1 // SPDX-License-Identifier: GPL-2.0-only 2 #include <linux/types.h> 3 #include <linux/skbuff.h> 4 #include <linux/socket.h> 5 #include <linux/sysctl.h> 6 #include <linux/net.h> 7 #include <linux/module.h> 8 #include <linux/if_arp.h> 9 #include <linux/ipv6.h> 10 #include <linux/mpls.h> 11 #include <linux/netconf.h> 12 #include <linux/nospec.h> 13 #include <linux/vmalloc.h> 14 #include <linux/percpu.h> 15 #include <net/gso.h> 16 #include <net/ip.h> 17 #include <net/dst.h> 18 #include <net/sock.h> 19 #include <net/arp.h> 20 #include <net/ip_fib.h> 21 #include <net/netevent.h> 22 #include <net/ip_tunnels.h> 23 #include <net/netns/generic.h> 24 #if IS_ENABLED(CONFIG_IPV6) 25 #include <net/ipv6.h> 26 #endif 27 #include <net/ipv6_stubs.h> 28 #include <net/rtnh.h> 29 #include "internal.h" 30 31 /* max memory we will use for mpls_route */ 32 #define MAX_MPLS_ROUTE_MEM 4096 33 34 /* Maximum number of labels to look ahead at when selecting a path of 35 * a multipath route 36 */ 37 #define MAX_MP_SELECT_LABELS 4 38 39 #define MPLS_NEIGH_TABLE_UNSPEC (NEIGH_LINK_TABLE + 1) 40 41 static int label_limit = (1 << 20) - 1; 42 static int ttl_max = 255; 43 44 #if IS_ENABLED(CONFIG_NET_IP_TUNNEL) 45 static size_t ipgre_mpls_encap_hlen(struct ip_tunnel_encap *e) 46 { 47 return sizeof(struct mpls_shim_hdr); 48 } 49 50 static const struct ip_tunnel_encap_ops mpls_iptun_ops = { 51 .encap_hlen = ipgre_mpls_encap_hlen, 52 }; 53 54 static int ipgre_tunnel_encap_add_mpls_ops(void) 55 { 56 return ip_tunnel_encap_add_ops(&mpls_iptun_ops, TUNNEL_ENCAP_MPLS); 57 } 58 59 static void ipgre_tunnel_encap_del_mpls_ops(void) 60 { 61 ip_tunnel_encap_del_ops(&mpls_iptun_ops, TUNNEL_ENCAP_MPLS); 62 } 63 #else 64 static int ipgre_tunnel_encap_add_mpls_ops(void) 65 { 66 return 0; 67 } 68 69 static void ipgre_tunnel_encap_del_mpls_ops(void) 70 { 71 } 72 #endif 73 74 static void rtmsg_lfib(int event, u32 label, struct mpls_route *rt, 75 struct nlmsghdr *nlh, struct net *net, u32 portid, 76 unsigned int nlm_flags); 77 78 static struct mpls_route *mpls_route_input(struct net *net, unsigned int index) 79 { 80 struct mpls_route __rcu **platform_label; 81 82 platform_label = mpls_dereference(net, net->mpls.platform_label); 83 return mpls_dereference(net, platform_label[index]); 84 } 85 86 static struct mpls_route *mpls_route_input_rcu(struct net *net, unsigned int index) 87 { 88 struct mpls_route __rcu **platform_label; 89 90 if (index >= net->mpls.platform_labels) 91 return NULL; 92 93 platform_label = rcu_dereference(net->mpls.platform_label); 94 return rcu_dereference(platform_label[index]); 95 } 96 97 bool mpls_output_possible(const struct net_device *dev) 98 { 99 return dev && (dev->flags & IFF_UP) && netif_carrier_ok(dev); 100 } 101 EXPORT_SYMBOL_GPL(mpls_output_possible); 102 103 static u8 *__mpls_nh_via(struct mpls_route *rt, struct mpls_nh *nh) 104 { 105 return (u8 *)nh + rt->rt_via_offset; 106 } 107 108 static const u8 *mpls_nh_via(const struct mpls_route *rt, 109 const struct mpls_nh *nh) 110 { 111 return __mpls_nh_via((struct mpls_route *)rt, (struct mpls_nh *)nh); 112 } 113 114 static unsigned int mpls_nh_header_size(const struct mpls_nh *nh) 115 { 116 /* The size of the layer 2.5 labels to be added for this route */ 117 return nh->nh_labels * sizeof(struct mpls_shim_hdr); 118 } 119 120 unsigned int mpls_dev_mtu(const struct net_device *dev) 121 { 122 /* The amount of data the layer 2 frame can hold */ 123 return dev->mtu; 124 } 125 EXPORT_SYMBOL_GPL(mpls_dev_mtu); 126 127 bool mpls_pkt_too_big(const struct sk_buff *skb, unsigned int mtu) 128 { 129 if (skb->len <= mtu) 130 return false; 131 132 if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu)) 133 return false; 134 135 return true; 136 } 137 EXPORT_SYMBOL_GPL(mpls_pkt_too_big); 138 139 void mpls_stats_inc_outucastpkts(struct net *net, 140 struct net_device *dev, 141 const struct sk_buff *skb) 142 { 143 struct mpls_dev *mdev; 144 145 if (skb->protocol == htons(ETH_P_MPLS_UC)) { 146 mdev = mpls_dev_rcu(dev); 147 if (mdev) 148 MPLS_INC_STATS_LEN(mdev, skb->len, 149 tx_packets, 150 tx_bytes); 151 } else if (skb->protocol == htons(ETH_P_IP)) { 152 IP_UPD_PO_STATS(net, IPSTATS_MIB_OUT, skb->len); 153 #if IS_ENABLED(CONFIG_IPV6) 154 } else if (skb->protocol == htons(ETH_P_IPV6)) { 155 struct inet6_dev *in6dev = in6_dev_rcu(dev); 156 157 if (in6dev) 158 IP6_UPD_PO_STATS(net, in6dev, 159 IPSTATS_MIB_OUT, skb->len); 160 #endif 161 } 162 } 163 EXPORT_SYMBOL_GPL(mpls_stats_inc_outucastpkts); 164 165 static u32 mpls_multipath_hash(struct mpls_route *rt, struct sk_buff *skb) 166 { 167 struct mpls_entry_decoded dec; 168 unsigned int mpls_hdr_len = 0; 169 struct mpls_shim_hdr *hdr; 170 bool eli_seen = false; 171 int label_index; 172 u32 hash = 0; 173 174 for (label_index = 0; label_index < MAX_MP_SELECT_LABELS; 175 label_index++) { 176 mpls_hdr_len += sizeof(*hdr); 177 if (!pskb_may_pull(skb, mpls_hdr_len)) 178 break; 179 180 /* Read and decode the current label */ 181 hdr = mpls_hdr(skb) + label_index; 182 dec = mpls_entry_decode(hdr); 183 184 /* RFC6790 - reserved labels MUST NOT be used as keys 185 * for the load-balancing function 186 */ 187 if (likely(dec.label >= MPLS_LABEL_FIRST_UNRESERVED)) { 188 hash = jhash_1word(dec.label, hash); 189 190 /* The entropy label follows the entropy label 191 * indicator, so this means that the entropy 192 * label was just added to the hash - no need to 193 * go any deeper either in the label stack or in the 194 * payload 195 */ 196 if (eli_seen) 197 break; 198 } else if (dec.label == MPLS_LABEL_ENTROPY) { 199 eli_seen = true; 200 } 201 202 if (!dec.bos) 203 continue; 204 205 /* found bottom label; does skb have room for a header? */ 206 if (pskb_may_pull(skb, mpls_hdr_len + sizeof(struct iphdr))) { 207 const struct iphdr *v4hdr; 208 209 v4hdr = (const struct iphdr *)(hdr + 1); 210 if (v4hdr->version == 4) { 211 hash = jhash_3words(ntohl(v4hdr->saddr), 212 ntohl(v4hdr->daddr), 213 v4hdr->protocol, hash); 214 } else if (v4hdr->version == 6 && 215 pskb_may_pull(skb, mpls_hdr_len + 216 sizeof(struct ipv6hdr))) { 217 const struct ipv6hdr *v6hdr; 218 219 v6hdr = (const struct ipv6hdr *)(hdr + 1); 220 hash = __ipv6_addr_jhash(&v6hdr->saddr, hash); 221 hash = __ipv6_addr_jhash(&v6hdr->daddr, hash); 222 hash = jhash_1word(v6hdr->nexthdr, hash); 223 } 224 } 225 226 break; 227 } 228 229 return hash; 230 } 231 232 static struct mpls_nh *mpls_get_nexthop(struct mpls_route *rt, u8 index) 233 { 234 return (struct mpls_nh *)((u8 *)rt->rt_nh + index * rt->rt_nh_size); 235 } 236 237 /* number of alive nexthops (rt->rt_nhn_alive) and the flags for 238 * a next hop (nh->nh_flags) are modified by netdev event handlers. 239 * Since those fields can change at any moment, use READ_ONCE to 240 * access both. 241 */ 242 static const struct mpls_nh *mpls_select_multipath(struct mpls_route *rt, 243 struct sk_buff *skb) 244 { 245 u32 hash = 0; 246 int nh_index = 0; 247 int n = 0; 248 u8 alive; 249 250 /* No need to look further into packet if there's only 251 * one path 252 */ 253 if (rt->rt_nhn == 1) 254 return rt->rt_nh; 255 256 alive = READ_ONCE(rt->rt_nhn_alive); 257 if (alive == 0) 258 return NULL; 259 260 hash = mpls_multipath_hash(rt, skb); 261 nh_index = hash % alive; 262 if (alive == rt->rt_nhn) 263 goto out; 264 for_nexthops(rt) { 265 unsigned int nh_flags = READ_ONCE(nh->nh_flags); 266 267 if (nh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)) 268 continue; 269 if (n == nh_index) 270 return nh; 271 n++; 272 } endfor_nexthops(rt); 273 274 out: 275 return mpls_get_nexthop(rt, nh_index); 276 } 277 278 static bool mpls_egress(struct net *net, struct mpls_route *rt, 279 struct sk_buff *skb, struct mpls_entry_decoded dec) 280 { 281 enum mpls_payload_type payload_type; 282 bool success = false; 283 284 /* The IPv4 code below accesses through the IPv4 header 285 * checksum, which is 12 bytes into the packet. 286 * The IPv6 code below accesses through the IPv6 hop limit 287 * which is 8 bytes into the packet. 288 * 289 * For all supported cases there should always be at least 12 290 * bytes of packet data present. The IPv4 header is 20 bytes 291 * without options and the IPv6 header is always 40 bytes 292 * long. 293 */ 294 if (!pskb_may_pull(skb, 12)) 295 return false; 296 297 payload_type = rt->rt_payload_type; 298 if (payload_type == MPT_UNSPEC) 299 payload_type = ip_hdr(skb)->version; 300 301 switch (payload_type) { 302 case MPT_IPV4: { 303 struct iphdr *hdr4 = ip_hdr(skb); 304 u8 new_ttl; 305 skb->protocol = htons(ETH_P_IP); 306 307 /* If propagating TTL, take the decremented TTL from 308 * the incoming MPLS header, otherwise decrement the 309 * TTL, but only if not 0 to avoid underflow. 310 */ 311 if (rt->rt_ttl_propagate == MPLS_TTL_PROP_ENABLED || 312 (rt->rt_ttl_propagate == MPLS_TTL_PROP_DEFAULT && 313 net->mpls.ip_ttl_propagate)) 314 new_ttl = dec.ttl; 315 else 316 new_ttl = hdr4->ttl ? hdr4->ttl - 1 : 0; 317 318 csum_replace2(&hdr4->check, 319 htons(hdr4->ttl << 8), 320 htons(new_ttl << 8)); 321 hdr4->ttl = new_ttl; 322 success = true; 323 break; 324 } 325 case MPT_IPV6: { 326 struct ipv6hdr *hdr6 = ipv6_hdr(skb); 327 skb->protocol = htons(ETH_P_IPV6); 328 329 /* If propagating TTL, take the decremented TTL from 330 * the incoming MPLS header, otherwise decrement the 331 * hop limit, but only if not 0 to avoid underflow. 332 */ 333 if (rt->rt_ttl_propagate == MPLS_TTL_PROP_ENABLED || 334 (rt->rt_ttl_propagate == MPLS_TTL_PROP_DEFAULT && 335 net->mpls.ip_ttl_propagate)) 336 hdr6->hop_limit = dec.ttl; 337 else if (hdr6->hop_limit) 338 hdr6->hop_limit = hdr6->hop_limit - 1; 339 success = true; 340 break; 341 } 342 case MPT_UNSPEC: 343 /* Should have decided which protocol it is by now */ 344 break; 345 } 346 347 return success; 348 } 349 350 static int mpls_forward(struct sk_buff *skb, struct net_device *dev, 351 struct packet_type *pt, struct net_device *orig_dev) 352 { 353 struct net *net = dev_net_rcu(dev); 354 struct mpls_shim_hdr *hdr; 355 const struct mpls_nh *nh; 356 struct mpls_route *rt; 357 struct mpls_entry_decoded dec; 358 struct net_device *out_dev; 359 struct mpls_dev *out_mdev; 360 struct mpls_dev *mdev; 361 unsigned int hh_len; 362 unsigned int new_header_size; 363 unsigned int mtu; 364 int err; 365 366 /* Careful this entire function runs inside of an rcu critical section */ 367 368 mdev = mpls_dev_rcu(dev); 369 if (!mdev) 370 goto drop; 371 372 MPLS_INC_STATS_LEN(mdev, skb->len, rx_packets, 373 rx_bytes); 374 375 if (!mdev->input_enabled) { 376 MPLS_INC_STATS(mdev, rx_dropped); 377 goto drop; 378 } 379 380 if (skb->pkt_type != PACKET_HOST) 381 goto err; 382 383 if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) 384 goto err; 385 386 if (!pskb_may_pull(skb, sizeof(*hdr))) 387 goto err; 388 389 skb_dst_drop(skb); 390 391 /* Read and decode the label */ 392 hdr = mpls_hdr(skb); 393 dec = mpls_entry_decode(hdr); 394 395 rt = mpls_route_input_rcu(net, dec.label); 396 if (!rt) { 397 MPLS_INC_STATS(mdev, rx_noroute); 398 goto drop; 399 } 400 401 nh = mpls_select_multipath(rt, skb); 402 if (!nh) 403 goto err; 404 405 /* Pop the label */ 406 skb_pull(skb, sizeof(*hdr)); 407 skb_reset_network_header(skb); 408 409 skb_orphan(skb); 410 411 if (skb_warn_if_lro(skb)) 412 goto err; 413 414 skb_forward_csum(skb); 415 416 /* Verify ttl is valid */ 417 if (dec.ttl <= 1) 418 goto err; 419 420 /* Find the output device */ 421 out_dev = nh->nh_dev; 422 if (!mpls_output_possible(out_dev)) 423 goto tx_err; 424 425 /* Verify the destination can hold the packet */ 426 new_header_size = mpls_nh_header_size(nh); 427 mtu = mpls_dev_mtu(out_dev); 428 if (mpls_pkt_too_big(skb, mtu - new_header_size)) 429 goto tx_err; 430 431 hh_len = LL_RESERVED_SPACE(out_dev); 432 if (!out_dev->header_ops) 433 hh_len = 0; 434 435 /* Ensure there is enough space for the headers in the skb */ 436 if (skb_cow(skb, hh_len + new_header_size)) 437 goto tx_err; 438 439 skb->dev = out_dev; 440 skb->protocol = htons(ETH_P_MPLS_UC); 441 442 dec.ttl -= 1; 443 if (unlikely(!new_header_size && dec.bos)) { 444 /* Penultimate hop popping */ 445 if (!mpls_egress(net, rt, skb, dec)) 446 goto err; 447 } else { 448 bool bos; 449 int i; 450 skb_push(skb, new_header_size); 451 skb_reset_network_header(skb); 452 /* Push the new labels */ 453 hdr = mpls_hdr(skb); 454 bos = dec.bos; 455 for (i = nh->nh_labels - 1; i >= 0; i--) { 456 hdr[i] = mpls_entry_encode(nh->nh_label[i], 457 dec.ttl, 0, bos); 458 bos = false; 459 } 460 } 461 462 mpls_stats_inc_outucastpkts(net, out_dev, skb); 463 464 /* If via wasn't specified then send out using device address */ 465 if (nh->nh_via_table == MPLS_NEIGH_TABLE_UNSPEC) 466 err = neigh_xmit(NEIGH_LINK_TABLE, out_dev, 467 out_dev->dev_addr, skb); 468 else 469 err = neigh_xmit(nh->nh_via_table, out_dev, 470 mpls_nh_via(rt, nh), skb); 471 if (err) 472 net_dbg_ratelimited("%s: packet transmission failed: %d\n", 473 __func__, err); 474 return 0; 475 476 tx_err: 477 out_mdev = out_dev ? mpls_dev_rcu(out_dev) : NULL; 478 if (out_mdev) 479 MPLS_INC_STATS(out_mdev, tx_errors); 480 goto drop; 481 err: 482 MPLS_INC_STATS(mdev, rx_errors); 483 drop: 484 kfree_skb(skb); 485 return NET_RX_DROP; 486 } 487 488 static struct packet_type mpls_packet_type __read_mostly = { 489 .type = cpu_to_be16(ETH_P_MPLS_UC), 490 .func = mpls_forward, 491 }; 492 493 static const struct nla_policy rtm_mpls_policy[RTA_MAX+1] = { 494 [RTA_DST] = { .type = NLA_U32 }, 495 [RTA_OIF] = { .type = NLA_U32 }, 496 [RTA_TTL_PROPAGATE] = { .type = NLA_U8 }, 497 }; 498 499 struct mpls_route_config { 500 u32 rc_protocol; 501 u32 rc_ifindex; 502 u8 rc_via_table; 503 u8 rc_via_alen; 504 u8 rc_via[MAX_VIA_ALEN]; 505 u32 rc_label; 506 u8 rc_ttl_propagate; 507 u8 rc_output_labels; 508 u32 rc_output_label[MAX_NEW_LABELS]; 509 u32 rc_nlflags; 510 enum mpls_payload_type rc_payload_type; 511 struct nl_info rc_nlinfo; 512 struct rtnexthop *rc_mp; 513 int rc_mp_len; 514 }; 515 516 /* all nexthops within a route have the same size based on max 517 * number of labels and max via length for a hop 518 */ 519 static struct mpls_route *mpls_rt_alloc(u8 num_nh, u8 max_alen, u8 max_labels) 520 { 521 u8 nh_size = MPLS_NH_SIZE(max_labels, max_alen); 522 struct mpls_route *rt; 523 size_t size; 524 525 size = sizeof(*rt) + num_nh * nh_size; 526 if (size > MAX_MPLS_ROUTE_MEM) 527 return ERR_PTR(-EINVAL); 528 529 rt = kzalloc(size, GFP_KERNEL); 530 if (!rt) 531 return ERR_PTR(-ENOMEM); 532 533 rt->rt_nhn = num_nh; 534 rt->rt_nhn_alive = num_nh; 535 rt->rt_nh_size = nh_size; 536 rt->rt_via_offset = MPLS_NH_VIA_OFF(max_labels); 537 538 return rt; 539 } 540 541 static void mpls_rt_free_rcu(struct rcu_head *head) 542 { 543 struct mpls_route *rt; 544 545 rt = container_of(head, struct mpls_route, rt_rcu); 546 547 change_nexthops(rt) { 548 netdev_put(nh->nh_dev, &nh->nh_dev_tracker); 549 } endfor_nexthops(rt); 550 551 kfree(rt); 552 } 553 554 static void mpls_rt_free(struct mpls_route *rt) 555 { 556 if (rt) 557 call_rcu(&rt->rt_rcu, mpls_rt_free_rcu); 558 } 559 560 static void mpls_notify_route(struct net *net, unsigned index, 561 struct mpls_route *old, struct mpls_route *new, 562 const struct nl_info *info) 563 { 564 struct nlmsghdr *nlh = info ? info->nlh : NULL; 565 unsigned portid = info ? info->portid : 0; 566 int event = new ? RTM_NEWROUTE : RTM_DELROUTE; 567 struct mpls_route *rt = new ? new : old; 568 unsigned nlm_flags = (old && new) ? NLM_F_REPLACE : 0; 569 /* Ignore reserved labels for now */ 570 if (rt && (index >= MPLS_LABEL_FIRST_UNRESERVED)) 571 rtmsg_lfib(event, index, rt, nlh, net, portid, nlm_flags); 572 } 573 574 static void mpls_route_update(struct net *net, unsigned index, 575 struct mpls_route *new, 576 const struct nl_info *info) 577 { 578 struct mpls_route __rcu **platform_label; 579 struct mpls_route *rt; 580 581 platform_label = mpls_dereference(net, net->mpls.platform_label); 582 rt = mpls_dereference(net, platform_label[index]); 583 rcu_assign_pointer(platform_label[index], new); 584 585 mpls_notify_route(net, index, rt, new, info); 586 587 /* If we removed a route free it now */ 588 mpls_rt_free(rt); 589 } 590 591 static unsigned int find_free_label(struct net *net) 592 { 593 unsigned int index; 594 595 for (index = MPLS_LABEL_FIRST_UNRESERVED; 596 index < net->mpls.platform_labels; 597 index++) { 598 if (!mpls_route_input(net, index)) 599 return index; 600 } 601 602 return LABEL_NOT_SPECIFIED; 603 } 604 605 #if IS_ENABLED(CONFIG_INET) 606 static struct net_device *inet_fib_lookup_dev(struct net *net, 607 struct mpls_nh *nh, 608 const void *addr) 609 { 610 struct net_device *dev; 611 struct rtable *rt; 612 struct in_addr daddr; 613 614 memcpy(&daddr, addr, sizeof(struct in_addr)); 615 rt = ip_route_output(net, daddr.s_addr, 0, 0, 0, RT_SCOPE_UNIVERSE); 616 if (IS_ERR(rt)) 617 return ERR_CAST(rt); 618 619 dev = rt->dst.dev; 620 netdev_hold(dev, &nh->nh_dev_tracker, GFP_KERNEL); 621 ip_rt_put(rt); 622 623 return dev; 624 } 625 #else 626 static struct net_device *inet_fib_lookup_dev(struct net *net, 627 struct mpls_nh *nh, 628 const void *addr) 629 { 630 return ERR_PTR(-EAFNOSUPPORT); 631 } 632 #endif 633 634 #if IS_ENABLED(CONFIG_IPV6) 635 static struct net_device *inet6_fib_lookup_dev(struct net *net, 636 struct mpls_nh *nh, 637 const void *addr) 638 { 639 struct net_device *dev; 640 struct dst_entry *dst; 641 struct flowi6 fl6; 642 643 if (!ipv6_stub) 644 return ERR_PTR(-EAFNOSUPPORT); 645 646 memset(&fl6, 0, sizeof(fl6)); 647 memcpy(&fl6.daddr, addr, sizeof(struct in6_addr)); 648 dst = ipv6_stub->ipv6_dst_lookup_flow(net, NULL, &fl6, NULL); 649 if (IS_ERR(dst)) 650 return ERR_CAST(dst); 651 652 dev = dst->dev; 653 netdev_hold(dev, &nh->nh_dev_tracker, GFP_KERNEL); 654 dst_release(dst); 655 656 return dev; 657 } 658 #else 659 static struct net_device *inet6_fib_lookup_dev(struct net *net, 660 struct mpls_nh *nh, 661 const void *addr) 662 { 663 return ERR_PTR(-EAFNOSUPPORT); 664 } 665 #endif 666 667 static struct net_device *find_outdev(struct net *net, 668 struct mpls_route *rt, 669 struct mpls_nh *nh, int oif) 670 { 671 struct net_device *dev = NULL; 672 673 if (!oif) { 674 switch (nh->nh_via_table) { 675 case NEIGH_ARP_TABLE: 676 dev = inet_fib_lookup_dev(net, nh, mpls_nh_via(rt, nh)); 677 break; 678 case NEIGH_ND_TABLE: 679 dev = inet6_fib_lookup_dev(net, nh, mpls_nh_via(rt, nh)); 680 break; 681 case NEIGH_LINK_TABLE: 682 break; 683 } 684 } else { 685 dev = netdev_get_by_index(net, oif, 686 &nh->nh_dev_tracker, GFP_KERNEL); 687 } 688 689 if (!dev) 690 return ERR_PTR(-ENODEV); 691 692 if (IS_ERR(dev)) 693 return dev; 694 695 nh->nh_dev = dev; 696 697 return dev; 698 } 699 700 static int mpls_nh_assign_dev(struct net *net, struct mpls_route *rt, 701 struct mpls_nh *nh, int oif) 702 { 703 struct net_device *dev = NULL; 704 int err = -ENODEV; 705 706 dev = find_outdev(net, rt, nh, oif); 707 if (IS_ERR(dev)) { 708 err = PTR_ERR(dev); 709 goto errout; 710 } 711 712 /* Ensure this is a supported device */ 713 err = -EINVAL; 714 if (!mpls_dev_get(net, dev)) 715 goto errout_put; 716 717 if ((nh->nh_via_table == NEIGH_LINK_TABLE) && 718 (dev->addr_len != nh->nh_via_alen)) 719 goto errout_put; 720 721 if (!(dev->flags & IFF_UP)) { 722 nh->nh_flags |= RTNH_F_DEAD; 723 } else { 724 unsigned int flags; 725 726 flags = netif_get_flags(dev); 727 if (!(flags & (IFF_RUNNING | IFF_LOWER_UP))) 728 nh->nh_flags |= RTNH_F_LINKDOWN; 729 } 730 731 return 0; 732 733 errout_put: 734 netdev_put(nh->nh_dev, &nh->nh_dev_tracker); 735 nh->nh_dev = NULL; 736 errout: 737 return err; 738 } 739 740 static int nla_get_via(const struct nlattr *nla, u8 *via_alen, u8 *via_table, 741 u8 via_addr[], struct netlink_ext_ack *extack) 742 { 743 struct rtvia *via = nla_data(nla); 744 int err = -EINVAL; 745 int alen; 746 747 if (nla_len(nla) < offsetof(struct rtvia, rtvia_addr)) { 748 NL_SET_ERR_MSG_ATTR(extack, nla, 749 "Invalid attribute length for RTA_VIA"); 750 goto errout; 751 } 752 alen = nla_len(nla) - 753 offsetof(struct rtvia, rtvia_addr); 754 if (alen > MAX_VIA_ALEN) { 755 NL_SET_ERR_MSG_ATTR(extack, nla, 756 "Invalid address length for RTA_VIA"); 757 goto errout; 758 } 759 760 /* Validate the address family */ 761 switch (via->rtvia_family) { 762 case AF_PACKET: 763 *via_table = NEIGH_LINK_TABLE; 764 break; 765 case AF_INET: 766 *via_table = NEIGH_ARP_TABLE; 767 if (alen != 4) 768 goto errout; 769 break; 770 case AF_INET6: 771 *via_table = NEIGH_ND_TABLE; 772 if (alen != 16) 773 goto errout; 774 break; 775 default: 776 /* Unsupported address family */ 777 goto errout; 778 } 779 780 memcpy(via_addr, via->rtvia_addr, alen); 781 *via_alen = alen; 782 err = 0; 783 784 errout: 785 return err; 786 } 787 788 static int mpls_nh_build_from_cfg(struct mpls_route_config *cfg, 789 struct mpls_route *rt) 790 { 791 struct net *net = cfg->rc_nlinfo.nl_net; 792 struct mpls_nh *nh = rt->rt_nh; 793 int err; 794 int i; 795 796 if (!nh) 797 return -ENOMEM; 798 799 nh->nh_labels = cfg->rc_output_labels; 800 for (i = 0; i < nh->nh_labels; i++) 801 nh->nh_label[i] = cfg->rc_output_label[i]; 802 803 nh->nh_via_table = cfg->rc_via_table; 804 memcpy(__mpls_nh_via(rt, nh), cfg->rc_via, cfg->rc_via_alen); 805 nh->nh_via_alen = cfg->rc_via_alen; 806 807 err = mpls_nh_assign_dev(net, rt, nh, cfg->rc_ifindex); 808 if (err) 809 goto errout; 810 811 if (nh->nh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)) 812 rt->rt_nhn_alive--; 813 814 return 0; 815 816 errout: 817 return err; 818 } 819 820 static int mpls_nh_build(struct net *net, struct mpls_route *rt, 821 struct mpls_nh *nh, int oif, struct nlattr *via, 822 struct nlattr *newdst, u8 max_labels, 823 struct netlink_ext_ack *extack) 824 { 825 int err = -ENOMEM; 826 827 if (!nh) 828 goto errout; 829 830 if (newdst) { 831 err = nla_get_labels(newdst, max_labels, &nh->nh_labels, 832 nh->nh_label, extack); 833 if (err) 834 goto errout; 835 } 836 837 if (via) { 838 err = nla_get_via(via, &nh->nh_via_alen, &nh->nh_via_table, 839 __mpls_nh_via(rt, nh), extack); 840 if (err) 841 goto errout; 842 } else { 843 nh->nh_via_table = MPLS_NEIGH_TABLE_UNSPEC; 844 } 845 846 err = mpls_nh_assign_dev(net, rt, nh, oif); 847 if (err) 848 goto errout; 849 850 return 0; 851 852 errout: 853 return err; 854 } 855 856 static u8 mpls_count_nexthops(struct rtnexthop *rtnh, int len, 857 u8 cfg_via_alen, u8 *max_via_alen, 858 u8 *max_labels) 859 { 860 int remaining = len; 861 u8 nhs = 0; 862 863 *max_via_alen = 0; 864 *max_labels = 0; 865 866 while (rtnh_ok(rtnh, remaining)) { 867 struct nlattr *nla, *attrs = rtnh_attrs(rtnh); 868 int attrlen; 869 u8 n_labels = 0; 870 871 attrlen = rtnh_attrlen(rtnh); 872 nla = nla_find(attrs, attrlen, RTA_VIA); 873 if (nla && nla_len(nla) >= 874 offsetof(struct rtvia, rtvia_addr)) { 875 int via_alen = nla_len(nla) - 876 offsetof(struct rtvia, rtvia_addr); 877 878 if (via_alen <= MAX_VIA_ALEN) 879 *max_via_alen = max_t(u16, *max_via_alen, 880 via_alen); 881 } 882 883 nla = nla_find(attrs, attrlen, RTA_NEWDST); 884 if (nla && 885 nla_get_labels(nla, MAX_NEW_LABELS, &n_labels, 886 NULL, NULL) != 0) 887 return 0; 888 889 *max_labels = max_t(u8, *max_labels, n_labels); 890 891 /* number of nexthops is tracked by a u8. 892 * Check for overflow. 893 */ 894 if (nhs == 255) 895 return 0; 896 nhs++; 897 898 rtnh = rtnh_next(rtnh, &remaining); 899 } 900 901 /* leftover implies invalid nexthop configuration, discard it */ 902 return remaining > 0 ? 0 : nhs; 903 } 904 905 static int mpls_nh_build_multi(struct mpls_route_config *cfg, 906 struct mpls_route *rt, u8 max_labels, 907 struct netlink_ext_ack *extack) 908 { 909 struct rtnexthop *rtnh = cfg->rc_mp; 910 struct nlattr *nla_via, *nla_newdst; 911 int remaining = cfg->rc_mp_len; 912 int err = 0; 913 914 rt->rt_nhn = 0; 915 916 change_nexthops(rt) { 917 int attrlen; 918 919 nla_via = NULL; 920 nla_newdst = NULL; 921 922 err = -EINVAL; 923 if (!rtnh_ok(rtnh, remaining)) 924 goto errout; 925 926 /* neither weighted multipath nor any flags 927 * are supported 928 */ 929 if (rtnh->rtnh_hops || rtnh->rtnh_flags) 930 goto errout; 931 932 attrlen = rtnh_attrlen(rtnh); 933 if (attrlen > 0) { 934 struct nlattr *attrs = rtnh_attrs(rtnh); 935 936 nla_via = nla_find(attrs, attrlen, RTA_VIA); 937 nla_newdst = nla_find(attrs, attrlen, RTA_NEWDST); 938 } 939 940 err = mpls_nh_build(cfg->rc_nlinfo.nl_net, rt, nh, 941 rtnh->rtnh_ifindex, nla_via, nla_newdst, 942 max_labels, extack); 943 if (err) 944 goto errout; 945 946 if (nh->nh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)) 947 rt->rt_nhn_alive--; 948 949 rtnh = rtnh_next(rtnh, &remaining); 950 rt->rt_nhn++; 951 } endfor_nexthops(rt); 952 953 return 0; 954 955 errout: 956 return err; 957 } 958 959 static bool mpls_label_ok(struct net *net, unsigned int *index, 960 struct netlink_ext_ack *extack) 961 { 962 /* Reserved labels may not be set */ 963 if (*index < MPLS_LABEL_FIRST_UNRESERVED) { 964 NL_SET_ERR_MSG(extack, 965 "Invalid label - must be MPLS_LABEL_FIRST_UNRESERVED or higher"); 966 return false; 967 } 968 969 /* The full 20 bit range may not be supported. */ 970 if (*index >= net->mpls.platform_labels) { 971 NL_SET_ERR_MSG(extack, 972 "Label >= configured maximum in platform_labels"); 973 return false; 974 } 975 976 *index = array_index_nospec(*index, net->mpls.platform_labels); 977 978 return true; 979 } 980 981 static int mpls_route_add(struct mpls_route_config *cfg, 982 struct netlink_ext_ack *extack) 983 { 984 struct net *net = cfg->rc_nlinfo.nl_net; 985 struct mpls_route *rt, *old; 986 int err = -EINVAL; 987 u8 max_via_alen; 988 unsigned index; 989 u8 max_labels; 990 u8 nhs; 991 992 index = cfg->rc_label; 993 994 /* If a label was not specified during insert pick one */ 995 if ((index == LABEL_NOT_SPECIFIED) && 996 (cfg->rc_nlflags & NLM_F_CREATE)) { 997 index = find_free_label(net); 998 } 999 1000 if (!mpls_label_ok(net, &index, extack)) 1001 goto errout; 1002 1003 /* Append makes no sense with mpls */ 1004 err = -EOPNOTSUPP; 1005 if (cfg->rc_nlflags & NLM_F_APPEND) { 1006 NL_SET_ERR_MSG(extack, "MPLS does not support route append"); 1007 goto errout; 1008 } 1009 1010 err = -EEXIST; 1011 old = mpls_route_input(net, index); 1012 if ((cfg->rc_nlflags & NLM_F_EXCL) && old) 1013 goto errout; 1014 1015 err = -EEXIST; 1016 if (!(cfg->rc_nlflags & NLM_F_REPLACE) && old) 1017 goto errout; 1018 1019 err = -ENOENT; 1020 if (!(cfg->rc_nlflags & NLM_F_CREATE) && !old) 1021 goto errout; 1022 1023 err = -EINVAL; 1024 if (cfg->rc_mp) { 1025 nhs = mpls_count_nexthops(cfg->rc_mp, cfg->rc_mp_len, 1026 cfg->rc_via_alen, &max_via_alen, 1027 &max_labels); 1028 } else { 1029 max_via_alen = cfg->rc_via_alen; 1030 max_labels = cfg->rc_output_labels; 1031 nhs = 1; 1032 } 1033 1034 if (nhs == 0) { 1035 NL_SET_ERR_MSG(extack, "Route does not contain a nexthop"); 1036 goto errout; 1037 } 1038 1039 rt = mpls_rt_alloc(nhs, max_via_alen, max_labels); 1040 if (IS_ERR(rt)) { 1041 err = PTR_ERR(rt); 1042 goto errout; 1043 } 1044 1045 rt->rt_protocol = cfg->rc_protocol; 1046 rt->rt_payload_type = cfg->rc_payload_type; 1047 rt->rt_ttl_propagate = cfg->rc_ttl_propagate; 1048 1049 if (cfg->rc_mp) 1050 err = mpls_nh_build_multi(cfg, rt, max_labels, extack); 1051 else 1052 err = mpls_nh_build_from_cfg(cfg, rt); 1053 if (err) 1054 goto freert; 1055 1056 mpls_route_update(net, index, rt, &cfg->rc_nlinfo); 1057 1058 return 0; 1059 1060 freert: 1061 mpls_rt_free(rt); 1062 errout: 1063 return err; 1064 } 1065 1066 static int mpls_route_del(struct mpls_route_config *cfg, 1067 struct netlink_ext_ack *extack) 1068 { 1069 struct net *net = cfg->rc_nlinfo.nl_net; 1070 unsigned index; 1071 int err = -EINVAL; 1072 1073 index = cfg->rc_label; 1074 1075 if (!mpls_label_ok(net, &index, extack)) 1076 goto errout; 1077 1078 mpls_route_update(net, index, NULL, &cfg->rc_nlinfo); 1079 1080 err = 0; 1081 errout: 1082 return err; 1083 } 1084 1085 static void mpls_get_stats(struct mpls_dev *mdev, 1086 struct mpls_link_stats *stats) 1087 { 1088 struct mpls_pcpu_stats *p; 1089 int i; 1090 1091 memset(stats, 0, sizeof(*stats)); 1092 1093 for_each_possible_cpu(i) { 1094 struct mpls_link_stats local; 1095 unsigned int start; 1096 1097 p = per_cpu_ptr(mdev->stats, i); 1098 do { 1099 start = u64_stats_fetch_begin(&p->syncp); 1100 local = p->stats; 1101 } while (u64_stats_fetch_retry(&p->syncp, start)); 1102 1103 stats->rx_packets += local.rx_packets; 1104 stats->rx_bytes += local.rx_bytes; 1105 stats->tx_packets += local.tx_packets; 1106 stats->tx_bytes += local.tx_bytes; 1107 stats->rx_errors += local.rx_errors; 1108 stats->tx_errors += local.tx_errors; 1109 stats->rx_dropped += local.rx_dropped; 1110 stats->tx_dropped += local.tx_dropped; 1111 stats->rx_noroute += local.rx_noroute; 1112 } 1113 } 1114 1115 static int mpls_fill_stats_af(struct sk_buff *skb, 1116 const struct net_device *dev) 1117 { 1118 struct mpls_link_stats *stats; 1119 struct mpls_dev *mdev; 1120 struct nlattr *nla; 1121 1122 mdev = mpls_dev_rcu(dev); 1123 if (!mdev) 1124 return -ENODATA; 1125 1126 nla = nla_reserve_64bit(skb, MPLS_STATS_LINK, 1127 sizeof(struct mpls_link_stats), 1128 MPLS_STATS_UNSPEC); 1129 if (!nla) 1130 return -EMSGSIZE; 1131 1132 stats = nla_data(nla); 1133 mpls_get_stats(mdev, stats); 1134 1135 return 0; 1136 } 1137 1138 static size_t mpls_get_stats_af_size(const struct net_device *dev) 1139 { 1140 struct mpls_dev *mdev; 1141 1142 mdev = mpls_dev_rcu(dev); 1143 if (!mdev) 1144 return 0; 1145 1146 return nla_total_size_64bit(sizeof(struct mpls_link_stats)); 1147 } 1148 1149 static int mpls_netconf_fill_devconf(struct sk_buff *skb, struct mpls_dev *mdev, 1150 u32 portid, u32 seq, int event, 1151 unsigned int flags, int type) 1152 { 1153 struct nlmsghdr *nlh; 1154 struct netconfmsg *ncm; 1155 bool all = false; 1156 1157 nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg), 1158 flags); 1159 if (!nlh) 1160 return -EMSGSIZE; 1161 1162 if (type == NETCONFA_ALL) 1163 all = true; 1164 1165 ncm = nlmsg_data(nlh); 1166 ncm->ncm_family = AF_MPLS; 1167 1168 if (nla_put_s32(skb, NETCONFA_IFINDEX, mdev->dev->ifindex) < 0) 1169 goto nla_put_failure; 1170 1171 if ((all || type == NETCONFA_INPUT) && 1172 nla_put_s32(skb, NETCONFA_INPUT, 1173 READ_ONCE(mdev->input_enabled)) < 0) 1174 goto nla_put_failure; 1175 1176 nlmsg_end(skb, nlh); 1177 return 0; 1178 1179 nla_put_failure: 1180 nlmsg_cancel(skb, nlh); 1181 return -EMSGSIZE; 1182 } 1183 1184 static int mpls_netconf_msgsize_devconf(int type) 1185 { 1186 int size = NLMSG_ALIGN(sizeof(struct netconfmsg)) 1187 + nla_total_size(4); /* NETCONFA_IFINDEX */ 1188 bool all = false; 1189 1190 if (type == NETCONFA_ALL) 1191 all = true; 1192 1193 if (all || type == NETCONFA_INPUT) 1194 size += nla_total_size(4); 1195 1196 return size; 1197 } 1198 1199 static void mpls_netconf_notify_devconf(struct net *net, int event, 1200 int type, struct mpls_dev *mdev) 1201 { 1202 struct sk_buff *skb; 1203 int err = -ENOBUFS; 1204 1205 skb = nlmsg_new(mpls_netconf_msgsize_devconf(type), GFP_KERNEL); 1206 if (!skb) 1207 goto errout; 1208 1209 err = mpls_netconf_fill_devconf(skb, mdev, 0, 0, event, 0, type); 1210 if (err < 0) { 1211 /* -EMSGSIZE implies BUG in mpls_netconf_msgsize_devconf() */ 1212 WARN_ON(err == -EMSGSIZE); 1213 kfree_skb(skb); 1214 goto errout; 1215 } 1216 1217 rtnl_notify(skb, net, 0, RTNLGRP_MPLS_NETCONF, NULL, GFP_KERNEL); 1218 return; 1219 errout: 1220 rtnl_set_sk_err(net, RTNLGRP_MPLS_NETCONF, err); 1221 } 1222 1223 static const struct nla_policy devconf_mpls_policy[NETCONFA_MAX + 1] = { 1224 [NETCONFA_IFINDEX] = { .len = sizeof(int) }, 1225 }; 1226 1227 static int mpls_netconf_valid_get_req(struct sk_buff *skb, 1228 const struct nlmsghdr *nlh, 1229 struct nlattr **tb, 1230 struct netlink_ext_ack *extack) 1231 { 1232 int i, err; 1233 1234 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(struct netconfmsg))) { 1235 NL_SET_ERR_MSG_MOD(extack, 1236 "Invalid header for netconf get request"); 1237 return -EINVAL; 1238 } 1239 1240 if (!netlink_strict_get_check(skb)) 1241 return nlmsg_parse_deprecated(nlh, sizeof(struct netconfmsg), 1242 tb, NETCONFA_MAX, 1243 devconf_mpls_policy, extack); 1244 1245 err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct netconfmsg), 1246 tb, NETCONFA_MAX, 1247 devconf_mpls_policy, extack); 1248 if (err) 1249 return err; 1250 1251 for (i = 0; i <= NETCONFA_MAX; i++) { 1252 if (!tb[i]) 1253 continue; 1254 1255 switch (i) { 1256 case NETCONFA_IFINDEX: 1257 break; 1258 default: 1259 NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in netconf get request"); 1260 return -EINVAL; 1261 } 1262 } 1263 1264 return 0; 1265 } 1266 1267 static int mpls_netconf_get_devconf(struct sk_buff *in_skb, 1268 struct nlmsghdr *nlh, 1269 struct netlink_ext_ack *extack) 1270 { 1271 struct net *net = sock_net(in_skb->sk); 1272 struct nlattr *tb[NETCONFA_MAX + 1]; 1273 struct net_device *dev; 1274 struct mpls_dev *mdev; 1275 struct sk_buff *skb; 1276 int ifindex; 1277 int err; 1278 1279 err = mpls_netconf_valid_get_req(in_skb, nlh, tb, extack); 1280 if (err < 0) 1281 goto errout; 1282 1283 if (!tb[NETCONFA_IFINDEX]) { 1284 err = -EINVAL; 1285 goto errout; 1286 } 1287 1288 ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]); 1289 1290 skb = nlmsg_new(mpls_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL); 1291 if (!skb) { 1292 err = -ENOBUFS; 1293 goto errout; 1294 } 1295 1296 rcu_read_lock(); 1297 1298 dev = dev_get_by_index_rcu(net, ifindex); 1299 if (!dev) { 1300 err = -EINVAL; 1301 goto errout_unlock; 1302 } 1303 1304 mdev = mpls_dev_rcu(dev); 1305 if (!mdev) { 1306 err = -EINVAL; 1307 goto errout_unlock; 1308 } 1309 1310 err = mpls_netconf_fill_devconf(skb, mdev, 1311 NETLINK_CB(in_skb).portid, 1312 nlh->nlmsg_seq, RTM_NEWNETCONF, 0, 1313 NETCONFA_ALL); 1314 if (err < 0) { 1315 /* -EMSGSIZE implies BUG in mpls_netconf_msgsize_devconf() */ 1316 WARN_ON(err == -EMSGSIZE); 1317 goto errout_unlock; 1318 } 1319 1320 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); 1321 1322 rcu_read_unlock(); 1323 errout: 1324 return err; 1325 1326 errout_unlock: 1327 rcu_read_unlock(); 1328 kfree_skb(skb); 1329 goto errout; 1330 } 1331 1332 static int mpls_netconf_dump_devconf(struct sk_buff *skb, 1333 struct netlink_callback *cb) 1334 { 1335 const struct nlmsghdr *nlh = cb->nlh; 1336 struct net *net = sock_net(skb->sk); 1337 struct { 1338 unsigned long ifindex; 1339 } *ctx = (void *)cb->ctx; 1340 struct net_device *dev; 1341 struct mpls_dev *mdev; 1342 int err = 0; 1343 1344 if (cb->strict_check) { 1345 struct netlink_ext_ack *extack = cb->extack; 1346 struct netconfmsg *ncm; 1347 1348 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ncm))) { 1349 NL_SET_ERR_MSG_MOD(extack, "Invalid header for netconf dump request"); 1350 return -EINVAL; 1351 } 1352 1353 if (nlmsg_attrlen(nlh, sizeof(*ncm))) { 1354 NL_SET_ERR_MSG_MOD(extack, "Invalid data after header in netconf dump request"); 1355 return -EINVAL; 1356 } 1357 } 1358 1359 rcu_read_lock(); 1360 for_each_netdev_dump(net, dev, ctx->ifindex) { 1361 mdev = mpls_dev_rcu(dev); 1362 if (!mdev) 1363 continue; 1364 err = mpls_netconf_fill_devconf(skb, mdev, 1365 NETLINK_CB(cb->skb).portid, 1366 nlh->nlmsg_seq, 1367 RTM_NEWNETCONF, 1368 NLM_F_MULTI, 1369 NETCONFA_ALL); 1370 if (err < 0) 1371 break; 1372 } 1373 rcu_read_unlock(); 1374 1375 return err; 1376 } 1377 1378 #define MPLS_PERDEV_SYSCTL_OFFSET(field) \ 1379 (&((struct mpls_dev *)0)->field) 1380 1381 static int mpls_conf_proc(const struct ctl_table *ctl, int write, 1382 void *buffer, size_t *lenp, loff_t *ppos) 1383 { 1384 int oval = *(int *)ctl->data; 1385 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); 1386 1387 if (write) { 1388 struct mpls_dev *mdev = ctl->extra1; 1389 int i = (int *)ctl->data - (int *)mdev; 1390 struct net *net = ctl->extra2; 1391 int val = *(int *)ctl->data; 1392 1393 if (i == offsetof(struct mpls_dev, input_enabled) && 1394 val != oval) { 1395 mpls_netconf_notify_devconf(net, RTM_NEWNETCONF, 1396 NETCONFA_INPUT, mdev); 1397 } 1398 } 1399 1400 return ret; 1401 } 1402 1403 static const struct ctl_table mpls_dev_table[] = { 1404 { 1405 .procname = "input", 1406 .maxlen = sizeof(int), 1407 .mode = 0644, 1408 .proc_handler = mpls_conf_proc, 1409 .data = MPLS_PERDEV_SYSCTL_OFFSET(input_enabled), 1410 }, 1411 }; 1412 1413 static int mpls_dev_sysctl_register(struct net_device *dev, 1414 struct mpls_dev *mdev) 1415 { 1416 char path[sizeof("net/mpls/conf/") + IFNAMSIZ]; 1417 size_t table_size = ARRAY_SIZE(mpls_dev_table); 1418 struct net *net = dev_net(dev); 1419 struct ctl_table *table; 1420 int i; 1421 1422 table = kmemdup(&mpls_dev_table, sizeof(mpls_dev_table), GFP_KERNEL); 1423 if (!table) 1424 goto out; 1425 1426 /* Table data contains only offsets relative to the base of 1427 * the mdev at this point, so make them absolute. 1428 */ 1429 for (i = 0; i < table_size; i++) { 1430 table[i].data = (char *)mdev + (uintptr_t)table[i].data; 1431 table[i].extra1 = mdev; 1432 table[i].extra2 = net; 1433 } 1434 1435 snprintf(path, sizeof(path), "net/mpls/conf/%s", dev->name); 1436 1437 mdev->sysctl = register_net_sysctl_sz(net, path, table, table_size); 1438 if (!mdev->sysctl) 1439 goto free; 1440 1441 mpls_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL, mdev); 1442 return 0; 1443 1444 free: 1445 kfree(table); 1446 out: 1447 mdev->sysctl = NULL; 1448 return -ENOBUFS; 1449 } 1450 1451 static void mpls_dev_sysctl_unregister(struct net_device *dev, 1452 struct mpls_dev *mdev) 1453 { 1454 struct net *net = dev_net(dev); 1455 const struct ctl_table *table; 1456 1457 if (!mdev->sysctl) 1458 return; 1459 1460 table = mdev->sysctl->ctl_table_arg; 1461 unregister_net_sysctl_table(mdev->sysctl); 1462 kfree(table); 1463 1464 mpls_netconf_notify_devconf(net, RTM_DELNETCONF, 0, mdev); 1465 } 1466 1467 static struct mpls_dev *mpls_add_dev(struct net_device *dev) 1468 { 1469 struct mpls_dev *mdev; 1470 int err = -ENOMEM; 1471 int i; 1472 1473 mdev = kzalloc(sizeof(*mdev), GFP_KERNEL); 1474 if (!mdev) 1475 return ERR_PTR(err); 1476 1477 mdev->stats = alloc_percpu(struct mpls_pcpu_stats); 1478 if (!mdev->stats) 1479 goto free; 1480 1481 for_each_possible_cpu(i) { 1482 struct mpls_pcpu_stats *mpls_stats; 1483 1484 mpls_stats = per_cpu_ptr(mdev->stats, i); 1485 u64_stats_init(&mpls_stats->syncp); 1486 } 1487 1488 mdev->dev = dev; 1489 1490 err = mpls_dev_sysctl_register(dev, mdev); 1491 if (err) 1492 goto free; 1493 1494 rcu_assign_pointer(dev->mpls_ptr, mdev); 1495 1496 return mdev; 1497 1498 free: 1499 free_percpu(mdev->stats); 1500 kfree(mdev); 1501 return ERR_PTR(err); 1502 } 1503 1504 static void mpls_dev_destroy_rcu(struct rcu_head *head) 1505 { 1506 struct mpls_dev *mdev = container_of(head, struct mpls_dev, rcu); 1507 1508 free_percpu(mdev->stats); 1509 kfree(mdev); 1510 } 1511 1512 static int mpls_ifdown(struct net_device *dev, int event) 1513 { 1514 struct net *net = dev_net(dev); 1515 unsigned int index; 1516 1517 for (index = 0; index < net->mpls.platform_labels; index++) { 1518 struct mpls_route *rt; 1519 bool nh_del = false; 1520 u8 alive = 0; 1521 1522 rt = mpls_route_input(net, index); 1523 if (!rt) 1524 continue; 1525 1526 if (event == NETDEV_UNREGISTER) { 1527 u8 deleted = 0; 1528 1529 for_nexthops(rt) { 1530 if (!nh->nh_dev || nh->nh_dev == dev) 1531 deleted++; 1532 if (nh->nh_dev == dev) 1533 nh_del = true; 1534 } endfor_nexthops(rt); 1535 1536 /* if there are no more nexthops, delete the route */ 1537 if (deleted == rt->rt_nhn) { 1538 mpls_route_update(net, index, NULL, NULL); 1539 continue; 1540 } 1541 1542 if (nh_del) { 1543 size_t size = sizeof(*rt) + rt->rt_nhn * 1544 rt->rt_nh_size; 1545 struct mpls_route *orig = rt; 1546 1547 rt = kmemdup(orig, size, GFP_KERNEL); 1548 if (!rt) 1549 return -ENOMEM; 1550 } 1551 } 1552 1553 change_nexthops(rt) { 1554 unsigned int nh_flags = nh->nh_flags; 1555 1556 if (nh->nh_dev != dev) { 1557 if (nh_del) 1558 netdev_hold(nh->nh_dev, &nh->nh_dev_tracker, 1559 GFP_KERNEL); 1560 goto next; 1561 } 1562 1563 switch (event) { 1564 case NETDEV_DOWN: 1565 case NETDEV_UNREGISTER: 1566 nh_flags |= RTNH_F_DEAD; 1567 fallthrough; 1568 case NETDEV_CHANGE: 1569 nh_flags |= RTNH_F_LINKDOWN; 1570 break; 1571 } 1572 if (event == NETDEV_UNREGISTER) 1573 nh->nh_dev = NULL; 1574 1575 if (nh->nh_flags != nh_flags) 1576 WRITE_ONCE(nh->nh_flags, nh_flags); 1577 next: 1578 if (!(nh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN))) 1579 alive++; 1580 } endfor_nexthops(rt); 1581 1582 WRITE_ONCE(rt->rt_nhn_alive, alive); 1583 1584 if (nh_del) 1585 mpls_route_update(net, index, rt, NULL); 1586 } 1587 1588 return 0; 1589 } 1590 1591 static void mpls_ifup(struct net_device *dev, unsigned int flags) 1592 { 1593 struct net *net = dev_net(dev); 1594 unsigned int index; 1595 u8 alive; 1596 1597 for (index = 0; index < net->mpls.platform_labels; index++) { 1598 struct mpls_route *rt; 1599 1600 rt = mpls_route_input(net, index); 1601 if (!rt) 1602 continue; 1603 1604 alive = 0; 1605 change_nexthops(rt) { 1606 unsigned int nh_flags = nh->nh_flags; 1607 1608 if (!(nh_flags & flags)) { 1609 alive++; 1610 continue; 1611 } 1612 if (nh->nh_dev != dev) 1613 continue; 1614 alive++; 1615 nh_flags &= ~flags; 1616 WRITE_ONCE(nh->nh_flags, nh_flags); 1617 } endfor_nexthops(rt); 1618 1619 WRITE_ONCE(rt->rt_nhn_alive, alive); 1620 } 1621 } 1622 1623 static int mpls_dev_notify(struct notifier_block *this, unsigned long event, 1624 void *ptr) 1625 { 1626 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 1627 struct net *net = dev_net(dev); 1628 struct mpls_dev *mdev; 1629 unsigned int flags; 1630 int err; 1631 1632 mutex_lock(&net->mpls.platform_mutex); 1633 1634 if (event == NETDEV_REGISTER) { 1635 mdev = mpls_add_dev(dev); 1636 if (IS_ERR(mdev)) { 1637 err = PTR_ERR(mdev); 1638 goto err; 1639 } 1640 1641 goto out; 1642 } 1643 1644 mdev = mpls_dev_get(net, dev); 1645 if (!mdev) 1646 goto out; 1647 1648 switch (event) { 1649 1650 case NETDEV_DOWN: 1651 err = mpls_ifdown(dev, event); 1652 if (err) 1653 goto err; 1654 break; 1655 case NETDEV_UP: 1656 flags = netif_get_flags(dev); 1657 if (flags & (IFF_RUNNING | IFF_LOWER_UP)) 1658 mpls_ifup(dev, RTNH_F_DEAD | RTNH_F_LINKDOWN); 1659 else 1660 mpls_ifup(dev, RTNH_F_DEAD); 1661 break; 1662 case NETDEV_CHANGE: 1663 flags = netif_get_flags(dev); 1664 if (flags & (IFF_RUNNING | IFF_LOWER_UP)) { 1665 mpls_ifup(dev, RTNH_F_DEAD | RTNH_F_LINKDOWN); 1666 } else { 1667 err = mpls_ifdown(dev, event); 1668 if (err) 1669 goto err; 1670 } 1671 break; 1672 case NETDEV_UNREGISTER: 1673 err = mpls_ifdown(dev, event); 1674 if (err) 1675 goto err; 1676 1677 mdev = mpls_dev_get(net, dev); 1678 if (mdev) { 1679 mpls_dev_sysctl_unregister(dev, mdev); 1680 RCU_INIT_POINTER(dev->mpls_ptr, NULL); 1681 call_rcu(&mdev->rcu, mpls_dev_destroy_rcu); 1682 } 1683 break; 1684 case NETDEV_CHANGENAME: 1685 mdev = mpls_dev_get(net, dev); 1686 if (mdev) { 1687 mpls_dev_sysctl_unregister(dev, mdev); 1688 err = mpls_dev_sysctl_register(dev, mdev); 1689 if (err) 1690 goto err; 1691 } 1692 break; 1693 } 1694 1695 out: 1696 mutex_unlock(&net->mpls.platform_mutex); 1697 return NOTIFY_OK; 1698 1699 err: 1700 mutex_unlock(&net->mpls.platform_mutex); 1701 return notifier_from_errno(err); 1702 } 1703 1704 static struct notifier_block mpls_dev_notifier = { 1705 .notifier_call = mpls_dev_notify, 1706 }; 1707 1708 static int nla_put_via(struct sk_buff *skb, 1709 u8 table, const void *addr, int alen) 1710 { 1711 static const int table_to_family[NEIGH_NR_TABLES + 1] = { 1712 AF_INET, AF_INET6, AF_PACKET, 1713 }; 1714 struct nlattr *nla; 1715 struct rtvia *via; 1716 int family = AF_UNSPEC; 1717 1718 nla = nla_reserve(skb, RTA_VIA, alen + 2); 1719 if (!nla) 1720 return -EMSGSIZE; 1721 1722 if (table <= NEIGH_NR_TABLES) 1723 family = table_to_family[table]; 1724 1725 via = nla_data(nla); 1726 via->rtvia_family = family; 1727 memcpy(via->rtvia_addr, addr, alen); 1728 return 0; 1729 } 1730 1731 int nla_put_labels(struct sk_buff *skb, int attrtype, 1732 u8 labels, const u32 label[]) 1733 { 1734 struct nlattr *nla; 1735 struct mpls_shim_hdr *nla_label; 1736 bool bos; 1737 int i; 1738 nla = nla_reserve(skb, attrtype, labels*4); 1739 if (!nla) 1740 return -EMSGSIZE; 1741 1742 nla_label = nla_data(nla); 1743 bos = true; 1744 for (i = labels - 1; i >= 0; i--) { 1745 nla_label[i] = mpls_entry_encode(label[i], 0, 0, bos); 1746 bos = false; 1747 } 1748 1749 return 0; 1750 } 1751 EXPORT_SYMBOL_GPL(nla_put_labels); 1752 1753 int nla_get_labels(const struct nlattr *nla, u8 max_labels, u8 *labels, 1754 u32 label[], struct netlink_ext_ack *extack) 1755 { 1756 unsigned len = nla_len(nla); 1757 struct mpls_shim_hdr *nla_label; 1758 u8 nla_labels; 1759 bool bos; 1760 int i; 1761 1762 /* len needs to be an even multiple of 4 (the label size). Number 1763 * of labels is a u8 so check for overflow. 1764 */ 1765 if (len & 3 || len / 4 > 255) { 1766 NL_SET_ERR_MSG_ATTR(extack, nla, 1767 "Invalid length for labels attribute"); 1768 return -EINVAL; 1769 } 1770 1771 /* Limit the number of new labels allowed */ 1772 nla_labels = len/4; 1773 if (nla_labels > max_labels) { 1774 NL_SET_ERR_MSG(extack, "Too many labels"); 1775 return -EINVAL; 1776 } 1777 1778 /* when label == NULL, caller wants number of labels */ 1779 if (!label) 1780 goto out; 1781 1782 nla_label = nla_data(nla); 1783 bos = true; 1784 for (i = nla_labels - 1; i >= 0; i--, bos = false) { 1785 struct mpls_entry_decoded dec; 1786 dec = mpls_entry_decode(nla_label + i); 1787 1788 /* Ensure the bottom of stack flag is properly set 1789 * and ttl and tc are both clear. 1790 */ 1791 if (dec.ttl) { 1792 NL_SET_ERR_MSG_ATTR(extack, nla, 1793 "TTL in label must be 0"); 1794 return -EINVAL; 1795 } 1796 1797 if (dec.tc) { 1798 NL_SET_ERR_MSG_ATTR(extack, nla, 1799 "Traffic class in label must be 0"); 1800 return -EINVAL; 1801 } 1802 1803 if (dec.bos != bos) { 1804 NL_SET_BAD_ATTR(extack, nla); 1805 if (bos) { 1806 NL_SET_ERR_MSG(extack, 1807 "BOS bit must be set in first label"); 1808 } else { 1809 NL_SET_ERR_MSG(extack, 1810 "BOS bit can only be set in first label"); 1811 } 1812 return -EINVAL; 1813 } 1814 1815 switch (dec.label) { 1816 case MPLS_LABEL_IMPLNULL: 1817 /* RFC3032: This is a label that an LSR may 1818 * assign and distribute, but which never 1819 * actually appears in the encapsulation. 1820 */ 1821 NL_SET_ERR_MSG_ATTR(extack, nla, 1822 "Implicit NULL Label (3) can not be used in encapsulation"); 1823 return -EINVAL; 1824 } 1825 1826 label[i] = dec.label; 1827 } 1828 out: 1829 *labels = nla_labels; 1830 return 0; 1831 } 1832 EXPORT_SYMBOL_GPL(nla_get_labels); 1833 1834 static int rtm_to_route_config(struct sk_buff *skb, 1835 struct nlmsghdr *nlh, 1836 struct mpls_route_config *cfg, 1837 struct netlink_ext_ack *extack) 1838 { 1839 struct rtmsg *rtm; 1840 struct nlattr *tb[RTA_MAX+1]; 1841 int index; 1842 int err; 1843 1844 err = nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX, 1845 rtm_mpls_policy, extack); 1846 if (err < 0) 1847 goto errout; 1848 1849 err = -EINVAL; 1850 rtm = nlmsg_data(nlh); 1851 1852 if (rtm->rtm_family != AF_MPLS) { 1853 NL_SET_ERR_MSG(extack, "Invalid address family in rtmsg"); 1854 goto errout; 1855 } 1856 if (rtm->rtm_dst_len != 20) { 1857 NL_SET_ERR_MSG(extack, "rtm_dst_len must be 20 for MPLS"); 1858 goto errout; 1859 } 1860 if (rtm->rtm_src_len != 0) { 1861 NL_SET_ERR_MSG(extack, "rtm_src_len must be 0 for MPLS"); 1862 goto errout; 1863 } 1864 if (rtm->rtm_tos != 0) { 1865 NL_SET_ERR_MSG(extack, "rtm_tos must be 0 for MPLS"); 1866 goto errout; 1867 } 1868 if (rtm->rtm_table != RT_TABLE_MAIN) { 1869 NL_SET_ERR_MSG(extack, 1870 "MPLS only supports the main route table"); 1871 goto errout; 1872 } 1873 /* Any value is acceptable for rtm_protocol */ 1874 1875 /* As mpls uses destination specific addresses 1876 * (or source specific address in the case of multicast) 1877 * all addresses have universal scope. 1878 */ 1879 if (rtm->rtm_scope != RT_SCOPE_UNIVERSE) { 1880 NL_SET_ERR_MSG(extack, 1881 "Invalid route scope - MPLS only supports UNIVERSE"); 1882 goto errout; 1883 } 1884 if (rtm->rtm_type != RTN_UNICAST) { 1885 NL_SET_ERR_MSG(extack, 1886 "Invalid route type - MPLS only supports UNICAST"); 1887 goto errout; 1888 } 1889 if (rtm->rtm_flags != 0) { 1890 NL_SET_ERR_MSG(extack, "rtm_flags must be 0 for MPLS"); 1891 goto errout; 1892 } 1893 1894 cfg->rc_label = LABEL_NOT_SPECIFIED; 1895 cfg->rc_protocol = rtm->rtm_protocol; 1896 cfg->rc_via_table = MPLS_NEIGH_TABLE_UNSPEC; 1897 cfg->rc_ttl_propagate = MPLS_TTL_PROP_DEFAULT; 1898 cfg->rc_nlflags = nlh->nlmsg_flags; 1899 cfg->rc_nlinfo.portid = NETLINK_CB(skb).portid; 1900 cfg->rc_nlinfo.nlh = nlh; 1901 cfg->rc_nlinfo.nl_net = sock_net(skb->sk); 1902 1903 for (index = 0; index <= RTA_MAX; index++) { 1904 struct nlattr *nla = tb[index]; 1905 if (!nla) 1906 continue; 1907 1908 switch (index) { 1909 case RTA_OIF: 1910 cfg->rc_ifindex = nla_get_u32(nla); 1911 break; 1912 case RTA_NEWDST: 1913 if (nla_get_labels(nla, MAX_NEW_LABELS, 1914 &cfg->rc_output_labels, 1915 cfg->rc_output_label, extack)) 1916 goto errout; 1917 break; 1918 case RTA_DST: 1919 { 1920 u8 label_count; 1921 if (nla_get_labels(nla, 1, &label_count, 1922 &cfg->rc_label, extack)) 1923 goto errout; 1924 1925 if (!mpls_label_ok(cfg->rc_nlinfo.nl_net, 1926 &cfg->rc_label, extack)) 1927 goto errout; 1928 break; 1929 } 1930 case RTA_GATEWAY: 1931 NL_SET_ERR_MSG(extack, "MPLS does not support RTA_GATEWAY attribute"); 1932 goto errout; 1933 case RTA_VIA: 1934 { 1935 if (nla_get_via(nla, &cfg->rc_via_alen, 1936 &cfg->rc_via_table, cfg->rc_via, 1937 extack)) 1938 goto errout; 1939 break; 1940 } 1941 case RTA_MULTIPATH: 1942 { 1943 cfg->rc_mp = nla_data(nla); 1944 cfg->rc_mp_len = nla_len(nla); 1945 break; 1946 } 1947 case RTA_TTL_PROPAGATE: 1948 { 1949 u8 ttl_propagate = nla_get_u8(nla); 1950 1951 if (ttl_propagate > 1) { 1952 NL_SET_ERR_MSG_ATTR(extack, nla, 1953 "RTA_TTL_PROPAGATE can only be 0 or 1"); 1954 goto errout; 1955 } 1956 cfg->rc_ttl_propagate = ttl_propagate ? 1957 MPLS_TTL_PROP_ENABLED : 1958 MPLS_TTL_PROP_DISABLED; 1959 break; 1960 } 1961 default: 1962 NL_SET_ERR_MSG_ATTR(extack, nla, "Unknown attribute"); 1963 /* Unsupported attribute */ 1964 goto errout; 1965 } 1966 } 1967 1968 err = 0; 1969 errout: 1970 return err; 1971 } 1972 1973 static int mpls_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, 1974 struct netlink_ext_ack *extack) 1975 { 1976 struct net *net = sock_net(skb->sk); 1977 struct mpls_route_config *cfg; 1978 int err; 1979 1980 cfg = kzalloc(sizeof(*cfg), GFP_KERNEL); 1981 if (!cfg) 1982 return -ENOMEM; 1983 1984 err = rtm_to_route_config(skb, nlh, cfg, extack); 1985 if (err < 0) 1986 goto out; 1987 1988 mutex_lock(&net->mpls.platform_mutex); 1989 err = mpls_route_del(cfg, extack); 1990 mutex_unlock(&net->mpls.platform_mutex); 1991 out: 1992 kfree(cfg); 1993 1994 return err; 1995 } 1996 1997 1998 static int mpls_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, 1999 struct netlink_ext_ack *extack) 2000 { 2001 struct net *net = sock_net(skb->sk); 2002 struct mpls_route_config *cfg; 2003 int err; 2004 2005 cfg = kzalloc(sizeof(*cfg), GFP_KERNEL); 2006 if (!cfg) 2007 return -ENOMEM; 2008 2009 err = rtm_to_route_config(skb, nlh, cfg, extack); 2010 if (err < 0) 2011 goto out; 2012 2013 mutex_lock(&net->mpls.platform_mutex); 2014 err = mpls_route_add(cfg, extack); 2015 mutex_unlock(&net->mpls.platform_mutex); 2016 out: 2017 kfree(cfg); 2018 2019 return err; 2020 } 2021 2022 static int mpls_dump_route(struct sk_buff *skb, u32 portid, u32 seq, int event, 2023 u32 label, struct mpls_route *rt, int flags) 2024 { 2025 struct net_device *dev; 2026 struct nlmsghdr *nlh; 2027 struct rtmsg *rtm; 2028 2029 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*rtm), flags); 2030 if (nlh == NULL) 2031 return -EMSGSIZE; 2032 2033 rtm = nlmsg_data(nlh); 2034 rtm->rtm_family = AF_MPLS; 2035 rtm->rtm_dst_len = 20; 2036 rtm->rtm_src_len = 0; 2037 rtm->rtm_tos = 0; 2038 rtm->rtm_table = RT_TABLE_MAIN; 2039 rtm->rtm_protocol = rt->rt_protocol; 2040 rtm->rtm_scope = RT_SCOPE_UNIVERSE; 2041 rtm->rtm_type = RTN_UNICAST; 2042 rtm->rtm_flags = 0; 2043 2044 if (nla_put_labels(skb, RTA_DST, 1, &label)) 2045 goto nla_put_failure; 2046 2047 if (rt->rt_ttl_propagate != MPLS_TTL_PROP_DEFAULT) { 2048 bool ttl_propagate = 2049 rt->rt_ttl_propagate == MPLS_TTL_PROP_ENABLED; 2050 2051 if (nla_put_u8(skb, RTA_TTL_PROPAGATE, 2052 ttl_propagate)) 2053 goto nla_put_failure; 2054 } 2055 if (rt->rt_nhn == 1) { 2056 const struct mpls_nh *nh = rt->rt_nh; 2057 2058 if (nh->nh_labels && 2059 nla_put_labels(skb, RTA_NEWDST, nh->nh_labels, 2060 nh->nh_label)) 2061 goto nla_put_failure; 2062 if (nh->nh_via_table != MPLS_NEIGH_TABLE_UNSPEC && 2063 nla_put_via(skb, nh->nh_via_table, mpls_nh_via(rt, nh), 2064 nh->nh_via_alen)) 2065 goto nla_put_failure; 2066 dev = nh->nh_dev; 2067 if (dev && nla_put_u32(skb, RTA_OIF, dev->ifindex)) 2068 goto nla_put_failure; 2069 if (nh->nh_flags & RTNH_F_LINKDOWN) 2070 rtm->rtm_flags |= RTNH_F_LINKDOWN; 2071 if (nh->nh_flags & RTNH_F_DEAD) 2072 rtm->rtm_flags |= RTNH_F_DEAD; 2073 } else { 2074 struct rtnexthop *rtnh; 2075 struct nlattr *mp; 2076 u8 linkdown = 0; 2077 u8 dead = 0; 2078 2079 mp = nla_nest_start_noflag(skb, RTA_MULTIPATH); 2080 if (!mp) 2081 goto nla_put_failure; 2082 2083 for_nexthops(rt) { 2084 dev = nh->nh_dev; 2085 if (!dev) 2086 continue; 2087 2088 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh)); 2089 if (!rtnh) 2090 goto nla_put_failure; 2091 2092 rtnh->rtnh_ifindex = dev->ifindex; 2093 if (nh->nh_flags & RTNH_F_LINKDOWN) { 2094 rtnh->rtnh_flags |= RTNH_F_LINKDOWN; 2095 linkdown++; 2096 } 2097 if (nh->nh_flags & RTNH_F_DEAD) { 2098 rtnh->rtnh_flags |= RTNH_F_DEAD; 2099 dead++; 2100 } 2101 2102 if (nh->nh_labels && nla_put_labels(skb, RTA_NEWDST, 2103 nh->nh_labels, 2104 nh->nh_label)) 2105 goto nla_put_failure; 2106 if (nh->nh_via_table != MPLS_NEIGH_TABLE_UNSPEC && 2107 nla_put_via(skb, nh->nh_via_table, 2108 mpls_nh_via(rt, nh), 2109 nh->nh_via_alen)) 2110 goto nla_put_failure; 2111 2112 /* length of rtnetlink header + attributes */ 2113 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh; 2114 } endfor_nexthops(rt); 2115 2116 if (linkdown == rt->rt_nhn) 2117 rtm->rtm_flags |= RTNH_F_LINKDOWN; 2118 if (dead == rt->rt_nhn) 2119 rtm->rtm_flags |= RTNH_F_DEAD; 2120 2121 nla_nest_end(skb, mp); 2122 } 2123 2124 nlmsg_end(skb, nlh); 2125 return 0; 2126 2127 nla_put_failure: 2128 nlmsg_cancel(skb, nlh); 2129 return -EMSGSIZE; 2130 } 2131 2132 #if IS_ENABLED(CONFIG_INET) 2133 static int mpls_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh, 2134 struct fib_dump_filter *filter, 2135 struct netlink_callback *cb) 2136 { 2137 return ip_valid_fib_dump_req(net, nlh, filter, cb); 2138 } 2139 #else 2140 static int mpls_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh, 2141 struct fib_dump_filter *filter, 2142 struct netlink_callback *cb) 2143 { 2144 struct netlink_ext_ack *extack = cb->extack; 2145 struct nlattr *tb[RTA_MAX + 1]; 2146 struct rtmsg *rtm; 2147 int err, i; 2148 2149 rtm = nlmsg_payload(nlh, sizeof(*rtm)); 2150 if (!rtm) { 2151 NL_SET_ERR_MSG_MOD(extack, "Invalid header for FIB dump request"); 2152 return -EINVAL; 2153 } 2154 2155 if (rtm->rtm_dst_len || rtm->rtm_src_len || rtm->rtm_tos || 2156 rtm->rtm_table || rtm->rtm_scope || rtm->rtm_type || 2157 rtm->rtm_flags) { 2158 NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for FIB dump request"); 2159 return -EINVAL; 2160 } 2161 2162 if (rtm->rtm_protocol) { 2163 filter->protocol = rtm->rtm_protocol; 2164 filter->filter_set = 1; 2165 cb->answer_flags = NLM_F_DUMP_FILTERED; 2166 } 2167 2168 err = nlmsg_parse_deprecated_strict(nlh, sizeof(*rtm), tb, RTA_MAX, 2169 rtm_mpls_policy, extack); 2170 if (err < 0) 2171 return err; 2172 2173 for (i = 0; i <= RTA_MAX; ++i) { 2174 int ifindex; 2175 2176 if (i == RTA_OIF) { 2177 ifindex = nla_get_u32(tb[i]); 2178 filter->dev = dev_get_by_index_rcu(net, ifindex); 2179 if (!filter->dev) 2180 return -ENODEV; 2181 filter->filter_set = 1; 2182 } else if (tb[i]) { 2183 NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in dump request"); 2184 return -EINVAL; 2185 } 2186 } 2187 2188 return 0; 2189 } 2190 #endif 2191 2192 static bool mpls_rt_uses_dev(struct mpls_route *rt, 2193 const struct net_device *dev) 2194 { 2195 if (rt->rt_nhn == 1) { 2196 struct mpls_nh *nh = rt->rt_nh; 2197 2198 if (nh->nh_dev == dev) 2199 return true; 2200 } else { 2201 for_nexthops(rt) { 2202 if (nh->nh_dev == dev) 2203 return true; 2204 } endfor_nexthops(rt); 2205 } 2206 2207 return false; 2208 } 2209 2210 static int mpls_dump_routes(struct sk_buff *skb, struct netlink_callback *cb) 2211 { 2212 const struct nlmsghdr *nlh = cb->nlh; 2213 struct net *net = sock_net(skb->sk); 2214 struct mpls_route __rcu **platform_label; 2215 struct fib_dump_filter filter = { 2216 .rtnl_held = false, 2217 }; 2218 unsigned int flags = NLM_F_MULTI; 2219 size_t platform_labels; 2220 unsigned int index; 2221 int err; 2222 2223 rcu_read_lock(); 2224 2225 if (cb->strict_check) { 2226 err = mpls_valid_fib_dump_req(net, nlh, &filter, cb); 2227 if (err < 0) 2228 goto err; 2229 2230 /* for MPLS, there is only 1 table with fixed type and flags. 2231 * If either are set in the filter then return nothing. 2232 */ 2233 if ((filter.table_id && filter.table_id != RT_TABLE_MAIN) || 2234 (filter.rt_type && filter.rt_type != RTN_UNICAST) || 2235 filter.flags) 2236 goto unlock; 2237 } 2238 2239 index = cb->args[0]; 2240 if (index < MPLS_LABEL_FIRST_UNRESERVED) 2241 index = MPLS_LABEL_FIRST_UNRESERVED; 2242 2243 platform_label = rcu_dereference(net->mpls.platform_label); 2244 platform_labels = net->mpls.platform_labels; 2245 2246 if (filter.filter_set) 2247 flags |= NLM_F_DUMP_FILTERED; 2248 2249 for (; index < platform_labels; index++) { 2250 struct mpls_route *rt; 2251 2252 rt = rcu_dereference(platform_label[index]); 2253 if (!rt) 2254 continue; 2255 2256 if ((filter.dev && !mpls_rt_uses_dev(rt, filter.dev)) || 2257 (filter.protocol && rt->rt_protocol != filter.protocol)) 2258 continue; 2259 2260 if (mpls_dump_route(skb, NETLINK_CB(cb->skb).portid, 2261 cb->nlh->nlmsg_seq, RTM_NEWROUTE, 2262 index, rt, flags) < 0) 2263 break; 2264 } 2265 cb->args[0] = index; 2266 2267 unlock: 2268 rcu_read_unlock(); 2269 return skb->len; 2270 2271 err: 2272 rcu_read_unlock(); 2273 return err; 2274 } 2275 2276 static inline size_t lfib_nlmsg_size(struct mpls_route *rt) 2277 { 2278 size_t payload = 2279 NLMSG_ALIGN(sizeof(struct rtmsg)) 2280 + nla_total_size(4) /* RTA_DST */ 2281 + nla_total_size(1); /* RTA_TTL_PROPAGATE */ 2282 2283 if (rt->rt_nhn == 1) { 2284 struct mpls_nh *nh = rt->rt_nh; 2285 2286 if (nh->nh_dev) 2287 payload += nla_total_size(4); /* RTA_OIF */ 2288 if (nh->nh_via_table != MPLS_NEIGH_TABLE_UNSPEC) /* RTA_VIA */ 2289 payload += nla_total_size(2 + nh->nh_via_alen); 2290 if (nh->nh_labels) /* RTA_NEWDST */ 2291 payload += nla_total_size(nh->nh_labels * 4); 2292 } else { 2293 /* each nexthop is packed in an attribute */ 2294 size_t nhsize = 0; 2295 2296 for_nexthops(rt) { 2297 if (!nh->nh_dev) 2298 continue; 2299 nhsize += nla_total_size(sizeof(struct rtnexthop)); 2300 /* RTA_VIA */ 2301 if (nh->nh_via_table != MPLS_NEIGH_TABLE_UNSPEC) 2302 nhsize += nla_total_size(2 + nh->nh_via_alen); 2303 if (nh->nh_labels) 2304 nhsize += nla_total_size(nh->nh_labels * 4); 2305 } endfor_nexthops(rt); 2306 /* nested attribute */ 2307 payload += nla_total_size(nhsize); 2308 } 2309 2310 return payload; 2311 } 2312 2313 static void rtmsg_lfib(int event, u32 label, struct mpls_route *rt, 2314 struct nlmsghdr *nlh, struct net *net, u32 portid, 2315 unsigned int nlm_flags) 2316 { 2317 struct sk_buff *skb; 2318 u32 seq = nlh ? nlh->nlmsg_seq : 0; 2319 int err = -ENOBUFS; 2320 2321 skb = nlmsg_new(lfib_nlmsg_size(rt), GFP_KERNEL); 2322 if (skb == NULL) 2323 goto errout; 2324 2325 err = mpls_dump_route(skb, portid, seq, event, label, rt, nlm_flags); 2326 if (err < 0) { 2327 /* -EMSGSIZE implies BUG in lfib_nlmsg_size */ 2328 WARN_ON(err == -EMSGSIZE); 2329 kfree_skb(skb); 2330 goto errout; 2331 } 2332 rtnl_notify(skb, net, portid, RTNLGRP_MPLS_ROUTE, nlh, GFP_KERNEL); 2333 2334 return; 2335 errout: 2336 rtnl_set_sk_err(net, RTNLGRP_MPLS_ROUTE, err); 2337 } 2338 2339 static int mpls_valid_getroute_req(struct sk_buff *skb, 2340 const struct nlmsghdr *nlh, 2341 struct nlattr **tb, 2342 struct netlink_ext_ack *extack) 2343 { 2344 struct rtmsg *rtm; 2345 int i, err; 2346 2347 rtm = nlmsg_payload(nlh, sizeof(*rtm)); 2348 if (!rtm) { 2349 NL_SET_ERR_MSG_MOD(extack, 2350 "Invalid header for get route request"); 2351 return -EINVAL; 2352 } 2353 2354 if (!netlink_strict_get_check(skb)) 2355 return nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX, 2356 rtm_mpls_policy, extack); 2357 2358 if ((rtm->rtm_dst_len && rtm->rtm_dst_len != 20) || 2359 rtm->rtm_src_len || rtm->rtm_tos || rtm->rtm_table || 2360 rtm->rtm_protocol || rtm->rtm_scope || rtm->rtm_type) { 2361 NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for get route request"); 2362 return -EINVAL; 2363 } 2364 if (rtm->rtm_flags & ~RTM_F_FIB_MATCH) { 2365 NL_SET_ERR_MSG_MOD(extack, 2366 "Invalid flags for get route request"); 2367 return -EINVAL; 2368 } 2369 2370 err = nlmsg_parse_deprecated_strict(nlh, sizeof(*rtm), tb, RTA_MAX, 2371 rtm_mpls_policy, extack); 2372 if (err) 2373 return err; 2374 2375 if ((tb[RTA_DST] || tb[RTA_NEWDST]) && !rtm->rtm_dst_len) { 2376 NL_SET_ERR_MSG_MOD(extack, "rtm_dst_len must be 20 for MPLS"); 2377 return -EINVAL; 2378 } 2379 2380 for (i = 0; i <= RTA_MAX; i++) { 2381 if (!tb[i]) 2382 continue; 2383 2384 switch (i) { 2385 case RTA_DST: 2386 case RTA_NEWDST: 2387 break; 2388 default: 2389 NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in get route request"); 2390 return -EINVAL; 2391 } 2392 } 2393 2394 return 0; 2395 } 2396 2397 static int mpls_getroute(struct sk_buff *in_skb, struct nlmsghdr *in_nlh, 2398 struct netlink_ext_ack *extack) 2399 { 2400 struct net *net = sock_net(in_skb->sk); 2401 u32 portid = NETLINK_CB(in_skb).portid; 2402 u32 in_label = LABEL_NOT_SPECIFIED; 2403 struct nlattr *tb[RTA_MAX + 1]; 2404 struct mpls_route *rt = NULL; 2405 u32 labels[MAX_NEW_LABELS]; 2406 struct mpls_shim_hdr *hdr; 2407 unsigned int hdr_size = 0; 2408 const struct mpls_nh *nh; 2409 struct net_device *dev; 2410 struct rtmsg *rtm, *r; 2411 struct nlmsghdr *nlh; 2412 struct sk_buff *skb; 2413 u8 n_labels; 2414 int err; 2415 2416 mutex_lock(&net->mpls.platform_mutex); 2417 2418 err = mpls_valid_getroute_req(in_skb, in_nlh, tb, extack); 2419 if (err < 0) 2420 goto errout; 2421 2422 rtm = nlmsg_data(in_nlh); 2423 2424 if (tb[RTA_DST]) { 2425 u8 label_count; 2426 2427 if (nla_get_labels(tb[RTA_DST], 1, &label_count, 2428 &in_label, extack)) { 2429 err = -EINVAL; 2430 goto errout; 2431 } 2432 2433 if (!mpls_label_ok(net, &in_label, extack)) { 2434 err = -EINVAL; 2435 goto errout; 2436 } 2437 } 2438 2439 if (in_label < net->mpls.platform_labels) 2440 rt = mpls_route_input(net, in_label); 2441 if (!rt) { 2442 err = -ENETUNREACH; 2443 goto errout; 2444 } 2445 2446 if (rtm->rtm_flags & RTM_F_FIB_MATCH) { 2447 skb = nlmsg_new(lfib_nlmsg_size(rt), GFP_KERNEL); 2448 if (!skb) { 2449 err = -ENOBUFS; 2450 goto errout; 2451 } 2452 2453 err = mpls_dump_route(skb, portid, in_nlh->nlmsg_seq, 2454 RTM_NEWROUTE, in_label, rt, 0); 2455 if (err < 0) { 2456 /* -EMSGSIZE implies BUG in lfib_nlmsg_size */ 2457 WARN_ON(err == -EMSGSIZE); 2458 goto errout_free; 2459 } 2460 2461 err = rtnl_unicast(skb, net, portid); 2462 goto errout; 2463 } 2464 2465 if (tb[RTA_NEWDST]) { 2466 if (nla_get_labels(tb[RTA_NEWDST], MAX_NEW_LABELS, &n_labels, 2467 labels, extack) != 0) { 2468 err = -EINVAL; 2469 goto errout; 2470 } 2471 2472 hdr_size = n_labels * sizeof(struct mpls_shim_hdr); 2473 } 2474 2475 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 2476 if (!skb) { 2477 err = -ENOBUFS; 2478 goto errout; 2479 } 2480 2481 skb->protocol = htons(ETH_P_MPLS_UC); 2482 2483 if (hdr_size) { 2484 bool bos; 2485 int i; 2486 2487 if (skb_cow(skb, hdr_size)) { 2488 err = -ENOBUFS; 2489 goto errout_free; 2490 } 2491 2492 skb_reserve(skb, hdr_size); 2493 skb_push(skb, hdr_size); 2494 skb_reset_network_header(skb); 2495 2496 /* Push new labels */ 2497 hdr = mpls_hdr(skb); 2498 bos = true; 2499 for (i = n_labels - 1; i >= 0; i--) { 2500 hdr[i] = mpls_entry_encode(labels[i], 2501 1, 0, bos); 2502 bos = false; 2503 } 2504 } 2505 2506 nh = mpls_select_multipath(rt, skb); 2507 if (!nh) { 2508 err = -ENETUNREACH; 2509 goto errout_free; 2510 } 2511 2512 if (hdr_size) { 2513 skb_pull(skb, hdr_size); 2514 skb_reset_network_header(skb); 2515 } 2516 2517 nlh = nlmsg_put(skb, portid, in_nlh->nlmsg_seq, 2518 RTM_NEWROUTE, sizeof(*r), 0); 2519 if (!nlh) { 2520 err = -EMSGSIZE; 2521 goto errout_free; 2522 } 2523 2524 r = nlmsg_data(nlh); 2525 r->rtm_family = AF_MPLS; 2526 r->rtm_dst_len = 20; 2527 r->rtm_src_len = 0; 2528 r->rtm_table = RT_TABLE_MAIN; 2529 r->rtm_type = RTN_UNICAST; 2530 r->rtm_scope = RT_SCOPE_UNIVERSE; 2531 r->rtm_protocol = rt->rt_protocol; 2532 r->rtm_flags = 0; 2533 2534 if (nla_put_labels(skb, RTA_DST, 1, &in_label)) 2535 goto nla_put_failure; 2536 2537 if (nh->nh_labels && 2538 nla_put_labels(skb, RTA_NEWDST, nh->nh_labels, 2539 nh->nh_label)) 2540 goto nla_put_failure; 2541 2542 if (nh->nh_via_table != MPLS_NEIGH_TABLE_UNSPEC && 2543 nla_put_via(skb, nh->nh_via_table, mpls_nh_via(rt, nh), 2544 nh->nh_via_alen)) 2545 goto nla_put_failure; 2546 dev = nh->nh_dev; 2547 if (dev && nla_put_u32(skb, RTA_OIF, dev->ifindex)) 2548 goto nla_put_failure; 2549 2550 nlmsg_end(skb, nlh); 2551 2552 err = rtnl_unicast(skb, net, portid); 2553 errout: 2554 mutex_unlock(&net->mpls.platform_mutex); 2555 return err; 2556 2557 nla_put_failure: 2558 nlmsg_cancel(skb, nlh); 2559 err = -EMSGSIZE; 2560 errout_free: 2561 mutex_unlock(&net->mpls.platform_mutex); 2562 kfree_skb(skb); 2563 return err; 2564 } 2565 2566 static int resize_platform_label_table(struct net *net, size_t limit) 2567 { 2568 size_t size = sizeof(struct mpls_route *) * limit; 2569 size_t old_limit; 2570 size_t cp_size; 2571 struct mpls_route __rcu **labels = NULL, **old; 2572 struct mpls_route *rt0 = NULL, *rt2 = NULL; 2573 unsigned index; 2574 2575 if (size) { 2576 labels = kvzalloc(size, GFP_KERNEL); 2577 if (!labels) 2578 goto nolabels; 2579 } 2580 2581 /* In case the predefined labels need to be populated */ 2582 if (limit > MPLS_LABEL_IPV4NULL) { 2583 struct net_device *lo = net->loopback_dev; 2584 2585 rt0 = mpls_rt_alloc(1, lo->addr_len, 0); 2586 if (IS_ERR(rt0)) 2587 goto nort0; 2588 2589 rt0->rt_nh->nh_dev = lo; 2590 netdev_hold(lo, &rt0->rt_nh->nh_dev_tracker, GFP_KERNEL); 2591 rt0->rt_protocol = RTPROT_KERNEL; 2592 rt0->rt_payload_type = MPT_IPV4; 2593 rt0->rt_ttl_propagate = MPLS_TTL_PROP_DEFAULT; 2594 rt0->rt_nh->nh_via_table = NEIGH_LINK_TABLE; 2595 rt0->rt_nh->nh_via_alen = lo->addr_len; 2596 memcpy(__mpls_nh_via(rt0, rt0->rt_nh), lo->dev_addr, 2597 lo->addr_len); 2598 } 2599 if (limit > MPLS_LABEL_IPV6NULL) { 2600 struct net_device *lo = net->loopback_dev; 2601 2602 rt2 = mpls_rt_alloc(1, lo->addr_len, 0); 2603 if (IS_ERR(rt2)) 2604 goto nort2; 2605 2606 rt2->rt_nh->nh_dev = lo; 2607 netdev_hold(lo, &rt2->rt_nh->nh_dev_tracker, GFP_KERNEL); 2608 rt2->rt_protocol = RTPROT_KERNEL; 2609 rt2->rt_payload_type = MPT_IPV6; 2610 rt2->rt_ttl_propagate = MPLS_TTL_PROP_DEFAULT; 2611 rt2->rt_nh->nh_via_table = NEIGH_LINK_TABLE; 2612 rt2->rt_nh->nh_via_alen = lo->addr_len; 2613 memcpy(__mpls_nh_via(rt2, rt2->rt_nh), lo->dev_addr, 2614 lo->addr_len); 2615 } 2616 2617 mutex_lock(&net->mpls.platform_mutex); 2618 2619 /* Remember the original table */ 2620 old = mpls_dereference(net, net->mpls.platform_label); 2621 old_limit = net->mpls.platform_labels; 2622 2623 /* Free any labels beyond the new table */ 2624 for (index = limit; index < old_limit; index++) 2625 mpls_route_update(net, index, NULL, NULL); 2626 2627 /* Copy over the old labels */ 2628 cp_size = size; 2629 if (old_limit < limit) 2630 cp_size = old_limit * sizeof(struct mpls_route *); 2631 2632 memcpy(labels, old, cp_size); 2633 2634 /* If needed set the predefined labels */ 2635 if ((old_limit <= MPLS_LABEL_IPV6NULL) && 2636 (limit > MPLS_LABEL_IPV6NULL)) { 2637 RCU_INIT_POINTER(labels[MPLS_LABEL_IPV6NULL], rt2); 2638 rt2 = NULL; 2639 } 2640 2641 if ((old_limit <= MPLS_LABEL_IPV4NULL) && 2642 (limit > MPLS_LABEL_IPV4NULL)) { 2643 RCU_INIT_POINTER(labels[MPLS_LABEL_IPV4NULL], rt0); 2644 rt0 = NULL; 2645 } 2646 2647 /* Update the global pointers */ 2648 net->mpls.platform_labels = limit; 2649 rcu_assign_pointer(net->mpls.platform_label, labels); 2650 2651 mutex_unlock(&net->mpls.platform_mutex); 2652 2653 mpls_rt_free(rt2); 2654 mpls_rt_free(rt0); 2655 2656 if (old) { 2657 synchronize_rcu(); 2658 kvfree(old); 2659 } 2660 return 0; 2661 2662 nort2: 2663 mpls_rt_free(rt0); 2664 nort0: 2665 kvfree(labels); 2666 nolabels: 2667 return -ENOMEM; 2668 } 2669 2670 static int mpls_platform_labels(const struct ctl_table *table, int write, 2671 void *buffer, size_t *lenp, loff_t *ppos) 2672 { 2673 struct net *net = table->data; 2674 int platform_labels = net->mpls.platform_labels; 2675 int ret; 2676 struct ctl_table tmp = { 2677 .procname = table->procname, 2678 .data = &platform_labels, 2679 .maxlen = sizeof(int), 2680 .mode = table->mode, 2681 .extra1 = SYSCTL_ZERO, 2682 .extra2 = &label_limit, 2683 }; 2684 2685 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); 2686 2687 if (write && ret == 0) 2688 ret = resize_platform_label_table(net, platform_labels); 2689 2690 return ret; 2691 } 2692 2693 #define MPLS_NS_SYSCTL_OFFSET(field) \ 2694 (&((struct net *)0)->field) 2695 2696 static const struct ctl_table mpls_table[] = { 2697 { 2698 .procname = "platform_labels", 2699 .data = NULL, 2700 .maxlen = sizeof(int), 2701 .mode = 0644, 2702 .proc_handler = mpls_platform_labels, 2703 }, 2704 { 2705 .procname = "ip_ttl_propagate", 2706 .data = MPLS_NS_SYSCTL_OFFSET(mpls.ip_ttl_propagate), 2707 .maxlen = sizeof(int), 2708 .mode = 0644, 2709 .proc_handler = proc_dointvec_minmax, 2710 .extra1 = SYSCTL_ZERO, 2711 .extra2 = SYSCTL_ONE, 2712 }, 2713 { 2714 .procname = "default_ttl", 2715 .data = MPLS_NS_SYSCTL_OFFSET(mpls.default_ttl), 2716 .maxlen = sizeof(int), 2717 .mode = 0644, 2718 .proc_handler = proc_dointvec_minmax, 2719 .extra1 = SYSCTL_ONE, 2720 .extra2 = &ttl_max, 2721 }, 2722 }; 2723 2724 static __net_init int mpls_net_init(struct net *net) 2725 { 2726 size_t table_size = ARRAY_SIZE(mpls_table); 2727 struct ctl_table *table; 2728 int i; 2729 2730 mutex_init(&net->mpls.platform_mutex); 2731 net->mpls.platform_labels = 0; 2732 net->mpls.platform_label = NULL; 2733 net->mpls.ip_ttl_propagate = 1; 2734 net->mpls.default_ttl = 255; 2735 2736 table = kmemdup(mpls_table, sizeof(mpls_table), GFP_KERNEL); 2737 if (table == NULL) 2738 return -ENOMEM; 2739 2740 /* Table data contains only offsets relative to the base of 2741 * the mdev at this point, so make them absolute. 2742 */ 2743 for (i = 0; i < table_size; i++) 2744 table[i].data = (char *)net + (uintptr_t)table[i].data; 2745 2746 net->mpls.ctl = register_net_sysctl_sz(net, "net/mpls", table, 2747 table_size); 2748 if (net->mpls.ctl == NULL) { 2749 kfree(table); 2750 return -ENOMEM; 2751 } 2752 2753 return 0; 2754 } 2755 2756 static __net_exit void mpls_net_exit(struct net *net) 2757 { 2758 struct mpls_route __rcu **platform_label; 2759 size_t platform_labels; 2760 const struct ctl_table *table; 2761 unsigned int index; 2762 2763 table = net->mpls.ctl->ctl_table_arg; 2764 unregister_net_sysctl_table(net->mpls.ctl); 2765 kfree(table); 2766 2767 /* An rcu grace period has passed since there was a device in 2768 * the network namespace (and thus the last in flight packet) 2769 * left this network namespace. This is because 2770 * unregister_netdevice_many and netdev_run_todo has completed 2771 * for each network device that was in this network namespace. 2772 * 2773 * As such no additional rcu synchronization is necessary when 2774 * freeing the platform_label table. 2775 */ 2776 mutex_lock(&net->mpls.platform_mutex); 2777 2778 platform_label = mpls_dereference(net, net->mpls.platform_label); 2779 platform_labels = net->mpls.platform_labels; 2780 2781 for (index = 0; index < platform_labels; index++) { 2782 struct mpls_route *rt; 2783 2784 rt = mpls_dereference(net, platform_label[index]); 2785 mpls_notify_route(net, index, rt, NULL, NULL); 2786 mpls_rt_free(rt); 2787 } 2788 2789 mutex_unlock(&net->mpls.platform_mutex); 2790 2791 kvfree(platform_label); 2792 } 2793 2794 static struct pernet_operations mpls_net_ops = { 2795 .init = mpls_net_init, 2796 .exit = mpls_net_exit, 2797 }; 2798 2799 static struct rtnl_af_ops mpls_af_ops __read_mostly = { 2800 .family = AF_MPLS, 2801 .fill_stats_af = mpls_fill_stats_af, 2802 .get_stats_af_size = mpls_get_stats_af_size, 2803 }; 2804 2805 static const struct rtnl_msg_handler mpls_rtnl_msg_handlers[] __initdata_or_module = { 2806 {THIS_MODULE, PF_MPLS, RTM_NEWROUTE, mpls_rtm_newroute, NULL, 2807 RTNL_FLAG_DOIT_UNLOCKED}, 2808 {THIS_MODULE, PF_MPLS, RTM_DELROUTE, mpls_rtm_delroute, NULL, 2809 RTNL_FLAG_DOIT_UNLOCKED}, 2810 {THIS_MODULE, PF_MPLS, RTM_GETROUTE, mpls_getroute, mpls_dump_routes, 2811 RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED}, 2812 {THIS_MODULE, PF_MPLS, RTM_GETNETCONF, 2813 mpls_netconf_get_devconf, mpls_netconf_dump_devconf, 2814 RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED}, 2815 }; 2816 2817 static int __init mpls_init(void) 2818 { 2819 int err; 2820 2821 BUILD_BUG_ON(sizeof(struct mpls_shim_hdr) != 4); 2822 2823 err = register_pernet_subsys(&mpls_net_ops); 2824 if (err) 2825 goto out; 2826 2827 err = register_netdevice_notifier(&mpls_dev_notifier); 2828 if (err) 2829 goto out_unregister_pernet; 2830 2831 dev_add_pack(&mpls_packet_type); 2832 2833 err = rtnl_af_register(&mpls_af_ops); 2834 if (err) 2835 goto out_unregister_dev_type; 2836 2837 err = rtnl_register_many(mpls_rtnl_msg_handlers); 2838 if (err) 2839 goto out_unregister_rtnl_af; 2840 2841 err = ipgre_tunnel_encap_add_mpls_ops(); 2842 if (err) { 2843 pr_err("Can't add mpls over gre tunnel ops\n"); 2844 goto out_unregister_rtnl; 2845 } 2846 2847 err = 0; 2848 out: 2849 return err; 2850 2851 out_unregister_rtnl: 2852 rtnl_unregister_many(mpls_rtnl_msg_handlers); 2853 out_unregister_rtnl_af: 2854 rtnl_af_unregister(&mpls_af_ops); 2855 out_unregister_dev_type: 2856 dev_remove_pack(&mpls_packet_type); 2857 out_unregister_pernet: 2858 unregister_pernet_subsys(&mpls_net_ops); 2859 goto out; 2860 } 2861 module_init(mpls_init); 2862 2863 static void __exit mpls_exit(void) 2864 { 2865 rtnl_unregister_all(PF_MPLS); 2866 rtnl_af_unregister(&mpls_af_ops); 2867 dev_remove_pack(&mpls_packet_type); 2868 unregister_netdevice_notifier(&mpls_dev_notifier); 2869 unregister_pernet_subsys(&mpls_net_ops); 2870 ipgre_tunnel_encap_del_mpls_ops(); 2871 } 2872 module_exit(mpls_exit); 2873 2874 MODULE_DESCRIPTION("MultiProtocol Label Switching"); 2875 MODULE_LICENSE("GPL v2"); 2876 MODULE_ALIAS_NETPROTO(PF_MPLS); 2877