1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Linux INET6 implementation 4 * Forwarding Information Database 5 * 6 * Authors: 7 * Pedro Roque <roque@di.fc.ul.pt> 8 * 9 * Changes: 10 * Yuji SEKIYA @USAGI: Support default route on router node; 11 * remove ip6_null_entry from the top of 12 * routing table. 13 * Ville Nuorvala: Fixed routing subtrees. 14 */ 15 16 #define pr_fmt(fmt) "IPv6: " fmt 17 18 #include <linux/errno.h> 19 #include <linux/types.h> 20 #include <linux/net.h> 21 #include <linux/route.h> 22 #include <linux/netdevice.h> 23 #include <linux/in6.h> 24 #include <linux/init.h> 25 #include <linux/list.h> 26 #include <linux/slab.h> 27 28 #include <net/ip.h> 29 #include <net/ipv6.h> 30 #include <net/ndisc.h> 31 #include <net/addrconf.h> 32 #include <net/lwtunnel.h> 33 #include <net/fib_notifier.h> 34 35 #include <net/ip6_fib.h> 36 #include <net/ip6_route.h> 37 38 static struct kmem_cache *fib6_node_kmem __read_mostly; 39 40 struct fib6_cleaner { 41 struct fib6_walker w; 42 struct net *net; 43 int (*func)(struct fib6_info *, void *arg); 44 int sernum; 45 void *arg; 46 bool skip_notify; 47 }; 48 49 #ifdef CONFIG_IPV6_SUBTREES 50 #define FWS_INIT FWS_S 51 #else 52 #define FWS_INIT FWS_L 53 #endif 54 55 static struct fib6_info *fib6_find_prefix(struct net *net, 56 struct fib6_table *table, 57 struct fib6_node *fn); 58 static struct fib6_node *fib6_repair_tree(struct net *net, 59 struct fib6_table *table, 60 struct fib6_node *fn); 61 static int fib6_walk(struct net *net, struct fib6_walker *w); 62 static int fib6_walk_continue(struct fib6_walker *w); 63 64 /* 65 * A routing update causes an increase of the serial number on the 66 * affected subtree. This allows for cached routes to be asynchronously 67 * tested when modifications are made to the destination cache as a 68 * result of redirects, path MTU changes, etc. 69 */ 70 71 static void fib6_gc_timer_cb(struct timer_list *t); 72 73 #define FOR_WALKERS(net, w) \ 74 list_for_each_entry(w, &(net)->ipv6.fib6_walkers, lh) 75 76 static void fib6_walker_link(struct net *net, struct fib6_walker *w) 77 { 78 write_lock_bh(&net->ipv6.fib6_walker_lock); 79 list_add(&w->lh, &net->ipv6.fib6_walkers); 80 write_unlock_bh(&net->ipv6.fib6_walker_lock); 81 } 82 83 static void fib6_walker_unlink(struct net *net, struct fib6_walker *w) 84 { 85 write_lock_bh(&net->ipv6.fib6_walker_lock); 86 list_del(&w->lh); 87 write_unlock_bh(&net->ipv6.fib6_walker_lock); 88 } 89 90 static int fib6_new_sernum(struct net *net) 91 { 92 int new, old; 93 94 do { 95 old = atomic_read(&net->ipv6.fib6_sernum); 96 new = old < INT_MAX ? old + 1 : 1; 97 } while (atomic_cmpxchg(&net->ipv6.fib6_sernum, 98 old, new) != old); 99 return new; 100 } 101 102 enum { 103 FIB6_NO_SERNUM_CHANGE = 0, 104 }; 105 106 void fib6_update_sernum(struct net *net, struct fib6_info *f6i) 107 { 108 struct fib6_node *fn; 109 110 fn = rcu_dereference_protected(f6i->fib6_node, 111 lockdep_is_held(&f6i->fib6_table->tb6_lock)); 112 if (fn) 113 fn->fn_sernum = fib6_new_sernum(net); 114 } 115 116 /* 117 * Auxiliary address test functions for the radix tree. 118 * 119 * These assume a 32bit processor (although it will work on 120 * 64bit processors) 121 */ 122 123 /* 124 * test bit 125 */ 126 #if defined(__LITTLE_ENDIAN) 127 # define BITOP_BE32_SWIZZLE (0x1F & ~7) 128 #else 129 # define BITOP_BE32_SWIZZLE 0 130 #endif 131 132 static __be32 addr_bit_set(const void *token, int fn_bit) 133 { 134 const __be32 *addr = token; 135 /* 136 * Here, 137 * 1 << ((~fn_bit ^ BITOP_BE32_SWIZZLE) & 0x1f) 138 * is optimized version of 139 * htonl(1 << ((~fn_bit)&0x1F)) 140 * See include/asm-generic/bitops/le.h. 141 */ 142 return (__force __be32)(1 << ((~fn_bit ^ BITOP_BE32_SWIZZLE) & 0x1f)) & 143 addr[fn_bit >> 5]; 144 } 145 146 struct fib6_info *fib6_info_alloc(gfp_t gfp_flags, bool with_fib6_nh) 147 { 148 struct fib6_info *f6i; 149 size_t sz = sizeof(*f6i); 150 151 if (with_fib6_nh) 152 sz += sizeof(struct fib6_nh); 153 154 f6i = kzalloc(sz, gfp_flags); 155 if (!f6i) 156 return NULL; 157 158 /* fib6_siblings is a union with nh_list, so this initializes both */ 159 INIT_LIST_HEAD(&f6i->fib6_siblings); 160 refcount_set(&f6i->fib6_ref, 1); 161 162 return f6i; 163 } 164 165 void fib6_info_destroy_rcu(struct rcu_head *head) 166 { 167 struct fib6_info *f6i = container_of(head, struct fib6_info, rcu); 168 169 WARN_ON(f6i->fib6_node); 170 171 if (f6i->nh) 172 nexthop_put(f6i->nh); 173 else 174 fib6_nh_release(f6i->fib6_nh); 175 176 ip_fib_metrics_put(f6i->fib6_metrics); 177 kfree(f6i); 178 } 179 EXPORT_SYMBOL_GPL(fib6_info_destroy_rcu); 180 181 static struct fib6_node *node_alloc(struct net *net) 182 { 183 struct fib6_node *fn; 184 185 fn = kmem_cache_zalloc(fib6_node_kmem, GFP_ATOMIC); 186 if (fn) 187 net->ipv6.rt6_stats->fib_nodes++; 188 189 return fn; 190 } 191 192 static void node_free_immediate(struct net *net, struct fib6_node *fn) 193 { 194 kmem_cache_free(fib6_node_kmem, fn); 195 net->ipv6.rt6_stats->fib_nodes--; 196 } 197 198 static void node_free_rcu(struct rcu_head *head) 199 { 200 struct fib6_node *fn = container_of(head, struct fib6_node, rcu); 201 202 kmem_cache_free(fib6_node_kmem, fn); 203 } 204 205 static void node_free(struct net *net, struct fib6_node *fn) 206 { 207 call_rcu(&fn->rcu, node_free_rcu); 208 net->ipv6.rt6_stats->fib_nodes--; 209 } 210 211 static void fib6_free_table(struct fib6_table *table) 212 { 213 inetpeer_invalidate_tree(&table->tb6_peers); 214 kfree(table); 215 } 216 217 static void fib6_link_table(struct net *net, struct fib6_table *tb) 218 { 219 unsigned int h; 220 221 /* 222 * Initialize table lock at a single place to give lockdep a key, 223 * tables aren't visible prior to being linked to the list. 224 */ 225 spin_lock_init(&tb->tb6_lock); 226 h = tb->tb6_id & (FIB6_TABLE_HASHSZ - 1); 227 228 /* 229 * No protection necessary, this is the only list mutatation 230 * operation, tables never disappear once they exist. 231 */ 232 hlist_add_head_rcu(&tb->tb6_hlist, &net->ipv6.fib_table_hash[h]); 233 } 234 235 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 236 237 static struct fib6_table *fib6_alloc_table(struct net *net, u32 id) 238 { 239 struct fib6_table *table; 240 241 table = kzalloc(sizeof(*table), GFP_ATOMIC); 242 if (table) { 243 table->tb6_id = id; 244 rcu_assign_pointer(table->tb6_root.leaf, 245 net->ipv6.fib6_null_entry); 246 table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; 247 inet_peer_base_init(&table->tb6_peers); 248 } 249 250 return table; 251 } 252 253 struct fib6_table *fib6_new_table(struct net *net, u32 id) 254 { 255 struct fib6_table *tb; 256 257 if (id == 0) 258 id = RT6_TABLE_MAIN; 259 tb = fib6_get_table(net, id); 260 if (tb) 261 return tb; 262 263 tb = fib6_alloc_table(net, id); 264 if (tb) 265 fib6_link_table(net, tb); 266 267 return tb; 268 } 269 EXPORT_SYMBOL_GPL(fib6_new_table); 270 271 struct fib6_table *fib6_get_table(struct net *net, u32 id) 272 { 273 struct fib6_table *tb; 274 struct hlist_head *head; 275 unsigned int h; 276 277 if (id == 0) 278 id = RT6_TABLE_MAIN; 279 h = id & (FIB6_TABLE_HASHSZ - 1); 280 rcu_read_lock(); 281 head = &net->ipv6.fib_table_hash[h]; 282 hlist_for_each_entry_rcu(tb, head, tb6_hlist) { 283 if (tb->tb6_id == id) { 284 rcu_read_unlock(); 285 return tb; 286 } 287 } 288 rcu_read_unlock(); 289 290 return NULL; 291 } 292 EXPORT_SYMBOL_GPL(fib6_get_table); 293 294 static void __net_init fib6_tables_init(struct net *net) 295 { 296 fib6_link_table(net, net->ipv6.fib6_main_tbl); 297 fib6_link_table(net, net->ipv6.fib6_local_tbl); 298 } 299 #else 300 301 struct fib6_table *fib6_new_table(struct net *net, u32 id) 302 { 303 return fib6_get_table(net, id); 304 } 305 306 struct fib6_table *fib6_get_table(struct net *net, u32 id) 307 { 308 return net->ipv6.fib6_main_tbl; 309 } 310 311 struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6, 312 const struct sk_buff *skb, 313 int flags, pol_lookup_t lookup) 314 { 315 struct rt6_info *rt; 316 317 rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, skb, flags); 318 if (rt->dst.error == -EAGAIN) { 319 ip6_rt_put(rt); 320 rt = net->ipv6.ip6_null_entry; 321 dst_hold(&rt->dst); 322 } 323 324 return &rt->dst; 325 } 326 327 /* called with rcu lock held; no reference taken on fib6_info */ 328 int fib6_lookup(struct net *net, int oif, struct flowi6 *fl6, 329 struct fib6_result *res, int flags) 330 { 331 return fib6_table_lookup(net, net->ipv6.fib6_main_tbl, oif, fl6, 332 res, flags); 333 } 334 335 static void __net_init fib6_tables_init(struct net *net) 336 { 337 fib6_link_table(net, net->ipv6.fib6_main_tbl); 338 } 339 340 #endif 341 342 unsigned int fib6_tables_seq_read(struct net *net) 343 { 344 unsigned int h, fib_seq = 0; 345 346 rcu_read_lock(); 347 for (h = 0; h < FIB6_TABLE_HASHSZ; h++) { 348 struct hlist_head *head = &net->ipv6.fib_table_hash[h]; 349 struct fib6_table *tb; 350 351 hlist_for_each_entry_rcu(tb, head, tb6_hlist) 352 fib_seq += tb->fib_seq; 353 } 354 rcu_read_unlock(); 355 356 return fib_seq; 357 } 358 359 static int call_fib6_entry_notifier(struct notifier_block *nb, struct net *net, 360 enum fib_event_type event_type, 361 struct fib6_info *rt) 362 { 363 struct fib6_entry_notifier_info info = { 364 .rt = rt, 365 }; 366 367 return call_fib6_notifier(nb, net, event_type, &info.info); 368 } 369 370 int call_fib6_entry_notifiers(struct net *net, 371 enum fib_event_type event_type, 372 struct fib6_info *rt, 373 struct netlink_ext_ack *extack) 374 { 375 struct fib6_entry_notifier_info info = { 376 .info.extack = extack, 377 .rt = rt, 378 }; 379 380 rt->fib6_table->fib_seq++; 381 return call_fib6_notifiers(net, event_type, &info.info); 382 } 383 384 struct fib6_dump_arg { 385 struct net *net; 386 struct notifier_block *nb; 387 }; 388 389 static void fib6_rt_dump(struct fib6_info *rt, struct fib6_dump_arg *arg) 390 { 391 if (rt == arg->net->ipv6.fib6_null_entry) 392 return; 393 call_fib6_entry_notifier(arg->nb, arg->net, FIB_EVENT_ENTRY_ADD, rt); 394 } 395 396 static int fib6_node_dump(struct fib6_walker *w) 397 { 398 struct fib6_info *rt; 399 400 for_each_fib6_walker_rt(w) 401 fib6_rt_dump(rt, w->args); 402 w->leaf = NULL; 403 return 0; 404 } 405 406 static void fib6_table_dump(struct net *net, struct fib6_table *tb, 407 struct fib6_walker *w) 408 { 409 w->root = &tb->tb6_root; 410 spin_lock_bh(&tb->tb6_lock); 411 fib6_walk(net, w); 412 spin_unlock_bh(&tb->tb6_lock); 413 } 414 415 /* Called with rcu_read_lock() */ 416 int fib6_tables_dump(struct net *net, struct notifier_block *nb) 417 { 418 struct fib6_dump_arg arg; 419 struct fib6_walker *w; 420 unsigned int h; 421 422 w = kzalloc(sizeof(*w), GFP_ATOMIC); 423 if (!w) 424 return -ENOMEM; 425 426 w->func = fib6_node_dump; 427 arg.net = net; 428 arg.nb = nb; 429 w->args = &arg; 430 431 for (h = 0; h < FIB6_TABLE_HASHSZ; h++) { 432 struct hlist_head *head = &net->ipv6.fib_table_hash[h]; 433 struct fib6_table *tb; 434 435 hlist_for_each_entry_rcu(tb, head, tb6_hlist) 436 fib6_table_dump(net, tb, w); 437 } 438 439 kfree(w); 440 441 return 0; 442 } 443 444 static int fib6_dump_node(struct fib6_walker *w) 445 { 446 int res; 447 struct fib6_info *rt; 448 449 for_each_fib6_walker_rt(w) { 450 res = rt6_dump_route(rt, w->args); 451 if (res < 0) { 452 /* Frame is full, suspend walking */ 453 w->leaf = rt; 454 return 1; 455 } 456 457 /* Multipath routes are dumped in one route with the 458 * RTA_MULTIPATH attribute. Jump 'rt' to point to the 459 * last sibling of this route (no need to dump the 460 * sibling routes again) 461 */ 462 if (rt->fib6_nsiblings) 463 rt = list_last_entry(&rt->fib6_siblings, 464 struct fib6_info, 465 fib6_siblings); 466 } 467 w->leaf = NULL; 468 return 0; 469 } 470 471 static void fib6_dump_end(struct netlink_callback *cb) 472 { 473 struct net *net = sock_net(cb->skb->sk); 474 struct fib6_walker *w = (void *)cb->args[2]; 475 476 if (w) { 477 if (cb->args[4]) { 478 cb->args[4] = 0; 479 fib6_walker_unlink(net, w); 480 } 481 cb->args[2] = 0; 482 kfree(w); 483 } 484 cb->done = (void *)cb->args[3]; 485 cb->args[1] = 3; 486 } 487 488 static int fib6_dump_done(struct netlink_callback *cb) 489 { 490 fib6_dump_end(cb); 491 return cb->done ? cb->done(cb) : 0; 492 } 493 494 static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb, 495 struct netlink_callback *cb) 496 { 497 struct net *net = sock_net(skb->sk); 498 struct fib6_walker *w; 499 int res; 500 501 w = (void *)cb->args[2]; 502 w->root = &table->tb6_root; 503 504 if (cb->args[4] == 0) { 505 w->count = 0; 506 w->skip = 0; 507 508 spin_lock_bh(&table->tb6_lock); 509 res = fib6_walk(net, w); 510 spin_unlock_bh(&table->tb6_lock); 511 if (res > 0) { 512 cb->args[4] = 1; 513 cb->args[5] = w->root->fn_sernum; 514 } 515 } else { 516 if (cb->args[5] != w->root->fn_sernum) { 517 /* Begin at the root if the tree changed */ 518 cb->args[5] = w->root->fn_sernum; 519 w->state = FWS_INIT; 520 w->node = w->root; 521 w->skip = w->count; 522 } else 523 w->skip = 0; 524 525 spin_lock_bh(&table->tb6_lock); 526 res = fib6_walk_continue(w); 527 spin_unlock_bh(&table->tb6_lock); 528 if (res <= 0) { 529 fib6_walker_unlink(net, w); 530 cb->args[4] = 0; 531 } 532 } 533 534 return res; 535 } 536 537 static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) 538 { 539 const struct nlmsghdr *nlh = cb->nlh; 540 struct net *net = sock_net(skb->sk); 541 struct rt6_rtnl_dump_arg arg = {}; 542 unsigned int h, s_h; 543 unsigned int e = 0, s_e; 544 struct fib6_walker *w; 545 struct fib6_table *tb; 546 struct hlist_head *head; 547 int res = 0; 548 549 if (cb->strict_check) { 550 int err; 551 552 err = ip_valid_fib_dump_req(net, nlh, &arg.filter, cb); 553 if (err < 0) 554 return err; 555 } else if (nlmsg_len(nlh) >= sizeof(struct rtmsg)) { 556 struct rtmsg *rtm = nlmsg_data(nlh); 557 558 arg.filter.flags = rtm->rtm_flags & (RTM_F_PREFIX|RTM_F_CLONED); 559 } 560 561 /* fib entries are never clones */ 562 if (arg.filter.flags & RTM_F_CLONED) 563 goto out; 564 565 w = (void *)cb->args[2]; 566 if (!w) { 567 /* New dump: 568 * 569 * 1. hook callback destructor. 570 */ 571 cb->args[3] = (long)cb->done; 572 cb->done = fib6_dump_done; 573 574 /* 575 * 2. allocate and initialize walker. 576 */ 577 w = kzalloc(sizeof(*w), GFP_ATOMIC); 578 if (!w) 579 return -ENOMEM; 580 w->func = fib6_dump_node; 581 cb->args[2] = (long)w; 582 } 583 584 arg.skb = skb; 585 arg.cb = cb; 586 arg.net = net; 587 w->args = &arg; 588 589 if (arg.filter.table_id) { 590 tb = fib6_get_table(net, arg.filter.table_id); 591 if (!tb) { 592 if (arg.filter.dump_all_families) 593 goto out; 594 595 NL_SET_ERR_MSG_MOD(cb->extack, "FIB table does not exist"); 596 return -ENOENT; 597 } 598 599 if (!cb->args[0]) { 600 res = fib6_dump_table(tb, skb, cb); 601 if (!res) 602 cb->args[0] = 1; 603 } 604 goto out; 605 } 606 607 s_h = cb->args[0]; 608 s_e = cb->args[1]; 609 610 rcu_read_lock(); 611 for (h = s_h; h < FIB6_TABLE_HASHSZ; h++, s_e = 0) { 612 e = 0; 613 head = &net->ipv6.fib_table_hash[h]; 614 hlist_for_each_entry_rcu(tb, head, tb6_hlist) { 615 if (e < s_e) 616 goto next; 617 res = fib6_dump_table(tb, skb, cb); 618 if (res != 0) 619 goto out_unlock; 620 next: 621 e++; 622 } 623 } 624 out_unlock: 625 rcu_read_unlock(); 626 cb->args[1] = e; 627 cb->args[0] = h; 628 out: 629 res = res < 0 ? res : skb->len; 630 if (res <= 0) 631 fib6_dump_end(cb); 632 return res; 633 } 634 635 void fib6_metric_set(struct fib6_info *f6i, int metric, u32 val) 636 { 637 if (!f6i) 638 return; 639 640 if (f6i->fib6_metrics == &dst_default_metrics) { 641 struct dst_metrics *p = kzalloc(sizeof(*p), GFP_ATOMIC); 642 643 if (!p) 644 return; 645 646 refcount_set(&p->refcnt, 1); 647 f6i->fib6_metrics = p; 648 } 649 650 f6i->fib6_metrics->metrics[metric - 1] = val; 651 } 652 653 /* 654 * Routing Table 655 * 656 * return the appropriate node for a routing tree "add" operation 657 * by either creating and inserting or by returning an existing 658 * node. 659 */ 660 661 static struct fib6_node *fib6_add_1(struct net *net, 662 struct fib6_table *table, 663 struct fib6_node *root, 664 struct in6_addr *addr, int plen, 665 int offset, int allow_create, 666 int replace_required, 667 struct netlink_ext_ack *extack) 668 { 669 struct fib6_node *fn, *in, *ln; 670 struct fib6_node *pn = NULL; 671 struct rt6key *key; 672 int bit; 673 __be32 dir = 0; 674 675 RT6_TRACE("fib6_add_1\n"); 676 677 /* insert node in tree */ 678 679 fn = root; 680 681 do { 682 struct fib6_info *leaf = rcu_dereference_protected(fn->leaf, 683 lockdep_is_held(&table->tb6_lock)); 684 key = (struct rt6key *)((u8 *)leaf + offset); 685 686 /* 687 * Prefix match 688 */ 689 if (plen < fn->fn_bit || 690 !ipv6_prefix_equal(&key->addr, addr, fn->fn_bit)) { 691 if (!allow_create) { 692 if (replace_required) { 693 NL_SET_ERR_MSG(extack, 694 "Can not replace route - no match found"); 695 pr_warn("Can't replace route, no match found\n"); 696 return ERR_PTR(-ENOENT); 697 } 698 pr_warn("NLM_F_CREATE should be set when creating new route\n"); 699 } 700 goto insert_above; 701 } 702 703 /* 704 * Exact match ? 705 */ 706 707 if (plen == fn->fn_bit) { 708 /* clean up an intermediate node */ 709 if (!(fn->fn_flags & RTN_RTINFO)) { 710 RCU_INIT_POINTER(fn->leaf, NULL); 711 fib6_info_release(leaf); 712 /* remove null_entry in the root node */ 713 } else if (fn->fn_flags & RTN_TL_ROOT && 714 rcu_access_pointer(fn->leaf) == 715 net->ipv6.fib6_null_entry) { 716 RCU_INIT_POINTER(fn->leaf, NULL); 717 } 718 719 return fn; 720 } 721 722 /* 723 * We have more bits to go 724 */ 725 726 /* Try to walk down on tree. */ 727 dir = addr_bit_set(addr, fn->fn_bit); 728 pn = fn; 729 fn = dir ? 730 rcu_dereference_protected(fn->right, 731 lockdep_is_held(&table->tb6_lock)) : 732 rcu_dereference_protected(fn->left, 733 lockdep_is_held(&table->tb6_lock)); 734 } while (fn); 735 736 if (!allow_create) { 737 /* We should not create new node because 738 * NLM_F_REPLACE was specified without NLM_F_CREATE 739 * I assume it is safe to require NLM_F_CREATE when 740 * REPLACE flag is used! Later we may want to remove the 741 * check for replace_required, because according 742 * to netlink specification, NLM_F_CREATE 743 * MUST be specified if new route is created. 744 * That would keep IPv6 consistent with IPv4 745 */ 746 if (replace_required) { 747 NL_SET_ERR_MSG(extack, 748 "Can not replace route - no match found"); 749 pr_warn("Can't replace route, no match found\n"); 750 return ERR_PTR(-ENOENT); 751 } 752 pr_warn("NLM_F_CREATE should be set when creating new route\n"); 753 } 754 /* 755 * We walked to the bottom of tree. 756 * Create new leaf node without children. 757 */ 758 759 ln = node_alloc(net); 760 761 if (!ln) 762 return ERR_PTR(-ENOMEM); 763 ln->fn_bit = plen; 764 RCU_INIT_POINTER(ln->parent, pn); 765 766 if (dir) 767 rcu_assign_pointer(pn->right, ln); 768 else 769 rcu_assign_pointer(pn->left, ln); 770 771 return ln; 772 773 774 insert_above: 775 /* 776 * split since we don't have a common prefix anymore or 777 * we have a less significant route. 778 * we've to insert an intermediate node on the list 779 * this new node will point to the one we need to create 780 * and the current 781 */ 782 783 pn = rcu_dereference_protected(fn->parent, 784 lockdep_is_held(&table->tb6_lock)); 785 786 /* find 1st bit in difference between the 2 addrs. 787 788 See comment in __ipv6_addr_diff: bit may be an invalid value, 789 but if it is >= plen, the value is ignored in any case. 790 */ 791 792 bit = __ipv6_addr_diff(addr, &key->addr, sizeof(*addr)); 793 794 /* 795 * (intermediate)[in] 796 * / \ 797 * (new leaf node)[ln] (old node)[fn] 798 */ 799 if (plen > bit) { 800 in = node_alloc(net); 801 ln = node_alloc(net); 802 803 if (!in || !ln) { 804 if (in) 805 node_free_immediate(net, in); 806 if (ln) 807 node_free_immediate(net, ln); 808 return ERR_PTR(-ENOMEM); 809 } 810 811 /* 812 * new intermediate node. 813 * RTN_RTINFO will 814 * be off since that an address that chooses one of 815 * the branches would not match less specific routes 816 * in the other branch 817 */ 818 819 in->fn_bit = bit; 820 821 RCU_INIT_POINTER(in->parent, pn); 822 in->leaf = fn->leaf; 823 fib6_info_hold(rcu_dereference_protected(in->leaf, 824 lockdep_is_held(&table->tb6_lock))); 825 826 /* update parent pointer */ 827 if (dir) 828 rcu_assign_pointer(pn->right, in); 829 else 830 rcu_assign_pointer(pn->left, in); 831 832 ln->fn_bit = plen; 833 834 RCU_INIT_POINTER(ln->parent, in); 835 rcu_assign_pointer(fn->parent, in); 836 837 if (addr_bit_set(addr, bit)) { 838 rcu_assign_pointer(in->right, ln); 839 rcu_assign_pointer(in->left, fn); 840 } else { 841 rcu_assign_pointer(in->left, ln); 842 rcu_assign_pointer(in->right, fn); 843 } 844 } else { /* plen <= bit */ 845 846 /* 847 * (new leaf node)[ln] 848 * / \ 849 * (old node)[fn] NULL 850 */ 851 852 ln = node_alloc(net); 853 854 if (!ln) 855 return ERR_PTR(-ENOMEM); 856 857 ln->fn_bit = plen; 858 859 RCU_INIT_POINTER(ln->parent, pn); 860 861 if (addr_bit_set(&key->addr, plen)) 862 RCU_INIT_POINTER(ln->right, fn); 863 else 864 RCU_INIT_POINTER(ln->left, fn); 865 866 rcu_assign_pointer(fn->parent, ln); 867 868 if (dir) 869 rcu_assign_pointer(pn->right, ln); 870 else 871 rcu_assign_pointer(pn->left, ln); 872 } 873 return ln; 874 } 875 876 static void __fib6_drop_pcpu_from(struct fib6_nh *fib6_nh, 877 const struct fib6_info *match, 878 const struct fib6_table *table) 879 { 880 int cpu; 881 882 if (!fib6_nh->rt6i_pcpu) 883 return; 884 885 /* release the reference to this fib entry from 886 * all of its cached pcpu routes 887 */ 888 for_each_possible_cpu(cpu) { 889 struct rt6_info **ppcpu_rt; 890 struct rt6_info *pcpu_rt; 891 892 ppcpu_rt = per_cpu_ptr(fib6_nh->rt6i_pcpu, cpu); 893 pcpu_rt = *ppcpu_rt; 894 895 /* only dropping the 'from' reference if the cached route 896 * is using 'match'. The cached pcpu_rt->from only changes 897 * from a fib6_info to NULL (ip6_dst_destroy); it can never 898 * change from one fib6_info reference to another 899 */ 900 if (pcpu_rt && rcu_access_pointer(pcpu_rt->from) == match) { 901 struct fib6_info *from; 902 903 from = xchg((__force struct fib6_info **)&pcpu_rt->from, NULL); 904 fib6_info_release(from); 905 } 906 } 907 } 908 909 struct fib6_nh_pcpu_arg { 910 struct fib6_info *from; 911 const struct fib6_table *table; 912 }; 913 914 static int fib6_nh_drop_pcpu_from(struct fib6_nh *nh, void *_arg) 915 { 916 struct fib6_nh_pcpu_arg *arg = _arg; 917 918 __fib6_drop_pcpu_from(nh, arg->from, arg->table); 919 return 0; 920 } 921 922 static void fib6_drop_pcpu_from(struct fib6_info *f6i, 923 const struct fib6_table *table) 924 { 925 /* Make sure rt6_make_pcpu_route() wont add other percpu routes 926 * while we are cleaning them here. 927 */ 928 f6i->fib6_destroying = 1; 929 mb(); /* paired with the cmpxchg() in rt6_make_pcpu_route() */ 930 931 if (f6i->nh) { 932 struct fib6_nh_pcpu_arg arg = { 933 .from = f6i, 934 .table = table 935 }; 936 937 nexthop_for_each_fib6_nh(f6i->nh, fib6_nh_drop_pcpu_from, 938 &arg); 939 } else { 940 struct fib6_nh *fib6_nh; 941 942 fib6_nh = f6i->fib6_nh; 943 __fib6_drop_pcpu_from(fib6_nh, f6i, table); 944 } 945 } 946 947 static void fib6_purge_rt(struct fib6_info *rt, struct fib6_node *fn, 948 struct net *net) 949 { 950 struct fib6_table *table = rt->fib6_table; 951 952 fib6_drop_pcpu_from(rt, table); 953 954 if (rt->nh && !list_empty(&rt->nh_list)) 955 list_del_init(&rt->nh_list); 956 957 if (refcount_read(&rt->fib6_ref) != 1) { 958 /* This route is used as dummy address holder in some split 959 * nodes. It is not leaked, but it still holds other resources, 960 * which must be released in time. So, scan ascendant nodes 961 * and replace dummy references to this route with references 962 * to still alive ones. 963 */ 964 while (fn) { 965 struct fib6_info *leaf = rcu_dereference_protected(fn->leaf, 966 lockdep_is_held(&table->tb6_lock)); 967 struct fib6_info *new_leaf; 968 if (!(fn->fn_flags & RTN_RTINFO) && leaf == rt) { 969 new_leaf = fib6_find_prefix(net, table, fn); 970 fib6_info_hold(new_leaf); 971 972 rcu_assign_pointer(fn->leaf, new_leaf); 973 fib6_info_release(rt); 974 } 975 fn = rcu_dereference_protected(fn->parent, 976 lockdep_is_held(&table->tb6_lock)); 977 } 978 } 979 } 980 981 /* 982 * Insert routing information in a node. 983 */ 984 985 static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt, 986 struct nl_info *info, 987 struct netlink_ext_ack *extack) 988 { 989 struct fib6_info *leaf = rcu_dereference_protected(fn->leaf, 990 lockdep_is_held(&rt->fib6_table->tb6_lock)); 991 struct fib6_info *iter = NULL; 992 struct fib6_info __rcu **ins; 993 struct fib6_info __rcu **fallback_ins = NULL; 994 int replace = (info->nlh && 995 (info->nlh->nlmsg_flags & NLM_F_REPLACE)); 996 int add = (!info->nlh || 997 (info->nlh->nlmsg_flags & NLM_F_CREATE)); 998 int found = 0; 999 bool rt_can_ecmp = rt6_qualify_for_ecmp(rt); 1000 u16 nlflags = NLM_F_EXCL; 1001 int err; 1002 1003 if (info->nlh && (info->nlh->nlmsg_flags & NLM_F_APPEND)) 1004 nlflags |= NLM_F_APPEND; 1005 1006 ins = &fn->leaf; 1007 1008 for (iter = leaf; iter; 1009 iter = rcu_dereference_protected(iter->fib6_next, 1010 lockdep_is_held(&rt->fib6_table->tb6_lock))) { 1011 /* 1012 * Search for duplicates 1013 */ 1014 1015 if (iter->fib6_metric == rt->fib6_metric) { 1016 /* 1017 * Same priority level 1018 */ 1019 if (info->nlh && 1020 (info->nlh->nlmsg_flags & NLM_F_EXCL)) 1021 return -EEXIST; 1022 1023 nlflags &= ~NLM_F_EXCL; 1024 if (replace) { 1025 if (rt_can_ecmp == rt6_qualify_for_ecmp(iter)) { 1026 found++; 1027 break; 1028 } 1029 if (rt_can_ecmp) 1030 fallback_ins = fallback_ins ?: ins; 1031 goto next_iter; 1032 } 1033 1034 if (rt6_duplicate_nexthop(iter, rt)) { 1035 if (rt->fib6_nsiblings) 1036 rt->fib6_nsiblings = 0; 1037 if (!(iter->fib6_flags & RTF_EXPIRES)) 1038 return -EEXIST; 1039 if (!(rt->fib6_flags & RTF_EXPIRES)) 1040 fib6_clean_expires(iter); 1041 else 1042 fib6_set_expires(iter, rt->expires); 1043 1044 if (rt->fib6_pmtu) 1045 fib6_metric_set(iter, RTAX_MTU, 1046 rt->fib6_pmtu); 1047 return -EEXIST; 1048 } 1049 /* If we have the same destination and the same metric, 1050 * but not the same gateway, then the route we try to 1051 * add is sibling to this route, increment our counter 1052 * of siblings, and later we will add our route to the 1053 * list. 1054 * Only static routes (which don't have flag 1055 * RTF_EXPIRES) are used for ECMPv6. 1056 * 1057 * To avoid long list, we only had siblings if the 1058 * route have a gateway. 1059 */ 1060 if (rt_can_ecmp && 1061 rt6_qualify_for_ecmp(iter)) 1062 rt->fib6_nsiblings++; 1063 } 1064 1065 if (iter->fib6_metric > rt->fib6_metric) 1066 break; 1067 1068 next_iter: 1069 ins = &iter->fib6_next; 1070 } 1071 1072 if (fallback_ins && !found) { 1073 /* No ECMP-able route found, replace first non-ECMP one */ 1074 ins = fallback_ins; 1075 iter = rcu_dereference_protected(*ins, 1076 lockdep_is_held(&rt->fib6_table->tb6_lock)); 1077 found++; 1078 } 1079 1080 /* Reset round-robin state, if necessary */ 1081 if (ins == &fn->leaf) 1082 fn->rr_ptr = NULL; 1083 1084 /* Link this route to others same route. */ 1085 if (rt->fib6_nsiblings) { 1086 unsigned int fib6_nsiblings; 1087 struct fib6_info *sibling, *temp_sibling; 1088 1089 /* Find the first route that have the same metric */ 1090 sibling = leaf; 1091 while (sibling) { 1092 if (sibling->fib6_metric == rt->fib6_metric && 1093 rt6_qualify_for_ecmp(sibling)) { 1094 list_add_tail(&rt->fib6_siblings, 1095 &sibling->fib6_siblings); 1096 break; 1097 } 1098 sibling = rcu_dereference_protected(sibling->fib6_next, 1099 lockdep_is_held(&rt->fib6_table->tb6_lock)); 1100 } 1101 /* For each sibling in the list, increment the counter of 1102 * siblings. BUG() if counters does not match, list of siblings 1103 * is broken! 1104 */ 1105 fib6_nsiblings = 0; 1106 list_for_each_entry_safe(sibling, temp_sibling, 1107 &rt->fib6_siblings, fib6_siblings) { 1108 sibling->fib6_nsiblings++; 1109 BUG_ON(sibling->fib6_nsiblings != rt->fib6_nsiblings); 1110 fib6_nsiblings++; 1111 } 1112 BUG_ON(fib6_nsiblings != rt->fib6_nsiblings); 1113 rt6_multipath_rebalance(temp_sibling); 1114 } 1115 1116 /* 1117 * insert node 1118 */ 1119 if (!replace) { 1120 if (!add) 1121 pr_warn("NLM_F_CREATE should be set when creating new route\n"); 1122 1123 add: 1124 nlflags |= NLM_F_CREATE; 1125 1126 err = call_fib6_entry_notifiers(info->nl_net, 1127 FIB_EVENT_ENTRY_ADD, 1128 rt, extack); 1129 if (err) 1130 return err; 1131 1132 rcu_assign_pointer(rt->fib6_next, iter); 1133 fib6_info_hold(rt); 1134 rcu_assign_pointer(rt->fib6_node, fn); 1135 rcu_assign_pointer(*ins, rt); 1136 if (!info->skip_notify) 1137 inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags); 1138 info->nl_net->ipv6.rt6_stats->fib_rt_entries++; 1139 1140 if (!(fn->fn_flags & RTN_RTINFO)) { 1141 info->nl_net->ipv6.rt6_stats->fib_route_nodes++; 1142 fn->fn_flags |= RTN_RTINFO; 1143 } 1144 1145 } else { 1146 int nsiblings; 1147 1148 if (!found) { 1149 if (add) 1150 goto add; 1151 pr_warn("NLM_F_REPLACE set, but no existing node found!\n"); 1152 return -ENOENT; 1153 } 1154 1155 err = call_fib6_entry_notifiers(info->nl_net, 1156 FIB_EVENT_ENTRY_REPLACE, 1157 rt, extack); 1158 if (err) 1159 return err; 1160 1161 fib6_info_hold(rt); 1162 rcu_assign_pointer(rt->fib6_node, fn); 1163 rt->fib6_next = iter->fib6_next; 1164 rcu_assign_pointer(*ins, rt); 1165 if (!info->skip_notify) 1166 inet6_rt_notify(RTM_NEWROUTE, rt, info, NLM_F_REPLACE); 1167 if (!(fn->fn_flags & RTN_RTINFO)) { 1168 info->nl_net->ipv6.rt6_stats->fib_route_nodes++; 1169 fn->fn_flags |= RTN_RTINFO; 1170 } 1171 nsiblings = iter->fib6_nsiblings; 1172 iter->fib6_node = NULL; 1173 fib6_purge_rt(iter, fn, info->nl_net); 1174 if (rcu_access_pointer(fn->rr_ptr) == iter) 1175 fn->rr_ptr = NULL; 1176 fib6_info_release(iter); 1177 1178 if (nsiblings) { 1179 /* Replacing an ECMP route, remove all siblings */ 1180 ins = &rt->fib6_next; 1181 iter = rcu_dereference_protected(*ins, 1182 lockdep_is_held(&rt->fib6_table->tb6_lock)); 1183 while (iter) { 1184 if (iter->fib6_metric > rt->fib6_metric) 1185 break; 1186 if (rt6_qualify_for_ecmp(iter)) { 1187 *ins = iter->fib6_next; 1188 iter->fib6_node = NULL; 1189 fib6_purge_rt(iter, fn, info->nl_net); 1190 if (rcu_access_pointer(fn->rr_ptr) == iter) 1191 fn->rr_ptr = NULL; 1192 fib6_info_release(iter); 1193 nsiblings--; 1194 info->nl_net->ipv6.rt6_stats->fib_rt_entries--; 1195 } else { 1196 ins = &iter->fib6_next; 1197 } 1198 iter = rcu_dereference_protected(*ins, 1199 lockdep_is_held(&rt->fib6_table->tb6_lock)); 1200 } 1201 WARN_ON(nsiblings != 0); 1202 } 1203 } 1204 1205 return 0; 1206 } 1207 1208 static void fib6_start_gc(struct net *net, struct fib6_info *rt) 1209 { 1210 if (!timer_pending(&net->ipv6.ip6_fib_timer) && 1211 (rt->fib6_flags & RTF_EXPIRES)) 1212 mod_timer(&net->ipv6.ip6_fib_timer, 1213 jiffies + net->ipv6.sysctl.ip6_rt_gc_interval); 1214 } 1215 1216 void fib6_force_start_gc(struct net *net) 1217 { 1218 if (!timer_pending(&net->ipv6.ip6_fib_timer)) 1219 mod_timer(&net->ipv6.ip6_fib_timer, 1220 jiffies + net->ipv6.sysctl.ip6_rt_gc_interval); 1221 } 1222 1223 static void __fib6_update_sernum_upto_root(struct fib6_info *rt, 1224 int sernum) 1225 { 1226 struct fib6_node *fn = rcu_dereference_protected(rt->fib6_node, 1227 lockdep_is_held(&rt->fib6_table->tb6_lock)); 1228 1229 /* paired with smp_rmb() in rt6_get_cookie_safe() */ 1230 smp_wmb(); 1231 while (fn) { 1232 fn->fn_sernum = sernum; 1233 fn = rcu_dereference_protected(fn->parent, 1234 lockdep_is_held(&rt->fib6_table->tb6_lock)); 1235 } 1236 } 1237 1238 void fib6_update_sernum_upto_root(struct net *net, struct fib6_info *rt) 1239 { 1240 __fib6_update_sernum_upto_root(rt, fib6_new_sernum(net)); 1241 } 1242 1243 /* allow ipv4 to update sernum via ipv6_stub */ 1244 void fib6_update_sernum_stub(struct net *net, struct fib6_info *f6i) 1245 { 1246 spin_lock_bh(&f6i->fib6_table->tb6_lock); 1247 fib6_update_sernum_upto_root(net, f6i); 1248 spin_unlock_bh(&f6i->fib6_table->tb6_lock); 1249 } 1250 1251 /* 1252 * Add routing information to the routing tree. 1253 * <destination addr>/<source addr> 1254 * with source addr info in sub-trees 1255 * Need to own table->tb6_lock 1256 */ 1257 1258 int fib6_add(struct fib6_node *root, struct fib6_info *rt, 1259 struct nl_info *info, struct netlink_ext_ack *extack) 1260 { 1261 struct fib6_table *table = rt->fib6_table; 1262 struct fib6_node *fn, *pn = NULL; 1263 int err = -ENOMEM; 1264 int allow_create = 1; 1265 int replace_required = 0; 1266 int sernum = fib6_new_sernum(info->nl_net); 1267 1268 if (info->nlh) { 1269 if (!(info->nlh->nlmsg_flags & NLM_F_CREATE)) 1270 allow_create = 0; 1271 if (info->nlh->nlmsg_flags & NLM_F_REPLACE) 1272 replace_required = 1; 1273 } 1274 if (!allow_create && !replace_required) 1275 pr_warn("RTM_NEWROUTE with no NLM_F_CREATE or NLM_F_REPLACE\n"); 1276 1277 fn = fib6_add_1(info->nl_net, table, root, 1278 &rt->fib6_dst.addr, rt->fib6_dst.plen, 1279 offsetof(struct fib6_info, fib6_dst), allow_create, 1280 replace_required, extack); 1281 if (IS_ERR(fn)) { 1282 err = PTR_ERR(fn); 1283 fn = NULL; 1284 goto out; 1285 } 1286 1287 pn = fn; 1288 1289 #ifdef CONFIG_IPV6_SUBTREES 1290 if (rt->fib6_src.plen) { 1291 struct fib6_node *sn; 1292 1293 if (!rcu_access_pointer(fn->subtree)) { 1294 struct fib6_node *sfn; 1295 1296 /* 1297 * Create subtree. 1298 * 1299 * fn[main tree] 1300 * | 1301 * sfn[subtree root] 1302 * \ 1303 * sn[new leaf node] 1304 */ 1305 1306 /* Create subtree root node */ 1307 sfn = node_alloc(info->nl_net); 1308 if (!sfn) 1309 goto failure; 1310 1311 fib6_info_hold(info->nl_net->ipv6.fib6_null_entry); 1312 rcu_assign_pointer(sfn->leaf, 1313 info->nl_net->ipv6.fib6_null_entry); 1314 sfn->fn_flags = RTN_ROOT; 1315 1316 /* Now add the first leaf node to new subtree */ 1317 1318 sn = fib6_add_1(info->nl_net, table, sfn, 1319 &rt->fib6_src.addr, rt->fib6_src.plen, 1320 offsetof(struct fib6_info, fib6_src), 1321 allow_create, replace_required, extack); 1322 1323 if (IS_ERR(sn)) { 1324 /* If it is failed, discard just allocated 1325 root, and then (in failure) stale node 1326 in main tree. 1327 */ 1328 node_free_immediate(info->nl_net, sfn); 1329 err = PTR_ERR(sn); 1330 goto failure; 1331 } 1332 1333 /* Now link new subtree to main tree */ 1334 rcu_assign_pointer(sfn->parent, fn); 1335 rcu_assign_pointer(fn->subtree, sfn); 1336 } else { 1337 sn = fib6_add_1(info->nl_net, table, FIB6_SUBTREE(fn), 1338 &rt->fib6_src.addr, rt->fib6_src.plen, 1339 offsetof(struct fib6_info, fib6_src), 1340 allow_create, replace_required, extack); 1341 1342 if (IS_ERR(sn)) { 1343 err = PTR_ERR(sn); 1344 goto failure; 1345 } 1346 } 1347 1348 if (!rcu_access_pointer(fn->leaf)) { 1349 if (fn->fn_flags & RTN_TL_ROOT) { 1350 /* put back null_entry for root node */ 1351 rcu_assign_pointer(fn->leaf, 1352 info->nl_net->ipv6.fib6_null_entry); 1353 } else { 1354 fib6_info_hold(rt); 1355 rcu_assign_pointer(fn->leaf, rt); 1356 } 1357 } 1358 fn = sn; 1359 } 1360 #endif 1361 1362 err = fib6_add_rt2node(fn, rt, info, extack); 1363 if (!err) { 1364 if (rt->nh) 1365 list_add(&rt->nh_list, &rt->nh->f6i_list); 1366 __fib6_update_sernum_upto_root(rt, sernum); 1367 fib6_start_gc(info->nl_net, rt); 1368 } 1369 1370 out: 1371 if (err) { 1372 #ifdef CONFIG_IPV6_SUBTREES 1373 /* 1374 * If fib6_add_1 has cleared the old leaf pointer in the 1375 * super-tree leaf node we have to find a new one for it. 1376 */ 1377 if (pn != fn) { 1378 struct fib6_info *pn_leaf = 1379 rcu_dereference_protected(pn->leaf, 1380 lockdep_is_held(&table->tb6_lock)); 1381 if (pn_leaf == rt) { 1382 pn_leaf = NULL; 1383 RCU_INIT_POINTER(pn->leaf, NULL); 1384 fib6_info_release(rt); 1385 } 1386 if (!pn_leaf && !(pn->fn_flags & RTN_RTINFO)) { 1387 pn_leaf = fib6_find_prefix(info->nl_net, table, 1388 pn); 1389 #if RT6_DEBUG >= 2 1390 if (!pn_leaf) { 1391 WARN_ON(!pn_leaf); 1392 pn_leaf = 1393 info->nl_net->ipv6.fib6_null_entry; 1394 } 1395 #endif 1396 fib6_info_hold(pn_leaf); 1397 rcu_assign_pointer(pn->leaf, pn_leaf); 1398 } 1399 } 1400 #endif 1401 goto failure; 1402 } 1403 return err; 1404 1405 failure: 1406 /* fn->leaf could be NULL and fib6_repair_tree() needs to be called if: 1407 * 1. fn is an intermediate node and we failed to add the new 1408 * route to it in both subtree creation failure and fib6_add_rt2node() 1409 * failure case. 1410 * 2. fn is the root node in the table and we fail to add the first 1411 * default route to it. 1412 */ 1413 if (fn && 1414 (!(fn->fn_flags & (RTN_RTINFO|RTN_ROOT)) || 1415 (fn->fn_flags & RTN_TL_ROOT && 1416 !rcu_access_pointer(fn->leaf)))) 1417 fib6_repair_tree(info->nl_net, table, fn); 1418 return err; 1419 } 1420 1421 /* 1422 * Routing tree lookup 1423 * 1424 */ 1425 1426 struct lookup_args { 1427 int offset; /* key offset on fib6_info */ 1428 const struct in6_addr *addr; /* search key */ 1429 }; 1430 1431 static struct fib6_node *fib6_node_lookup_1(struct fib6_node *root, 1432 struct lookup_args *args) 1433 { 1434 struct fib6_node *fn; 1435 __be32 dir; 1436 1437 if (unlikely(args->offset == 0)) 1438 return NULL; 1439 1440 /* 1441 * Descend on a tree 1442 */ 1443 1444 fn = root; 1445 1446 for (;;) { 1447 struct fib6_node *next; 1448 1449 dir = addr_bit_set(args->addr, fn->fn_bit); 1450 1451 next = dir ? rcu_dereference(fn->right) : 1452 rcu_dereference(fn->left); 1453 1454 if (next) { 1455 fn = next; 1456 continue; 1457 } 1458 break; 1459 } 1460 1461 while (fn) { 1462 struct fib6_node *subtree = FIB6_SUBTREE(fn); 1463 1464 if (subtree || fn->fn_flags & RTN_RTINFO) { 1465 struct fib6_info *leaf = rcu_dereference(fn->leaf); 1466 struct rt6key *key; 1467 1468 if (!leaf) 1469 goto backtrack; 1470 1471 key = (struct rt6key *) ((u8 *)leaf + args->offset); 1472 1473 if (ipv6_prefix_equal(&key->addr, args->addr, key->plen)) { 1474 #ifdef CONFIG_IPV6_SUBTREES 1475 if (subtree) { 1476 struct fib6_node *sfn; 1477 sfn = fib6_node_lookup_1(subtree, 1478 args + 1); 1479 if (!sfn) 1480 goto backtrack; 1481 fn = sfn; 1482 } 1483 #endif 1484 if (fn->fn_flags & RTN_RTINFO) 1485 return fn; 1486 } 1487 } 1488 backtrack: 1489 if (fn->fn_flags & RTN_ROOT) 1490 break; 1491 1492 fn = rcu_dereference(fn->parent); 1493 } 1494 1495 return NULL; 1496 } 1497 1498 /* called with rcu_read_lock() held 1499 */ 1500 struct fib6_node *fib6_node_lookup(struct fib6_node *root, 1501 const struct in6_addr *daddr, 1502 const struct in6_addr *saddr) 1503 { 1504 struct fib6_node *fn; 1505 struct lookup_args args[] = { 1506 { 1507 .offset = offsetof(struct fib6_info, fib6_dst), 1508 .addr = daddr, 1509 }, 1510 #ifdef CONFIG_IPV6_SUBTREES 1511 { 1512 .offset = offsetof(struct fib6_info, fib6_src), 1513 .addr = saddr, 1514 }, 1515 #endif 1516 { 1517 .offset = 0, /* sentinel */ 1518 } 1519 }; 1520 1521 fn = fib6_node_lookup_1(root, daddr ? args : args + 1); 1522 if (!fn || fn->fn_flags & RTN_TL_ROOT) 1523 fn = root; 1524 1525 return fn; 1526 } 1527 1528 /* 1529 * Get node with specified destination prefix (and source prefix, 1530 * if subtrees are used) 1531 * exact_match == true means we try to find fn with exact match of 1532 * the passed in prefix addr 1533 * exact_match == false means we try to find fn with longest prefix 1534 * match of the passed in prefix addr. This is useful for finding fn 1535 * for cached route as it will be stored in the exception table under 1536 * the node with longest prefix length. 1537 */ 1538 1539 1540 static struct fib6_node *fib6_locate_1(struct fib6_node *root, 1541 const struct in6_addr *addr, 1542 int plen, int offset, 1543 bool exact_match) 1544 { 1545 struct fib6_node *fn, *prev = NULL; 1546 1547 for (fn = root; fn ; ) { 1548 struct fib6_info *leaf = rcu_dereference(fn->leaf); 1549 struct rt6key *key; 1550 1551 /* This node is being deleted */ 1552 if (!leaf) { 1553 if (plen <= fn->fn_bit) 1554 goto out; 1555 else 1556 goto next; 1557 } 1558 1559 key = (struct rt6key *)((u8 *)leaf + offset); 1560 1561 /* 1562 * Prefix match 1563 */ 1564 if (plen < fn->fn_bit || 1565 !ipv6_prefix_equal(&key->addr, addr, fn->fn_bit)) 1566 goto out; 1567 1568 if (plen == fn->fn_bit) 1569 return fn; 1570 1571 prev = fn; 1572 1573 next: 1574 /* 1575 * We have more bits to go 1576 */ 1577 if (addr_bit_set(addr, fn->fn_bit)) 1578 fn = rcu_dereference(fn->right); 1579 else 1580 fn = rcu_dereference(fn->left); 1581 } 1582 out: 1583 if (exact_match) 1584 return NULL; 1585 else 1586 return prev; 1587 } 1588 1589 struct fib6_node *fib6_locate(struct fib6_node *root, 1590 const struct in6_addr *daddr, int dst_len, 1591 const struct in6_addr *saddr, int src_len, 1592 bool exact_match) 1593 { 1594 struct fib6_node *fn; 1595 1596 fn = fib6_locate_1(root, daddr, dst_len, 1597 offsetof(struct fib6_info, fib6_dst), 1598 exact_match); 1599 1600 #ifdef CONFIG_IPV6_SUBTREES 1601 if (src_len) { 1602 WARN_ON(saddr == NULL); 1603 if (fn) { 1604 struct fib6_node *subtree = FIB6_SUBTREE(fn); 1605 1606 if (subtree) { 1607 fn = fib6_locate_1(subtree, saddr, src_len, 1608 offsetof(struct fib6_info, fib6_src), 1609 exact_match); 1610 } 1611 } 1612 } 1613 #endif 1614 1615 if (fn && fn->fn_flags & RTN_RTINFO) 1616 return fn; 1617 1618 return NULL; 1619 } 1620 1621 1622 /* 1623 * Deletion 1624 * 1625 */ 1626 1627 static struct fib6_info *fib6_find_prefix(struct net *net, 1628 struct fib6_table *table, 1629 struct fib6_node *fn) 1630 { 1631 struct fib6_node *child_left, *child_right; 1632 1633 if (fn->fn_flags & RTN_ROOT) 1634 return net->ipv6.fib6_null_entry; 1635 1636 while (fn) { 1637 child_left = rcu_dereference_protected(fn->left, 1638 lockdep_is_held(&table->tb6_lock)); 1639 child_right = rcu_dereference_protected(fn->right, 1640 lockdep_is_held(&table->tb6_lock)); 1641 if (child_left) 1642 return rcu_dereference_protected(child_left->leaf, 1643 lockdep_is_held(&table->tb6_lock)); 1644 if (child_right) 1645 return rcu_dereference_protected(child_right->leaf, 1646 lockdep_is_held(&table->tb6_lock)); 1647 1648 fn = FIB6_SUBTREE(fn); 1649 } 1650 return NULL; 1651 } 1652 1653 /* 1654 * Called to trim the tree of intermediate nodes when possible. "fn" 1655 * is the node we want to try and remove. 1656 * Need to own table->tb6_lock 1657 */ 1658 1659 static struct fib6_node *fib6_repair_tree(struct net *net, 1660 struct fib6_table *table, 1661 struct fib6_node *fn) 1662 { 1663 int children; 1664 int nstate; 1665 struct fib6_node *child; 1666 struct fib6_walker *w; 1667 int iter = 0; 1668 1669 /* Set fn->leaf to null_entry for root node. */ 1670 if (fn->fn_flags & RTN_TL_ROOT) { 1671 rcu_assign_pointer(fn->leaf, net->ipv6.fib6_null_entry); 1672 return fn; 1673 } 1674 1675 for (;;) { 1676 struct fib6_node *fn_r = rcu_dereference_protected(fn->right, 1677 lockdep_is_held(&table->tb6_lock)); 1678 struct fib6_node *fn_l = rcu_dereference_protected(fn->left, 1679 lockdep_is_held(&table->tb6_lock)); 1680 struct fib6_node *pn = rcu_dereference_protected(fn->parent, 1681 lockdep_is_held(&table->tb6_lock)); 1682 struct fib6_node *pn_r = rcu_dereference_protected(pn->right, 1683 lockdep_is_held(&table->tb6_lock)); 1684 struct fib6_node *pn_l = rcu_dereference_protected(pn->left, 1685 lockdep_is_held(&table->tb6_lock)); 1686 struct fib6_info *fn_leaf = rcu_dereference_protected(fn->leaf, 1687 lockdep_is_held(&table->tb6_lock)); 1688 struct fib6_info *pn_leaf = rcu_dereference_protected(pn->leaf, 1689 lockdep_is_held(&table->tb6_lock)); 1690 struct fib6_info *new_fn_leaf; 1691 1692 RT6_TRACE("fixing tree: plen=%d iter=%d\n", fn->fn_bit, iter); 1693 iter++; 1694 1695 WARN_ON(fn->fn_flags & RTN_RTINFO); 1696 WARN_ON(fn->fn_flags & RTN_TL_ROOT); 1697 WARN_ON(fn_leaf); 1698 1699 children = 0; 1700 child = NULL; 1701 if (fn_r) 1702 child = fn_r, children |= 1; 1703 if (fn_l) 1704 child = fn_l, children |= 2; 1705 1706 if (children == 3 || FIB6_SUBTREE(fn) 1707 #ifdef CONFIG_IPV6_SUBTREES 1708 /* Subtree root (i.e. fn) may have one child */ 1709 || (children && fn->fn_flags & RTN_ROOT) 1710 #endif 1711 ) { 1712 new_fn_leaf = fib6_find_prefix(net, table, fn); 1713 #if RT6_DEBUG >= 2 1714 if (!new_fn_leaf) { 1715 WARN_ON(!new_fn_leaf); 1716 new_fn_leaf = net->ipv6.fib6_null_entry; 1717 } 1718 #endif 1719 fib6_info_hold(new_fn_leaf); 1720 rcu_assign_pointer(fn->leaf, new_fn_leaf); 1721 return pn; 1722 } 1723 1724 #ifdef CONFIG_IPV6_SUBTREES 1725 if (FIB6_SUBTREE(pn) == fn) { 1726 WARN_ON(!(fn->fn_flags & RTN_ROOT)); 1727 RCU_INIT_POINTER(pn->subtree, NULL); 1728 nstate = FWS_L; 1729 } else { 1730 WARN_ON(fn->fn_flags & RTN_ROOT); 1731 #endif 1732 if (pn_r == fn) 1733 rcu_assign_pointer(pn->right, child); 1734 else if (pn_l == fn) 1735 rcu_assign_pointer(pn->left, child); 1736 #if RT6_DEBUG >= 2 1737 else 1738 WARN_ON(1); 1739 #endif 1740 if (child) 1741 rcu_assign_pointer(child->parent, pn); 1742 nstate = FWS_R; 1743 #ifdef CONFIG_IPV6_SUBTREES 1744 } 1745 #endif 1746 1747 read_lock(&net->ipv6.fib6_walker_lock); 1748 FOR_WALKERS(net, w) { 1749 if (!child) { 1750 if (w->node == fn) { 1751 RT6_TRACE("W %p adjusted by delnode 1, s=%d/%d\n", w, w->state, nstate); 1752 w->node = pn; 1753 w->state = nstate; 1754 } 1755 } else { 1756 if (w->node == fn) { 1757 w->node = child; 1758 if (children&2) { 1759 RT6_TRACE("W %p adjusted by delnode 2, s=%d\n", w, w->state); 1760 w->state = w->state >= FWS_R ? FWS_U : FWS_INIT; 1761 } else { 1762 RT6_TRACE("W %p adjusted by delnode 2, s=%d\n", w, w->state); 1763 w->state = w->state >= FWS_C ? FWS_U : FWS_INIT; 1764 } 1765 } 1766 } 1767 } 1768 read_unlock(&net->ipv6.fib6_walker_lock); 1769 1770 node_free(net, fn); 1771 if (pn->fn_flags & RTN_RTINFO || FIB6_SUBTREE(pn)) 1772 return pn; 1773 1774 RCU_INIT_POINTER(pn->leaf, NULL); 1775 fib6_info_release(pn_leaf); 1776 fn = pn; 1777 } 1778 } 1779 1780 static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn, 1781 struct fib6_info __rcu **rtp, struct nl_info *info) 1782 { 1783 struct fib6_walker *w; 1784 struct fib6_info *rt = rcu_dereference_protected(*rtp, 1785 lockdep_is_held(&table->tb6_lock)); 1786 struct net *net = info->nl_net; 1787 1788 RT6_TRACE("fib6_del_route\n"); 1789 1790 /* Unlink it */ 1791 *rtp = rt->fib6_next; 1792 rt->fib6_node = NULL; 1793 net->ipv6.rt6_stats->fib_rt_entries--; 1794 net->ipv6.rt6_stats->fib_discarded_routes++; 1795 1796 /* Flush all cached dst in exception table */ 1797 rt6_flush_exceptions(rt); 1798 1799 /* Reset round-robin state, if necessary */ 1800 if (rcu_access_pointer(fn->rr_ptr) == rt) 1801 fn->rr_ptr = NULL; 1802 1803 /* Remove this entry from other siblings */ 1804 if (rt->fib6_nsiblings) { 1805 struct fib6_info *sibling, *next_sibling; 1806 1807 list_for_each_entry_safe(sibling, next_sibling, 1808 &rt->fib6_siblings, fib6_siblings) 1809 sibling->fib6_nsiblings--; 1810 rt->fib6_nsiblings = 0; 1811 list_del_init(&rt->fib6_siblings); 1812 rt6_multipath_rebalance(next_sibling); 1813 } 1814 1815 /* Adjust walkers */ 1816 read_lock(&net->ipv6.fib6_walker_lock); 1817 FOR_WALKERS(net, w) { 1818 if (w->state == FWS_C && w->leaf == rt) { 1819 RT6_TRACE("walker %p adjusted by delroute\n", w); 1820 w->leaf = rcu_dereference_protected(rt->fib6_next, 1821 lockdep_is_held(&table->tb6_lock)); 1822 if (!w->leaf) 1823 w->state = FWS_U; 1824 } 1825 } 1826 read_unlock(&net->ipv6.fib6_walker_lock); 1827 1828 /* If it was last route, call fib6_repair_tree() to: 1829 * 1. For root node, put back null_entry as how the table was created. 1830 * 2. For other nodes, expunge its radix tree node. 1831 */ 1832 if (!rcu_access_pointer(fn->leaf)) { 1833 if (!(fn->fn_flags & RTN_TL_ROOT)) { 1834 fn->fn_flags &= ~RTN_RTINFO; 1835 net->ipv6.rt6_stats->fib_route_nodes--; 1836 } 1837 fn = fib6_repair_tree(net, table, fn); 1838 } 1839 1840 fib6_purge_rt(rt, fn, net); 1841 1842 call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, rt, NULL); 1843 if (!info->skip_notify) 1844 inet6_rt_notify(RTM_DELROUTE, rt, info, 0); 1845 fib6_info_release(rt); 1846 } 1847 1848 /* Need to own table->tb6_lock */ 1849 int fib6_del(struct fib6_info *rt, struct nl_info *info) 1850 { 1851 struct fib6_node *fn = rcu_dereference_protected(rt->fib6_node, 1852 lockdep_is_held(&rt->fib6_table->tb6_lock)); 1853 struct fib6_table *table = rt->fib6_table; 1854 struct net *net = info->nl_net; 1855 struct fib6_info __rcu **rtp; 1856 struct fib6_info __rcu **rtp_next; 1857 1858 if (!fn || rt == net->ipv6.fib6_null_entry) 1859 return -ENOENT; 1860 1861 WARN_ON(!(fn->fn_flags & RTN_RTINFO)); 1862 1863 /* 1864 * Walk the leaf entries looking for ourself 1865 */ 1866 1867 for (rtp = &fn->leaf; *rtp; rtp = rtp_next) { 1868 struct fib6_info *cur = rcu_dereference_protected(*rtp, 1869 lockdep_is_held(&table->tb6_lock)); 1870 if (rt == cur) { 1871 fib6_del_route(table, fn, rtp, info); 1872 return 0; 1873 } 1874 rtp_next = &cur->fib6_next; 1875 } 1876 return -ENOENT; 1877 } 1878 1879 /* 1880 * Tree traversal function. 1881 * 1882 * Certainly, it is not interrupt safe. 1883 * However, it is internally reenterable wrt itself and fib6_add/fib6_del. 1884 * It means, that we can modify tree during walking 1885 * and use this function for garbage collection, clone pruning, 1886 * cleaning tree when a device goes down etc. etc. 1887 * 1888 * It guarantees that every node will be traversed, 1889 * and that it will be traversed only once. 1890 * 1891 * Callback function w->func may return: 1892 * 0 -> continue walking. 1893 * positive value -> walking is suspended (used by tree dumps, 1894 * and probably by gc, if it will be split to several slices) 1895 * negative value -> terminate walking. 1896 * 1897 * The function itself returns: 1898 * 0 -> walk is complete. 1899 * >0 -> walk is incomplete (i.e. suspended) 1900 * <0 -> walk is terminated by an error. 1901 * 1902 * This function is called with tb6_lock held. 1903 */ 1904 1905 static int fib6_walk_continue(struct fib6_walker *w) 1906 { 1907 struct fib6_node *fn, *pn, *left, *right; 1908 1909 /* w->root should always be table->tb6_root */ 1910 WARN_ON_ONCE(!(w->root->fn_flags & RTN_TL_ROOT)); 1911 1912 for (;;) { 1913 fn = w->node; 1914 if (!fn) 1915 return 0; 1916 1917 switch (w->state) { 1918 #ifdef CONFIG_IPV6_SUBTREES 1919 case FWS_S: 1920 if (FIB6_SUBTREE(fn)) { 1921 w->node = FIB6_SUBTREE(fn); 1922 continue; 1923 } 1924 w->state = FWS_L; 1925 #endif 1926 /* fall through */ 1927 case FWS_L: 1928 left = rcu_dereference_protected(fn->left, 1); 1929 if (left) { 1930 w->node = left; 1931 w->state = FWS_INIT; 1932 continue; 1933 } 1934 w->state = FWS_R; 1935 /* fall through */ 1936 case FWS_R: 1937 right = rcu_dereference_protected(fn->right, 1); 1938 if (right) { 1939 w->node = right; 1940 w->state = FWS_INIT; 1941 continue; 1942 } 1943 w->state = FWS_C; 1944 w->leaf = rcu_dereference_protected(fn->leaf, 1); 1945 /* fall through */ 1946 case FWS_C: 1947 if (w->leaf && fn->fn_flags & RTN_RTINFO) { 1948 int err; 1949 1950 if (w->skip) { 1951 w->skip--; 1952 goto skip; 1953 } 1954 1955 err = w->func(w); 1956 if (err) 1957 return err; 1958 1959 w->count++; 1960 continue; 1961 } 1962 skip: 1963 w->state = FWS_U; 1964 /* fall through */ 1965 case FWS_U: 1966 if (fn == w->root) 1967 return 0; 1968 pn = rcu_dereference_protected(fn->parent, 1); 1969 left = rcu_dereference_protected(pn->left, 1); 1970 right = rcu_dereference_protected(pn->right, 1); 1971 w->node = pn; 1972 #ifdef CONFIG_IPV6_SUBTREES 1973 if (FIB6_SUBTREE(pn) == fn) { 1974 WARN_ON(!(fn->fn_flags & RTN_ROOT)); 1975 w->state = FWS_L; 1976 continue; 1977 } 1978 #endif 1979 if (left == fn) { 1980 w->state = FWS_R; 1981 continue; 1982 } 1983 if (right == fn) { 1984 w->state = FWS_C; 1985 w->leaf = rcu_dereference_protected(w->node->leaf, 1); 1986 continue; 1987 } 1988 #if RT6_DEBUG >= 2 1989 WARN_ON(1); 1990 #endif 1991 } 1992 } 1993 } 1994 1995 static int fib6_walk(struct net *net, struct fib6_walker *w) 1996 { 1997 int res; 1998 1999 w->state = FWS_INIT; 2000 w->node = w->root; 2001 2002 fib6_walker_link(net, w); 2003 res = fib6_walk_continue(w); 2004 if (res <= 0) 2005 fib6_walker_unlink(net, w); 2006 return res; 2007 } 2008 2009 static int fib6_clean_node(struct fib6_walker *w) 2010 { 2011 int res; 2012 struct fib6_info *rt; 2013 struct fib6_cleaner *c = container_of(w, struct fib6_cleaner, w); 2014 struct nl_info info = { 2015 .nl_net = c->net, 2016 .skip_notify = c->skip_notify, 2017 }; 2018 2019 if (c->sernum != FIB6_NO_SERNUM_CHANGE && 2020 w->node->fn_sernum != c->sernum) 2021 w->node->fn_sernum = c->sernum; 2022 2023 if (!c->func) { 2024 WARN_ON_ONCE(c->sernum == FIB6_NO_SERNUM_CHANGE); 2025 w->leaf = NULL; 2026 return 0; 2027 } 2028 2029 for_each_fib6_walker_rt(w) { 2030 res = c->func(rt, c->arg); 2031 if (res == -1) { 2032 w->leaf = rt; 2033 res = fib6_del(rt, &info); 2034 if (res) { 2035 #if RT6_DEBUG >= 2 2036 pr_debug("%s: del failed: rt=%p@%p err=%d\n", 2037 __func__, rt, 2038 rcu_access_pointer(rt->fib6_node), 2039 res); 2040 #endif 2041 continue; 2042 } 2043 return 0; 2044 } else if (res == -2) { 2045 if (WARN_ON(!rt->fib6_nsiblings)) 2046 continue; 2047 rt = list_last_entry(&rt->fib6_siblings, 2048 struct fib6_info, fib6_siblings); 2049 continue; 2050 } 2051 WARN_ON(res != 0); 2052 } 2053 w->leaf = rt; 2054 return 0; 2055 } 2056 2057 /* 2058 * Convenient frontend to tree walker. 2059 * 2060 * func is called on each route. 2061 * It may return -2 -> skip multipath route. 2062 * -1 -> delete this route. 2063 * 0 -> continue walking 2064 */ 2065 2066 static void fib6_clean_tree(struct net *net, struct fib6_node *root, 2067 int (*func)(struct fib6_info *, void *arg), 2068 int sernum, void *arg, bool skip_notify) 2069 { 2070 struct fib6_cleaner c; 2071 2072 c.w.root = root; 2073 c.w.func = fib6_clean_node; 2074 c.w.count = 0; 2075 c.w.skip = 0; 2076 c.func = func; 2077 c.sernum = sernum; 2078 c.arg = arg; 2079 c.net = net; 2080 c.skip_notify = skip_notify; 2081 2082 fib6_walk(net, &c.w); 2083 } 2084 2085 static void __fib6_clean_all(struct net *net, 2086 int (*func)(struct fib6_info *, void *), 2087 int sernum, void *arg, bool skip_notify) 2088 { 2089 struct fib6_table *table; 2090 struct hlist_head *head; 2091 unsigned int h; 2092 2093 rcu_read_lock(); 2094 for (h = 0; h < FIB6_TABLE_HASHSZ; h++) { 2095 head = &net->ipv6.fib_table_hash[h]; 2096 hlist_for_each_entry_rcu(table, head, tb6_hlist) { 2097 spin_lock_bh(&table->tb6_lock); 2098 fib6_clean_tree(net, &table->tb6_root, 2099 func, sernum, arg, skip_notify); 2100 spin_unlock_bh(&table->tb6_lock); 2101 } 2102 } 2103 rcu_read_unlock(); 2104 } 2105 2106 void fib6_clean_all(struct net *net, int (*func)(struct fib6_info *, void *), 2107 void *arg) 2108 { 2109 __fib6_clean_all(net, func, FIB6_NO_SERNUM_CHANGE, arg, false); 2110 } 2111 2112 void fib6_clean_all_skip_notify(struct net *net, 2113 int (*func)(struct fib6_info *, void *), 2114 void *arg) 2115 { 2116 __fib6_clean_all(net, func, FIB6_NO_SERNUM_CHANGE, arg, true); 2117 } 2118 2119 static void fib6_flush_trees(struct net *net) 2120 { 2121 int new_sernum = fib6_new_sernum(net); 2122 2123 __fib6_clean_all(net, NULL, new_sernum, NULL, false); 2124 } 2125 2126 /* 2127 * Garbage collection 2128 */ 2129 2130 static int fib6_age(struct fib6_info *rt, void *arg) 2131 { 2132 struct fib6_gc_args *gc_args = arg; 2133 unsigned long now = jiffies; 2134 2135 /* 2136 * check addrconf expiration here. 2137 * Routes are expired even if they are in use. 2138 */ 2139 2140 if (rt->fib6_flags & RTF_EXPIRES && rt->expires) { 2141 if (time_after(now, rt->expires)) { 2142 RT6_TRACE("expiring %p\n", rt); 2143 return -1; 2144 } 2145 gc_args->more++; 2146 } 2147 2148 /* Also age clones in the exception table. 2149 * Note, that clones are aged out 2150 * only if they are not in use now. 2151 */ 2152 rt6_age_exceptions(rt, gc_args, now); 2153 2154 return 0; 2155 } 2156 2157 void fib6_run_gc(unsigned long expires, struct net *net, bool force) 2158 { 2159 struct fib6_gc_args gc_args; 2160 unsigned long now; 2161 2162 if (force) { 2163 spin_lock_bh(&net->ipv6.fib6_gc_lock); 2164 } else if (!spin_trylock_bh(&net->ipv6.fib6_gc_lock)) { 2165 mod_timer(&net->ipv6.ip6_fib_timer, jiffies + HZ); 2166 return; 2167 } 2168 gc_args.timeout = expires ? (int)expires : 2169 net->ipv6.sysctl.ip6_rt_gc_interval; 2170 gc_args.more = 0; 2171 2172 fib6_clean_all(net, fib6_age, &gc_args); 2173 now = jiffies; 2174 net->ipv6.ip6_rt_last_gc = now; 2175 2176 if (gc_args.more) 2177 mod_timer(&net->ipv6.ip6_fib_timer, 2178 round_jiffies(now 2179 + net->ipv6.sysctl.ip6_rt_gc_interval)); 2180 else 2181 del_timer(&net->ipv6.ip6_fib_timer); 2182 spin_unlock_bh(&net->ipv6.fib6_gc_lock); 2183 } 2184 2185 static void fib6_gc_timer_cb(struct timer_list *t) 2186 { 2187 struct net *arg = from_timer(arg, t, ipv6.ip6_fib_timer); 2188 2189 fib6_run_gc(0, arg, true); 2190 } 2191 2192 static int __net_init fib6_net_init(struct net *net) 2193 { 2194 size_t size = sizeof(struct hlist_head) * FIB6_TABLE_HASHSZ; 2195 int err; 2196 2197 err = fib6_notifier_init(net); 2198 if (err) 2199 return err; 2200 2201 spin_lock_init(&net->ipv6.fib6_gc_lock); 2202 rwlock_init(&net->ipv6.fib6_walker_lock); 2203 INIT_LIST_HEAD(&net->ipv6.fib6_walkers); 2204 timer_setup(&net->ipv6.ip6_fib_timer, fib6_gc_timer_cb, 0); 2205 2206 net->ipv6.rt6_stats = kzalloc(sizeof(*net->ipv6.rt6_stats), GFP_KERNEL); 2207 if (!net->ipv6.rt6_stats) 2208 goto out_timer; 2209 2210 /* Avoid false sharing : Use at least a full cache line */ 2211 size = max_t(size_t, size, L1_CACHE_BYTES); 2212 2213 net->ipv6.fib_table_hash = kzalloc(size, GFP_KERNEL); 2214 if (!net->ipv6.fib_table_hash) 2215 goto out_rt6_stats; 2216 2217 net->ipv6.fib6_main_tbl = kzalloc(sizeof(*net->ipv6.fib6_main_tbl), 2218 GFP_KERNEL); 2219 if (!net->ipv6.fib6_main_tbl) 2220 goto out_fib_table_hash; 2221 2222 net->ipv6.fib6_main_tbl->tb6_id = RT6_TABLE_MAIN; 2223 rcu_assign_pointer(net->ipv6.fib6_main_tbl->tb6_root.leaf, 2224 net->ipv6.fib6_null_entry); 2225 net->ipv6.fib6_main_tbl->tb6_root.fn_flags = 2226 RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; 2227 inet_peer_base_init(&net->ipv6.fib6_main_tbl->tb6_peers); 2228 2229 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 2230 net->ipv6.fib6_local_tbl = kzalloc(sizeof(*net->ipv6.fib6_local_tbl), 2231 GFP_KERNEL); 2232 if (!net->ipv6.fib6_local_tbl) 2233 goto out_fib6_main_tbl; 2234 net->ipv6.fib6_local_tbl->tb6_id = RT6_TABLE_LOCAL; 2235 rcu_assign_pointer(net->ipv6.fib6_local_tbl->tb6_root.leaf, 2236 net->ipv6.fib6_null_entry); 2237 net->ipv6.fib6_local_tbl->tb6_root.fn_flags = 2238 RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; 2239 inet_peer_base_init(&net->ipv6.fib6_local_tbl->tb6_peers); 2240 #endif 2241 fib6_tables_init(net); 2242 2243 return 0; 2244 2245 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 2246 out_fib6_main_tbl: 2247 kfree(net->ipv6.fib6_main_tbl); 2248 #endif 2249 out_fib_table_hash: 2250 kfree(net->ipv6.fib_table_hash); 2251 out_rt6_stats: 2252 kfree(net->ipv6.rt6_stats); 2253 out_timer: 2254 fib6_notifier_exit(net); 2255 return -ENOMEM; 2256 } 2257 2258 static void fib6_net_exit(struct net *net) 2259 { 2260 unsigned int i; 2261 2262 del_timer_sync(&net->ipv6.ip6_fib_timer); 2263 2264 for (i = 0; i < FIB6_TABLE_HASHSZ; i++) { 2265 struct hlist_head *head = &net->ipv6.fib_table_hash[i]; 2266 struct hlist_node *tmp; 2267 struct fib6_table *tb; 2268 2269 hlist_for_each_entry_safe(tb, tmp, head, tb6_hlist) { 2270 hlist_del(&tb->tb6_hlist); 2271 fib6_free_table(tb); 2272 } 2273 } 2274 2275 kfree(net->ipv6.fib_table_hash); 2276 kfree(net->ipv6.rt6_stats); 2277 fib6_notifier_exit(net); 2278 } 2279 2280 static struct pernet_operations fib6_net_ops = { 2281 .init = fib6_net_init, 2282 .exit = fib6_net_exit, 2283 }; 2284 2285 int __init fib6_init(void) 2286 { 2287 int ret = -ENOMEM; 2288 2289 fib6_node_kmem = kmem_cache_create("fib6_nodes", 2290 sizeof(struct fib6_node), 2291 0, SLAB_HWCACHE_ALIGN, 2292 NULL); 2293 if (!fib6_node_kmem) 2294 goto out; 2295 2296 ret = register_pernet_subsys(&fib6_net_ops); 2297 if (ret) 2298 goto out_kmem_cache_create; 2299 2300 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE, NULL, 2301 inet6_dump_fib, 0); 2302 if (ret) 2303 goto out_unregister_subsys; 2304 2305 __fib6_flush_trees = fib6_flush_trees; 2306 out: 2307 return ret; 2308 2309 out_unregister_subsys: 2310 unregister_pernet_subsys(&fib6_net_ops); 2311 out_kmem_cache_create: 2312 kmem_cache_destroy(fib6_node_kmem); 2313 goto out; 2314 } 2315 2316 void fib6_gc_cleanup(void) 2317 { 2318 unregister_pernet_subsys(&fib6_net_ops); 2319 kmem_cache_destroy(fib6_node_kmem); 2320 } 2321 2322 #ifdef CONFIG_PROC_FS 2323 static int ipv6_route_seq_show(struct seq_file *seq, void *v) 2324 { 2325 struct fib6_info *rt = v; 2326 struct ipv6_route_iter *iter = seq->private; 2327 struct fib6_nh *fib6_nh = rt->fib6_nh; 2328 unsigned int flags = rt->fib6_flags; 2329 const struct net_device *dev; 2330 2331 if (rt->nh) 2332 fib6_nh = nexthop_fib6_nh(rt->nh); 2333 2334 seq_printf(seq, "%pi6 %02x ", &rt->fib6_dst.addr, rt->fib6_dst.plen); 2335 2336 #ifdef CONFIG_IPV6_SUBTREES 2337 seq_printf(seq, "%pi6 %02x ", &rt->fib6_src.addr, rt->fib6_src.plen); 2338 #else 2339 seq_puts(seq, "00000000000000000000000000000000 00 "); 2340 #endif 2341 if (fib6_nh->fib_nh_gw_family) { 2342 flags |= RTF_GATEWAY; 2343 seq_printf(seq, "%pi6", &fib6_nh->fib_nh_gw6); 2344 } else { 2345 seq_puts(seq, "00000000000000000000000000000000"); 2346 } 2347 2348 dev = fib6_nh->fib_nh_dev; 2349 seq_printf(seq, " %08x %08x %08x %08x %8s\n", 2350 rt->fib6_metric, refcount_read(&rt->fib6_ref), 0, 2351 flags, dev ? dev->name : ""); 2352 iter->w.leaf = NULL; 2353 return 0; 2354 } 2355 2356 static int ipv6_route_yield(struct fib6_walker *w) 2357 { 2358 struct ipv6_route_iter *iter = w->args; 2359 2360 if (!iter->skip) 2361 return 1; 2362 2363 do { 2364 iter->w.leaf = rcu_dereference_protected( 2365 iter->w.leaf->fib6_next, 2366 lockdep_is_held(&iter->tbl->tb6_lock)); 2367 iter->skip--; 2368 if (!iter->skip && iter->w.leaf) 2369 return 1; 2370 } while (iter->w.leaf); 2371 2372 return 0; 2373 } 2374 2375 static void ipv6_route_seq_setup_walk(struct ipv6_route_iter *iter, 2376 struct net *net) 2377 { 2378 memset(&iter->w, 0, sizeof(iter->w)); 2379 iter->w.func = ipv6_route_yield; 2380 iter->w.root = &iter->tbl->tb6_root; 2381 iter->w.state = FWS_INIT; 2382 iter->w.node = iter->w.root; 2383 iter->w.args = iter; 2384 iter->sernum = iter->w.root->fn_sernum; 2385 INIT_LIST_HEAD(&iter->w.lh); 2386 fib6_walker_link(net, &iter->w); 2387 } 2388 2389 static struct fib6_table *ipv6_route_seq_next_table(struct fib6_table *tbl, 2390 struct net *net) 2391 { 2392 unsigned int h; 2393 struct hlist_node *node; 2394 2395 if (tbl) { 2396 h = (tbl->tb6_id & (FIB6_TABLE_HASHSZ - 1)) + 1; 2397 node = rcu_dereference_bh(hlist_next_rcu(&tbl->tb6_hlist)); 2398 } else { 2399 h = 0; 2400 node = NULL; 2401 } 2402 2403 while (!node && h < FIB6_TABLE_HASHSZ) { 2404 node = rcu_dereference_bh( 2405 hlist_first_rcu(&net->ipv6.fib_table_hash[h++])); 2406 } 2407 return hlist_entry_safe(node, struct fib6_table, tb6_hlist); 2408 } 2409 2410 static void ipv6_route_check_sernum(struct ipv6_route_iter *iter) 2411 { 2412 if (iter->sernum != iter->w.root->fn_sernum) { 2413 iter->sernum = iter->w.root->fn_sernum; 2414 iter->w.state = FWS_INIT; 2415 iter->w.node = iter->w.root; 2416 WARN_ON(iter->w.skip); 2417 iter->w.skip = iter->w.count; 2418 } 2419 } 2420 2421 static void *ipv6_route_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2422 { 2423 int r; 2424 struct fib6_info *n; 2425 struct net *net = seq_file_net(seq); 2426 struct ipv6_route_iter *iter = seq->private; 2427 2428 if (!v) 2429 goto iter_table; 2430 2431 n = rcu_dereference_bh(((struct fib6_info *)v)->fib6_next); 2432 if (n) { 2433 ++*pos; 2434 return n; 2435 } 2436 2437 iter_table: 2438 ipv6_route_check_sernum(iter); 2439 spin_lock_bh(&iter->tbl->tb6_lock); 2440 r = fib6_walk_continue(&iter->w); 2441 spin_unlock_bh(&iter->tbl->tb6_lock); 2442 if (r > 0) { 2443 if (v) 2444 ++*pos; 2445 return iter->w.leaf; 2446 } else if (r < 0) { 2447 fib6_walker_unlink(net, &iter->w); 2448 return NULL; 2449 } 2450 fib6_walker_unlink(net, &iter->w); 2451 2452 iter->tbl = ipv6_route_seq_next_table(iter->tbl, net); 2453 if (!iter->tbl) 2454 return NULL; 2455 2456 ipv6_route_seq_setup_walk(iter, net); 2457 goto iter_table; 2458 } 2459 2460 static void *ipv6_route_seq_start(struct seq_file *seq, loff_t *pos) 2461 __acquires(RCU_BH) 2462 { 2463 struct net *net = seq_file_net(seq); 2464 struct ipv6_route_iter *iter = seq->private; 2465 2466 rcu_read_lock_bh(); 2467 iter->tbl = ipv6_route_seq_next_table(NULL, net); 2468 iter->skip = *pos; 2469 2470 if (iter->tbl) { 2471 ipv6_route_seq_setup_walk(iter, net); 2472 return ipv6_route_seq_next(seq, NULL, pos); 2473 } else { 2474 return NULL; 2475 } 2476 } 2477 2478 static bool ipv6_route_iter_active(struct ipv6_route_iter *iter) 2479 { 2480 struct fib6_walker *w = &iter->w; 2481 return w->node && !(w->state == FWS_U && w->node == w->root); 2482 } 2483 2484 static void ipv6_route_seq_stop(struct seq_file *seq, void *v) 2485 __releases(RCU_BH) 2486 { 2487 struct net *net = seq_file_net(seq); 2488 struct ipv6_route_iter *iter = seq->private; 2489 2490 if (ipv6_route_iter_active(iter)) 2491 fib6_walker_unlink(net, &iter->w); 2492 2493 rcu_read_unlock_bh(); 2494 } 2495 2496 const struct seq_operations ipv6_route_seq_ops = { 2497 .start = ipv6_route_seq_start, 2498 .next = ipv6_route_seq_next, 2499 .stop = ipv6_route_seq_stop, 2500 .show = ipv6_route_seq_show 2501 }; 2502 #endif /* CONFIG_PROC_FS */ 2503