1 /* 2 * INET An implementation of the TCP/IP protocol suite for the LINUX 3 * operating system. INET is implemented using the BSD Socket 4 * interface as the means of communication with the user level. 5 * 6 * IPv4 Forwarding Information Base: semantics. 7 * 8 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 9 * 10 * This program is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU General Public License 12 * as published by the Free Software Foundation; either version 13 * 2 of the License, or (at your option) any later version. 14 */ 15 16 #include <asm/uaccess.h> 17 #include <asm/system.h> 18 #include <linux/bitops.h> 19 #include <linux/types.h> 20 #include <linux/kernel.h> 21 #include <linux/jiffies.h> 22 #include <linux/mm.h> 23 #include <linux/string.h> 24 #include <linux/socket.h> 25 #include <linux/sockios.h> 26 #include <linux/errno.h> 27 #include <linux/in.h> 28 #include <linux/inet.h> 29 #include <linux/inetdevice.h> 30 #include <linux/netdevice.h> 31 #include <linux/if_arp.h> 32 #include <linux/proc_fs.h> 33 #include <linux/skbuff.h> 34 #include <linux/init.h> 35 36 #include <net/arp.h> 37 #include <net/ip.h> 38 #include <net/protocol.h> 39 #include <net/route.h> 40 #include <net/tcp.h> 41 #include <net/sock.h> 42 #include <net/ip_fib.h> 43 #include <net/netlink.h> 44 #include <net/nexthop.h> 45 46 #include "fib_lookup.h" 47 48 static DEFINE_SPINLOCK(fib_info_lock); 49 static struct hlist_head *fib_info_hash; 50 static struct hlist_head *fib_info_laddrhash; 51 static unsigned int fib_hash_size; 52 static unsigned int fib_info_cnt; 53 54 #define DEVINDEX_HASHBITS 8 55 #define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS) 56 static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE]; 57 58 #ifdef CONFIG_IP_ROUTE_MULTIPATH 59 60 static DEFINE_SPINLOCK(fib_multipath_lock); 61 62 #define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \ 63 for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++) 64 65 #define change_nexthops(fi) { int nhsel; struct fib_nh *nexthop_nh; \ 66 for (nhsel=0, nexthop_nh = (struct fib_nh *)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nexthop_nh++, nhsel++) 67 68 #else /* CONFIG_IP_ROUTE_MULTIPATH */ 69 70 /* Hope, that gcc will optimize it to get rid of dummy loop */ 71 72 #define for_nexthops(fi) { int nhsel = 0; const struct fib_nh * nh = (fi)->fib_nh; \ 73 for (nhsel=0; nhsel < 1; nhsel++) 74 75 #define change_nexthops(fi) { int nhsel = 0; struct fib_nh *nexthop_nh = (struct fib_nh *)((fi)->fib_nh); \ 76 for (nhsel=0; nhsel < 1; nhsel++) 77 78 #endif /* CONFIG_IP_ROUTE_MULTIPATH */ 79 80 #define endfor_nexthops(fi) } 81 82 83 static const struct 84 { 85 int error; 86 u8 scope; 87 } fib_props[RTN_MAX + 1] = { 88 { 89 .error = 0, 90 .scope = RT_SCOPE_NOWHERE, 91 }, /* RTN_UNSPEC */ 92 { 93 .error = 0, 94 .scope = RT_SCOPE_UNIVERSE, 95 }, /* RTN_UNICAST */ 96 { 97 .error = 0, 98 .scope = RT_SCOPE_HOST, 99 }, /* RTN_LOCAL */ 100 { 101 .error = 0, 102 .scope = RT_SCOPE_LINK, 103 }, /* RTN_BROADCAST */ 104 { 105 .error = 0, 106 .scope = RT_SCOPE_LINK, 107 }, /* RTN_ANYCAST */ 108 { 109 .error = 0, 110 .scope = RT_SCOPE_UNIVERSE, 111 }, /* RTN_MULTICAST */ 112 { 113 .error = -EINVAL, 114 .scope = RT_SCOPE_UNIVERSE, 115 }, /* RTN_BLACKHOLE */ 116 { 117 .error = -EHOSTUNREACH, 118 .scope = RT_SCOPE_UNIVERSE, 119 }, /* RTN_UNREACHABLE */ 120 { 121 .error = -EACCES, 122 .scope = RT_SCOPE_UNIVERSE, 123 }, /* RTN_PROHIBIT */ 124 { 125 .error = -EAGAIN, 126 .scope = RT_SCOPE_UNIVERSE, 127 }, /* RTN_THROW */ 128 { 129 .error = -EINVAL, 130 .scope = RT_SCOPE_NOWHERE, 131 }, /* RTN_NAT */ 132 { 133 .error = -EINVAL, 134 .scope = RT_SCOPE_NOWHERE, 135 }, /* RTN_XRESOLVE */ 136 }; 137 138 139 /* Release a nexthop info record */ 140 141 void free_fib_info(struct fib_info *fi) 142 { 143 if (fi->fib_dead == 0) { 144 printk(KERN_WARNING "Freeing alive fib_info %p\n", fi); 145 return; 146 } 147 change_nexthops(fi) { 148 if (nexthop_nh->nh_dev) 149 dev_put(nexthop_nh->nh_dev); 150 nexthop_nh->nh_dev = NULL; 151 } endfor_nexthops(fi); 152 fib_info_cnt--; 153 release_net(fi->fib_net); 154 kfree(fi); 155 } 156 157 void fib_release_info(struct fib_info *fi) 158 { 159 spin_lock_bh(&fib_info_lock); 160 if (fi && --fi->fib_treeref == 0) { 161 hlist_del(&fi->fib_hash); 162 if (fi->fib_prefsrc) 163 hlist_del(&fi->fib_lhash); 164 change_nexthops(fi) { 165 if (!nexthop_nh->nh_dev) 166 continue; 167 hlist_del(&nexthop_nh->nh_hash); 168 } endfor_nexthops(fi) 169 fi->fib_dead = 1; 170 fib_info_put(fi); 171 } 172 spin_unlock_bh(&fib_info_lock); 173 } 174 175 static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi) 176 { 177 const struct fib_nh *onh = ofi->fib_nh; 178 179 for_nexthops(fi) { 180 if (nh->nh_oif != onh->nh_oif || 181 nh->nh_gw != onh->nh_gw || 182 nh->nh_scope != onh->nh_scope || 183 #ifdef CONFIG_IP_ROUTE_MULTIPATH 184 nh->nh_weight != onh->nh_weight || 185 #endif 186 #ifdef CONFIG_NET_CLS_ROUTE 187 nh->nh_tclassid != onh->nh_tclassid || 188 #endif 189 ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD)) 190 return -1; 191 onh++; 192 } endfor_nexthops(fi); 193 return 0; 194 } 195 196 static inline unsigned int fib_devindex_hashfn(unsigned int val) 197 { 198 unsigned int mask = DEVINDEX_HASHSIZE - 1; 199 200 return (val ^ 201 (val >> DEVINDEX_HASHBITS) ^ 202 (val >> (DEVINDEX_HASHBITS * 2))) & mask; 203 } 204 205 static inline unsigned int fib_info_hashfn(const struct fib_info *fi) 206 { 207 unsigned int mask = (fib_hash_size - 1); 208 unsigned int val = fi->fib_nhs; 209 210 val ^= fi->fib_protocol; 211 val ^= (__force u32)fi->fib_prefsrc; 212 val ^= fi->fib_priority; 213 for_nexthops(fi) { 214 val ^= fib_devindex_hashfn(nh->nh_oif); 215 } endfor_nexthops(fi) 216 217 return (val ^ (val >> 7) ^ (val >> 12)) & mask; 218 } 219 220 static struct fib_info *fib_find_info(const struct fib_info *nfi) 221 { 222 struct hlist_head *head; 223 struct hlist_node *node; 224 struct fib_info *fi; 225 unsigned int hash; 226 227 hash = fib_info_hashfn(nfi); 228 head = &fib_info_hash[hash]; 229 230 hlist_for_each_entry(fi, node, head, fib_hash) { 231 if (!net_eq(fi->fib_net, nfi->fib_net)) 232 continue; 233 if (fi->fib_nhs != nfi->fib_nhs) 234 continue; 235 if (nfi->fib_protocol == fi->fib_protocol && 236 nfi->fib_prefsrc == fi->fib_prefsrc && 237 nfi->fib_priority == fi->fib_priority && 238 memcmp(nfi->fib_metrics, fi->fib_metrics, 239 sizeof(fi->fib_metrics)) == 0 && 240 ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 && 241 (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0)) 242 return fi; 243 } 244 245 return NULL; 246 } 247 248 /* Check, that the gateway is already configured. 249 Used only by redirect accept routine. 250 */ 251 252 int ip_fib_check_default(__be32 gw, struct net_device *dev) 253 { 254 struct hlist_head *head; 255 struct hlist_node *node; 256 struct fib_nh *nh; 257 unsigned int hash; 258 259 spin_lock(&fib_info_lock); 260 261 hash = fib_devindex_hashfn(dev->ifindex); 262 head = &fib_info_devhash[hash]; 263 hlist_for_each_entry(nh, node, head, nh_hash) { 264 if (nh->nh_dev == dev && 265 nh->nh_gw == gw && 266 !(nh->nh_flags&RTNH_F_DEAD)) { 267 spin_unlock(&fib_info_lock); 268 return 0; 269 } 270 } 271 272 spin_unlock(&fib_info_lock); 273 274 return -1; 275 } 276 277 static inline size_t fib_nlmsg_size(struct fib_info *fi) 278 { 279 size_t payload = NLMSG_ALIGN(sizeof(struct rtmsg)) 280 + nla_total_size(4) /* RTA_TABLE */ 281 + nla_total_size(4) /* RTA_DST */ 282 + nla_total_size(4) /* RTA_PRIORITY */ 283 + nla_total_size(4); /* RTA_PREFSRC */ 284 285 /* space for nested metrics */ 286 payload += nla_total_size((RTAX_MAX * nla_total_size(4))); 287 288 if (fi->fib_nhs) { 289 /* Also handles the special case fib_nhs == 1 */ 290 291 /* each nexthop is packed in an attribute */ 292 size_t nhsize = nla_total_size(sizeof(struct rtnexthop)); 293 294 /* may contain flow and gateway attribute */ 295 nhsize += 2 * nla_total_size(4); 296 297 /* all nexthops are packed in a nested attribute */ 298 payload += nla_total_size(fi->fib_nhs * nhsize); 299 } 300 301 return payload; 302 } 303 304 void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, 305 int dst_len, u32 tb_id, struct nl_info *info, 306 unsigned int nlm_flags) 307 { 308 struct sk_buff *skb; 309 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0; 310 int err = -ENOBUFS; 311 312 skb = nlmsg_new(fib_nlmsg_size(fa->fa_info), GFP_KERNEL); 313 if (skb == NULL) 314 goto errout; 315 316 err = fib_dump_info(skb, info->pid, seq, event, tb_id, 317 fa->fa_type, fa->fa_scope, key, dst_len, 318 fa->fa_tos, fa->fa_info, nlm_flags); 319 if (err < 0) { 320 /* -EMSGSIZE implies BUG in fib_nlmsg_size() */ 321 WARN_ON(err == -EMSGSIZE); 322 kfree_skb(skb); 323 goto errout; 324 } 325 rtnl_notify(skb, info->nl_net, info->pid, RTNLGRP_IPV4_ROUTE, 326 info->nlh, GFP_KERNEL); 327 return; 328 errout: 329 if (err < 0) 330 rtnl_set_sk_err(info->nl_net, RTNLGRP_IPV4_ROUTE, err); 331 } 332 333 /* Return the first fib alias matching TOS with 334 * priority less than or equal to PRIO. 335 */ 336 struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio) 337 { 338 if (fah) { 339 struct fib_alias *fa; 340 list_for_each_entry(fa, fah, fa_list) { 341 if (fa->fa_tos > tos) 342 continue; 343 if (fa->fa_info->fib_priority >= prio || 344 fa->fa_tos < tos) 345 return fa; 346 } 347 } 348 return NULL; 349 } 350 351 int fib_detect_death(struct fib_info *fi, int order, 352 struct fib_info **last_resort, int *last_idx, int dflt) 353 { 354 struct neighbour *n; 355 int state = NUD_NONE; 356 357 n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev); 358 if (n) { 359 state = n->nud_state; 360 neigh_release(n); 361 } 362 if (state == NUD_REACHABLE) 363 return 0; 364 if ((state&NUD_VALID) && order != dflt) 365 return 0; 366 if ((state&NUD_VALID) || 367 (*last_idx<0 && order > dflt)) { 368 *last_resort = fi; 369 *last_idx = order; 370 } 371 return 1; 372 } 373 374 #ifdef CONFIG_IP_ROUTE_MULTIPATH 375 376 static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining) 377 { 378 int nhs = 0; 379 380 while (rtnh_ok(rtnh, remaining)) { 381 nhs++; 382 rtnh = rtnh_next(rtnh, &remaining); 383 } 384 385 /* leftover implies invalid nexthop configuration, discard it */ 386 return remaining > 0 ? 0 : nhs; 387 } 388 389 static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, 390 int remaining, struct fib_config *cfg) 391 { 392 change_nexthops(fi) { 393 int attrlen; 394 395 if (!rtnh_ok(rtnh, remaining)) 396 return -EINVAL; 397 398 nexthop_nh->nh_flags = 399 (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags; 400 nexthop_nh->nh_oif = rtnh->rtnh_ifindex; 401 nexthop_nh->nh_weight = rtnh->rtnh_hops + 1; 402 403 attrlen = rtnh_attrlen(rtnh); 404 if (attrlen > 0) { 405 struct nlattr *nla, *attrs = rtnh_attrs(rtnh); 406 407 nla = nla_find(attrs, attrlen, RTA_GATEWAY); 408 nexthop_nh->nh_gw = nla ? nla_get_be32(nla) : 0; 409 #ifdef CONFIG_NET_CLS_ROUTE 410 nla = nla_find(attrs, attrlen, RTA_FLOW); 411 nexthop_nh->nh_tclassid = nla ? nla_get_u32(nla) : 0; 412 #endif 413 } 414 415 rtnh = rtnh_next(rtnh, &remaining); 416 } endfor_nexthops(fi); 417 418 return 0; 419 } 420 421 #endif 422 423 int fib_nh_match(struct fib_config *cfg, struct fib_info *fi) 424 { 425 #ifdef CONFIG_IP_ROUTE_MULTIPATH 426 struct rtnexthop *rtnh; 427 int remaining; 428 #endif 429 430 if (cfg->fc_priority && cfg->fc_priority != fi->fib_priority) 431 return 1; 432 433 if (cfg->fc_oif || cfg->fc_gw) { 434 if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) && 435 (!cfg->fc_gw || cfg->fc_gw == fi->fib_nh->nh_gw)) 436 return 0; 437 return 1; 438 } 439 440 #ifdef CONFIG_IP_ROUTE_MULTIPATH 441 if (cfg->fc_mp == NULL) 442 return 0; 443 444 rtnh = cfg->fc_mp; 445 remaining = cfg->fc_mp_len; 446 447 for_nexthops(fi) { 448 int attrlen; 449 450 if (!rtnh_ok(rtnh, remaining)) 451 return -EINVAL; 452 453 if (rtnh->rtnh_ifindex && rtnh->rtnh_ifindex != nh->nh_oif) 454 return 1; 455 456 attrlen = rtnh_attrlen(rtnh); 457 if (attrlen < 0) { 458 struct nlattr *nla, *attrs = rtnh_attrs(rtnh); 459 460 nla = nla_find(attrs, attrlen, RTA_GATEWAY); 461 if (nla && nla_get_be32(nla) != nh->nh_gw) 462 return 1; 463 #ifdef CONFIG_NET_CLS_ROUTE 464 nla = nla_find(attrs, attrlen, RTA_FLOW); 465 if (nla && nla_get_u32(nla) != nh->nh_tclassid) 466 return 1; 467 #endif 468 } 469 470 rtnh = rtnh_next(rtnh, &remaining); 471 } endfor_nexthops(fi); 472 #endif 473 return 0; 474 } 475 476 477 /* 478 Picture 479 ------- 480 481 Semantics of nexthop is very messy by historical reasons. 482 We have to take into account, that: 483 a) gateway can be actually local interface address, 484 so that gatewayed route is direct. 485 b) gateway must be on-link address, possibly 486 described not by an ifaddr, but also by a direct route. 487 c) If both gateway and interface are specified, they should not 488 contradict. 489 d) If we use tunnel routes, gateway could be not on-link. 490 491 Attempt to reconcile all of these (alas, self-contradictory) conditions 492 results in pretty ugly and hairy code with obscure logic. 493 494 I chose to generalized it instead, so that the size 495 of code does not increase practically, but it becomes 496 much more general. 497 Every prefix is assigned a "scope" value: "host" is local address, 498 "link" is direct route, 499 [ ... "site" ... "interior" ... ] 500 and "universe" is true gateway route with global meaning. 501 502 Every prefix refers to a set of "nexthop"s (gw, oif), 503 where gw must have narrower scope. This recursion stops 504 when gw has LOCAL scope or if "nexthop" is declared ONLINK, 505 which means that gw is forced to be on link. 506 507 Code is still hairy, but now it is apparently logically 508 consistent and very flexible. F.e. as by-product it allows 509 to co-exists in peace independent exterior and interior 510 routing processes. 511 512 Normally it looks as following. 513 514 {universe prefix} -> (gw, oif) [scope link] 515 | 516 |-> {link prefix} -> (gw, oif) [scope local] 517 | 518 |-> {local prefix} (terminal node) 519 */ 520 521 static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, 522 struct fib_nh *nh) 523 { 524 int err; 525 struct net *net; 526 527 net = cfg->fc_nlinfo.nl_net; 528 if (nh->nh_gw) { 529 struct fib_result res; 530 531 if (nh->nh_flags&RTNH_F_ONLINK) { 532 struct net_device *dev; 533 534 if (cfg->fc_scope >= RT_SCOPE_LINK) 535 return -EINVAL; 536 if (inet_addr_type(net, nh->nh_gw) != RTN_UNICAST) 537 return -EINVAL; 538 if ((dev = __dev_get_by_index(net, nh->nh_oif)) == NULL) 539 return -ENODEV; 540 if (!(dev->flags&IFF_UP)) 541 return -ENETDOWN; 542 nh->nh_dev = dev; 543 dev_hold(dev); 544 nh->nh_scope = RT_SCOPE_LINK; 545 return 0; 546 } 547 { 548 struct flowi fl = { 549 .nl_u = { 550 .ip4_u = { 551 .daddr = nh->nh_gw, 552 .scope = cfg->fc_scope + 1, 553 }, 554 }, 555 .oif = nh->nh_oif, 556 }; 557 558 /* It is not necessary, but requires a bit of thinking */ 559 if (fl.fl4_scope < RT_SCOPE_LINK) 560 fl.fl4_scope = RT_SCOPE_LINK; 561 if ((err = fib_lookup(net, &fl, &res)) != 0) 562 return err; 563 } 564 err = -EINVAL; 565 if (res.type != RTN_UNICAST && res.type != RTN_LOCAL) 566 goto out; 567 nh->nh_scope = res.scope; 568 nh->nh_oif = FIB_RES_OIF(res); 569 if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL) 570 goto out; 571 dev_hold(nh->nh_dev); 572 err = -ENETDOWN; 573 if (!(nh->nh_dev->flags & IFF_UP)) 574 goto out; 575 err = 0; 576 out: 577 fib_res_put(&res); 578 return err; 579 } else { 580 struct in_device *in_dev; 581 582 if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK)) 583 return -EINVAL; 584 585 in_dev = inetdev_by_index(net, nh->nh_oif); 586 if (in_dev == NULL) 587 return -ENODEV; 588 if (!(in_dev->dev->flags&IFF_UP)) { 589 in_dev_put(in_dev); 590 return -ENETDOWN; 591 } 592 nh->nh_dev = in_dev->dev; 593 dev_hold(nh->nh_dev); 594 nh->nh_scope = RT_SCOPE_HOST; 595 in_dev_put(in_dev); 596 } 597 return 0; 598 } 599 600 static inline unsigned int fib_laddr_hashfn(__be32 val) 601 { 602 unsigned int mask = (fib_hash_size - 1); 603 604 return ((__force u32)val ^ ((__force u32)val >> 7) ^ ((__force u32)val >> 14)) & mask; 605 } 606 607 static struct hlist_head *fib_hash_alloc(int bytes) 608 { 609 if (bytes <= PAGE_SIZE) 610 return kzalloc(bytes, GFP_KERNEL); 611 else 612 return (struct hlist_head *) 613 __get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(bytes)); 614 } 615 616 static void fib_hash_free(struct hlist_head *hash, int bytes) 617 { 618 if (!hash) 619 return; 620 621 if (bytes <= PAGE_SIZE) 622 kfree(hash); 623 else 624 free_pages((unsigned long) hash, get_order(bytes)); 625 } 626 627 static void fib_hash_move(struct hlist_head *new_info_hash, 628 struct hlist_head *new_laddrhash, 629 unsigned int new_size) 630 { 631 struct hlist_head *old_info_hash, *old_laddrhash; 632 unsigned int old_size = fib_hash_size; 633 unsigned int i, bytes; 634 635 spin_lock_bh(&fib_info_lock); 636 old_info_hash = fib_info_hash; 637 old_laddrhash = fib_info_laddrhash; 638 fib_hash_size = new_size; 639 640 for (i = 0; i < old_size; i++) { 641 struct hlist_head *head = &fib_info_hash[i]; 642 struct hlist_node *node, *n; 643 struct fib_info *fi; 644 645 hlist_for_each_entry_safe(fi, node, n, head, fib_hash) { 646 struct hlist_head *dest; 647 unsigned int new_hash; 648 649 hlist_del(&fi->fib_hash); 650 651 new_hash = fib_info_hashfn(fi); 652 dest = &new_info_hash[new_hash]; 653 hlist_add_head(&fi->fib_hash, dest); 654 } 655 } 656 fib_info_hash = new_info_hash; 657 658 for (i = 0; i < old_size; i++) { 659 struct hlist_head *lhead = &fib_info_laddrhash[i]; 660 struct hlist_node *node, *n; 661 struct fib_info *fi; 662 663 hlist_for_each_entry_safe(fi, node, n, lhead, fib_lhash) { 664 struct hlist_head *ldest; 665 unsigned int new_hash; 666 667 hlist_del(&fi->fib_lhash); 668 669 new_hash = fib_laddr_hashfn(fi->fib_prefsrc); 670 ldest = &new_laddrhash[new_hash]; 671 hlist_add_head(&fi->fib_lhash, ldest); 672 } 673 } 674 fib_info_laddrhash = new_laddrhash; 675 676 spin_unlock_bh(&fib_info_lock); 677 678 bytes = old_size * sizeof(struct hlist_head *); 679 fib_hash_free(old_info_hash, bytes); 680 fib_hash_free(old_laddrhash, bytes); 681 } 682 683 struct fib_info *fib_create_info(struct fib_config *cfg) 684 { 685 int err; 686 struct fib_info *fi = NULL; 687 struct fib_info *ofi; 688 int nhs = 1; 689 struct net *net = cfg->fc_nlinfo.nl_net; 690 691 /* Fast check to catch the most weird cases */ 692 if (fib_props[cfg->fc_type].scope > cfg->fc_scope) 693 goto err_inval; 694 695 #ifdef CONFIG_IP_ROUTE_MULTIPATH 696 if (cfg->fc_mp) { 697 nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len); 698 if (nhs == 0) 699 goto err_inval; 700 } 701 #endif 702 703 err = -ENOBUFS; 704 if (fib_info_cnt >= fib_hash_size) { 705 unsigned int new_size = fib_hash_size << 1; 706 struct hlist_head *new_info_hash; 707 struct hlist_head *new_laddrhash; 708 unsigned int bytes; 709 710 if (!new_size) 711 new_size = 1; 712 bytes = new_size * sizeof(struct hlist_head *); 713 new_info_hash = fib_hash_alloc(bytes); 714 new_laddrhash = fib_hash_alloc(bytes); 715 if (!new_info_hash || !new_laddrhash) { 716 fib_hash_free(new_info_hash, bytes); 717 fib_hash_free(new_laddrhash, bytes); 718 } else 719 fib_hash_move(new_info_hash, new_laddrhash, new_size); 720 721 if (!fib_hash_size) 722 goto failure; 723 } 724 725 fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL); 726 if (fi == NULL) 727 goto failure; 728 fib_info_cnt++; 729 730 fi->fib_net = hold_net(net); 731 fi->fib_protocol = cfg->fc_protocol; 732 fi->fib_flags = cfg->fc_flags; 733 fi->fib_priority = cfg->fc_priority; 734 fi->fib_prefsrc = cfg->fc_prefsrc; 735 736 fi->fib_nhs = nhs; 737 change_nexthops(fi) { 738 nexthop_nh->nh_parent = fi; 739 } endfor_nexthops(fi) 740 741 if (cfg->fc_mx) { 742 struct nlattr *nla; 743 int remaining; 744 745 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { 746 int type = nla_type(nla); 747 748 if (type) { 749 if (type > RTAX_MAX) 750 goto err_inval; 751 fi->fib_metrics[type - 1] = nla_get_u32(nla); 752 } 753 } 754 } 755 756 if (cfg->fc_mp) { 757 #ifdef CONFIG_IP_ROUTE_MULTIPATH 758 err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg); 759 if (err != 0) 760 goto failure; 761 if (cfg->fc_oif && fi->fib_nh->nh_oif != cfg->fc_oif) 762 goto err_inval; 763 if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw) 764 goto err_inval; 765 #ifdef CONFIG_NET_CLS_ROUTE 766 if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow) 767 goto err_inval; 768 #endif 769 #else 770 goto err_inval; 771 #endif 772 } else { 773 struct fib_nh *nh = fi->fib_nh; 774 775 nh->nh_oif = cfg->fc_oif; 776 nh->nh_gw = cfg->fc_gw; 777 nh->nh_flags = cfg->fc_flags; 778 #ifdef CONFIG_NET_CLS_ROUTE 779 nh->nh_tclassid = cfg->fc_flow; 780 #endif 781 #ifdef CONFIG_IP_ROUTE_MULTIPATH 782 nh->nh_weight = 1; 783 #endif 784 } 785 786 if (fib_props[cfg->fc_type].error) { 787 if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp) 788 goto err_inval; 789 goto link_it; 790 } 791 792 if (cfg->fc_scope > RT_SCOPE_HOST) 793 goto err_inval; 794 795 if (cfg->fc_scope == RT_SCOPE_HOST) { 796 struct fib_nh *nh = fi->fib_nh; 797 798 /* Local address is added. */ 799 if (nhs != 1 || nh->nh_gw) 800 goto err_inval; 801 nh->nh_scope = RT_SCOPE_NOWHERE; 802 nh->nh_dev = dev_get_by_index(net, fi->fib_nh->nh_oif); 803 err = -ENODEV; 804 if (nh->nh_dev == NULL) 805 goto failure; 806 } else { 807 change_nexthops(fi) { 808 if ((err = fib_check_nh(cfg, fi, nexthop_nh)) != 0) 809 goto failure; 810 } endfor_nexthops(fi) 811 } 812 813 if (fi->fib_prefsrc) { 814 if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst || 815 fi->fib_prefsrc != cfg->fc_dst) 816 if (inet_addr_type(net, fi->fib_prefsrc) != RTN_LOCAL) 817 goto err_inval; 818 } 819 820 link_it: 821 if ((ofi = fib_find_info(fi)) != NULL) { 822 fi->fib_dead = 1; 823 free_fib_info(fi); 824 ofi->fib_treeref++; 825 return ofi; 826 } 827 828 fi->fib_treeref++; 829 atomic_inc(&fi->fib_clntref); 830 spin_lock_bh(&fib_info_lock); 831 hlist_add_head(&fi->fib_hash, 832 &fib_info_hash[fib_info_hashfn(fi)]); 833 if (fi->fib_prefsrc) { 834 struct hlist_head *head; 835 836 head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)]; 837 hlist_add_head(&fi->fib_lhash, head); 838 } 839 change_nexthops(fi) { 840 struct hlist_head *head; 841 unsigned int hash; 842 843 if (!nexthop_nh->nh_dev) 844 continue; 845 hash = fib_devindex_hashfn(nexthop_nh->nh_dev->ifindex); 846 head = &fib_info_devhash[hash]; 847 hlist_add_head(&nexthop_nh->nh_hash, head); 848 } endfor_nexthops(fi) 849 spin_unlock_bh(&fib_info_lock); 850 return fi; 851 852 err_inval: 853 err = -EINVAL; 854 855 failure: 856 if (fi) { 857 fi->fib_dead = 1; 858 free_fib_info(fi); 859 } 860 861 return ERR_PTR(err); 862 } 863 864 /* Note! fib_semantic_match intentionally uses RCU list functions. */ 865 int fib_semantic_match(struct list_head *head, const struct flowi *flp, 866 struct fib_result *res, int prefixlen) 867 { 868 struct fib_alias *fa; 869 int nh_sel = 0; 870 871 list_for_each_entry_rcu(fa, head, fa_list) { 872 int err; 873 874 if (fa->fa_tos && 875 fa->fa_tos != flp->fl4_tos) 876 continue; 877 878 if (fa->fa_scope < flp->fl4_scope) 879 continue; 880 881 fa->fa_state |= FA_S_ACCESSED; 882 883 err = fib_props[fa->fa_type].error; 884 if (err == 0) { 885 struct fib_info *fi = fa->fa_info; 886 887 if (fi->fib_flags & RTNH_F_DEAD) 888 continue; 889 890 switch (fa->fa_type) { 891 case RTN_UNICAST: 892 case RTN_LOCAL: 893 case RTN_BROADCAST: 894 case RTN_ANYCAST: 895 case RTN_MULTICAST: 896 for_nexthops(fi) { 897 if (nh->nh_flags&RTNH_F_DEAD) 898 continue; 899 if (!flp->oif || flp->oif == nh->nh_oif) 900 break; 901 } 902 #ifdef CONFIG_IP_ROUTE_MULTIPATH 903 if (nhsel < fi->fib_nhs) { 904 nh_sel = nhsel; 905 goto out_fill_res; 906 } 907 #else 908 if (nhsel < 1) { 909 goto out_fill_res; 910 } 911 #endif 912 endfor_nexthops(fi); 913 continue; 914 915 default: 916 printk(KERN_WARNING "fib_semantic_match bad type %#x\n", 917 fa->fa_type); 918 return -EINVAL; 919 } 920 } 921 return err; 922 } 923 return 1; 924 925 out_fill_res: 926 res->prefixlen = prefixlen; 927 res->nh_sel = nh_sel; 928 res->type = fa->fa_type; 929 res->scope = fa->fa_scope; 930 res->fi = fa->fa_info; 931 atomic_inc(&res->fi->fib_clntref); 932 return 0; 933 } 934 935 /* Find appropriate source address to this destination */ 936 937 __be32 __fib_res_prefsrc(struct fib_result *res) 938 { 939 return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope); 940 } 941 942 int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, 943 u32 tb_id, u8 type, u8 scope, __be32 dst, int dst_len, u8 tos, 944 struct fib_info *fi, unsigned int flags) 945 { 946 struct nlmsghdr *nlh; 947 struct rtmsg *rtm; 948 949 nlh = nlmsg_put(skb, pid, seq, event, sizeof(*rtm), flags); 950 if (nlh == NULL) 951 return -EMSGSIZE; 952 953 rtm = nlmsg_data(nlh); 954 rtm->rtm_family = AF_INET; 955 rtm->rtm_dst_len = dst_len; 956 rtm->rtm_src_len = 0; 957 rtm->rtm_tos = tos; 958 if (tb_id < 256) 959 rtm->rtm_table = tb_id; 960 else 961 rtm->rtm_table = RT_TABLE_COMPAT; 962 NLA_PUT_U32(skb, RTA_TABLE, tb_id); 963 rtm->rtm_type = type; 964 rtm->rtm_flags = fi->fib_flags; 965 rtm->rtm_scope = scope; 966 rtm->rtm_protocol = fi->fib_protocol; 967 968 if (rtm->rtm_dst_len) 969 NLA_PUT_BE32(skb, RTA_DST, dst); 970 971 if (fi->fib_priority) 972 NLA_PUT_U32(skb, RTA_PRIORITY, fi->fib_priority); 973 974 if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0) 975 goto nla_put_failure; 976 977 if (fi->fib_prefsrc) 978 NLA_PUT_BE32(skb, RTA_PREFSRC, fi->fib_prefsrc); 979 980 if (fi->fib_nhs == 1) { 981 if (fi->fib_nh->nh_gw) 982 NLA_PUT_BE32(skb, RTA_GATEWAY, fi->fib_nh->nh_gw); 983 984 if (fi->fib_nh->nh_oif) 985 NLA_PUT_U32(skb, RTA_OIF, fi->fib_nh->nh_oif); 986 #ifdef CONFIG_NET_CLS_ROUTE 987 if (fi->fib_nh[0].nh_tclassid) 988 NLA_PUT_U32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid); 989 #endif 990 } 991 #ifdef CONFIG_IP_ROUTE_MULTIPATH 992 if (fi->fib_nhs > 1) { 993 struct rtnexthop *rtnh; 994 struct nlattr *mp; 995 996 mp = nla_nest_start(skb, RTA_MULTIPATH); 997 if (mp == NULL) 998 goto nla_put_failure; 999 1000 for_nexthops(fi) { 1001 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh)); 1002 if (rtnh == NULL) 1003 goto nla_put_failure; 1004 1005 rtnh->rtnh_flags = nh->nh_flags & 0xFF; 1006 rtnh->rtnh_hops = nh->nh_weight - 1; 1007 rtnh->rtnh_ifindex = nh->nh_oif; 1008 1009 if (nh->nh_gw) 1010 NLA_PUT_BE32(skb, RTA_GATEWAY, nh->nh_gw); 1011 #ifdef CONFIG_NET_CLS_ROUTE 1012 if (nh->nh_tclassid) 1013 NLA_PUT_U32(skb, RTA_FLOW, nh->nh_tclassid); 1014 #endif 1015 /* length of rtnetlink header + attributes */ 1016 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh; 1017 } endfor_nexthops(fi); 1018 1019 nla_nest_end(skb, mp); 1020 } 1021 #endif 1022 return nlmsg_end(skb, nlh); 1023 1024 nla_put_failure: 1025 nlmsg_cancel(skb, nlh); 1026 return -EMSGSIZE; 1027 } 1028 1029 /* 1030 Update FIB if: 1031 - local address disappeared -> we must delete all the entries 1032 referring to it. 1033 - device went down -> we must shutdown all nexthops going via it. 1034 */ 1035 int fib_sync_down_addr(struct net *net, __be32 local) 1036 { 1037 int ret = 0; 1038 unsigned int hash = fib_laddr_hashfn(local); 1039 struct hlist_head *head = &fib_info_laddrhash[hash]; 1040 struct hlist_node *node; 1041 struct fib_info *fi; 1042 1043 if (fib_info_laddrhash == NULL || local == 0) 1044 return 0; 1045 1046 hlist_for_each_entry(fi, node, head, fib_lhash) { 1047 if (!net_eq(fi->fib_net, net)) 1048 continue; 1049 if (fi->fib_prefsrc == local) { 1050 fi->fib_flags |= RTNH_F_DEAD; 1051 ret++; 1052 } 1053 } 1054 return ret; 1055 } 1056 1057 int fib_sync_down_dev(struct net_device *dev, int force) 1058 { 1059 int ret = 0; 1060 int scope = RT_SCOPE_NOWHERE; 1061 struct fib_info *prev_fi = NULL; 1062 unsigned int hash = fib_devindex_hashfn(dev->ifindex); 1063 struct hlist_head *head = &fib_info_devhash[hash]; 1064 struct hlist_node *node; 1065 struct fib_nh *nh; 1066 1067 if (force) 1068 scope = -1; 1069 1070 hlist_for_each_entry(nh, node, head, nh_hash) { 1071 struct fib_info *fi = nh->nh_parent; 1072 int dead; 1073 1074 BUG_ON(!fi->fib_nhs); 1075 if (nh->nh_dev != dev || fi == prev_fi) 1076 continue; 1077 prev_fi = fi; 1078 dead = 0; 1079 change_nexthops(fi) { 1080 if (nexthop_nh->nh_flags&RTNH_F_DEAD) 1081 dead++; 1082 else if (nexthop_nh->nh_dev == dev && 1083 nexthop_nh->nh_scope != scope) { 1084 nexthop_nh->nh_flags |= RTNH_F_DEAD; 1085 #ifdef CONFIG_IP_ROUTE_MULTIPATH 1086 spin_lock_bh(&fib_multipath_lock); 1087 fi->fib_power -= nexthop_nh->nh_power; 1088 nexthop_nh->nh_power = 0; 1089 spin_unlock_bh(&fib_multipath_lock); 1090 #endif 1091 dead++; 1092 } 1093 #ifdef CONFIG_IP_ROUTE_MULTIPATH 1094 if (force > 1 && nexthop_nh->nh_dev == dev) { 1095 dead = fi->fib_nhs; 1096 break; 1097 } 1098 #endif 1099 } endfor_nexthops(fi) 1100 if (dead == fi->fib_nhs) { 1101 fi->fib_flags |= RTNH_F_DEAD; 1102 ret++; 1103 } 1104 } 1105 1106 return ret; 1107 } 1108 1109 #ifdef CONFIG_IP_ROUTE_MULTIPATH 1110 1111 /* 1112 Dead device goes up. We wake up dead nexthops. 1113 It takes sense only on multipath routes. 1114 */ 1115 1116 int fib_sync_up(struct net_device *dev) 1117 { 1118 struct fib_info *prev_fi; 1119 unsigned int hash; 1120 struct hlist_head *head; 1121 struct hlist_node *node; 1122 struct fib_nh *nh; 1123 int ret; 1124 1125 if (!(dev->flags&IFF_UP)) 1126 return 0; 1127 1128 prev_fi = NULL; 1129 hash = fib_devindex_hashfn(dev->ifindex); 1130 head = &fib_info_devhash[hash]; 1131 ret = 0; 1132 1133 hlist_for_each_entry(nh, node, head, nh_hash) { 1134 struct fib_info *fi = nh->nh_parent; 1135 int alive; 1136 1137 BUG_ON(!fi->fib_nhs); 1138 if (nh->nh_dev != dev || fi == prev_fi) 1139 continue; 1140 1141 prev_fi = fi; 1142 alive = 0; 1143 change_nexthops(fi) { 1144 if (!(nexthop_nh->nh_flags&RTNH_F_DEAD)) { 1145 alive++; 1146 continue; 1147 } 1148 if (nexthop_nh->nh_dev == NULL || 1149 !(nexthop_nh->nh_dev->flags&IFF_UP)) 1150 continue; 1151 if (nexthop_nh->nh_dev != dev || 1152 !__in_dev_get_rtnl(dev)) 1153 continue; 1154 alive++; 1155 spin_lock_bh(&fib_multipath_lock); 1156 nexthop_nh->nh_power = 0; 1157 nexthop_nh->nh_flags &= ~RTNH_F_DEAD; 1158 spin_unlock_bh(&fib_multipath_lock); 1159 } endfor_nexthops(fi) 1160 1161 if (alive > 0) { 1162 fi->fib_flags &= ~RTNH_F_DEAD; 1163 ret++; 1164 } 1165 } 1166 1167 return ret; 1168 } 1169 1170 /* 1171 The algorithm is suboptimal, but it provides really 1172 fair weighted route distribution. 1173 */ 1174 1175 void fib_select_multipath(const struct flowi *flp, struct fib_result *res) 1176 { 1177 struct fib_info *fi = res->fi; 1178 int w; 1179 1180 spin_lock_bh(&fib_multipath_lock); 1181 if (fi->fib_power <= 0) { 1182 int power = 0; 1183 change_nexthops(fi) { 1184 if (!(nexthop_nh->nh_flags&RTNH_F_DEAD)) { 1185 power += nexthop_nh->nh_weight; 1186 nexthop_nh->nh_power = nexthop_nh->nh_weight; 1187 } 1188 } endfor_nexthops(fi); 1189 fi->fib_power = power; 1190 if (power <= 0) { 1191 spin_unlock_bh(&fib_multipath_lock); 1192 /* Race condition: route has just become dead. */ 1193 res->nh_sel = 0; 1194 return; 1195 } 1196 } 1197 1198 1199 /* w should be random number [0..fi->fib_power-1], 1200 it is pretty bad approximation. 1201 */ 1202 1203 w = jiffies % fi->fib_power; 1204 1205 change_nexthops(fi) { 1206 if (!(nexthop_nh->nh_flags&RTNH_F_DEAD) && 1207 nexthop_nh->nh_power) { 1208 if ((w -= nexthop_nh->nh_power) <= 0) { 1209 nexthop_nh->nh_power--; 1210 fi->fib_power--; 1211 res->nh_sel = nhsel; 1212 spin_unlock_bh(&fib_multipath_lock); 1213 return; 1214 } 1215 } 1216 } endfor_nexthops(fi); 1217 1218 /* Race condition: route has just become dead. */ 1219 res->nh_sel = 0; 1220 spin_unlock_bh(&fib_multipath_lock); 1221 } 1222 #endif 1223