11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * INET An implementation of the TCP/IP protocol suite for the LINUX 31da177e4SLinus Torvalds * operating system. INET is implemented using the BSD Socket 41da177e4SLinus Torvalds * interface as the means of communication with the user level. 51da177e4SLinus Torvalds * 61da177e4SLinus Torvalds * IPv4 Forwarding Information Base: semantics. 71da177e4SLinus Torvalds * 81da177e4SLinus Torvalds * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 91da177e4SLinus Torvalds * 101da177e4SLinus Torvalds * This program is free software; you can redistribute it and/or 111da177e4SLinus Torvalds * modify it under the terms of the GNU General Public License 121da177e4SLinus Torvalds * as published by the Free Software Foundation; either version 131da177e4SLinus Torvalds * 2 of the License, or (at your option) any later version. 141da177e4SLinus Torvalds */ 151da177e4SLinus Torvalds 161da177e4SLinus Torvalds #include <asm/uaccess.h> 171da177e4SLinus Torvalds #include <asm/system.h> 181da177e4SLinus Torvalds #include <linux/bitops.h> 191da177e4SLinus Torvalds #include <linux/types.h> 201da177e4SLinus Torvalds #include <linux/kernel.h> 211da177e4SLinus Torvalds #include <linux/jiffies.h> 221da177e4SLinus Torvalds #include <linux/mm.h> 231da177e4SLinus Torvalds #include <linux/string.h> 241da177e4SLinus Torvalds #include <linux/socket.h> 251da177e4SLinus Torvalds #include <linux/sockios.h> 261da177e4SLinus Torvalds #include <linux/errno.h> 271da177e4SLinus Torvalds #include <linux/in.h> 281da177e4SLinus Torvalds #include <linux/inet.h> 2914c85021SArnaldo Carvalho de Melo #include <linux/inetdevice.h> 301da177e4SLinus Torvalds #include <linux/netdevice.h> 311da177e4SLinus Torvalds #include <linux/if_arp.h> 321da177e4SLinus Torvalds #include <linux/proc_fs.h> 331da177e4SLinus Torvalds #include <linux/skbuff.h> 341da177e4SLinus Torvalds #include <linux/init.h> 355a0e3ad6STejun Heo #include <linux/slab.h> 361da177e4SLinus Torvalds 3714c85021SArnaldo Carvalho de Melo #include <net/arp.h> 381da177e4SLinus Torvalds #include <net/ip.h> 391da177e4SLinus Torvalds #include <net/protocol.h> 401da177e4SLinus Torvalds #include <net/route.h> 411da177e4SLinus Torvalds #include <net/tcp.h> 421da177e4SLinus Torvalds #include <net/sock.h> 431da177e4SLinus Torvalds #include <net/ip_fib.h> 44f21c7bc5SThomas Graf #include <net/netlink.h> 454e902c57SThomas Graf #include <net/nexthop.h> 461da177e4SLinus Torvalds 471da177e4SLinus Torvalds #include "fib_lookup.h" 481da177e4SLinus Torvalds 49832b4c5eSStephen Hemminger static DEFINE_SPINLOCK(fib_info_lock); 501da177e4SLinus Torvalds static struct hlist_head *fib_info_hash; 511da177e4SLinus Torvalds static struct hlist_head *fib_info_laddrhash; 52*123b9731SDavid S. Miller static unsigned int fib_info_hash_size; 531da177e4SLinus Torvalds static unsigned int fib_info_cnt; 541da177e4SLinus Torvalds 551da177e4SLinus Torvalds #define DEVINDEX_HASHBITS 8 561da177e4SLinus Torvalds #define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS) 571da177e4SLinus Torvalds static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE]; 581da177e4SLinus Torvalds 591da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 601da177e4SLinus Torvalds 611da177e4SLinus Torvalds static DEFINE_SPINLOCK(fib_multipath_lock); 621da177e4SLinus Torvalds 636a31d2a9SEric Dumazet #define for_nexthops(fi) { \ 646a31d2a9SEric Dumazet int nhsel; const struct fib_nh *nh; \ 656a31d2a9SEric Dumazet for (nhsel = 0, nh = (fi)->fib_nh; \ 666a31d2a9SEric Dumazet nhsel < (fi)->fib_nhs; \ 676a31d2a9SEric Dumazet nh++, nhsel++) 681da177e4SLinus Torvalds 696a31d2a9SEric Dumazet #define change_nexthops(fi) { \ 706a31d2a9SEric Dumazet int nhsel; struct fib_nh *nexthop_nh; \ 716a31d2a9SEric Dumazet for (nhsel = 0, nexthop_nh = (struct fib_nh *)((fi)->fib_nh); \ 726a31d2a9SEric Dumazet nhsel < (fi)->fib_nhs; \ 736a31d2a9SEric Dumazet nexthop_nh++, nhsel++) 741da177e4SLinus Torvalds 751da177e4SLinus Torvalds #else /* CONFIG_IP_ROUTE_MULTIPATH */ 761da177e4SLinus Torvalds 771da177e4SLinus Torvalds /* Hope, that gcc will optimize it to get rid of dummy loop */ 781da177e4SLinus Torvalds 796a31d2a9SEric Dumazet #define for_nexthops(fi) { \ 806a31d2a9SEric Dumazet int nhsel; const struct fib_nh *nh = (fi)->fib_nh; \ 811da177e4SLinus Torvalds for (nhsel = 0; nhsel < 1; nhsel++) 821da177e4SLinus Torvalds 836a31d2a9SEric Dumazet #define change_nexthops(fi) { \ 846a31d2a9SEric Dumazet int nhsel; \ 856a31d2a9SEric Dumazet struct fib_nh *nexthop_nh = (struct fib_nh *)((fi)->fib_nh); \ 861da177e4SLinus Torvalds for (nhsel = 0; nhsel < 1; nhsel++) 871da177e4SLinus Torvalds 881da177e4SLinus Torvalds #endif /* CONFIG_IP_ROUTE_MULTIPATH */ 891da177e4SLinus Torvalds 901da177e4SLinus Torvalds #define endfor_nexthops(fi) } 911da177e4SLinus Torvalds 921da177e4SLinus Torvalds 939b5b5cffSArjan van de Ven static const struct 941da177e4SLinus Torvalds { 951da177e4SLinus Torvalds int error; 961da177e4SLinus Torvalds u8 scope; 97a0ee18b9SThomas Graf } fib_props[RTN_MAX + 1] = { 986a31d2a9SEric Dumazet [RTN_UNSPEC] = { 991da177e4SLinus Torvalds .error = 0, 1001da177e4SLinus Torvalds .scope = RT_SCOPE_NOWHERE, 1016a31d2a9SEric Dumazet }, 1026a31d2a9SEric Dumazet [RTN_UNICAST] = { 1031da177e4SLinus Torvalds .error = 0, 1041da177e4SLinus Torvalds .scope = RT_SCOPE_UNIVERSE, 1056a31d2a9SEric Dumazet }, 1066a31d2a9SEric Dumazet [RTN_LOCAL] = { 1071da177e4SLinus Torvalds .error = 0, 1081da177e4SLinus Torvalds .scope = RT_SCOPE_HOST, 1096a31d2a9SEric Dumazet }, 1106a31d2a9SEric Dumazet [RTN_BROADCAST] = { 1111da177e4SLinus Torvalds .error = 0, 1121da177e4SLinus Torvalds .scope = RT_SCOPE_LINK, 1136a31d2a9SEric Dumazet }, 1146a31d2a9SEric Dumazet [RTN_ANYCAST] = { 1151da177e4SLinus Torvalds .error = 0, 1161da177e4SLinus Torvalds .scope = RT_SCOPE_LINK, 1176a31d2a9SEric Dumazet }, 1186a31d2a9SEric Dumazet [RTN_MULTICAST] = { 1191da177e4SLinus Torvalds .error = 0, 1201da177e4SLinus Torvalds .scope = RT_SCOPE_UNIVERSE, 1216a31d2a9SEric Dumazet }, 1226a31d2a9SEric Dumazet [RTN_BLACKHOLE] = { 1231da177e4SLinus Torvalds .error = -EINVAL, 1241da177e4SLinus Torvalds .scope = RT_SCOPE_UNIVERSE, 1256a31d2a9SEric Dumazet }, 1266a31d2a9SEric Dumazet [RTN_UNREACHABLE] = { 1271da177e4SLinus Torvalds .error = -EHOSTUNREACH, 1281da177e4SLinus Torvalds .scope = RT_SCOPE_UNIVERSE, 1296a31d2a9SEric Dumazet }, 1306a31d2a9SEric Dumazet [RTN_PROHIBIT] = { 1311da177e4SLinus Torvalds .error = -EACCES, 1321da177e4SLinus Torvalds .scope = RT_SCOPE_UNIVERSE, 1336a31d2a9SEric Dumazet }, 1346a31d2a9SEric Dumazet [RTN_THROW] = { 1351da177e4SLinus Torvalds .error = -EAGAIN, 1361da177e4SLinus Torvalds .scope = RT_SCOPE_UNIVERSE, 1376a31d2a9SEric Dumazet }, 1386a31d2a9SEric Dumazet [RTN_NAT] = { 1391da177e4SLinus Torvalds .error = -EINVAL, 1401da177e4SLinus Torvalds .scope = RT_SCOPE_NOWHERE, 1416a31d2a9SEric Dumazet }, 1426a31d2a9SEric Dumazet [RTN_XRESOLVE] = { 1431da177e4SLinus Torvalds .error = -EINVAL, 1441da177e4SLinus Torvalds .scope = RT_SCOPE_NOWHERE, 1456a31d2a9SEric Dumazet }, 1461da177e4SLinus Torvalds }; 1471da177e4SLinus Torvalds 1481da177e4SLinus Torvalds 1491da177e4SLinus Torvalds /* Release a nexthop info record */ 1501da177e4SLinus Torvalds 151ebc0ffaeSEric Dumazet static void free_fib_info_rcu(struct rcu_head *head) 152ebc0ffaeSEric Dumazet { 153ebc0ffaeSEric Dumazet struct fib_info *fi = container_of(head, struct fib_info, rcu); 154ebc0ffaeSEric Dumazet 155725d1e1bSDavid S. Miller if (fi->fib_metrics != (u32 *) dst_default_metrics) 1569c150e82SDavid S. Miller kfree(fi->fib_metrics); 157ebc0ffaeSEric Dumazet kfree(fi); 158ebc0ffaeSEric Dumazet } 159ebc0ffaeSEric Dumazet 1601da177e4SLinus Torvalds void free_fib_info(struct fib_info *fi) 1611da177e4SLinus Torvalds { 1621da177e4SLinus Torvalds if (fi->fib_dead == 0) { 1636a31d2a9SEric Dumazet pr_warning("Freeing alive fib_info %p\n", fi); 1641da177e4SLinus Torvalds return; 1651da177e4SLinus Torvalds } 1661da177e4SLinus Torvalds change_nexthops(fi) { 16771fceff0SDavid S. Miller if (nexthop_nh->nh_dev) 16871fceff0SDavid S. Miller dev_put(nexthop_nh->nh_dev); 16971fceff0SDavid S. Miller nexthop_nh->nh_dev = NULL; 1701da177e4SLinus Torvalds } endfor_nexthops(fi); 1711da177e4SLinus Torvalds fib_info_cnt--; 17257d7a600SDenis V. Lunev release_net(fi->fib_net); 173ebc0ffaeSEric Dumazet call_rcu(&fi->rcu, free_fib_info_rcu); 1741da177e4SLinus Torvalds } 1751da177e4SLinus Torvalds 1761da177e4SLinus Torvalds void fib_release_info(struct fib_info *fi) 1771da177e4SLinus Torvalds { 178832b4c5eSStephen Hemminger spin_lock_bh(&fib_info_lock); 1791da177e4SLinus Torvalds if (fi && --fi->fib_treeref == 0) { 1801da177e4SLinus Torvalds hlist_del(&fi->fib_hash); 1811da177e4SLinus Torvalds if (fi->fib_prefsrc) 1821da177e4SLinus Torvalds hlist_del(&fi->fib_lhash); 1831da177e4SLinus Torvalds change_nexthops(fi) { 18471fceff0SDavid S. Miller if (!nexthop_nh->nh_dev) 1851da177e4SLinus Torvalds continue; 18671fceff0SDavid S. Miller hlist_del(&nexthop_nh->nh_hash); 1871da177e4SLinus Torvalds } endfor_nexthops(fi) 1881da177e4SLinus Torvalds fi->fib_dead = 1; 1891da177e4SLinus Torvalds fib_info_put(fi); 1901da177e4SLinus Torvalds } 191832b4c5eSStephen Hemminger spin_unlock_bh(&fib_info_lock); 1921da177e4SLinus Torvalds } 1931da177e4SLinus Torvalds 1946a31d2a9SEric Dumazet static inline int nh_comp(const struct fib_info *fi, const struct fib_info *ofi) 1951da177e4SLinus Torvalds { 1961da177e4SLinus Torvalds const struct fib_nh *onh = ofi->fib_nh; 1971da177e4SLinus Torvalds 1981da177e4SLinus Torvalds for_nexthops(fi) { 1991da177e4SLinus Torvalds if (nh->nh_oif != onh->nh_oif || 2001da177e4SLinus Torvalds nh->nh_gw != onh->nh_gw || 2011da177e4SLinus Torvalds nh->nh_scope != onh->nh_scope || 2021da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 2031da177e4SLinus Torvalds nh->nh_weight != onh->nh_weight || 2041da177e4SLinus Torvalds #endif 205c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID 2061da177e4SLinus Torvalds nh->nh_tclassid != onh->nh_tclassid || 2071da177e4SLinus Torvalds #endif 2081da177e4SLinus Torvalds ((nh->nh_flags ^ onh->nh_flags) & ~RTNH_F_DEAD)) 2091da177e4SLinus Torvalds return -1; 2101da177e4SLinus Torvalds onh++; 2111da177e4SLinus Torvalds } endfor_nexthops(fi); 2121da177e4SLinus Torvalds return 0; 2131da177e4SLinus Torvalds } 2141da177e4SLinus Torvalds 21588ebc72fSDavid S. Miller static inline unsigned int fib_devindex_hashfn(unsigned int val) 21688ebc72fSDavid S. Miller { 21788ebc72fSDavid S. Miller unsigned int mask = DEVINDEX_HASHSIZE - 1; 21888ebc72fSDavid S. Miller 21988ebc72fSDavid S. Miller return (val ^ 22088ebc72fSDavid S. Miller (val >> DEVINDEX_HASHBITS) ^ 22188ebc72fSDavid S. Miller (val >> (DEVINDEX_HASHBITS * 2))) & mask; 22288ebc72fSDavid S. Miller } 22388ebc72fSDavid S. Miller 2241da177e4SLinus Torvalds static inline unsigned int fib_info_hashfn(const struct fib_info *fi) 2251da177e4SLinus Torvalds { 226*123b9731SDavid S. Miller unsigned int mask = (fib_info_hash_size - 1); 2271da177e4SLinus Torvalds unsigned int val = fi->fib_nhs; 2281da177e4SLinus Torvalds 2291da177e4SLinus Torvalds val ^= fi->fib_protocol; 23081f7bf6cSAl Viro val ^= (__force u32)fi->fib_prefsrc; 2311da177e4SLinus Torvalds val ^= fi->fib_priority; 23288ebc72fSDavid S. Miller for_nexthops(fi) { 23388ebc72fSDavid S. Miller val ^= fib_devindex_hashfn(nh->nh_oif); 23488ebc72fSDavid S. Miller } endfor_nexthops(fi) 2351da177e4SLinus Torvalds 2361da177e4SLinus Torvalds return (val ^ (val >> 7) ^ (val >> 12)) & mask; 2371da177e4SLinus Torvalds } 2381da177e4SLinus Torvalds 2391da177e4SLinus Torvalds static struct fib_info *fib_find_info(const struct fib_info *nfi) 2401da177e4SLinus Torvalds { 2411da177e4SLinus Torvalds struct hlist_head *head; 2421da177e4SLinus Torvalds struct hlist_node *node; 2431da177e4SLinus Torvalds struct fib_info *fi; 2441da177e4SLinus Torvalds unsigned int hash; 2451da177e4SLinus Torvalds 2461da177e4SLinus Torvalds hash = fib_info_hashfn(nfi); 2471da177e4SLinus Torvalds head = &fib_info_hash[hash]; 2481da177e4SLinus Torvalds 2491da177e4SLinus Torvalds hlist_for_each_entry(fi, node, head, fib_hash) { 25009ad9bc7SOctavian Purdila if (!net_eq(fi->fib_net, nfi->fib_net)) 2514814bdbdSDenis V. Lunev continue; 2521da177e4SLinus Torvalds if (fi->fib_nhs != nfi->fib_nhs) 2531da177e4SLinus Torvalds continue; 2541da177e4SLinus Torvalds if (nfi->fib_protocol == fi->fib_protocol && 2551da177e4SLinus Torvalds nfi->fib_prefsrc == fi->fib_prefsrc && 2561da177e4SLinus Torvalds nfi->fib_priority == fi->fib_priority && 2571da177e4SLinus Torvalds memcmp(nfi->fib_metrics, fi->fib_metrics, 2581da177e4SLinus Torvalds sizeof(fi->fib_metrics)) == 0 && 2591da177e4SLinus Torvalds ((nfi->fib_flags ^ fi->fib_flags) & ~RTNH_F_DEAD) == 0 && 2601da177e4SLinus Torvalds (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0)) 2611da177e4SLinus Torvalds return fi; 2621da177e4SLinus Torvalds } 2631da177e4SLinus Torvalds 2641da177e4SLinus Torvalds return NULL; 2651da177e4SLinus Torvalds } 2661da177e4SLinus Torvalds 2671da177e4SLinus Torvalds /* Check, that the gateway is already configured. 2686a31d2a9SEric Dumazet * Used only by redirect accept routine. 2691da177e4SLinus Torvalds */ 270d878e72eSAl Viro int ip_fib_check_default(__be32 gw, struct net_device *dev) 2711da177e4SLinus Torvalds { 2721da177e4SLinus Torvalds struct hlist_head *head; 2731da177e4SLinus Torvalds struct hlist_node *node; 2741da177e4SLinus Torvalds struct fib_nh *nh; 2751da177e4SLinus Torvalds unsigned int hash; 2761da177e4SLinus Torvalds 277832b4c5eSStephen Hemminger spin_lock(&fib_info_lock); 2781da177e4SLinus Torvalds 2791da177e4SLinus Torvalds hash = fib_devindex_hashfn(dev->ifindex); 2801da177e4SLinus Torvalds head = &fib_info_devhash[hash]; 2811da177e4SLinus Torvalds hlist_for_each_entry(nh, node, head, nh_hash) { 2821da177e4SLinus Torvalds if (nh->nh_dev == dev && 2831da177e4SLinus Torvalds nh->nh_gw == gw && 2841da177e4SLinus Torvalds !(nh->nh_flags & RTNH_F_DEAD)) { 285832b4c5eSStephen Hemminger spin_unlock(&fib_info_lock); 2861da177e4SLinus Torvalds return 0; 2871da177e4SLinus Torvalds } 2881da177e4SLinus Torvalds } 2891da177e4SLinus Torvalds 290832b4c5eSStephen Hemminger spin_unlock(&fib_info_lock); 2911da177e4SLinus Torvalds 2921da177e4SLinus Torvalds return -1; 2931da177e4SLinus Torvalds } 2941da177e4SLinus Torvalds 295339bf98fSThomas Graf static inline size_t fib_nlmsg_size(struct fib_info *fi) 296339bf98fSThomas Graf { 297339bf98fSThomas Graf size_t payload = NLMSG_ALIGN(sizeof(struct rtmsg)) 298339bf98fSThomas Graf + nla_total_size(4) /* RTA_TABLE */ 299339bf98fSThomas Graf + nla_total_size(4) /* RTA_DST */ 300339bf98fSThomas Graf + nla_total_size(4) /* RTA_PRIORITY */ 301339bf98fSThomas Graf + nla_total_size(4); /* RTA_PREFSRC */ 302339bf98fSThomas Graf 303339bf98fSThomas Graf /* space for nested metrics */ 304339bf98fSThomas Graf payload += nla_total_size((RTAX_MAX * nla_total_size(4))); 305339bf98fSThomas Graf 306339bf98fSThomas Graf if (fi->fib_nhs) { 307339bf98fSThomas Graf /* Also handles the special case fib_nhs == 1 */ 308339bf98fSThomas Graf 309339bf98fSThomas Graf /* each nexthop is packed in an attribute */ 310339bf98fSThomas Graf size_t nhsize = nla_total_size(sizeof(struct rtnexthop)); 311339bf98fSThomas Graf 312339bf98fSThomas Graf /* may contain flow and gateway attribute */ 313339bf98fSThomas Graf nhsize += 2 * nla_total_size(4); 314339bf98fSThomas Graf 315339bf98fSThomas Graf /* all nexthops are packed in a nested attribute */ 316339bf98fSThomas Graf payload += nla_total_size(fi->fib_nhs * nhsize); 317339bf98fSThomas Graf } 318339bf98fSThomas Graf 319339bf98fSThomas Graf return payload; 320339bf98fSThomas Graf } 321339bf98fSThomas Graf 32281f7bf6cSAl Viro void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, 323b8f55831SMilan Kocian int dst_len, u32 tb_id, struct nl_info *info, 324b8f55831SMilan Kocian unsigned int nlm_flags) 3251da177e4SLinus Torvalds { 3261da177e4SLinus Torvalds struct sk_buff *skb; 3274e902c57SThomas Graf u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0; 328f21c7bc5SThomas Graf int err = -ENOBUFS; 3291da177e4SLinus Torvalds 330339bf98fSThomas Graf skb = nlmsg_new(fib_nlmsg_size(fa->fa_info), GFP_KERNEL); 331f21c7bc5SThomas Graf if (skb == NULL) 332f21c7bc5SThomas Graf goto errout; 3331da177e4SLinus Torvalds 3344e902c57SThomas Graf err = fib_dump_info(skb, info->pid, seq, event, tb_id, 335be403ea1SThomas Graf fa->fa_type, fa->fa_scope, key, dst_len, 336b8f55831SMilan Kocian fa->fa_tos, fa->fa_info, nlm_flags); 33726932566SPatrick McHardy if (err < 0) { 33826932566SPatrick McHardy /* -EMSGSIZE implies BUG in fib_nlmsg_size() */ 33926932566SPatrick McHardy WARN_ON(err == -EMSGSIZE); 34026932566SPatrick McHardy kfree_skb(skb); 34126932566SPatrick McHardy goto errout; 34226932566SPatrick McHardy } 3431ce85fe4SPablo Neira Ayuso rtnl_notify(skb, info->nl_net, info->pid, RTNLGRP_IPV4_ROUTE, 3444e902c57SThomas Graf info->nlh, GFP_KERNEL); 3451ce85fe4SPablo Neira Ayuso return; 346f21c7bc5SThomas Graf errout: 347f21c7bc5SThomas Graf if (err < 0) 3484d1169c1SDenis V. Lunev rtnl_set_sk_err(info->nl_net, RTNLGRP_IPV4_ROUTE, err); 3491da177e4SLinus Torvalds } 3501da177e4SLinus Torvalds 3511da177e4SLinus Torvalds /* Return the first fib alias matching TOS with 3521da177e4SLinus Torvalds * priority less than or equal to PRIO. 3531da177e4SLinus Torvalds */ 3541da177e4SLinus Torvalds struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio) 3551da177e4SLinus Torvalds { 3561da177e4SLinus Torvalds if (fah) { 3571da177e4SLinus Torvalds struct fib_alias *fa; 3581da177e4SLinus Torvalds list_for_each_entry(fa, fah, fa_list) { 3591da177e4SLinus Torvalds if (fa->fa_tos > tos) 3601da177e4SLinus Torvalds continue; 3611da177e4SLinus Torvalds if (fa->fa_info->fib_priority >= prio || 3621da177e4SLinus Torvalds fa->fa_tos < tos) 3631da177e4SLinus Torvalds return fa; 3641da177e4SLinus Torvalds } 3651da177e4SLinus Torvalds } 3661da177e4SLinus Torvalds return NULL; 3671da177e4SLinus Torvalds } 3681da177e4SLinus Torvalds 3691da177e4SLinus Torvalds int fib_detect_death(struct fib_info *fi, int order, 370c17860a0SDenis V. Lunev struct fib_info **last_resort, int *last_idx, int dflt) 3711da177e4SLinus Torvalds { 3721da177e4SLinus Torvalds struct neighbour *n; 3731da177e4SLinus Torvalds int state = NUD_NONE; 3741da177e4SLinus Torvalds 3751da177e4SLinus Torvalds n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev); 3761da177e4SLinus Torvalds if (n) { 3771da177e4SLinus Torvalds state = n->nud_state; 3781da177e4SLinus Torvalds neigh_release(n); 3791da177e4SLinus Torvalds } 3801da177e4SLinus Torvalds if (state == NUD_REACHABLE) 3811da177e4SLinus Torvalds return 0; 382c17860a0SDenis V. Lunev if ((state & NUD_VALID) && order != dflt) 3831da177e4SLinus Torvalds return 0; 3841da177e4SLinus Torvalds if ((state & NUD_VALID) || 385c17860a0SDenis V. Lunev (*last_idx < 0 && order > dflt)) { 3861da177e4SLinus Torvalds *last_resort = fi; 3871da177e4SLinus Torvalds *last_idx = order; 3881da177e4SLinus Torvalds } 3891da177e4SLinus Torvalds return 1; 3901da177e4SLinus Torvalds } 3911da177e4SLinus Torvalds 3921da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 3931da177e4SLinus Torvalds 3944e902c57SThomas Graf static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining) 3951da177e4SLinus Torvalds { 3961da177e4SLinus Torvalds int nhs = 0; 3971da177e4SLinus Torvalds 3984e902c57SThomas Graf while (rtnh_ok(rtnh, remaining)) { 3991da177e4SLinus Torvalds nhs++; 4004e902c57SThomas Graf rtnh = rtnh_next(rtnh, &remaining); 4011da177e4SLinus Torvalds } 4021da177e4SLinus Torvalds 4034e902c57SThomas Graf /* leftover implies invalid nexthop configuration, discard it */ 4044e902c57SThomas Graf return remaining > 0 ? 0 : nhs; 4054e902c57SThomas Graf } 4061da177e4SLinus Torvalds 4074e902c57SThomas Graf static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, 4084e902c57SThomas Graf int remaining, struct fib_config *cfg) 4094e902c57SThomas Graf { 4101da177e4SLinus Torvalds change_nexthops(fi) { 4114e902c57SThomas Graf int attrlen; 4124e902c57SThomas Graf 4134e902c57SThomas Graf if (!rtnh_ok(rtnh, remaining)) 4141da177e4SLinus Torvalds return -EINVAL; 4154e902c57SThomas Graf 41671fceff0SDavid S. Miller nexthop_nh->nh_flags = 41771fceff0SDavid S. Miller (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags; 41871fceff0SDavid S. Miller nexthop_nh->nh_oif = rtnh->rtnh_ifindex; 41971fceff0SDavid S. Miller nexthop_nh->nh_weight = rtnh->rtnh_hops + 1; 4204e902c57SThomas Graf 4214e902c57SThomas Graf attrlen = rtnh_attrlen(rtnh); 4224e902c57SThomas Graf if (attrlen > 0) { 4234e902c57SThomas Graf struct nlattr *nla, *attrs = rtnh_attrs(rtnh); 4244e902c57SThomas Graf 4254e902c57SThomas Graf nla = nla_find(attrs, attrlen, RTA_GATEWAY); 42671fceff0SDavid S. Miller nexthop_nh->nh_gw = nla ? nla_get_be32(nla) : 0; 427c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID 4284e902c57SThomas Graf nla = nla_find(attrs, attrlen, RTA_FLOW); 42971fceff0SDavid S. Miller nexthop_nh->nh_tclassid = nla ? nla_get_u32(nla) : 0; 4301da177e4SLinus Torvalds #endif 4311da177e4SLinus Torvalds } 4324e902c57SThomas Graf 4334e902c57SThomas Graf rtnh = rtnh_next(rtnh, &remaining); 4341da177e4SLinus Torvalds } endfor_nexthops(fi); 4354e902c57SThomas Graf 4361da177e4SLinus Torvalds return 0; 4371da177e4SLinus Torvalds } 4381da177e4SLinus Torvalds 4391da177e4SLinus Torvalds #endif 4401da177e4SLinus Torvalds 4414e902c57SThomas Graf int fib_nh_match(struct fib_config *cfg, struct fib_info *fi) 4421da177e4SLinus Torvalds { 4431da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 4444e902c57SThomas Graf struct rtnexthop *rtnh; 4454e902c57SThomas Graf int remaining; 4461da177e4SLinus Torvalds #endif 4471da177e4SLinus Torvalds 4484e902c57SThomas Graf if (cfg->fc_priority && cfg->fc_priority != fi->fib_priority) 4491da177e4SLinus Torvalds return 1; 4501da177e4SLinus Torvalds 4514e902c57SThomas Graf if (cfg->fc_oif || cfg->fc_gw) { 4524e902c57SThomas Graf if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) && 4534e902c57SThomas Graf (!cfg->fc_gw || cfg->fc_gw == fi->fib_nh->nh_gw)) 4541da177e4SLinus Torvalds return 0; 4551da177e4SLinus Torvalds return 1; 4561da177e4SLinus Torvalds } 4571da177e4SLinus Torvalds 4581da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 4594e902c57SThomas Graf if (cfg->fc_mp == NULL) 4601da177e4SLinus Torvalds return 0; 4614e902c57SThomas Graf 4624e902c57SThomas Graf rtnh = cfg->fc_mp; 4634e902c57SThomas Graf remaining = cfg->fc_mp_len; 4641da177e4SLinus Torvalds 4651da177e4SLinus Torvalds for_nexthops(fi) { 4664e902c57SThomas Graf int attrlen; 4671da177e4SLinus Torvalds 4684e902c57SThomas Graf if (!rtnh_ok(rtnh, remaining)) 4691da177e4SLinus Torvalds return -EINVAL; 4704e902c57SThomas Graf 4714e902c57SThomas Graf if (rtnh->rtnh_ifindex && rtnh->rtnh_ifindex != nh->nh_oif) 4721da177e4SLinus Torvalds return 1; 4734e902c57SThomas Graf 4744e902c57SThomas Graf attrlen = rtnh_attrlen(rtnh); 4754e902c57SThomas Graf if (attrlen < 0) { 4764e902c57SThomas Graf struct nlattr *nla, *attrs = rtnh_attrs(rtnh); 4774e902c57SThomas Graf 4784e902c57SThomas Graf nla = nla_find(attrs, attrlen, RTA_GATEWAY); 47917fb2c64SAl Viro if (nla && nla_get_be32(nla) != nh->nh_gw) 4801da177e4SLinus Torvalds return 1; 481c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID 4824e902c57SThomas Graf nla = nla_find(attrs, attrlen, RTA_FLOW); 4834e902c57SThomas Graf if (nla && nla_get_u32(nla) != nh->nh_tclassid) 4841da177e4SLinus Torvalds return 1; 4851da177e4SLinus Torvalds #endif 4861da177e4SLinus Torvalds } 4874e902c57SThomas Graf 4884e902c57SThomas Graf rtnh = rtnh_next(rtnh, &remaining); 4891da177e4SLinus Torvalds } endfor_nexthops(fi); 4901da177e4SLinus Torvalds #endif 4911da177e4SLinus Torvalds return 0; 4921da177e4SLinus Torvalds } 4931da177e4SLinus Torvalds 4941da177e4SLinus Torvalds 4951da177e4SLinus Torvalds /* 4966a31d2a9SEric Dumazet * Picture 4976a31d2a9SEric Dumazet * ------- 4986a31d2a9SEric Dumazet * 4996a31d2a9SEric Dumazet * Semantics of nexthop is very messy by historical reasons. 5006a31d2a9SEric Dumazet * We have to take into account, that: 5016a31d2a9SEric Dumazet * a) gateway can be actually local interface address, 5026a31d2a9SEric Dumazet * so that gatewayed route is direct. 5036a31d2a9SEric Dumazet * b) gateway must be on-link address, possibly 5046a31d2a9SEric Dumazet * described not by an ifaddr, but also by a direct route. 5056a31d2a9SEric Dumazet * c) If both gateway and interface are specified, they should not 5066a31d2a9SEric Dumazet * contradict. 5076a31d2a9SEric Dumazet * d) If we use tunnel routes, gateway could be not on-link. 5086a31d2a9SEric Dumazet * 5096a31d2a9SEric Dumazet * Attempt to reconcile all of these (alas, self-contradictory) conditions 5106a31d2a9SEric Dumazet * results in pretty ugly and hairy code with obscure logic. 5116a31d2a9SEric Dumazet * 5126a31d2a9SEric Dumazet * I chose to generalized it instead, so that the size 5136a31d2a9SEric Dumazet * of code does not increase practically, but it becomes 5146a31d2a9SEric Dumazet * much more general. 5156a31d2a9SEric Dumazet * Every prefix is assigned a "scope" value: "host" is local address, 5166a31d2a9SEric Dumazet * "link" is direct route, 5176a31d2a9SEric Dumazet * [ ... "site" ... "interior" ... ] 5186a31d2a9SEric Dumazet * and "universe" is true gateway route with global meaning. 5196a31d2a9SEric Dumazet * 5206a31d2a9SEric Dumazet * Every prefix refers to a set of "nexthop"s (gw, oif), 5216a31d2a9SEric Dumazet * where gw must have narrower scope. This recursion stops 5226a31d2a9SEric Dumazet * when gw has LOCAL scope or if "nexthop" is declared ONLINK, 5236a31d2a9SEric Dumazet * which means that gw is forced to be on link. 5246a31d2a9SEric Dumazet * 5256a31d2a9SEric Dumazet * Code is still hairy, but now it is apparently logically 5266a31d2a9SEric Dumazet * consistent and very flexible. F.e. as by-product it allows 5276a31d2a9SEric Dumazet * to co-exists in peace independent exterior and interior 5286a31d2a9SEric Dumazet * routing processes. 5296a31d2a9SEric Dumazet * 5306a31d2a9SEric Dumazet * Normally it looks as following. 5316a31d2a9SEric Dumazet * 5326a31d2a9SEric Dumazet * {universe prefix} -> (gw, oif) [scope link] 5336a31d2a9SEric Dumazet * | 5346a31d2a9SEric Dumazet * |-> {link prefix} -> (gw, oif) [scope local] 5356a31d2a9SEric Dumazet * | 5366a31d2a9SEric Dumazet * |-> {local prefix} (terminal node) 5371da177e4SLinus Torvalds */ 5384e902c57SThomas Graf static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, 5394e902c57SThomas Graf struct fib_nh *nh) 5401da177e4SLinus Torvalds { 5411da177e4SLinus Torvalds int err; 54286167a37SDenis V. Lunev struct net *net; 5436a31d2a9SEric Dumazet struct net_device *dev; 5441da177e4SLinus Torvalds 54586167a37SDenis V. Lunev net = cfg->fc_nlinfo.nl_net; 5461da177e4SLinus Torvalds if (nh->nh_gw) { 5471da177e4SLinus Torvalds struct fib_result res; 5481da177e4SLinus Torvalds 5491da177e4SLinus Torvalds if (nh->nh_flags & RTNH_F_ONLINK) { 5501da177e4SLinus Torvalds 5514e902c57SThomas Graf if (cfg->fc_scope >= RT_SCOPE_LINK) 5521da177e4SLinus Torvalds return -EINVAL; 55386167a37SDenis V. Lunev if (inet_addr_type(net, nh->nh_gw) != RTN_UNICAST) 5541da177e4SLinus Torvalds return -EINVAL; 5556a31d2a9SEric Dumazet dev = __dev_get_by_index(net, nh->nh_oif); 5566a31d2a9SEric Dumazet if (!dev) 5571da177e4SLinus Torvalds return -ENODEV; 5581da177e4SLinus Torvalds if (!(dev->flags & IFF_UP)) 5591da177e4SLinus Torvalds return -ENETDOWN; 5601da177e4SLinus Torvalds nh->nh_dev = dev; 5611da177e4SLinus Torvalds dev_hold(dev); 5621da177e4SLinus Torvalds nh->nh_scope = RT_SCOPE_LINK; 5631da177e4SLinus Torvalds return 0; 5641da177e4SLinus Torvalds } 565ebc0ffaeSEric Dumazet rcu_read_lock(); 5661da177e4SLinus Torvalds { 5674e902c57SThomas Graf struct flowi fl = { 5685811662bSChangli Gao .fl4_dst = nh->nh_gw, 5695811662bSChangli Gao .fl4_scope = cfg->fc_scope + 1, 5704e902c57SThomas Graf .oif = nh->nh_oif, 5714e902c57SThomas Graf }; 5721da177e4SLinus Torvalds 5731da177e4SLinus Torvalds /* It is not necessary, but requires a bit of thinking */ 5741da177e4SLinus Torvalds if (fl.fl4_scope < RT_SCOPE_LINK) 5751da177e4SLinus Torvalds fl.fl4_scope = RT_SCOPE_LINK; 5766a31d2a9SEric Dumazet err = fib_lookup(net, &fl, &res); 577ebc0ffaeSEric Dumazet if (err) { 578ebc0ffaeSEric Dumazet rcu_read_unlock(); 5791da177e4SLinus Torvalds return err; 5801da177e4SLinus Torvalds } 581ebc0ffaeSEric Dumazet } 5821da177e4SLinus Torvalds err = -EINVAL; 5831da177e4SLinus Torvalds if (res.type != RTN_UNICAST && res.type != RTN_LOCAL) 5841da177e4SLinus Torvalds goto out; 5851da177e4SLinus Torvalds nh->nh_scope = res.scope; 5861da177e4SLinus Torvalds nh->nh_oif = FIB_RES_OIF(res); 5876a31d2a9SEric Dumazet nh->nh_dev = dev = FIB_RES_DEV(res); 5886a31d2a9SEric Dumazet if (!dev) 5891da177e4SLinus Torvalds goto out; 5906a31d2a9SEric Dumazet dev_hold(dev); 5918723e1b4SEric Dumazet err = (dev->flags & IFF_UP) ? 0 : -ENETDOWN; 5921da177e4SLinus Torvalds } else { 5931da177e4SLinus Torvalds struct in_device *in_dev; 5941da177e4SLinus Torvalds 5951da177e4SLinus Torvalds if (nh->nh_flags & (RTNH_F_PERVASIVE | RTNH_F_ONLINK)) 5961da177e4SLinus Torvalds return -EINVAL; 5971da177e4SLinus Torvalds 5988723e1b4SEric Dumazet rcu_read_lock(); 5998723e1b4SEric Dumazet err = -ENODEV; 60086167a37SDenis V. Lunev in_dev = inetdev_by_index(net, nh->nh_oif); 6011da177e4SLinus Torvalds if (in_dev == NULL) 6028723e1b4SEric Dumazet goto out; 6038723e1b4SEric Dumazet err = -ENETDOWN; 6048723e1b4SEric Dumazet if (!(in_dev->dev->flags & IFF_UP)) 6058723e1b4SEric Dumazet goto out; 6061da177e4SLinus Torvalds nh->nh_dev = in_dev->dev; 6071da177e4SLinus Torvalds dev_hold(nh->nh_dev); 6081da177e4SLinus Torvalds nh->nh_scope = RT_SCOPE_HOST; 6098723e1b4SEric Dumazet err = 0; 6101da177e4SLinus Torvalds } 6118723e1b4SEric Dumazet out: 6128723e1b4SEric Dumazet rcu_read_unlock(); 6138723e1b4SEric Dumazet return err; 6141da177e4SLinus Torvalds } 6151da177e4SLinus Torvalds 61681f7bf6cSAl Viro static inline unsigned int fib_laddr_hashfn(__be32 val) 6171da177e4SLinus Torvalds { 618*123b9731SDavid S. Miller unsigned int mask = (fib_info_hash_size - 1); 6191da177e4SLinus Torvalds 6206a31d2a9SEric Dumazet return ((__force u32)val ^ 6216a31d2a9SEric Dumazet ((__force u32)val >> 7) ^ 6226a31d2a9SEric Dumazet ((__force u32)val >> 14)) & mask; 6231da177e4SLinus Torvalds } 6241da177e4SLinus Torvalds 625*123b9731SDavid S. Miller static struct hlist_head *fib_info_hash_alloc(int bytes) 6261da177e4SLinus Torvalds { 6271da177e4SLinus Torvalds if (bytes <= PAGE_SIZE) 62888f83491SJoonwoo Park return kzalloc(bytes, GFP_KERNEL); 6291da177e4SLinus Torvalds else 6301da177e4SLinus Torvalds return (struct hlist_head *) 6316a31d2a9SEric Dumazet __get_free_pages(GFP_KERNEL | __GFP_ZERO, 6326a31d2a9SEric Dumazet get_order(bytes)); 6331da177e4SLinus Torvalds } 6341da177e4SLinus Torvalds 635*123b9731SDavid S. Miller static void fib_info_hash_free(struct hlist_head *hash, int bytes) 6361da177e4SLinus Torvalds { 6371da177e4SLinus Torvalds if (!hash) 6381da177e4SLinus Torvalds return; 6391da177e4SLinus Torvalds 6401da177e4SLinus Torvalds if (bytes <= PAGE_SIZE) 6411da177e4SLinus Torvalds kfree(hash); 6421da177e4SLinus Torvalds else 6431da177e4SLinus Torvalds free_pages((unsigned long) hash, get_order(bytes)); 6441da177e4SLinus Torvalds } 6451da177e4SLinus Torvalds 646*123b9731SDavid S. Miller static void fib_info_hash_move(struct hlist_head *new_info_hash, 6471da177e4SLinus Torvalds struct hlist_head *new_laddrhash, 6481da177e4SLinus Torvalds unsigned int new_size) 6491da177e4SLinus Torvalds { 650b7656e7fSDavid S. Miller struct hlist_head *old_info_hash, *old_laddrhash; 651*123b9731SDavid S. Miller unsigned int old_size = fib_info_hash_size; 652b7656e7fSDavid S. Miller unsigned int i, bytes; 6531da177e4SLinus Torvalds 654832b4c5eSStephen Hemminger spin_lock_bh(&fib_info_lock); 655b7656e7fSDavid S. Miller old_info_hash = fib_info_hash; 656b7656e7fSDavid S. Miller old_laddrhash = fib_info_laddrhash; 657*123b9731SDavid S. Miller fib_info_hash_size = new_size; 6581da177e4SLinus Torvalds 6591da177e4SLinus Torvalds for (i = 0; i < old_size; i++) { 6601da177e4SLinus Torvalds struct hlist_head *head = &fib_info_hash[i]; 6611da177e4SLinus Torvalds struct hlist_node *node, *n; 6621da177e4SLinus Torvalds struct fib_info *fi; 6631da177e4SLinus Torvalds 6641da177e4SLinus Torvalds hlist_for_each_entry_safe(fi, node, n, head, fib_hash) { 6651da177e4SLinus Torvalds struct hlist_head *dest; 6661da177e4SLinus Torvalds unsigned int new_hash; 6671da177e4SLinus Torvalds 6681da177e4SLinus Torvalds hlist_del(&fi->fib_hash); 6691da177e4SLinus Torvalds 6701da177e4SLinus Torvalds new_hash = fib_info_hashfn(fi); 6711da177e4SLinus Torvalds dest = &new_info_hash[new_hash]; 6721da177e4SLinus Torvalds hlist_add_head(&fi->fib_hash, dest); 6731da177e4SLinus Torvalds } 6741da177e4SLinus Torvalds } 6751da177e4SLinus Torvalds fib_info_hash = new_info_hash; 6761da177e4SLinus Torvalds 6771da177e4SLinus Torvalds for (i = 0; i < old_size; i++) { 6781da177e4SLinus Torvalds struct hlist_head *lhead = &fib_info_laddrhash[i]; 6791da177e4SLinus Torvalds struct hlist_node *node, *n; 6801da177e4SLinus Torvalds struct fib_info *fi; 6811da177e4SLinus Torvalds 6821da177e4SLinus Torvalds hlist_for_each_entry_safe(fi, node, n, lhead, fib_lhash) { 6831da177e4SLinus Torvalds struct hlist_head *ldest; 6841da177e4SLinus Torvalds unsigned int new_hash; 6851da177e4SLinus Torvalds 6861da177e4SLinus Torvalds hlist_del(&fi->fib_lhash); 6871da177e4SLinus Torvalds 6881da177e4SLinus Torvalds new_hash = fib_laddr_hashfn(fi->fib_prefsrc); 6891da177e4SLinus Torvalds ldest = &new_laddrhash[new_hash]; 6901da177e4SLinus Torvalds hlist_add_head(&fi->fib_lhash, ldest); 6911da177e4SLinus Torvalds } 6921da177e4SLinus Torvalds } 6931da177e4SLinus Torvalds fib_info_laddrhash = new_laddrhash; 6941da177e4SLinus Torvalds 695832b4c5eSStephen Hemminger spin_unlock_bh(&fib_info_lock); 696b7656e7fSDavid S. Miller 697b7656e7fSDavid S. Miller bytes = old_size * sizeof(struct hlist_head *); 698*123b9731SDavid S. Miller fib_info_hash_free(old_info_hash, bytes); 699*123b9731SDavid S. Miller fib_info_hash_free(old_laddrhash, bytes); 7001da177e4SLinus Torvalds } 7011da177e4SLinus Torvalds 7024e902c57SThomas Graf struct fib_info *fib_create_info(struct fib_config *cfg) 7031da177e4SLinus Torvalds { 7041da177e4SLinus Torvalds int err; 7051da177e4SLinus Torvalds struct fib_info *fi = NULL; 7061da177e4SLinus Torvalds struct fib_info *ofi; 7071da177e4SLinus Torvalds int nhs = 1; 7087462bd74SDenis V. Lunev struct net *net = cfg->fc_nlinfo.nl_net; 7091da177e4SLinus Torvalds 7101da177e4SLinus Torvalds /* Fast check to catch the most weird cases */ 7114e902c57SThomas Graf if (fib_props[cfg->fc_type].scope > cfg->fc_scope) 7121da177e4SLinus Torvalds goto err_inval; 7131da177e4SLinus Torvalds 7141da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 7154e902c57SThomas Graf if (cfg->fc_mp) { 7164e902c57SThomas Graf nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len); 7171da177e4SLinus Torvalds if (nhs == 0) 7181da177e4SLinus Torvalds goto err_inval; 7191da177e4SLinus Torvalds } 7201da177e4SLinus Torvalds #endif 7211da177e4SLinus Torvalds 7221da177e4SLinus Torvalds err = -ENOBUFS; 723*123b9731SDavid S. Miller if (fib_info_cnt >= fib_info_hash_size) { 724*123b9731SDavid S. Miller unsigned int new_size = fib_info_hash_size << 1; 7251da177e4SLinus Torvalds struct hlist_head *new_info_hash; 7261da177e4SLinus Torvalds struct hlist_head *new_laddrhash; 7271da177e4SLinus Torvalds unsigned int bytes; 7281da177e4SLinus Torvalds 7291da177e4SLinus Torvalds if (!new_size) 7301da177e4SLinus Torvalds new_size = 1; 7311da177e4SLinus Torvalds bytes = new_size * sizeof(struct hlist_head *); 732*123b9731SDavid S. Miller new_info_hash = fib_info_hash_alloc(bytes); 733*123b9731SDavid S. Miller new_laddrhash = fib_info_hash_alloc(bytes); 7341da177e4SLinus Torvalds if (!new_info_hash || !new_laddrhash) { 735*123b9731SDavid S. Miller fib_info_hash_free(new_info_hash, bytes); 736*123b9731SDavid S. Miller fib_info_hash_free(new_laddrhash, bytes); 73788f83491SJoonwoo Park } else 738*123b9731SDavid S. Miller fib_info_hash_move(new_info_hash, new_laddrhash, new_size); 7391da177e4SLinus Torvalds 740*123b9731SDavid S. Miller if (!fib_info_hash_size) 7411da177e4SLinus Torvalds goto failure; 7421da177e4SLinus Torvalds } 7431da177e4SLinus Torvalds 7440da974f4SPanagiotis Issaris fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL); 7451da177e4SLinus Torvalds if (fi == NULL) 7461da177e4SLinus Torvalds goto failure; 747725d1e1bSDavid S. Miller if (cfg->fc_mx) { 7489c150e82SDavid S. Miller fi->fib_metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL); 7499c150e82SDavid S. Miller if (!fi->fib_metrics) 7509c150e82SDavid S. Miller goto failure; 751725d1e1bSDavid S. Miller } else 752725d1e1bSDavid S. Miller fi->fib_metrics = (u32 *) dst_default_metrics; 7531da177e4SLinus Torvalds fib_info_cnt++; 7541da177e4SLinus Torvalds 75557d7a600SDenis V. Lunev fi->fib_net = hold_net(net); 7564e902c57SThomas Graf fi->fib_protocol = cfg->fc_protocol; 7574e902c57SThomas Graf fi->fib_flags = cfg->fc_flags; 7584e902c57SThomas Graf fi->fib_priority = cfg->fc_priority; 7594e902c57SThomas Graf fi->fib_prefsrc = cfg->fc_prefsrc; 7601da177e4SLinus Torvalds 7611da177e4SLinus Torvalds fi->fib_nhs = nhs; 7621da177e4SLinus Torvalds change_nexthops(fi) { 76371fceff0SDavid S. Miller nexthop_nh->nh_parent = fi; 7641da177e4SLinus Torvalds } endfor_nexthops(fi) 7651da177e4SLinus Torvalds 7664e902c57SThomas Graf if (cfg->fc_mx) { 7674e902c57SThomas Graf struct nlattr *nla; 7684e902c57SThomas Graf int remaining; 7691da177e4SLinus Torvalds 7704e902c57SThomas Graf nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { 7718f4c1f9bSThomas Graf int type = nla_type(nla); 7724e902c57SThomas Graf 7734e902c57SThomas Graf if (type) { 7744e902c57SThomas Graf if (type > RTAX_MAX) 7751da177e4SLinus Torvalds goto err_inval; 7764e902c57SThomas Graf fi->fib_metrics[type - 1] = nla_get_u32(nla); 7771da177e4SLinus Torvalds } 7781da177e4SLinus Torvalds } 7794e902c57SThomas Graf } 7801da177e4SLinus Torvalds 7814e902c57SThomas Graf if (cfg->fc_mp) { 7821da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 7834e902c57SThomas Graf err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg); 7844e902c57SThomas Graf if (err != 0) 7851da177e4SLinus Torvalds goto failure; 7864e902c57SThomas Graf if (cfg->fc_oif && fi->fib_nh->nh_oif != cfg->fc_oif) 7871da177e4SLinus Torvalds goto err_inval; 7884e902c57SThomas Graf if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw) 7891da177e4SLinus Torvalds goto err_inval; 790c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID 7914e902c57SThomas Graf if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow) 7921da177e4SLinus Torvalds goto err_inval; 7931da177e4SLinus Torvalds #endif 7941da177e4SLinus Torvalds #else 7951da177e4SLinus Torvalds goto err_inval; 7961da177e4SLinus Torvalds #endif 7971da177e4SLinus Torvalds } else { 7981da177e4SLinus Torvalds struct fib_nh *nh = fi->fib_nh; 7994e902c57SThomas Graf 8004e902c57SThomas Graf nh->nh_oif = cfg->fc_oif; 8014e902c57SThomas Graf nh->nh_gw = cfg->fc_gw; 8024e902c57SThomas Graf nh->nh_flags = cfg->fc_flags; 803c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID 8044e902c57SThomas Graf nh->nh_tclassid = cfg->fc_flow; 8051da177e4SLinus Torvalds #endif 8061da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 8071da177e4SLinus Torvalds nh->nh_weight = 1; 8081da177e4SLinus Torvalds #endif 8091da177e4SLinus Torvalds } 8101da177e4SLinus Torvalds 8114e902c57SThomas Graf if (fib_props[cfg->fc_type].error) { 8124e902c57SThomas Graf if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp) 8131da177e4SLinus Torvalds goto err_inval; 8141da177e4SLinus Torvalds goto link_it; 8151da177e4SLinus Torvalds } 8161da177e4SLinus Torvalds 8174e902c57SThomas Graf if (cfg->fc_scope > RT_SCOPE_HOST) 8181da177e4SLinus Torvalds goto err_inval; 8191da177e4SLinus Torvalds 8204e902c57SThomas Graf if (cfg->fc_scope == RT_SCOPE_HOST) { 8211da177e4SLinus Torvalds struct fib_nh *nh = fi->fib_nh; 8221da177e4SLinus Torvalds 8231da177e4SLinus Torvalds /* Local address is added. */ 8241da177e4SLinus Torvalds if (nhs != 1 || nh->nh_gw) 8251da177e4SLinus Torvalds goto err_inval; 8261da177e4SLinus Torvalds nh->nh_scope = RT_SCOPE_NOWHERE; 8277462bd74SDenis V. Lunev nh->nh_dev = dev_get_by_index(net, fi->fib_nh->nh_oif); 8281da177e4SLinus Torvalds err = -ENODEV; 8291da177e4SLinus Torvalds if (nh->nh_dev == NULL) 8301da177e4SLinus Torvalds goto failure; 8311da177e4SLinus Torvalds } else { 8321da177e4SLinus Torvalds change_nexthops(fi) { 8336a31d2a9SEric Dumazet err = fib_check_nh(cfg, fi, nexthop_nh); 8346a31d2a9SEric Dumazet if (err != 0) 8351da177e4SLinus Torvalds goto failure; 8361da177e4SLinus Torvalds } endfor_nexthops(fi) 8371da177e4SLinus Torvalds } 8381da177e4SLinus Torvalds 8391da177e4SLinus Torvalds if (fi->fib_prefsrc) { 8404e902c57SThomas Graf if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst || 8414e902c57SThomas Graf fi->fib_prefsrc != cfg->fc_dst) 8427462bd74SDenis V. Lunev if (inet_addr_type(net, fi->fib_prefsrc) != RTN_LOCAL) 8431da177e4SLinus Torvalds goto err_inval; 8441da177e4SLinus Torvalds } 8451da177e4SLinus Torvalds 8461da177e4SLinus Torvalds link_it: 8476a31d2a9SEric Dumazet ofi = fib_find_info(fi); 8486a31d2a9SEric Dumazet if (ofi) { 8491da177e4SLinus Torvalds fi->fib_dead = 1; 8501da177e4SLinus Torvalds free_fib_info(fi); 8511da177e4SLinus Torvalds ofi->fib_treeref++; 8521da177e4SLinus Torvalds return ofi; 8531da177e4SLinus Torvalds } 8541da177e4SLinus Torvalds 8551da177e4SLinus Torvalds fi->fib_treeref++; 8561da177e4SLinus Torvalds atomic_inc(&fi->fib_clntref); 857832b4c5eSStephen Hemminger spin_lock_bh(&fib_info_lock); 8581da177e4SLinus Torvalds hlist_add_head(&fi->fib_hash, 8591da177e4SLinus Torvalds &fib_info_hash[fib_info_hashfn(fi)]); 8601da177e4SLinus Torvalds if (fi->fib_prefsrc) { 8611da177e4SLinus Torvalds struct hlist_head *head; 8621da177e4SLinus Torvalds 8631da177e4SLinus Torvalds head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)]; 8641da177e4SLinus Torvalds hlist_add_head(&fi->fib_lhash, head); 8651da177e4SLinus Torvalds } 8661da177e4SLinus Torvalds change_nexthops(fi) { 8671da177e4SLinus Torvalds struct hlist_head *head; 8681da177e4SLinus Torvalds unsigned int hash; 8691da177e4SLinus Torvalds 87071fceff0SDavid S. Miller if (!nexthop_nh->nh_dev) 8711da177e4SLinus Torvalds continue; 87271fceff0SDavid S. Miller hash = fib_devindex_hashfn(nexthop_nh->nh_dev->ifindex); 8731da177e4SLinus Torvalds head = &fib_info_devhash[hash]; 87471fceff0SDavid S. Miller hlist_add_head(&nexthop_nh->nh_hash, head); 8751da177e4SLinus Torvalds } endfor_nexthops(fi) 876832b4c5eSStephen Hemminger spin_unlock_bh(&fib_info_lock); 8771da177e4SLinus Torvalds return fi; 8781da177e4SLinus Torvalds 8791da177e4SLinus Torvalds err_inval: 8801da177e4SLinus Torvalds err = -EINVAL; 8811da177e4SLinus Torvalds 8821da177e4SLinus Torvalds failure: 8831da177e4SLinus Torvalds if (fi) { 8841da177e4SLinus Torvalds fi->fib_dead = 1; 8851da177e4SLinus Torvalds free_fib_info(fi); 8861da177e4SLinus Torvalds } 8874e902c57SThomas Graf 8884e902c57SThomas Graf return ERR_PTR(err); 8891da177e4SLinus Torvalds } 8901da177e4SLinus Torvalds 891e5b43760SRobert Olsson /* Note! fib_semantic_match intentionally uses RCU list functions. */ 8925b470441SDavid S. Miller int fib_semantic_match(struct fib_table *tb, struct list_head *head, 8935b470441SDavid S. Miller const struct flowi *flp, struct fib_result *res, 8945b470441SDavid S. Miller int prefixlen, int fib_flags) 8951da177e4SLinus Torvalds { 8961da177e4SLinus Torvalds struct fib_alias *fa; 8971da177e4SLinus Torvalds int nh_sel = 0; 8981da177e4SLinus Torvalds 899e5b43760SRobert Olsson list_for_each_entry_rcu(fa, head, fa_list) { 9001da177e4SLinus Torvalds int err; 9011da177e4SLinus Torvalds 9021da177e4SLinus Torvalds if (fa->fa_tos && 9031da177e4SLinus Torvalds fa->fa_tos != flp->fl4_tos) 9041da177e4SLinus Torvalds continue; 9051da177e4SLinus Torvalds 9061da177e4SLinus Torvalds if (fa->fa_scope < flp->fl4_scope) 9071da177e4SLinus Torvalds continue; 9081da177e4SLinus Torvalds 9099b0c290eSEric Dumazet fib_alias_accessed(fa); 9101da177e4SLinus Torvalds 9111da177e4SLinus Torvalds err = fib_props[fa->fa_type].error; 9121da177e4SLinus Torvalds if (err == 0) { 9131da177e4SLinus Torvalds struct fib_info *fi = fa->fa_info; 9141da177e4SLinus Torvalds 9151da177e4SLinus Torvalds if (fi->fib_flags & RTNH_F_DEAD) 9161da177e4SLinus Torvalds continue; 9171da177e4SLinus Torvalds 9181da177e4SLinus Torvalds switch (fa->fa_type) { 9191da177e4SLinus Torvalds case RTN_UNICAST: 9201da177e4SLinus Torvalds case RTN_LOCAL: 9211da177e4SLinus Torvalds case RTN_BROADCAST: 9221da177e4SLinus Torvalds case RTN_ANYCAST: 9231da177e4SLinus Torvalds case RTN_MULTICAST: 9241da177e4SLinus Torvalds for_nexthops(fi) { 9251da177e4SLinus Torvalds if (nh->nh_flags & RTNH_F_DEAD) 9261da177e4SLinus Torvalds continue; 9271da177e4SLinus Torvalds if (!flp->oif || flp->oif == nh->nh_oif) 9281da177e4SLinus Torvalds break; 9291da177e4SLinus Torvalds } 9301da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 9311da177e4SLinus Torvalds if (nhsel < fi->fib_nhs) { 9321da177e4SLinus Torvalds nh_sel = nhsel; 9331da177e4SLinus Torvalds goto out_fill_res; 9341da177e4SLinus Torvalds } 9351da177e4SLinus Torvalds #else 9366a31d2a9SEric Dumazet if (nhsel < 1) 9371da177e4SLinus Torvalds goto out_fill_res; 9381da177e4SLinus Torvalds #endif 9391da177e4SLinus Torvalds endfor_nexthops(fi); 9401da177e4SLinus Torvalds continue; 9411da177e4SLinus Torvalds 9421da177e4SLinus Torvalds default: 9436a31d2a9SEric Dumazet pr_warning("fib_semantic_match bad type %#x\n", 944a6db9010SStephen Hemminger fa->fa_type); 9451da177e4SLinus Torvalds return -EINVAL; 9463ff50b79SStephen Hemminger } 9471da177e4SLinus Torvalds } 9481da177e4SLinus Torvalds return err; 9491da177e4SLinus Torvalds } 9501da177e4SLinus Torvalds return 1; 9511da177e4SLinus Torvalds 9521da177e4SLinus Torvalds out_fill_res: 9531da177e4SLinus Torvalds res->prefixlen = prefixlen; 9541da177e4SLinus Torvalds res->nh_sel = nh_sel; 9551da177e4SLinus Torvalds res->type = fa->fa_type; 9561da177e4SLinus Torvalds res->scope = fa->fa_scope; 9571da177e4SLinus Torvalds res->fi = fa->fa_info; 9585b470441SDavid S. Miller res->table = tb; 9595b470441SDavid S. Miller res->fa_head = head; 960ebc0ffaeSEric Dumazet if (!(fib_flags & FIB_LOOKUP_NOREF)) 9611da177e4SLinus Torvalds atomic_inc(&res->fi->fib_clntref); 9621da177e4SLinus Torvalds return 0; 9631da177e4SLinus Torvalds } 9641da177e4SLinus Torvalds 9651da177e4SLinus Torvalds /* Find appropriate source address to this destination */ 9661da177e4SLinus Torvalds 967b83738aeSAl Viro __be32 __fib_res_prefsrc(struct fib_result *res) 9681da177e4SLinus Torvalds { 9691da177e4SLinus Torvalds return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope); 9701da177e4SLinus Torvalds } 9711da177e4SLinus Torvalds 972be403ea1SThomas Graf int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, 97381f7bf6cSAl Viro u32 tb_id, u8 type, u8 scope, __be32 dst, int dst_len, u8 tos, 974b6544c0bSJamal Hadi Salim struct fib_info *fi, unsigned int flags) 9751da177e4SLinus Torvalds { 9761da177e4SLinus Torvalds struct nlmsghdr *nlh; 977be403ea1SThomas Graf struct rtmsg *rtm; 9781da177e4SLinus Torvalds 979be403ea1SThomas Graf nlh = nlmsg_put(skb, pid, seq, event, sizeof(*rtm), flags); 980be403ea1SThomas Graf if (nlh == NULL) 98126932566SPatrick McHardy return -EMSGSIZE; 982be403ea1SThomas Graf 983be403ea1SThomas Graf rtm = nlmsg_data(nlh); 9841da177e4SLinus Torvalds rtm->rtm_family = AF_INET; 9851da177e4SLinus Torvalds rtm->rtm_dst_len = dst_len; 9861da177e4SLinus Torvalds rtm->rtm_src_len = 0; 9871da177e4SLinus Torvalds rtm->rtm_tos = tos; 988709772e6SKrzysztof Piotr Oledzki if (tb_id < 256) 9891da177e4SLinus Torvalds rtm->rtm_table = tb_id; 990709772e6SKrzysztof Piotr Oledzki else 991709772e6SKrzysztof Piotr Oledzki rtm->rtm_table = RT_TABLE_COMPAT; 992be403ea1SThomas Graf NLA_PUT_U32(skb, RTA_TABLE, tb_id); 9931da177e4SLinus Torvalds rtm->rtm_type = type; 9941da177e4SLinus Torvalds rtm->rtm_flags = fi->fib_flags; 9951da177e4SLinus Torvalds rtm->rtm_scope = scope; 9961da177e4SLinus Torvalds rtm->rtm_protocol = fi->fib_protocol; 997be403ea1SThomas Graf 998be403ea1SThomas Graf if (rtm->rtm_dst_len) 99917fb2c64SAl Viro NLA_PUT_BE32(skb, RTA_DST, dst); 1000be403ea1SThomas Graf 10011da177e4SLinus Torvalds if (fi->fib_priority) 1002be403ea1SThomas Graf NLA_PUT_U32(skb, RTA_PRIORITY, fi->fib_priority); 1003be403ea1SThomas Graf 10041da177e4SLinus Torvalds if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0) 1005be403ea1SThomas Graf goto nla_put_failure; 1006be403ea1SThomas Graf 10071da177e4SLinus Torvalds if (fi->fib_prefsrc) 100817fb2c64SAl Viro NLA_PUT_BE32(skb, RTA_PREFSRC, fi->fib_prefsrc); 1009be403ea1SThomas Graf 10101da177e4SLinus Torvalds if (fi->fib_nhs == 1) { 10111da177e4SLinus Torvalds if (fi->fib_nh->nh_gw) 101217fb2c64SAl Viro NLA_PUT_BE32(skb, RTA_GATEWAY, fi->fib_nh->nh_gw); 1013be403ea1SThomas Graf 10141da177e4SLinus Torvalds if (fi->fib_nh->nh_oif) 1015be403ea1SThomas Graf NLA_PUT_U32(skb, RTA_OIF, fi->fib_nh->nh_oif); 1016c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID 10178265abc0SPatrick McHardy if (fi->fib_nh[0].nh_tclassid) 1018be403ea1SThomas Graf NLA_PUT_U32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid); 10198265abc0SPatrick McHardy #endif 10201da177e4SLinus Torvalds } 10211da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 10221da177e4SLinus Torvalds if (fi->fib_nhs > 1) { 1023be403ea1SThomas Graf struct rtnexthop *rtnh; 1024be403ea1SThomas Graf struct nlattr *mp; 1025be403ea1SThomas Graf 1026be403ea1SThomas Graf mp = nla_nest_start(skb, RTA_MULTIPATH); 1027be403ea1SThomas Graf if (mp == NULL) 1028be403ea1SThomas Graf goto nla_put_failure; 10291da177e4SLinus Torvalds 10301da177e4SLinus Torvalds for_nexthops(fi) { 1031be403ea1SThomas Graf rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh)); 1032be403ea1SThomas Graf if (rtnh == NULL) 1033be403ea1SThomas Graf goto nla_put_failure; 1034be403ea1SThomas Graf 1035be403ea1SThomas Graf rtnh->rtnh_flags = nh->nh_flags & 0xFF; 1036be403ea1SThomas Graf rtnh->rtnh_hops = nh->nh_weight - 1; 1037be403ea1SThomas Graf rtnh->rtnh_ifindex = nh->nh_oif; 1038be403ea1SThomas Graf 10391da177e4SLinus Torvalds if (nh->nh_gw) 104017fb2c64SAl Viro NLA_PUT_BE32(skb, RTA_GATEWAY, nh->nh_gw); 1041c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID 10428265abc0SPatrick McHardy if (nh->nh_tclassid) 1043be403ea1SThomas Graf NLA_PUT_U32(skb, RTA_FLOW, nh->nh_tclassid); 10448265abc0SPatrick McHardy #endif 1045be403ea1SThomas Graf /* length of rtnetlink header + attributes */ 1046be403ea1SThomas Graf rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh; 10471da177e4SLinus Torvalds } endfor_nexthops(fi); 1048be403ea1SThomas Graf 1049be403ea1SThomas Graf nla_nest_end(skb, mp); 10501da177e4SLinus Torvalds } 10511da177e4SLinus Torvalds #endif 1052be403ea1SThomas Graf return nlmsg_end(skb, nlh); 10531da177e4SLinus Torvalds 1054be403ea1SThomas Graf nla_put_failure: 105526932566SPatrick McHardy nlmsg_cancel(skb, nlh); 105626932566SPatrick McHardy return -EMSGSIZE; 10571da177e4SLinus Torvalds } 10581da177e4SLinus Torvalds 10591da177e4SLinus Torvalds /* 10606a31d2a9SEric Dumazet * Update FIB if: 10616a31d2a9SEric Dumazet * - local address disappeared -> we must delete all the entries 10626a31d2a9SEric Dumazet * referring to it. 10636a31d2a9SEric Dumazet * - device went down -> we must shutdown all nexthops going via it. 10641da177e4SLinus Torvalds */ 10654814bdbdSDenis V. Lunev int fib_sync_down_addr(struct net *net, __be32 local) 10661da177e4SLinus Torvalds { 10671da177e4SLinus Torvalds int ret = 0; 10681da177e4SLinus Torvalds unsigned int hash = fib_laddr_hashfn(local); 10691da177e4SLinus Torvalds struct hlist_head *head = &fib_info_laddrhash[hash]; 10701da177e4SLinus Torvalds struct hlist_node *node; 10711da177e4SLinus Torvalds struct fib_info *fi; 10721da177e4SLinus Torvalds 107385326fa5SDenis V. Lunev if (fib_info_laddrhash == NULL || local == 0) 107485326fa5SDenis V. Lunev return 0; 107585326fa5SDenis V. Lunev 10761da177e4SLinus Torvalds hlist_for_each_entry(fi, node, head, fib_lhash) { 107709ad9bc7SOctavian Purdila if (!net_eq(fi->fib_net, net)) 10784814bdbdSDenis V. Lunev continue; 10791da177e4SLinus Torvalds if (fi->fib_prefsrc == local) { 10801da177e4SLinus Torvalds fi->fib_flags |= RTNH_F_DEAD; 10811da177e4SLinus Torvalds ret++; 10821da177e4SLinus Torvalds } 10831da177e4SLinus Torvalds } 108485326fa5SDenis V. Lunev return ret; 10851da177e4SLinus Torvalds } 10861da177e4SLinus Torvalds 108785326fa5SDenis V. Lunev int fib_sync_down_dev(struct net_device *dev, int force) 108885326fa5SDenis V. Lunev { 108985326fa5SDenis V. Lunev int ret = 0; 109085326fa5SDenis V. Lunev int scope = RT_SCOPE_NOWHERE; 10911da177e4SLinus Torvalds struct fib_info *prev_fi = NULL; 10921da177e4SLinus Torvalds unsigned int hash = fib_devindex_hashfn(dev->ifindex); 10931da177e4SLinus Torvalds struct hlist_head *head = &fib_info_devhash[hash]; 10941da177e4SLinus Torvalds struct hlist_node *node; 10951da177e4SLinus Torvalds struct fib_nh *nh; 10961da177e4SLinus Torvalds 109785326fa5SDenis V. Lunev if (force) 109885326fa5SDenis V. Lunev scope = -1; 109985326fa5SDenis V. Lunev 11001da177e4SLinus Torvalds hlist_for_each_entry(nh, node, head, nh_hash) { 11011da177e4SLinus Torvalds struct fib_info *fi = nh->nh_parent; 11021da177e4SLinus Torvalds int dead; 11031da177e4SLinus Torvalds 11041da177e4SLinus Torvalds BUG_ON(!fi->fib_nhs); 11051da177e4SLinus Torvalds if (nh->nh_dev != dev || fi == prev_fi) 11061da177e4SLinus Torvalds continue; 11071da177e4SLinus Torvalds prev_fi = fi; 11081da177e4SLinus Torvalds dead = 0; 11091da177e4SLinus Torvalds change_nexthops(fi) { 111071fceff0SDavid S. Miller if (nexthop_nh->nh_flags & RTNH_F_DEAD) 11111da177e4SLinus Torvalds dead++; 111271fceff0SDavid S. Miller else if (nexthop_nh->nh_dev == dev && 111371fceff0SDavid S. Miller nexthop_nh->nh_scope != scope) { 111471fceff0SDavid S. Miller nexthop_nh->nh_flags |= RTNH_F_DEAD; 11151da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 11161da177e4SLinus Torvalds spin_lock_bh(&fib_multipath_lock); 111771fceff0SDavid S. Miller fi->fib_power -= nexthop_nh->nh_power; 111871fceff0SDavid S. Miller nexthop_nh->nh_power = 0; 11191da177e4SLinus Torvalds spin_unlock_bh(&fib_multipath_lock); 11201da177e4SLinus Torvalds #endif 11211da177e4SLinus Torvalds dead++; 11221da177e4SLinus Torvalds } 11231da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 112471fceff0SDavid S. Miller if (force > 1 && nexthop_nh->nh_dev == dev) { 11251da177e4SLinus Torvalds dead = fi->fib_nhs; 11261da177e4SLinus Torvalds break; 11271da177e4SLinus Torvalds } 11281da177e4SLinus Torvalds #endif 11291da177e4SLinus Torvalds } endfor_nexthops(fi) 11301da177e4SLinus Torvalds if (dead == fi->fib_nhs) { 11311da177e4SLinus Torvalds fi->fib_flags |= RTNH_F_DEAD; 11321da177e4SLinus Torvalds ret++; 11331da177e4SLinus Torvalds } 11341da177e4SLinus Torvalds } 11351da177e4SLinus Torvalds 11361da177e4SLinus Torvalds return ret; 11371da177e4SLinus Torvalds } 11381da177e4SLinus Torvalds 11390c838ff1SDavid S. Miller /* Must be invoked inside of an RCU protected region. */ 11400c838ff1SDavid S. Miller void fib_select_default(struct fib_result *res) 11410c838ff1SDavid S. Miller { 11420c838ff1SDavid S. Miller struct fib_info *fi = NULL, *last_resort = NULL; 11430c838ff1SDavid S. Miller struct list_head *fa_head = res->fa_head; 11440c838ff1SDavid S. Miller struct fib_table *tb = res->table; 11450c838ff1SDavid S. Miller int order = -1, last_idx = -1; 11460c838ff1SDavid S. Miller struct fib_alias *fa; 11470c838ff1SDavid S. Miller 11480c838ff1SDavid S. Miller list_for_each_entry_rcu(fa, fa_head, fa_list) { 11490c838ff1SDavid S. Miller struct fib_info *next_fi = fa->fa_info; 11500c838ff1SDavid S. Miller 11510c838ff1SDavid S. Miller if (fa->fa_scope != res->scope || 11520c838ff1SDavid S. Miller fa->fa_type != RTN_UNICAST) 11530c838ff1SDavid S. Miller continue; 11540c838ff1SDavid S. Miller 11550c838ff1SDavid S. Miller if (next_fi->fib_priority > res->fi->fib_priority) 11560c838ff1SDavid S. Miller break; 11570c838ff1SDavid S. Miller if (!next_fi->fib_nh[0].nh_gw || 11580c838ff1SDavid S. Miller next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK) 11590c838ff1SDavid S. Miller continue; 11600c838ff1SDavid S. Miller 11610c838ff1SDavid S. Miller fib_alias_accessed(fa); 11620c838ff1SDavid S. Miller 11630c838ff1SDavid S. Miller if (fi == NULL) { 11640c838ff1SDavid S. Miller if (next_fi != res->fi) 11650c838ff1SDavid S. Miller break; 11660c838ff1SDavid S. Miller } else if (!fib_detect_death(fi, order, &last_resort, 11670c838ff1SDavid S. Miller &last_idx, tb->tb_default)) { 11680c838ff1SDavid S. Miller fib_result_assign(res, fi); 11690c838ff1SDavid S. Miller tb->tb_default = order; 11700c838ff1SDavid S. Miller goto out; 11710c838ff1SDavid S. Miller } 11720c838ff1SDavid S. Miller fi = next_fi; 11730c838ff1SDavid S. Miller order++; 11740c838ff1SDavid S. Miller } 11750c838ff1SDavid S. Miller 11760c838ff1SDavid S. Miller if (order <= 0 || fi == NULL) { 11770c838ff1SDavid S. Miller tb->tb_default = -1; 11780c838ff1SDavid S. Miller goto out; 11790c838ff1SDavid S. Miller } 11800c838ff1SDavid S. Miller 11810c838ff1SDavid S. Miller if (!fib_detect_death(fi, order, &last_resort, &last_idx, 11820c838ff1SDavid S. Miller tb->tb_default)) { 11830c838ff1SDavid S. Miller fib_result_assign(res, fi); 11840c838ff1SDavid S. Miller tb->tb_default = order; 11850c838ff1SDavid S. Miller goto out; 11860c838ff1SDavid S. Miller } 11870c838ff1SDavid S. Miller 11880c838ff1SDavid S. Miller if (last_idx >= 0) 11890c838ff1SDavid S. Miller fib_result_assign(res, last_resort); 11900c838ff1SDavid S. Miller tb->tb_default = last_idx; 11910c838ff1SDavid S. Miller out: 11920c838ff1SDavid S. Miller rcu_read_unlock(); 11930c838ff1SDavid S. Miller } 11940c838ff1SDavid S. Miller 11951da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 11961da177e4SLinus Torvalds 11971da177e4SLinus Torvalds /* 11986a31d2a9SEric Dumazet * Dead device goes up. We wake up dead nexthops. 11996a31d2a9SEric Dumazet * It takes sense only on multipath routes. 12001da177e4SLinus Torvalds */ 12011da177e4SLinus Torvalds int fib_sync_up(struct net_device *dev) 12021da177e4SLinus Torvalds { 12031da177e4SLinus Torvalds struct fib_info *prev_fi; 12041da177e4SLinus Torvalds unsigned int hash; 12051da177e4SLinus Torvalds struct hlist_head *head; 12061da177e4SLinus Torvalds struct hlist_node *node; 12071da177e4SLinus Torvalds struct fib_nh *nh; 12081da177e4SLinus Torvalds int ret; 12091da177e4SLinus Torvalds 12101da177e4SLinus Torvalds if (!(dev->flags & IFF_UP)) 12111da177e4SLinus Torvalds return 0; 12121da177e4SLinus Torvalds 12131da177e4SLinus Torvalds prev_fi = NULL; 12141da177e4SLinus Torvalds hash = fib_devindex_hashfn(dev->ifindex); 12151da177e4SLinus Torvalds head = &fib_info_devhash[hash]; 12161da177e4SLinus Torvalds ret = 0; 12171da177e4SLinus Torvalds 12181da177e4SLinus Torvalds hlist_for_each_entry(nh, node, head, nh_hash) { 12191da177e4SLinus Torvalds struct fib_info *fi = nh->nh_parent; 12201da177e4SLinus Torvalds int alive; 12211da177e4SLinus Torvalds 12221da177e4SLinus Torvalds BUG_ON(!fi->fib_nhs); 12231da177e4SLinus Torvalds if (nh->nh_dev != dev || fi == prev_fi) 12241da177e4SLinus Torvalds continue; 12251da177e4SLinus Torvalds 12261da177e4SLinus Torvalds prev_fi = fi; 12271da177e4SLinus Torvalds alive = 0; 12281da177e4SLinus Torvalds change_nexthops(fi) { 122971fceff0SDavid S. Miller if (!(nexthop_nh->nh_flags & RTNH_F_DEAD)) { 12301da177e4SLinus Torvalds alive++; 12311da177e4SLinus Torvalds continue; 12321da177e4SLinus Torvalds } 123371fceff0SDavid S. Miller if (nexthop_nh->nh_dev == NULL || 123471fceff0SDavid S. Miller !(nexthop_nh->nh_dev->flags & IFF_UP)) 12351da177e4SLinus Torvalds continue; 123671fceff0SDavid S. Miller if (nexthop_nh->nh_dev != dev || 123771fceff0SDavid S. Miller !__in_dev_get_rtnl(dev)) 12381da177e4SLinus Torvalds continue; 12391da177e4SLinus Torvalds alive++; 12401da177e4SLinus Torvalds spin_lock_bh(&fib_multipath_lock); 124171fceff0SDavid S. Miller nexthop_nh->nh_power = 0; 124271fceff0SDavid S. Miller nexthop_nh->nh_flags &= ~RTNH_F_DEAD; 12431da177e4SLinus Torvalds spin_unlock_bh(&fib_multipath_lock); 12441da177e4SLinus Torvalds } endfor_nexthops(fi) 12451da177e4SLinus Torvalds 12461da177e4SLinus Torvalds if (alive > 0) { 12471da177e4SLinus Torvalds fi->fib_flags &= ~RTNH_F_DEAD; 12481da177e4SLinus Torvalds ret++; 12491da177e4SLinus Torvalds } 12501da177e4SLinus Torvalds } 12511da177e4SLinus Torvalds 12521da177e4SLinus Torvalds return ret; 12531da177e4SLinus Torvalds } 12541da177e4SLinus Torvalds 12551da177e4SLinus Torvalds /* 12566a31d2a9SEric Dumazet * The algorithm is suboptimal, but it provides really 12576a31d2a9SEric Dumazet * fair weighted route distribution. 12581da177e4SLinus Torvalds */ 12591da177e4SLinus Torvalds void fib_select_multipath(const struct flowi *flp, struct fib_result *res) 12601da177e4SLinus Torvalds { 12611da177e4SLinus Torvalds struct fib_info *fi = res->fi; 12621da177e4SLinus Torvalds int w; 12631da177e4SLinus Torvalds 12641da177e4SLinus Torvalds spin_lock_bh(&fib_multipath_lock); 12651da177e4SLinus Torvalds if (fi->fib_power <= 0) { 12661da177e4SLinus Torvalds int power = 0; 12671da177e4SLinus Torvalds change_nexthops(fi) { 126871fceff0SDavid S. Miller if (!(nexthop_nh->nh_flags & RTNH_F_DEAD)) { 126971fceff0SDavid S. Miller power += nexthop_nh->nh_weight; 127071fceff0SDavid S. Miller nexthop_nh->nh_power = nexthop_nh->nh_weight; 12711da177e4SLinus Torvalds } 12721da177e4SLinus Torvalds } endfor_nexthops(fi); 12731da177e4SLinus Torvalds fi->fib_power = power; 12741da177e4SLinus Torvalds if (power <= 0) { 12751da177e4SLinus Torvalds spin_unlock_bh(&fib_multipath_lock); 12761da177e4SLinus Torvalds /* Race condition: route has just become dead. */ 12771da177e4SLinus Torvalds res->nh_sel = 0; 12781da177e4SLinus Torvalds return; 12791da177e4SLinus Torvalds } 12801da177e4SLinus Torvalds } 12811da177e4SLinus Torvalds 12821da177e4SLinus Torvalds 12831da177e4SLinus Torvalds /* w should be random number [0..fi->fib_power-1], 12846a31d2a9SEric Dumazet * it is pretty bad approximation. 12851da177e4SLinus Torvalds */ 12861da177e4SLinus Torvalds 12871da177e4SLinus Torvalds w = jiffies % fi->fib_power; 12881da177e4SLinus Torvalds 12891da177e4SLinus Torvalds change_nexthops(fi) { 129071fceff0SDavid S. Miller if (!(nexthop_nh->nh_flags & RTNH_F_DEAD) && 129171fceff0SDavid S. Miller nexthop_nh->nh_power) { 12926a31d2a9SEric Dumazet w -= nexthop_nh->nh_power; 12936a31d2a9SEric Dumazet if (w <= 0) { 129471fceff0SDavid S. Miller nexthop_nh->nh_power--; 12951da177e4SLinus Torvalds fi->fib_power--; 12961da177e4SLinus Torvalds res->nh_sel = nhsel; 12971da177e4SLinus Torvalds spin_unlock_bh(&fib_multipath_lock); 12981da177e4SLinus Torvalds return; 12991da177e4SLinus Torvalds } 13001da177e4SLinus Torvalds } 13011da177e4SLinus Torvalds } endfor_nexthops(fi); 13021da177e4SLinus Torvalds 13031da177e4SLinus Torvalds /* Race condition: route has just become dead. */ 13041da177e4SLinus Torvalds res->nh_sel = 0; 13051da177e4SLinus Torvalds spin_unlock_bh(&fib_multipath_lock); 13061da177e4SLinus Torvalds } 13071da177e4SLinus Torvalds #endif 1308