11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * INET An implementation of the TCP/IP protocol suite for the LINUX 31da177e4SLinus Torvalds * operating system. INET is implemented using the BSD Socket 41da177e4SLinus Torvalds * interface as the means of communication with the user level. 51da177e4SLinus Torvalds * 61da177e4SLinus Torvalds * IPv4 Forwarding Information Base: semantics. 71da177e4SLinus Torvalds * 81da177e4SLinus Torvalds * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 91da177e4SLinus Torvalds * 101da177e4SLinus Torvalds * This program is free software; you can redistribute it and/or 111da177e4SLinus Torvalds * modify it under the terms of the GNU General Public License 121da177e4SLinus Torvalds * as published by the Free Software Foundation; either version 131da177e4SLinus Torvalds * 2 of the License, or (at your option) any later version. 141da177e4SLinus Torvalds */ 151da177e4SLinus Torvalds 161da177e4SLinus Torvalds #include <asm/uaccess.h> 171da177e4SLinus Torvalds #include <linux/bitops.h> 181da177e4SLinus Torvalds #include <linux/types.h> 191da177e4SLinus Torvalds #include <linux/kernel.h> 201da177e4SLinus Torvalds #include <linux/jiffies.h> 211da177e4SLinus Torvalds #include <linux/mm.h> 221da177e4SLinus Torvalds #include <linux/string.h> 231da177e4SLinus Torvalds #include <linux/socket.h> 241da177e4SLinus Torvalds #include <linux/sockios.h> 251da177e4SLinus Torvalds #include <linux/errno.h> 261da177e4SLinus Torvalds #include <linux/in.h> 271da177e4SLinus Torvalds #include <linux/inet.h> 2814c85021SArnaldo Carvalho de Melo #include <linux/inetdevice.h> 291da177e4SLinus Torvalds #include <linux/netdevice.h> 301da177e4SLinus Torvalds #include <linux/if_arp.h> 311da177e4SLinus Torvalds #include <linux/proc_fs.h> 321da177e4SLinus Torvalds #include <linux/skbuff.h> 331da177e4SLinus Torvalds #include <linux/init.h> 345a0e3ad6STejun Heo #include <linux/slab.h> 351da177e4SLinus Torvalds 3614c85021SArnaldo Carvalho de Melo #include <net/arp.h> 371da177e4SLinus Torvalds #include <net/ip.h> 381da177e4SLinus Torvalds #include <net/protocol.h> 391da177e4SLinus Torvalds #include <net/route.h> 401da177e4SLinus Torvalds #include <net/tcp.h> 411da177e4SLinus Torvalds #include <net/sock.h> 421da177e4SLinus Torvalds #include <net/ip_fib.h> 43f21c7bc5SThomas Graf #include <net/netlink.h> 444e902c57SThomas Graf #include <net/nexthop.h> 451da177e4SLinus Torvalds 461da177e4SLinus Torvalds #include "fib_lookup.h" 471da177e4SLinus Torvalds 48832b4c5eSStephen Hemminger static DEFINE_SPINLOCK(fib_info_lock); 491da177e4SLinus Torvalds static struct hlist_head *fib_info_hash; 501da177e4SLinus Torvalds static struct hlist_head *fib_info_laddrhash; 51123b9731SDavid S. Miller static unsigned int fib_info_hash_size; 521da177e4SLinus Torvalds static unsigned int fib_info_cnt; 531da177e4SLinus Torvalds 541da177e4SLinus Torvalds #define DEVINDEX_HASHBITS 8 551da177e4SLinus Torvalds #define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS) 561da177e4SLinus Torvalds static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE]; 571da177e4SLinus Torvalds 581da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 591da177e4SLinus Torvalds 601da177e4SLinus Torvalds static DEFINE_SPINLOCK(fib_multipath_lock); 611da177e4SLinus Torvalds 626a31d2a9SEric Dumazet #define for_nexthops(fi) { \ 636a31d2a9SEric Dumazet int nhsel; const struct fib_nh *nh; \ 646a31d2a9SEric Dumazet for (nhsel = 0, nh = (fi)->fib_nh; \ 656a31d2a9SEric Dumazet nhsel < (fi)->fib_nhs; \ 666a31d2a9SEric Dumazet nh++, nhsel++) 671da177e4SLinus Torvalds 686a31d2a9SEric Dumazet #define change_nexthops(fi) { \ 696a31d2a9SEric Dumazet int nhsel; struct fib_nh *nexthop_nh; \ 706a31d2a9SEric Dumazet for (nhsel = 0, nexthop_nh = (struct fib_nh *)((fi)->fib_nh); \ 716a31d2a9SEric Dumazet nhsel < (fi)->fib_nhs; \ 726a31d2a9SEric Dumazet nexthop_nh++, nhsel++) 731da177e4SLinus Torvalds 741da177e4SLinus Torvalds #else /* CONFIG_IP_ROUTE_MULTIPATH */ 751da177e4SLinus Torvalds 761da177e4SLinus Torvalds /* Hope, that gcc will optimize it to get rid of dummy loop */ 771da177e4SLinus Torvalds 786a31d2a9SEric Dumazet #define for_nexthops(fi) { \ 796a31d2a9SEric Dumazet int nhsel; const struct fib_nh *nh = (fi)->fib_nh; \ 801da177e4SLinus Torvalds for (nhsel = 0; nhsel < 1; nhsel++) 811da177e4SLinus Torvalds 826a31d2a9SEric Dumazet #define change_nexthops(fi) { \ 836a31d2a9SEric Dumazet int nhsel; \ 846a31d2a9SEric Dumazet struct fib_nh *nexthop_nh = (struct fib_nh *)((fi)->fib_nh); \ 851da177e4SLinus Torvalds for (nhsel = 0; nhsel < 1; nhsel++) 861da177e4SLinus Torvalds 871da177e4SLinus Torvalds #endif /* CONFIG_IP_ROUTE_MULTIPATH */ 881da177e4SLinus Torvalds 891da177e4SLinus Torvalds #define endfor_nexthops(fi) } 901da177e4SLinus Torvalds 911da177e4SLinus Torvalds 923be0686bSDavid S. Miller const struct fib_prop fib_props[RTN_MAX + 1] = { 936a31d2a9SEric Dumazet [RTN_UNSPEC] = { 941da177e4SLinus Torvalds .error = 0, 951da177e4SLinus Torvalds .scope = RT_SCOPE_NOWHERE, 966a31d2a9SEric Dumazet }, 976a31d2a9SEric Dumazet [RTN_UNICAST] = { 981da177e4SLinus Torvalds .error = 0, 991da177e4SLinus Torvalds .scope = RT_SCOPE_UNIVERSE, 1006a31d2a9SEric Dumazet }, 1016a31d2a9SEric Dumazet [RTN_LOCAL] = { 1021da177e4SLinus Torvalds .error = 0, 1031da177e4SLinus Torvalds .scope = RT_SCOPE_HOST, 1046a31d2a9SEric Dumazet }, 1056a31d2a9SEric Dumazet [RTN_BROADCAST] = { 1061da177e4SLinus Torvalds .error = 0, 1071da177e4SLinus Torvalds .scope = RT_SCOPE_LINK, 1086a31d2a9SEric Dumazet }, 1096a31d2a9SEric Dumazet [RTN_ANYCAST] = { 1101da177e4SLinus Torvalds .error = 0, 1111da177e4SLinus Torvalds .scope = RT_SCOPE_LINK, 1126a31d2a9SEric Dumazet }, 1136a31d2a9SEric Dumazet [RTN_MULTICAST] = { 1141da177e4SLinus Torvalds .error = 0, 1151da177e4SLinus Torvalds .scope = RT_SCOPE_UNIVERSE, 1166a31d2a9SEric Dumazet }, 1176a31d2a9SEric Dumazet [RTN_BLACKHOLE] = { 1181da177e4SLinus Torvalds .error = -EINVAL, 1191da177e4SLinus Torvalds .scope = RT_SCOPE_UNIVERSE, 1206a31d2a9SEric Dumazet }, 1216a31d2a9SEric Dumazet [RTN_UNREACHABLE] = { 1221da177e4SLinus Torvalds .error = -EHOSTUNREACH, 1231da177e4SLinus Torvalds .scope = RT_SCOPE_UNIVERSE, 1246a31d2a9SEric Dumazet }, 1256a31d2a9SEric Dumazet [RTN_PROHIBIT] = { 1261da177e4SLinus Torvalds .error = -EACCES, 1271da177e4SLinus Torvalds .scope = RT_SCOPE_UNIVERSE, 1286a31d2a9SEric Dumazet }, 1296a31d2a9SEric Dumazet [RTN_THROW] = { 1301da177e4SLinus Torvalds .error = -EAGAIN, 1311da177e4SLinus Torvalds .scope = RT_SCOPE_UNIVERSE, 1326a31d2a9SEric Dumazet }, 1336a31d2a9SEric Dumazet [RTN_NAT] = { 1341da177e4SLinus Torvalds .error = -EINVAL, 1351da177e4SLinus Torvalds .scope = RT_SCOPE_NOWHERE, 1366a31d2a9SEric Dumazet }, 1376a31d2a9SEric Dumazet [RTN_XRESOLVE] = { 1381da177e4SLinus Torvalds .error = -EINVAL, 1391da177e4SLinus Torvalds .scope = RT_SCOPE_NOWHERE, 1406a31d2a9SEric Dumazet }, 1411da177e4SLinus Torvalds }; 1421da177e4SLinus Torvalds 143c5038a83SDavid S. Miller static void rt_fibinfo_free(struct rtable __rcu **rtp) 14454764bb6SEric Dumazet { 14554764bb6SEric Dumazet struct rtable *rt = rcu_dereference_protected(*rtp, 1); 14654764bb6SEric Dumazet 14754764bb6SEric Dumazet if (!rt) 14854764bb6SEric Dumazet return; 14954764bb6SEric Dumazet 15054764bb6SEric Dumazet /* Not even needed : RCU_INIT_POINTER(*rtp, NULL); 15154764bb6SEric Dumazet * because we waited an RCU grace period before calling 15254764bb6SEric Dumazet * free_fib_info_rcu() 15354764bb6SEric Dumazet */ 15454764bb6SEric Dumazet 15554764bb6SEric Dumazet dst_free(&rt->dst); 15654764bb6SEric Dumazet } 15754764bb6SEric Dumazet 158c5038a83SDavid S. Miller static void free_nh_exceptions(struct fib_nh *nh) 159c5038a83SDavid S. Miller { 160caa41527SEric Dumazet struct fnhe_hash_bucket *hash; 161c5038a83SDavid S. Miller int i; 162c5038a83SDavid S. Miller 163caa41527SEric Dumazet hash = rcu_dereference_protected(nh->nh_exceptions, 1); 164caa41527SEric Dumazet if (!hash) 165caa41527SEric Dumazet return; 166c5038a83SDavid S. Miller for (i = 0; i < FNHE_HASH_SIZE; i++) { 167c5038a83SDavid S. Miller struct fib_nh_exception *fnhe; 168c5038a83SDavid S. Miller 169c5038a83SDavid S. Miller fnhe = rcu_dereference_protected(hash[i].chain, 1); 170c5038a83SDavid S. Miller while (fnhe) { 171c5038a83SDavid S. Miller struct fib_nh_exception *next; 172c5038a83SDavid S. Miller 173c5038a83SDavid S. Miller next = rcu_dereference_protected(fnhe->fnhe_next, 1); 174c5038a83SDavid S. Miller 1752ffae99dSTimo Teräs rt_fibinfo_free(&fnhe->fnhe_rth_input); 1762ffae99dSTimo Teräs rt_fibinfo_free(&fnhe->fnhe_rth_output); 177c5038a83SDavid S. Miller 178c5038a83SDavid S. Miller kfree(fnhe); 179c5038a83SDavid S. Miller 180c5038a83SDavid S. Miller fnhe = next; 181c5038a83SDavid S. Miller } 182c5038a83SDavid S. Miller } 183c5038a83SDavid S. Miller kfree(hash); 184c5038a83SDavid S. Miller } 185c5038a83SDavid S. Miller 186c5038a83SDavid S. Miller static void rt_fibinfo_free_cpus(struct rtable __rcu * __percpu *rtp) 187d26b3a7cSEric Dumazet { 188d26b3a7cSEric Dumazet int cpu; 189d26b3a7cSEric Dumazet 190d26b3a7cSEric Dumazet if (!rtp) 191d26b3a7cSEric Dumazet return; 192d26b3a7cSEric Dumazet 193d26b3a7cSEric Dumazet for_each_possible_cpu(cpu) { 194d26b3a7cSEric Dumazet struct rtable *rt; 195d26b3a7cSEric Dumazet 196d26b3a7cSEric Dumazet rt = rcu_dereference_protected(*per_cpu_ptr(rtp, cpu), 1); 197d26b3a7cSEric Dumazet if (rt) 198d26b3a7cSEric Dumazet dst_free(&rt->dst); 199d26b3a7cSEric Dumazet } 200d26b3a7cSEric Dumazet free_percpu(rtp); 201d26b3a7cSEric Dumazet } 202d26b3a7cSEric Dumazet 2031da177e4SLinus Torvalds /* Release a nexthop info record */ 20419c1ea14SYan, Zheng static void free_fib_info_rcu(struct rcu_head *head) 20519c1ea14SYan, Zheng { 20619c1ea14SYan, Zheng struct fib_info *fi = container_of(head, struct fib_info, rcu); 20719c1ea14SYan, Zheng 208e49cc0daSYanmin Zhang change_nexthops(fi) { 209e49cc0daSYanmin Zhang if (nexthop_nh->nh_dev) 210e49cc0daSYanmin Zhang dev_put(nexthop_nh->nh_dev); 2114895c771SDavid S. Miller free_nh_exceptions(nexthop_nh); 212c5038a83SDavid S. Miller rt_fibinfo_free_cpus(nexthop_nh->nh_pcpu_rth_output); 213c5038a83SDavid S. Miller rt_fibinfo_free(&nexthop_nh->nh_rth_input); 214e49cc0daSYanmin Zhang } endfor_nexthops(fi); 215e49cc0daSYanmin Zhang 21619c1ea14SYan, Zheng if (fi->fib_metrics != (u32 *) dst_default_metrics) 21719c1ea14SYan, Zheng kfree(fi->fib_metrics); 21819c1ea14SYan, Zheng kfree(fi); 21919c1ea14SYan, Zheng } 2201da177e4SLinus Torvalds 2211da177e4SLinus Torvalds void free_fib_info(struct fib_info *fi) 2221da177e4SLinus Torvalds { 2231da177e4SLinus Torvalds if (fi->fib_dead == 0) { 224058bd4d2SJoe Perches pr_warn("Freeing alive fib_info %p\n", fi); 2251da177e4SLinus Torvalds return; 2261da177e4SLinus Torvalds } 2271da177e4SLinus Torvalds fib_info_cnt--; 2287a9bc9b8SDavid S. Miller #ifdef CONFIG_IP_ROUTE_CLASSID 2297a9bc9b8SDavid S. Miller change_nexthops(fi) { 2307a9bc9b8SDavid S. Miller if (nexthop_nh->nh_tclassid) 231f4530fa5SDavid S. Miller fi->fib_net->ipv4.fib_num_tclassid_users--; 2327a9bc9b8SDavid S. Miller } endfor_nexthops(fi); 2337a9bc9b8SDavid S. Miller #endif 23419c1ea14SYan, Zheng call_rcu(&fi->rcu, free_fib_info_rcu); 2351da177e4SLinus Torvalds } 2361da177e4SLinus Torvalds 2371da177e4SLinus Torvalds void fib_release_info(struct fib_info *fi) 2381da177e4SLinus Torvalds { 239832b4c5eSStephen Hemminger spin_lock_bh(&fib_info_lock); 2401da177e4SLinus Torvalds if (fi && --fi->fib_treeref == 0) { 2411da177e4SLinus Torvalds hlist_del(&fi->fib_hash); 2421da177e4SLinus Torvalds if (fi->fib_prefsrc) 2431da177e4SLinus Torvalds hlist_del(&fi->fib_lhash); 2441da177e4SLinus Torvalds change_nexthops(fi) { 24571fceff0SDavid S. Miller if (!nexthop_nh->nh_dev) 2461da177e4SLinus Torvalds continue; 24771fceff0SDavid S. Miller hlist_del(&nexthop_nh->nh_hash); 2481da177e4SLinus Torvalds } endfor_nexthops(fi) 2491da177e4SLinus Torvalds fi->fib_dead = 1; 2501da177e4SLinus Torvalds fib_info_put(fi); 2511da177e4SLinus Torvalds } 252832b4c5eSStephen Hemminger spin_unlock_bh(&fib_info_lock); 2531da177e4SLinus Torvalds } 2541da177e4SLinus Torvalds 2556a31d2a9SEric Dumazet static inline int nh_comp(const struct fib_info *fi, const struct fib_info *ofi) 2561da177e4SLinus Torvalds { 2571da177e4SLinus Torvalds const struct fib_nh *onh = ofi->fib_nh; 2581da177e4SLinus Torvalds 2591da177e4SLinus Torvalds for_nexthops(fi) { 2601da177e4SLinus Torvalds if (nh->nh_oif != onh->nh_oif || 2611da177e4SLinus Torvalds nh->nh_gw != onh->nh_gw || 2621da177e4SLinus Torvalds nh->nh_scope != onh->nh_scope || 2631da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 2641da177e4SLinus Torvalds nh->nh_weight != onh->nh_weight || 2651da177e4SLinus Torvalds #endif 266c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID 2671da177e4SLinus Torvalds nh->nh_tclassid != onh->nh_tclassid || 2681da177e4SLinus Torvalds #endif 2698a3d0316SAndy Gospodarek ((nh->nh_flags ^ onh->nh_flags) & ~RTNH_COMPARE_MASK)) 2701da177e4SLinus Torvalds return -1; 2711da177e4SLinus Torvalds onh++; 2721da177e4SLinus Torvalds } endfor_nexthops(fi); 2731da177e4SLinus Torvalds return 0; 2741da177e4SLinus Torvalds } 2751da177e4SLinus Torvalds 27688ebc72fSDavid S. Miller static inline unsigned int fib_devindex_hashfn(unsigned int val) 27788ebc72fSDavid S. Miller { 27888ebc72fSDavid S. Miller unsigned int mask = DEVINDEX_HASHSIZE - 1; 27988ebc72fSDavid S. Miller 28088ebc72fSDavid S. Miller return (val ^ 28188ebc72fSDavid S. Miller (val >> DEVINDEX_HASHBITS) ^ 28288ebc72fSDavid S. Miller (val >> (DEVINDEX_HASHBITS * 2))) & mask; 28388ebc72fSDavid S. Miller } 28488ebc72fSDavid S. Miller 2851da177e4SLinus Torvalds static inline unsigned int fib_info_hashfn(const struct fib_info *fi) 2861da177e4SLinus Torvalds { 287123b9731SDavid S. Miller unsigned int mask = (fib_info_hash_size - 1); 2881da177e4SLinus Torvalds unsigned int val = fi->fib_nhs; 2891da177e4SLinus Torvalds 29037e826c5SDavid S. Miller val ^= (fi->fib_protocol << 8) | fi->fib_scope; 29181f7bf6cSAl Viro val ^= (__force u32)fi->fib_prefsrc; 2921da177e4SLinus Torvalds val ^= fi->fib_priority; 29388ebc72fSDavid S. Miller for_nexthops(fi) { 29488ebc72fSDavid S. Miller val ^= fib_devindex_hashfn(nh->nh_oif); 29588ebc72fSDavid S. Miller } endfor_nexthops(fi) 2961da177e4SLinus Torvalds 2971da177e4SLinus Torvalds return (val ^ (val >> 7) ^ (val >> 12)) & mask; 2981da177e4SLinus Torvalds } 2991da177e4SLinus Torvalds 3001da177e4SLinus Torvalds static struct fib_info *fib_find_info(const struct fib_info *nfi) 3011da177e4SLinus Torvalds { 3021da177e4SLinus Torvalds struct hlist_head *head; 3031da177e4SLinus Torvalds struct fib_info *fi; 3041da177e4SLinus Torvalds unsigned int hash; 3051da177e4SLinus Torvalds 3061da177e4SLinus Torvalds hash = fib_info_hashfn(nfi); 3071da177e4SLinus Torvalds head = &fib_info_hash[hash]; 3081da177e4SLinus Torvalds 309b67bfe0dSSasha Levin hlist_for_each_entry(fi, head, fib_hash) { 31009ad9bc7SOctavian Purdila if (!net_eq(fi->fib_net, nfi->fib_net)) 3114814bdbdSDenis V. Lunev continue; 3121da177e4SLinus Torvalds if (fi->fib_nhs != nfi->fib_nhs) 3131da177e4SLinus Torvalds continue; 3141da177e4SLinus Torvalds if (nfi->fib_protocol == fi->fib_protocol && 31537e826c5SDavid S. Miller nfi->fib_scope == fi->fib_scope && 3161da177e4SLinus Torvalds nfi->fib_prefsrc == fi->fib_prefsrc && 3171da177e4SLinus Torvalds nfi->fib_priority == fi->fib_priority && 318f4ef85bbSEric Dumazet nfi->fib_type == fi->fib_type && 3191da177e4SLinus Torvalds memcmp(nfi->fib_metrics, fi->fib_metrics, 320fcd13f42SEric Dumazet sizeof(u32) * RTAX_MAX) == 0 && 3218a3d0316SAndy Gospodarek !((nfi->fib_flags ^ fi->fib_flags) & ~RTNH_COMPARE_MASK) && 3221da177e4SLinus Torvalds (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0)) 3231da177e4SLinus Torvalds return fi; 3241da177e4SLinus Torvalds } 3251da177e4SLinus Torvalds 3261da177e4SLinus Torvalds return NULL; 3271da177e4SLinus Torvalds } 3281da177e4SLinus Torvalds 3291da177e4SLinus Torvalds /* Check, that the gateway is already configured. 3306a31d2a9SEric Dumazet * Used only by redirect accept routine. 3311da177e4SLinus Torvalds */ 332d878e72eSAl Viro int ip_fib_check_default(__be32 gw, struct net_device *dev) 3331da177e4SLinus Torvalds { 3341da177e4SLinus Torvalds struct hlist_head *head; 3351da177e4SLinus Torvalds struct fib_nh *nh; 3361da177e4SLinus Torvalds unsigned int hash; 3371da177e4SLinus Torvalds 338832b4c5eSStephen Hemminger spin_lock(&fib_info_lock); 3391da177e4SLinus Torvalds 3401da177e4SLinus Torvalds hash = fib_devindex_hashfn(dev->ifindex); 3411da177e4SLinus Torvalds head = &fib_info_devhash[hash]; 342b67bfe0dSSasha Levin hlist_for_each_entry(nh, head, nh_hash) { 3431da177e4SLinus Torvalds if (nh->nh_dev == dev && 3441da177e4SLinus Torvalds nh->nh_gw == gw && 3451da177e4SLinus Torvalds !(nh->nh_flags & RTNH_F_DEAD)) { 346832b4c5eSStephen Hemminger spin_unlock(&fib_info_lock); 3471da177e4SLinus Torvalds return 0; 3481da177e4SLinus Torvalds } 3491da177e4SLinus Torvalds } 3501da177e4SLinus Torvalds 351832b4c5eSStephen Hemminger spin_unlock(&fib_info_lock); 3521da177e4SLinus Torvalds 3531da177e4SLinus Torvalds return -1; 3541da177e4SLinus Torvalds } 3551da177e4SLinus Torvalds 356339bf98fSThomas Graf static inline size_t fib_nlmsg_size(struct fib_info *fi) 357339bf98fSThomas Graf { 358339bf98fSThomas Graf size_t payload = NLMSG_ALIGN(sizeof(struct rtmsg)) 359339bf98fSThomas Graf + nla_total_size(4) /* RTA_TABLE */ 360339bf98fSThomas Graf + nla_total_size(4) /* RTA_DST */ 361339bf98fSThomas Graf + nla_total_size(4) /* RTA_PRIORITY */ 362ea697639SDaniel Borkmann + nla_total_size(4) /* RTA_PREFSRC */ 363ea697639SDaniel Borkmann + nla_total_size(TCP_CA_NAME_MAX); /* RTAX_CC_ALGO */ 364339bf98fSThomas Graf 365339bf98fSThomas Graf /* space for nested metrics */ 366339bf98fSThomas Graf payload += nla_total_size((RTAX_MAX * nla_total_size(4))); 367339bf98fSThomas Graf 368339bf98fSThomas Graf if (fi->fib_nhs) { 369339bf98fSThomas Graf /* Also handles the special case fib_nhs == 1 */ 370339bf98fSThomas Graf 371339bf98fSThomas Graf /* each nexthop is packed in an attribute */ 372339bf98fSThomas Graf size_t nhsize = nla_total_size(sizeof(struct rtnexthop)); 373339bf98fSThomas Graf 374339bf98fSThomas Graf /* may contain flow and gateway attribute */ 375339bf98fSThomas Graf nhsize += 2 * nla_total_size(4); 376339bf98fSThomas Graf 377339bf98fSThomas Graf /* all nexthops are packed in a nested attribute */ 378339bf98fSThomas Graf payload += nla_total_size(fi->fib_nhs * nhsize); 379339bf98fSThomas Graf } 380339bf98fSThomas Graf 381339bf98fSThomas Graf return payload; 382339bf98fSThomas Graf } 383339bf98fSThomas Graf 38481f7bf6cSAl Viro void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, 3859877b253SJoe Perches int dst_len, u32 tb_id, const struct nl_info *info, 386b8f55831SMilan Kocian unsigned int nlm_flags) 3871da177e4SLinus Torvalds { 3881da177e4SLinus Torvalds struct sk_buff *skb; 3894e902c57SThomas Graf u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0; 390f21c7bc5SThomas Graf int err = -ENOBUFS; 3911da177e4SLinus Torvalds 392339bf98fSThomas Graf skb = nlmsg_new(fib_nlmsg_size(fa->fa_info), GFP_KERNEL); 39351456b29SIan Morris if (!skb) 394f21c7bc5SThomas Graf goto errout; 3951da177e4SLinus Torvalds 39615e47304SEric W. Biederman err = fib_dump_info(skb, info->portid, seq, event, tb_id, 39737e826c5SDavid S. Miller fa->fa_type, key, dst_len, 398b8f55831SMilan Kocian fa->fa_tos, fa->fa_info, nlm_flags); 39926932566SPatrick McHardy if (err < 0) { 40026932566SPatrick McHardy /* -EMSGSIZE implies BUG in fib_nlmsg_size() */ 40126932566SPatrick McHardy WARN_ON(err == -EMSGSIZE); 40226932566SPatrick McHardy kfree_skb(skb); 40326932566SPatrick McHardy goto errout; 40426932566SPatrick McHardy } 40515e47304SEric W. Biederman rtnl_notify(skb, info->nl_net, info->portid, RTNLGRP_IPV4_ROUTE, 4064e902c57SThomas Graf info->nlh, GFP_KERNEL); 4071ce85fe4SPablo Neira Ayuso return; 408f21c7bc5SThomas Graf errout: 409f21c7bc5SThomas Graf if (err < 0) 4104d1169c1SDenis V. Lunev rtnl_set_sk_err(info->nl_net, RTNLGRP_IPV4_ROUTE, err); 4111da177e4SLinus Torvalds } 4121da177e4SLinus Torvalds 413c9cb6b6eSStephen Hemminger static int fib_detect_death(struct fib_info *fi, int order, 414c9cb6b6eSStephen Hemminger struct fib_info **last_resort, int *last_idx, 415c9cb6b6eSStephen Hemminger int dflt) 4161da177e4SLinus Torvalds { 4171da177e4SLinus Torvalds struct neighbour *n; 4181da177e4SLinus Torvalds int state = NUD_NONE; 4191da177e4SLinus Torvalds 4201da177e4SLinus Torvalds n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev); 4211da177e4SLinus Torvalds if (n) { 4221da177e4SLinus Torvalds state = n->nud_state; 4231da177e4SLinus Torvalds neigh_release(n); 4241da177e4SLinus Torvalds } 4251da177e4SLinus Torvalds if (state == NUD_REACHABLE) 4261da177e4SLinus Torvalds return 0; 427c17860a0SDenis V. Lunev if ((state & NUD_VALID) && order != dflt) 4281da177e4SLinus Torvalds return 0; 4291da177e4SLinus Torvalds if ((state & NUD_VALID) || 430c17860a0SDenis V. Lunev (*last_idx < 0 && order > dflt)) { 4311da177e4SLinus Torvalds *last_resort = fi; 4321da177e4SLinus Torvalds *last_idx = order; 4331da177e4SLinus Torvalds } 4341da177e4SLinus Torvalds return 1; 4351da177e4SLinus Torvalds } 4361da177e4SLinus Torvalds 4371da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 4381da177e4SLinus Torvalds 4394e902c57SThomas Graf static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining) 4401da177e4SLinus Torvalds { 4411da177e4SLinus Torvalds int nhs = 0; 4421da177e4SLinus Torvalds 4434e902c57SThomas Graf while (rtnh_ok(rtnh, remaining)) { 4441da177e4SLinus Torvalds nhs++; 4454e902c57SThomas Graf rtnh = rtnh_next(rtnh, &remaining); 4461da177e4SLinus Torvalds } 4471da177e4SLinus Torvalds 4484e902c57SThomas Graf /* leftover implies invalid nexthop configuration, discard it */ 4494e902c57SThomas Graf return remaining > 0 ? 0 : nhs; 4504e902c57SThomas Graf } 4511da177e4SLinus Torvalds 4524e902c57SThomas Graf static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, 4534e902c57SThomas Graf int remaining, struct fib_config *cfg) 4544e902c57SThomas Graf { 4551da177e4SLinus Torvalds change_nexthops(fi) { 4564e902c57SThomas Graf int attrlen; 4574e902c57SThomas Graf 4584e902c57SThomas Graf if (!rtnh_ok(rtnh, remaining)) 4591da177e4SLinus Torvalds return -EINVAL; 4604e902c57SThomas Graf 46171fceff0SDavid S. Miller nexthop_nh->nh_flags = 46271fceff0SDavid S. Miller (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags; 46371fceff0SDavid S. Miller nexthop_nh->nh_oif = rtnh->rtnh_ifindex; 46471fceff0SDavid S. Miller nexthop_nh->nh_weight = rtnh->rtnh_hops + 1; 4654e902c57SThomas Graf 4664e902c57SThomas Graf attrlen = rtnh_attrlen(rtnh); 4674e902c57SThomas Graf if (attrlen > 0) { 4684e902c57SThomas Graf struct nlattr *nla, *attrs = rtnh_attrs(rtnh); 4694e902c57SThomas Graf 4704e902c57SThomas Graf nla = nla_find(attrs, attrlen, RTA_GATEWAY); 47167b61f6cSJiri Benc nexthop_nh->nh_gw = nla ? nla_get_in_addr(nla) : 0; 472c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID 4734e902c57SThomas Graf nla = nla_find(attrs, attrlen, RTA_FLOW); 47471fceff0SDavid S. Miller nexthop_nh->nh_tclassid = nla ? nla_get_u32(nla) : 0; 4757a9bc9b8SDavid S. Miller if (nexthop_nh->nh_tclassid) 476f4530fa5SDavid S. Miller fi->fib_net->ipv4.fib_num_tclassid_users++; 4771da177e4SLinus Torvalds #endif 4781da177e4SLinus Torvalds } 4794e902c57SThomas Graf 4804e902c57SThomas Graf rtnh = rtnh_next(rtnh, &remaining); 4811da177e4SLinus Torvalds } endfor_nexthops(fi); 4824e902c57SThomas Graf 4831da177e4SLinus Torvalds return 0; 4841da177e4SLinus Torvalds } 4851da177e4SLinus Torvalds 4861da177e4SLinus Torvalds #endif 4871da177e4SLinus Torvalds 4884e902c57SThomas Graf int fib_nh_match(struct fib_config *cfg, struct fib_info *fi) 4891da177e4SLinus Torvalds { 4901da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 4914e902c57SThomas Graf struct rtnexthop *rtnh; 4924e902c57SThomas Graf int remaining; 4931da177e4SLinus Torvalds #endif 4941da177e4SLinus Torvalds 4954e902c57SThomas Graf if (cfg->fc_priority && cfg->fc_priority != fi->fib_priority) 4961da177e4SLinus Torvalds return 1; 4971da177e4SLinus Torvalds 4984e902c57SThomas Graf if (cfg->fc_oif || cfg->fc_gw) { 4994e902c57SThomas Graf if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) && 5004e902c57SThomas Graf (!cfg->fc_gw || cfg->fc_gw == fi->fib_nh->nh_gw)) 5011da177e4SLinus Torvalds return 0; 5021da177e4SLinus Torvalds return 1; 5031da177e4SLinus Torvalds } 5041da177e4SLinus Torvalds 5051da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 50651456b29SIan Morris if (!cfg->fc_mp) 5071da177e4SLinus Torvalds return 0; 5084e902c57SThomas Graf 5094e902c57SThomas Graf rtnh = cfg->fc_mp; 5104e902c57SThomas Graf remaining = cfg->fc_mp_len; 5111da177e4SLinus Torvalds 5121da177e4SLinus Torvalds for_nexthops(fi) { 5134e902c57SThomas Graf int attrlen; 5141da177e4SLinus Torvalds 5154e902c57SThomas Graf if (!rtnh_ok(rtnh, remaining)) 5161da177e4SLinus Torvalds return -EINVAL; 5174e902c57SThomas Graf 5184e902c57SThomas Graf if (rtnh->rtnh_ifindex && rtnh->rtnh_ifindex != nh->nh_oif) 5191da177e4SLinus Torvalds return 1; 5204e902c57SThomas Graf 5214e902c57SThomas Graf attrlen = rtnh_attrlen(rtnh); 522f76936d0SJiri Pirko if (attrlen > 0) { 5234e902c57SThomas Graf struct nlattr *nla, *attrs = rtnh_attrs(rtnh); 5244e902c57SThomas Graf 5254e902c57SThomas Graf nla = nla_find(attrs, attrlen, RTA_GATEWAY); 52667b61f6cSJiri Benc if (nla && nla_get_in_addr(nla) != nh->nh_gw) 5271da177e4SLinus Torvalds return 1; 528c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID 5294e902c57SThomas Graf nla = nla_find(attrs, attrlen, RTA_FLOW); 5304e902c57SThomas Graf if (nla && nla_get_u32(nla) != nh->nh_tclassid) 5311da177e4SLinus Torvalds return 1; 5321da177e4SLinus Torvalds #endif 5331da177e4SLinus Torvalds } 5344e902c57SThomas Graf 5354e902c57SThomas Graf rtnh = rtnh_next(rtnh, &remaining); 5361da177e4SLinus Torvalds } endfor_nexthops(fi); 5371da177e4SLinus Torvalds #endif 5381da177e4SLinus Torvalds return 0; 5391da177e4SLinus Torvalds } 5401da177e4SLinus Torvalds 5411da177e4SLinus Torvalds 5421da177e4SLinus Torvalds /* 5436a31d2a9SEric Dumazet * Picture 5446a31d2a9SEric Dumazet * ------- 5456a31d2a9SEric Dumazet * 5466a31d2a9SEric Dumazet * Semantics of nexthop is very messy by historical reasons. 5476a31d2a9SEric Dumazet * We have to take into account, that: 5486a31d2a9SEric Dumazet * a) gateway can be actually local interface address, 5496a31d2a9SEric Dumazet * so that gatewayed route is direct. 5506a31d2a9SEric Dumazet * b) gateway must be on-link address, possibly 5516a31d2a9SEric Dumazet * described not by an ifaddr, but also by a direct route. 5526a31d2a9SEric Dumazet * c) If both gateway and interface are specified, they should not 5536a31d2a9SEric Dumazet * contradict. 5546a31d2a9SEric Dumazet * d) If we use tunnel routes, gateway could be not on-link. 5556a31d2a9SEric Dumazet * 5566a31d2a9SEric Dumazet * Attempt to reconcile all of these (alas, self-contradictory) conditions 5576a31d2a9SEric Dumazet * results in pretty ugly and hairy code with obscure logic. 5586a31d2a9SEric Dumazet * 5596a31d2a9SEric Dumazet * I chose to generalized it instead, so that the size 5606a31d2a9SEric Dumazet * of code does not increase practically, but it becomes 5616a31d2a9SEric Dumazet * much more general. 5626a31d2a9SEric Dumazet * Every prefix is assigned a "scope" value: "host" is local address, 5636a31d2a9SEric Dumazet * "link" is direct route, 5646a31d2a9SEric Dumazet * [ ... "site" ... "interior" ... ] 5656a31d2a9SEric Dumazet * and "universe" is true gateway route with global meaning. 5666a31d2a9SEric Dumazet * 5676a31d2a9SEric Dumazet * Every prefix refers to a set of "nexthop"s (gw, oif), 5686a31d2a9SEric Dumazet * where gw must have narrower scope. This recursion stops 5696a31d2a9SEric Dumazet * when gw has LOCAL scope or if "nexthop" is declared ONLINK, 5706a31d2a9SEric Dumazet * which means that gw is forced to be on link. 5716a31d2a9SEric Dumazet * 5726a31d2a9SEric Dumazet * Code is still hairy, but now it is apparently logically 5736a31d2a9SEric Dumazet * consistent and very flexible. F.e. as by-product it allows 5746a31d2a9SEric Dumazet * to co-exists in peace independent exterior and interior 5756a31d2a9SEric Dumazet * routing processes. 5766a31d2a9SEric Dumazet * 5776a31d2a9SEric Dumazet * Normally it looks as following. 5786a31d2a9SEric Dumazet * 5796a31d2a9SEric Dumazet * {universe prefix} -> (gw, oif) [scope link] 5806a31d2a9SEric Dumazet * | 5816a31d2a9SEric Dumazet * |-> {link prefix} -> (gw, oif) [scope local] 5826a31d2a9SEric Dumazet * | 5836a31d2a9SEric Dumazet * |-> {local prefix} (terminal node) 5841da177e4SLinus Torvalds */ 5854e902c57SThomas Graf static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, 5864e902c57SThomas Graf struct fib_nh *nh) 5871da177e4SLinus Torvalds { 5881da177e4SLinus Torvalds int err; 58986167a37SDenis V. Lunev struct net *net; 5906a31d2a9SEric Dumazet struct net_device *dev; 5911da177e4SLinus Torvalds 59286167a37SDenis V. Lunev net = cfg->fc_nlinfo.nl_net; 5931da177e4SLinus Torvalds if (nh->nh_gw) { 5941da177e4SLinus Torvalds struct fib_result res; 5951da177e4SLinus Torvalds 5961da177e4SLinus Torvalds if (nh->nh_flags & RTNH_F_ONLINK) { 5971da177e4SLinus Torvalds 5984e902c57SThomas Graf if (cfg->fc_scope >= RT_SCOPE_LINK) 5991da177e4SLinus Torvalds return -EINVAL; 60086167a37SDenis V. Lunev if (inet_addr_type(net, nh->nh_gw) != RTN_UNICAST) 6011da177e4SLinus Torvalds return -EINVAL; 6026a31d2a9SEric Dumazet dev = __dev_get_by_index(net, nh->nh_oif); 6036a31d2a9SEric Dumazet if (!dev) 6041da177e4SLinus Torvalds return -ENODEV; 6051da177e4SLinus Torvalds if (!(dev->flags & IFF_UP)) 6061da177e4SLinus Torvalds return -ENETDOWN; 6078a3d0316SAndy Gospodarek if (!netif_carrier_ok(dev)) 6088a3d0316SAndy Gospodarek nh->nh_flags |= RTNH_F_LINKDOWN; 6091da177e4SLinus Torvalds nh->nh_dev = dev; 6101da177e4SLinus Torvalds dev_hold(dev); 6111da177e4SLinus Torvalds nh->nh_scope = RT_SCOPE_LINK; 6121da177e4SLinus Torvalds return 0; 6131da177e4SLinus Torvalds } 614ebc0ffaeSEric Dumazet rcu_read_lock(); 6151da177e4SLinus Torvalds { 6169ade2286SDavid S. Miller struct flowi4 fl4 = { 6179ade2286SDavid S. Miller .daddr = nh->nh_gw, 6189ade2286SDavid S. Miller .flowi4_scope = cfg->fc_scope + 1, 6199ade2286SDavid S. Miller .flowi4_oif = nh->nh_oif, 6206a662719SCong Wang .flowi4_iif = LOOPBACK_IFINDEX, 6214e902c57SThomas Graf }; 6221da177e4SLinus Torvalds 6231da177e4SLinus Torvalds /* It is not necessary, but requires a bit of thinking */ 6249ade2286SDavid S. Miller if (fl4.flowi4_scope < RT_SCOPE_LINK) 6259ade2286SDavid S. Miller fl4.flowi4_scope = RT_SCOPE_LINK; 626*0eeb075fSAndy Gospodarek err = fib_lookup(net, &fl4, &res, 627*0eeb075fSAndy Gospodarek FIB_LOOKUP_IGNORE_LINKSTATE); 628ebc0ffaeSEric Dumazet if (err) { 629ebc0ffaeSEric Dumazet rcu_read_unlock(); 6301da177e4SLinus Torvalds return err; 6311da177e4SLinus Torvalds } 632ebc0ffaeSEric Dumazet } 6331da177e4SLinus Torvalds err = -EINVAL; 6341da177e4SLinus Torvalds if (res.type != RTN_UNICAST && res.type != RTN_LOCAL) 6351da177e4SLinus Torvalds goto out; 6361da177e4SLinus Torvalds nh->nh_scope = res.scope; 6371da177e4SLinus Torvalds nh->nh_oif = FIB_RES_OIF(res); 6386a31d2a9SEric Dumazet nh->nh_dev = dev = FIB_RES_DEV(res); 6396a31d2a9SEric Dumazet if (!dev) 6401da177e4SLinus Torvalds goto out; 6416a31d2a9SEric Dumazet dev_hold(dev); 6428a3d0316SAndy Gospodarek if (!netif_carrier_ok(dev)) 6438a3d0316SAndy Gospodarek nh->nh_flags |= RTNH_F_LINKDOWN; 6448723e1b4SEric Dumazet err = (dev->flags & IFF_UP) ? 0 : -ENETDOWN; 6451da177e4SLinus Torvalds } else { 6461da177e4SLinus Torvalds struct in_device *in_dev; 6471da177e4SLinus Torvalds 6481da177e4SLinus Torvalds if (nh->nh_flags & (RTNH_F_PERVASIVE | RTNH_F_ONLINK)) 6491da177e4SLinus Torvalds return -EINVAL; 6501da177e4SLinus Torvalds 6518723e1b4SEric Dumazet rcu_read_lock(); 6528723e1b4SEric Dumazet err = -ENODEV; 65386167a37SDenis V. Lunev in_dev = inetdev_by_index(net, nh->nh_oif); 65451456b29SIan Morris if (!in_dev) 6558723e1b4SEric Dumazet goto out; 6568723e1b4SEric Dumazet err = -ENETDOWN; 6578723e1b4SEric Dumazet if (!(in_dev->dev->flags & IFF_UP)) 6588723e1b4SEric Dumazet goto out; 6591da177e4SLinus Torvalds nh->nh_dev = in_dev->dev; 6601da177e4SLinus Torvalds dev_hold(nh->nh_dev); 6611da177e4SLinus Torvalds nh->nh_scope = RT_SCOPE_HOST; 6628a3d0316SAndy Gospodarek if (!netif_carrier_ok(nh->nh_dev)) 6638a3d0316SAndy Gospodarek nh->nh_flags |= RTNH_F_LINKDOWN; 6648723e1b4SEric Dumazet err = 0; 6651da177e4SLinus Torvalds } 6668723e1b4SEric Dumazet out: 6678723e1b4SEric Dumazet rcu_read_unlock(); 6688723e1b4SEric Dumazet return err; 6691da177e4SLinus Torvalds } 6701da177e4SLinus Torvalds 67181f7bf6cSAl Viro static inline unsigned int fib_laddr_hashfn(__be32 val) 6721da177e4SLinus Torvalds { 673123b9731SDavid S. Miller unsigned int mask = (fib_info_hash_size - 1); 6741da177e4SLinus Torvalds 6756a31d2a9SEric Dumazet return ((__force u32)val ^ 6766a31d2a9SEric Dumazet ((__force u32)val >> 7) ^ 6776a31d2a9SEric Dumazet ((__force u32)val >> 14)) & mask; 6781da177e4SLinus Torvalds } 6791da177e4SLinus Torvalds 680123b9731SDavid S. Miller static struct hlist_head *fib_info_hash_alloc(int bytes) 6811da177e4SLinus Torvalds { 6821da177e4SLinus Torvalds if (bytes <= PAGE_SIZE) 68388f83491SJoonwoo Park return kzalloc(bytes, GFP_KERNEL); 6841da177e4SLinus Torvalds else 6851da177e4SLinus Torvalds return (struct hlist_head *) 6866a31d2a9SEric Dumazet __get_free_pages(GFP_KERNEL | __GFP_ZERO, 6876a31d2a9SEric Dumazet get_order(bytes)); 6881da177e4SLinus Torvalds } 6891da177e4SLinus Torvalds 690123b9731SDavid S. Miller static void fib_info_hash_free(struct hlist_head *hash, int bytes) 6911da177e4SLinus Torvalds { 6921da177e4SLinus Torvalds if (!hash) 6931da177e4SLinus Torvalds return; 6941da177e4SLinus Torvalds 6951da177e4SLinus Torvalds if (bytes <= PAGE_SIZE) 6961da177e4SLinus Torvalds kfree(hash); 6971da177e4SLinus Torvalds else 6981da177e4SLinus Torvalds free_pages((unsigned long) hash, get_order(bytes)); 6991da177e4SLinus Torvalds } 7001da177e4SLinus Torvalds 701123b9731SDavid S. Miller static void fib_info_hash_move(struct hlist_head *new_info_hash, 7021da177e4SLinus Torvalds struct hlist_head *new_laddrhash, 7031da177e4SLinus Torvalds unsigned int new_size) 7041da177e4SLinus Torvalds { 705b7656e7fSDavid S. Miller struct hlist_head *old_info_hash, *old_laddrhash; 706123b9731SDavid S. Miller unsigned int old_size = fib_info_hash_size; 707b7656e7fSDavid S. Miller unsigned int i, bytes; 7081da177e4SLinus Torvalds 709832b4c5eSStephen Hemminger spin_lock_bh(&fib_info_lock); 710b7656e7fSDavid S. Miller old_info_hash = fib_info_hash; 711b7656e7fSDavid S. Miller old_laddrhash = fib_info_laddrhash; 712123b9731SDavid S. Miller fib_info_hash_size = new_size; 7131da177e4SLinus Torvalds 7141da177e4SLinus Torvalds for (i = 0; i < old_size; i++) { 7151da177e4SLinus Torvalds struct hlist_head *head = &fib_info_hash[i]; 716b67bfe0dSSasha Levin struct hlist_node *n; 7171da177e4SLinus Torvalds struct fib_info *fi; 7181da177e4SLinus Torvalds 719b67bfe0dSSasha Levin hlist_for_each_entry_safe(fi, n, head, fib_hash) { 7201da177e4SLinus Torvalds struct hlist_head *dest; 7211da177e4SLinus Torvalds unsigned int new_hash; 7221da177e4SLinus Torvalds 7231da177e4SLinus Torvalds new_hash = fib_info_hashfn(fi); 7241da177e4SLinus Torvalds dest = &new_info_hash[new_hash]; 7251da177e4SLinus Torvalds hlist_add_head(&fi->fib_hash, dest); 7261da177e4SLinus Torvalds } 7271da177e4SLinus Torvalds } 7281da177e4SLinus Torvalds fib_info_hash = new_info_hash; 7291da177e4SLinus Torvalds 7301da177e4SLinus Torvalds for (i = 0; i < old_size; i++) { 7311da177e4SLinus Torvalds struct hlist_head *lhead = &fib_info_laddrhash[i]; 732b67bfe0dSSasha Levin struct hlist_node *n; 7331da177e4SLinus Torvalds struct fib_info *fi; 7341da177e4SLinus Torvalds 735b67bfe0dSSasha Levin hlist_for_each_entry_safe(fi, n, lhead, fib_lhash) { 7361da177e4SLinus Torvalds struct hlist_head *ldest; 7371da177e4SLinus Torvalds unsigned int new_hash; 7381da177e4SLinus Torvalds 7391da177e4SLinus Torvalds new_hash = fib_laddr_hashfn(fi->fib_prefsrc); 7401da177e4SLinus Torvalds ldest = &new_laddrhash[new_hash]; 7411da177e4SLinus Torvalds hlist_add_head(&fi->fib_lhash, ldest); 7421da177e4SLinus Torvalds } 7431da177e4SLinus Torvalds } 7441da177e4SLinus Torvalds fib_info_laddrhash = new_laddrhash; 7451da177e4SLinus Torvalds 746832b4c5eSStephen Hemminger spin_unlock_bh(&fib_info_lock); 747b7656e7fSDavid S. Miller 748b7656e7fSDavid S. Miller bytes = old_size * sizeof(struct hlist_head *); 749123b9731SDavid S. Miller fib_info_hash_free(old_info_hash, bytes); 750123b9731SDavid S. Miller fib_info_hash_free(old_laddrhash, bytes); 7511da177e4SLinus Torvalds } 7521da177e4SLinus Torvalds 753436c3b66SDavid S. Miller __be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh) 754436c3b66SDavid S. Miller { 755436c3b66SDavid S. Miller nh->nh_saddr = inet_select_addr(nh->nh_dev, 756436c3b66SDavid S. Miller nh->nh_gw, 75737e826c5SDavid S. Miller nh->nh_parent->fib_scope); 758436c3b66SDavid S. Miller nh->nh_saddr_genid = atomic_read(&net->ipv4.dev_addr_genid); 759436c3b66SDavid S. Miller 760436c3b66SDavid S. Miller return nh->nh_saddr; 761436c3b66SDavid S. Miller } 762436c3b66SDavid S. Miller 7634e902c57SThomas Graf struct fib_info *fib_create_info(struct fib_config *cfg) 7641da177e4SLinus Torvalds { 7651da177e4SLinus Torvalds int err; 7661da177e4SLinus Torvalds struct fib_info *fi = NULL; 7671da177e4SLinus Torvalds struct fib_info *ofi; 7681da177e4SLinus Torvalds int nhs = 1; 7697462bd74SDenis V. Lunev struct net *net = cfg->fc_nlinfo.nl_net; 7701da177e4SLinus Torvalds 7714c8237cdSDavid S. Miller if (cfg->fc_type > RTN_MAX) 7724c8237cdSDavid S. Miller goto err_inval; 7734c8237cdSDavid S. Miller 7741da177e4SLinus Torvalds /* Fast check to catch the most weird cases */ 7754e902c57SThomas Graf if (fib_props[cfg->fc_type].scope > cfg->fc_scope) 7761da177e4SLinus Torvalds goto err_inval; 7771da177e4SLinus Torvalds 7781da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 7794e902c57SThomas Graf if (cfg->fc_mp) { 7804e902c57SThomas Graf nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len); 7811da177e4SLinus Torvalds if (nhs == 0) 7821da177e4SLinus Torvalds goto err_inval; 7831da177e4SLinus Torvalds } 7841da177e4SLinus Torvalds #endif 7851da177e4SLinus Torvalds 7861da177e4SLinus Torvalds err = -ENOBUFS; 787123b9731SDavid S. Miller if (fib_info_cnt >= fib_info_hash_size) { 788123b9731SDavid S. Miller unsigned int new_size = fib_info_hash_size << 1; 7891da177e4SLinus Torvalds struct hlist_head *new_info_hash; 7901da177e4SLinus Torvalds struct hlist_head *new_laddrhash; 7911da177e4SLinus Torvalds unsigned int bytes; 7921da177e4SLinus Torvalds 7931da177e4SLinus Torvalds if (!new_size) 794d94ce9b2SEric Dumazet new_size = 16; 7951da177e4SLinus Torvalds bytes = new_size * sizeof(struct hlist_head *); 796123b9731SDavid S. Miller new_info_hash = fib_info_hash_alloc(bytes); 797123b9731SDavid S. Miller new_laddrhash = fib_info_hash_alloc(bytes); 7981da177e4SLinus Torvalds if (!new_info_hash || !new_laddrhash) { 799123b9731SDavid S. Miller fib_info_hash_free(new_info_hash, bytes); 800123b9731SDavid S. Miller fib_info_hash_free(new_laddrhash, bytes); 80188f83491SJoonwoo Park } else 802123b9731SDavid S. Miller fib_info_hash_move(new_info_hash, new_laddrhash, new_size); 8031da177e4SLinus Torvalds 804123b9731SDavid S. Miller if (!fib_info_hash_size) 8051da177e4SLinus Torvalds goto failure; 8061da177e4SLinus Torvalds } 8071da177e4SLinus Torvalds 8080da974f4SPanagiotis Issaris fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL); 80951456b29SIan Morris if (!fi) 8101da177e4SLinus Torvalds goto failure; 811aeefa1ecSSergey Popovich fib_info_cnt++; 812725d1e1bSDavid S. Miller if (cfg->fc_mx) { 8139c150e82SDavid S. Miller fi->fib_metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL); 8149c150e82SDavid S. Miller if (!fi->fib_metrics) 8159c150e82SDavid S. Miller goto failure; 816725d1e1bSDavid S. Miller } else 817725d1e1bSDavid S. Miller fi->fib_metrics = (u32 *) dst_default_metrics; 8181da177e4SLinus Torvalds 819efd7ef1cSEric W. Biederman fi->fib_net = net; 8204e902c57SThomas Graf fi->fib_protocol = cfg->fc_protocol; 82137e826c5SDavid S. Miller fi->fib_scope = cfg->fc_scope; 8224e902c57SThomas Graf fi->fib_flags = cfg->fc_flags; 8234e902c57SThomas Graf fi->fib_priority = cfg->fc_priority; 8244e902c57SThomas Graf fi->fib_prefsrc = cfg->fc_prefsrc; 825f4ef85bbSEric Dumazet fi->fib_type = cfg->fc_type; 8261da177e4SLinus Torvalds 8271da177e4SLinus Torvalds fi->fib_nhs = nhs; 8281da177e4SLinus Torvalds change_nexthops(fi) { 82971fceff0SDavid S. Miller nexthop_nh->nh_parent = fi; 830d26b3a7cSEric Dumazet nexthop_nh->nh_pcpu_rth_output = alloc_percpu(struct rtable __rcu *); 831f8a17175SJulian Anastasov if (!nexthop_nh->nh_pcpu_rth_output) 832f8a17175SJulian Anastasov goto failure; 8331da177e4SLinus Torvalds } endfor_nexthops(fi) 8341da177e4SLinus Torvalds 8354e902c57SThomas Graf if (cfg->fc_mx) { 8364e902c57SThomas Graf struct nlattr *nla; 8374e902c57SThomas Graf int remaining; 8381da177e4SLinus Torvalds 8394e902c57SThomas Graf nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { 8408f4c1f9bSThomas Graf int type = nla_type(nla); 8414e902c57SThomas Graf 8424e902c57SThomas Graf if (type) { 8436fac2625SDavid S. Miller u32 val; 8446fac2625SDavid S. Miller 8454e902c57SThomas Graf if (type > RTAX_MAX) 8461da177e4SLinus Torvalds goto err_inval; 847ea697639SDaniel Borkmann if (type == RTAX_CC_ALGO) { 848ea697639SDaniel Borkmann char tmp[TCP_CA_NAME_MAX]; 849ea697639SDaniel Borkmann 850ea697639SDaniel Borkmann nla_strlcpy(tmp, nla, sizeof(tmp)); 851ea697639SDaniel Borkmann val = tcp_ca_get_key_by_name(tmp); 852ea697639SDaniel Borkmann if (val == TCP_CA_UNSPEC) 853ea697639SDaniel Borkmann goto err_inval; 854ea697639SDaniel Borkmann } else { 8556fac2625SDavid S. Miller val = nla_get_u32(nla); 856ea697639SDaniel Borkmann } 8576fac2625SDavid S. Miller if (type == RTAX_ADVMSS && val > 65535 - 40) 8586fac2625SDavid S. Miller val = 65535 - 40; 859710ab6c0SDavid S. Miller if (type == RTAX_MTU && val > 65535 - 15) 860710ab6c0SDavid S. Miller val = 65535 - 15; 8616fac2625SDavid S. Miller fi->fib_metrics[type - 1] = val; 8621da177e4SLinus Torvalds } 8631da177e4SLinus Torvalds } 8644e902c57SThomas Graf } 8651da177e4SLinus Torvalds 8664e902c57SThomas Graf if (cfg->fc_mp) { 8671da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 8684e902c57SThomas Graf err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg); 8694e902c57SThomas Graf if (err != 0) 8701da177e4SLinus Torvalds goto failure; 8714e902c57SThomas Graf if (cfg->fc_oif && fi->fib_nh->nh_oif != cfg->fc_oif) 8721da177e4SLinus Torvalds goto err_inval; 8734e902c57SThomas Graf if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw) 8741da177e4SLinus Torvalds goto err_inval; 875c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID 8764e902c57SThomas Graf if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow) 8771da177e4SLinus Torvalds goto err_inval; 8781da177e4SLinus Torvalds #endif 8791da177e4SLinus Torvalds #else 8801da177e4SLinus Torvalds goto err_inval; 8811da177e4SLinus Torvalds #endif 8821da177e4SLinus Torvalds } else { 8831da177e4SLinus Torvalds struct fib_nh *nh = fi->fib_nh; 8844e902c57SThomas Graf 8854e902c57SThomas Graf nh->nh_oif = cfg->fc_oif; 8864e902c57SThomas Graf nh->nh_gw = cfg->fc_gw; 8874e902c57SThomas Graf nh->nh_flags = cfg->fc_flags; 888c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID 8894e902c57SThomas Graf nh->nh_tclassid = cfg->fc_flow; 8907a9bc9b8SDavid S. Miller if (nh->nh_tclassid) 891f4530fa5SDavid S. Miller fi->fib_net->ipv4.fib_num_tclassid_users++; 8921da177e4SLinus Torvalds #endif 8931da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 8941da177e4SLinus Torvalds nh->nh_weight = 1; 8951da177e4SLinus Torvalds #endif 8961da177e4SLinus Torvalds } 8971da177e4SLinus Torvalds 8984e902c57SThomas Graf if (fib_props[cfg->fc_type].error) { 8994e902c57SThomas Graf if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp) 9001da177e4SLinus Torvalds goto err_inval; 9011da177e4SLinus Torvalds goto link_it; 9024c8237cdSDavid S. Miller } else { 9034c8237cdSDavid S. Miller switch (cfg->fc_type) { 9044c8237cdSDavid S. Miller case RTN_UNICAST: 9054c8237cdSDavid S. Miller case RTN_LOCAL: 9064c8237cdSDavid S. Miller case RTN_BROADCAST: 9074c8237cdSDavid S. Miller case RTN_ANYCAST: 9084c8237cdSDavid S. Miller case RTN_MULTICAST: 9094c8237cdSDavid S. Miller break; 9104c8237cdSDavid S. Miller default: 9114c8237cdSDavid S. Miller goto err_inval; 9124c8237cdSDavid S. Miller } 9131da177e4SLinus Torvalds } 9141da177e4SLinus Torvalds 9154e902c57SThomas Graf if (cfg->fc_scope > RT_SCOPE_HOST) 9161da177e4SLinus Torvalds goto err_inval; 9171da177e4SLinus Torvalds 9184e902c57SThomas Graf if (cfg->fc_scope == RT_SCOPE_HOST) { 9191da177e4SLinus Torvalds struct fib_nh *nh = fi->fib_nh; 9201da177e4SLinus Torvalds 9211da177e4SLinus Torvalds /* Local address is added. */ 9221da177e4SLinus Torvalds if (nhs != 1 || nh->nh_gw) 9231da177e4SLinus Torvalds goto err_inval; 9241da177e4SLinus Torvalds nh->nh_scope = RT_SCOPE_NOWHERE; 9257462bd74SDenis V. Lunev nh->nh_dev = dev_get_by_index(net, fi->fib_nh->nh_oif); 9261da177e4SLinus Torvalds err = -ENODEV; 92751456b29SIan Morris if (!nh->nh_dev) 9281da177e4SLinus Torvalds goto failure; 9291da177e4SLinus Torvalds } else { 9308a3d0316SAndy Gospodarek int linkdown = 0; 9318a3d0316SAndy Gospodarek 9321da177e4SLinus Torvalds change_nexthops(fi) { 9336a31d2a9SEric Dumazet err = fib_check_nh(cfg, fi, nexthop_nh); 9346a31d2a9SEric Dumazet if (err != 0) 9351da177e4SLinus Torvalds goto failure; 9368a3d0316SAndy Gospodarek if (nexthop_nh->nh_flags & RTNH_F_LINKDOWN) 9378a3d0316SAndy Gospodarek linkdown++; 9381da177e4SLinus Torvalds } endfor_nexthops(fi) 9398a3d0316SAndy Gospodarek if (linkdown == fi->fib_nhs) 9408a3d0316SAndy Gospodarek fi->fib_flags |= RTNH_F_LINKDOWN; 9411da177e4SLinus Torvalds } 9421da177e4SLinus Torvalds 9431da177e4SLinus Torvalds if (fi->fib_prefsrc) { 9444e902c57SThomas Graf if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst || 9454e902c57SThomas Graf fi->fib_prefsrc != cfg->fc_dst) 9467462bd74SDenis V. Lunev if (inet_addr_type(net, fi->fib_prefsrc) != RTN_LOCAL) 9471da177e4SLinus Torvalds goto err_inval; 9481da177e4SLinus Torvalds } 9491da177e4SLinus Torvalds 9501fc050a1SDavid S. Miller change_nexthops(fi) { 951436c3b66SDavid S. Miller fib_info_update_nh_saddr(net, nexthop_nh); 9521fc050a1SDavid S. Miller } endfor_nexthops(fi) 9531fc050a1SDavid S. Miller 9541da177e4SLinus Torvalds link_it: 9556a31d2a9SEric Dumazet ofi = fib_find_info(fi); 9566a31d2a9SEric Dumazet if (ofi) { 9571da177e4SLinus Torvalds fi->fib_dead = 1; 9581da177e4SLinus Torvalds free_fib_info(fi); 9591da177e4SLinus Torvalds ofi->fib_treeref++; 9601da177e4SLinus Torvalds return ofi; 9611da177e4SLinus Torvalds } 9621da177e4SLinus Torvalds 9631da177e4SLinus Torvalds fi->fib_treeref++; 9641da177e4SLinus Torvalds atomic_inc(&fi->fib_clntref); 965832b4c5eSStephen Hemminger spin_lock_bh(&fib_info_lock); 9661da177e4SLinus Torvalds hlist_add_head(&fi->fib_hash, 9671da177e4SLinus Torvalds &fib_info_hash[fib_info_hashfn(fi)]); 9681da177e4SLinus Torvalds if (fi->fib_prefsrc) { 9691da177e4SLinus Torvalds struct hlist_head *head; 9701da177e4SLinus Torvalds 9711da177e4SLinus Torvalds head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)]; 9721da177e4SLinus Torvalds hlist_add_head(&fi->fib_lhash, head); 9731da177e4SLinus Torvalds } 9741da177e4SLinus Torvalds change_nexthops(fi) { 9751da177e4SLinus Torvalds struct hlist_head *head; 9761da177e4SLinus Torvalds unsigned int hash; 9771da177e4SLinus Torvalds 97871fceff0SDavid S. Miller if (!nexthop_nh->nh_dev) 9791da177e4SLinus Torvalds continue; 98071fceff0SDavid S. Miller hash = fib_devindex_hashfn(nexthop_nh->nh_dev->ifindex); 9811da177e4SLinus Torvalds head = &fib_info_devhash[hash]; 98271fceff0SDavid S. Miller hlist_add_head(&nexthop_nh->nh_hash, head); 9831da177e4SLinus Torvalds } endfor_nexthops(fi) 984832b4c5eSStephen Hemminger spin_unlock_bh(&fib_info_lock); 9851da177e4SLinus Torvalds return fi; 9861da177e4SLinus Torvalds 9871da177e4SLinus Torvalds err_inval: 9881da177e4SLinus Torvalds err = -EINVAL; 9891da177e4SLinus Torvalds 9901da177e4SLinus Torvalds failure: 9911da177e4SLinus Torvalds if (fi) { 9921da177e4SLinus Torvalds fi->fib_dead = 1; 9931da177e4SLinus Torvalds free_fib_info(fi); 9941da177e4SLinus Torvalds } 9954e902c57SThomas Graf 9964e902c57SThomas Graf return ERR_PTR(err); 9971da177e4SLinus Torvalds } 9981da177e4SLinus Torvalds 99915e47304SEric W. Biederman int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, 100037e826c5SDavid S. Miller u32 tb_id, u8 type, __be32 dst, int dst_len, u8 tos, 1001b6544c0bSJamal Hadi Salim struct fib_info *fi, unsigned int flags) 10021da177e4SLinus Torvalds { 10031da177e4SLinus Torvalds struct nlmsghdr *nlh; 1004be403ea1SThomas Graf struct rtmsg *rtm; 10051da177e4SLinus Torvalds 100615e47304SEric W. Biederman nlh = nlmsg_put(skb, portid, seq, event, sizeof(*rtm), flags); 100751456b29SIan Morris if (!nlh) 100826932566SPatrick McHardy return -EMSGSIZE; 1009be403ea1SThomas Graf 1010be403ea1SThomas Graf rtm = nlmsg_data(nlh); 10111da177e4SLinus Torvalds rtm->rtm_family = AF_INET; 10121da177e4SLinus Torvalds rtm->rtm_dst_len = dst_len; 10131da177e4SLinus Torvalds rtm->rtm_src_len = 0; 10141da177e4SLinus Torvalds rtm->rtm_tos = tos; 1015709772e6SKrzysztof Piotr Oledzki if (tb_id < 256) 10161da177e4SLinus Torvalds rtm->rtm_table = tb_id; 1017709772e6SKrzysztof Piotr Oledzki else 1018709772e6SKrzysztof Piotr Oledzki rtm->rtm_table = RT_TABLE_COMPAT; 1019f3756b79SDavid S. Miller if (nla_put_u32(skb, RTA_TABLE, tb_id)) 1020f3756b79SDavid S. Miller goto nla_put_failure; 10211da177e4SLinus Torvalds rtm->rtm_type = type; 10221da177e4SLinus Torvalds rtm->rtm_flags = fi->fib_flags; 102337e826c5SDavid S. Miller rtm->rtm_scope = fi->fib_scope; 10241da177e4SLinus Torvalds rtm->rtm_protocol = fi->fib_protocol; 1025be403ea1SThomas Graf 1026f3756b79SDavid S. Miller if (rtm->rtm_dst_len && 1027930345eaSJiri Benc nla_put_in_addr(skb, RTA_DST, dst)) 1028f3756b79SDavid S. Miller goto nla_put_failure; 1029f3756b79SDavid S. Miller if (fi->fib_priority && 1030f3756b79SDavid S. Miller nla_put_u32(skb, RTA_PRIORITY, fi->fib_priority)) 1031f3756b79SDavid S. Miller goto nla_put_failure; 10321da177e4SLinus Torvalds if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0) 1033be403ea1SThomas Graf goto nla_put_failure; 1034be403ea1SThomas Graf 1035f3756b79SDavid S. Miller if (fi->fib_prefsrc && 1036930345eaSJiri Benc nla_put_in_addr(skb, RTA_PREFSRC, fi->fib_prefsrc)) 1037f3756b79SDavid S. Miller goto nla_put_failure; 10381da177e4SLinus Torvalds if (fi->fib_nhs == 1) { 1039*0eeb075fSAndy Gospodarek struct in_device *in_dev; 1040*0eeb075fSAndy Gospodarek 1041f3756b79SDavid S. Miller if (fi->fib_nh->nh_gw && 1042930345eaSJiri Benc nla_put_in_addr(skb, RTA_GATEWAY, fi->fib_nh->nh_gw)) 1043f3756b79SDavid S. Miller goto nla_put_failure; 1044f3756b79SDavid S. Miller if (fi->fib_nh->nh_oif && 1045f3756b79SDavid S. Miller nla_put_u32(skb, RTA_OIF, fi->fib_nh->nh_oif)) 1046f3756b79SDavid S. Miller goto nla_put_failure; 1047*0eeb075fSAndy Gospodarek if (fi->fib_nh->nh_flags & RTNH_F_LINKDOWN) { 1048*0eeb075fSAndy Gospodarek in_dev = __in_dev_get_rcu(fi->fib_nh->nh_dev); 1049*0eeb075fSAndy Gospodarek if (in_dev && 1050*0eeb075fSAndy Gospodarek IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev)) 1051*0eeb075fSAndy Gospodarek rtm->rtm_flags |= RTNH_F_DEAD; 1052*0eeb075fSAndy Gospodarek } 1053c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID 1054f3756b79SDavid S. Miller if (fi->fib_nh[0].nh_tclassid && 1055f3756b79SDavid S. Miller nla_put_u32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid)) 1056f3756b79SDavid S. Miller goto nla_put_failure; 10578265abc0SPatrick McHardy #endif 10581da177e4SLinus Torvalds } 10591da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 10601da177e4SLinus Torvalds if (fi->fib_nhs > 1) { 1061be403ea1SThomas Graf struct rtnexthop *rtnh; 1062be403ea1SThomas Graf struct nlattr *mp; 1063be403ea1SThomas Graf 1064be403ea1SThomas Graf mp = nla_nest_start(skb, RTA_MULTIPATH); 106551456b29SIan Morris if (!mp) 1066be403ea1SThomas Graf goto nla_put_failure; 10671da177e4SLinus Torvalds 10681da177e4SLinus Torvalds for_nexthops(fi) { 1069*0eeb075fSAndy Gospodarek struct in_device *in_dev; 1070*0eeb075fSAndy Gospodarek 1071be403ea1SThomas Graf rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh)); 107251456b29SIan Morris if (!rtnh) 1073be403ea1SThomas Graf goto nla_put_failure; 1074be403ea1SThomas Graf 1075be403ea1SThomas Graf rtnh->rtnh_flags = nh->nh_flags & 0xFF; 1076*0eeb075fSAndy Gospodarek if (nh->nh_flags & RTNH_F_LINKDOWN) { 1077*0eeb075fSAndy Gospodarek in_dev = __in_dev_get_rcu(nh->nh_dev); 1078*0eeb075fSAndy Gospodarek if (in_dev && 1079*0eeb075fSAndy Gospodarek IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev)) 1080*0eeb075fSAndy Gospodarek rtnh->rtnh_flags |= RTNH_F_DEAD; 1081*0eeb075fSAndy Gospodarek } 1082be403ea1SThomas Graf rtnh->rtnh_hops = nh->nh_weight - 1; 1083be403ea1SThomas Graf rtnh->rtnh_ifindex = nh->nh_oif; 1084be403ea1SThomas Graf 1085f3756b79SDavid S. Miller if (nh->nh_gw && 1086930345eaSJiri Benc nla_put_in_addr(skb, RTA_GATEWAY, nh->nh_gw)) 1087f3756b79SDavid S. Miller goto nla_put_failure; 1088c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID 1089f3756b79SDavid S. Miller if (nh->nh_tclassid && 1090f3756b79SDavid S. Miller nla_put_u32(skb, RTA_FLOW, nh->nh_tclassid)) 1091f3756b79SDavid S. Miller goto nla_put_failure; 10928265abc0SPatrick McHardy #endif 1093be403ea1SThomas Graf /* length of rtnetlink header + attributes */ 1094be403ea1SThomas Graf rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh; 10951da177e4SLinus Torvalds } endfor_nexthops(fi); 1096be403ea1SThomas Graf 1097be403ea1SThomas Graf nla_nest_end(skb, mp); 10981da177e4SLinus Torvalds } 10991da177e4SLinus Torvalds #endif 1100053c095aSJohannes Berg nlmsg_end(skb, nlh); 1101053c095aSJohannes Berg return 0; 11021da177e4SLinus Torvalds 1103be403ea1SThomas Graf nla_put_failure: 110426932566SPatrick McHardy nlmsg_cancel(skb, nlh); 110526932566SPatrick McHardy return -EMSGSIZE; 11061da177e4SLinus Torvalds } 11071da177e4SLinus Torvalds 11081da177e4SLinus Torvalds /* 11096a31d2a9SEric Dumazet * Update FIB if: 11106a31d2a9SEric Dumazet * - local address disappeared -> we must delete all the entries 11116a31d2a9SEric Dumazet * referring to it. 11126a31d2a9SEric Dumazet * - device went down -> we must shutdown all nexthops going via it. 11131da177e4SLinus Torvalds */ 11144814bdbdSDenis V. Lunev int fib_sync_down_addr(struct net *net, __be32 local) 11151da177e4SLinus Torvalds { 11161da177e4SLinus Torvalds int ret = 0; 11171da177e4SLinus Torvalds unsigned int hash = fib_laddr_hashfn(local); 11181da177e4SLinus Torvalds struct hlist_head *head = &fib_info_laddrhash[hash]; 11191da177e4SLinus Torvalds struct fib_info *fi; 11201da177e4SLinus Torvalds 112151456b29SIan Morris if (!fib_info_laddrhash || local == 0) 112285326fa5SDenis V. Lunev return 0; 112385326fa5SDenis V. Lunev 1124b67bfe0dSSasha Levin hlist_for_each_entry(fi, head, fib_lhash) { 112509ad9bc7SOctavian Purdila if (!net_eq(fi->fib_net, net)) 11264814bdbdSDenis V. Lunev continue; 11271da177e4SLinus Torvalds if (fi->fib_prefsrc == local) { 11281da177e4SLinus Torvalds fi->fib_flags |= RTNH_F_DEAD; 11291da177e4SLinus Torvalds ret++; 11301da177e4SLinus Torvalds } 11311da177e4SLinus Torvalds } 113285326fa5SDenis V. Lunev return ret; 11331da177e4SLinus Torvalds } 11341da177e4SLinus Torvalds 11358a3d0316SAndy Gospodarek int fib_sync_down_dev(struct net_device *dev, unsigned long event) 113685326fa5SDenis V. Lunev { 113785326fa5SDenis V. Lunev int ret = 0; 113885326fa5SDenis V. Lunev int scope = RT_SCOPE_NOWHERE; 11391da177e4SLinus Torvalds struct fib_info *prev_fi = NULL; 11401da177e4SLinus Torvalds unsigned int hash = fib_devindex_hashfn(dev->ifindex); 11411da177e4SLinus Torvalds struct hlist_head *head = &fib_info_devhash[hash]; 11421da177e4SLinus Torvalds struct fib_nh *nh; 11431da177e4SLinus Torvalds 11448a3d0316SAndy Gospodarek if (event == NETDEV_UNREGISTER || 11458a3d0316SAndy Gospodarek event == NETDEV_DOWN) 114685326fa5SDenis V. Lunev scope = -1; 114785326fa5SDenis V. Lunev 1148b67bfe0dSSasha Levin hlist_for_each_entry(nh, head, nh_hash) { 11491da177e4SLinus Torvalds struct fib_info *fi = nh->nh_parent; 11501da177e4SLinus Torvalds int dead; 11511da177e4SLinus Torvalds 11521da177e4SLinus Torvalds BUG_ON(!fi->fib_nhs); 11531da177e4SLinus Torvalds if (nh->nh_dev != dev || fi == prev_fi) 11541da177e4SLinus Torvalds continue; 11551da177e4SLinus Torvalds prev_fi = fi; 11561da177e4SLinus Torvalds dead = 0; 11571da177e4SLinus Torvalds change_nexthops(fi) { 115871fceff0SDavid S. Miller if (nexthop_nh->nh_flags & RTNH_F_DEAD) 11591da177e4SLinus Torvalds dead++; 116071fceff0SDavid S. Miller else if (nexthop_nh->nh_dev == dev && 116171fceff0SDavid S. Miller nexthop_nh->nh_scope != scope) { 11628a3d0316SAndy Gospodarek switch (event) { 11638a3d0316SAndy Gospodarek case NETDEV_DOWN: 11648a3d0316SAndy Gospodarek case NETDEV_UNREGISTER: 116571fceff0SDavid S. Miller nexthop_nh->nh_flags |= RTNH_F_DEAD; 11668a3d0316SAndy Gospodarek /* fall through */ 11678a3d0316SAndy Gospodarek case NETDEV_CHANGE: 11688a3d0316SAndy Gospodarek nexthop_nh->nh_flags |= RTNH_F_LINKDOWN; 11698a3d0316SAndy Gospodarek break; 11708a3d0316SAndy Gospodarek } 11711da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 11721da177e4SLinus Torvalds spin_lock_bh(&fib_multipath_lock); 117371fceff0SDavid S. Miller fi->fib_power -= nexthop_nh->nh_power; 117471fceff0SDavid S. Miller nexthop_nh->nh_power = 0; 11751da177e4SLinus Torvalds spin_unlock_bh(&fib_multipath_lock); 11761da177e4SLinus Torvalds #endif 11771da177e4SLinus Torvalds dead++; 11781da177e4SLinus Torvalds } 11791da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 11808a3d0316SAndy Gospodarek if (event == NETDEV_UNREGISTER && 11818a3d0316SAndy Gospodarek nexthop_nh->nh_dev == dev) { 11821da177e4SLinus Torvalds dead = fi->fib_nhs; 11831da177e4SLinus Torvalds break; 11841da177e4SLinus Torvalds } 11851da177e4SLinus Torvalds #endif 11861da177e4SLinus Torvalds } endfor_nexthops(fi) 11871da177e4SLinus Torvalds if (dead == fi->fib_nhs) { 11888a3d0316SAndy Gospodarek switch (event) { 11898a3d0316SAndy Gospodarek case NETDEV_DOWN: 11908a3d0316SAndy Gospodarek case NETDEV_UNREGISTER: 11911da177e4SLinus Torvalds fi->fib_flags |= RTNH_F_DEAD; 11928a3d0316SAndy Gospodarek /* fall through */ 11938a3d0316SAndy Gospodarek case NETDEV_CHANGE: 11948a3d0316SAndy Gospodarek fi->fib_flags |= RTNH_F_LINKDOWN; 11958a3d0316SAndy Gospodarek break; 11968a3d0316SAndy Gospodarek } 11971da177e4SLinus Torvalds ret++; 11981da177e4SLinus Torvalds } 11991da177e4SLinus Torvalds } 12001da177e4SLinus Torvalds 12011da177e4SLinus Torvalds return ret; 12021da177e4SLinus Torvalds } 12031da177e4SLinus Torvalds 12040c838ff1SDavid S. Miller /* Must be invoked inside of an RCU protected region. */ 12050c838ff1SDavid S. Miller void fib_select_default(struct fib_result *res) 12060c838ff1SDavid S. Miller { 12070c838ff1SDavid S. Miller struct fib_info *fi = NULL, *last_resort = NULL; 120856315f9eSAlexander Duyck struct hlist_head *fa_head = res->fa_head; 12090c838ff1SDavid S. Miller struct fib_table *tb = res->table; 12100c838ff1SDavid S. Miller int order = -1, last_idx = -1; 12110c838ff1SDavid S. Miller struct fib_alias *fa; 12120c838ff1SDavid S. Miller 121356315f9eSAlexander Duyck hlist_for_each_entry_rcu(fa, fa_head, fa_list) { 12140c838ff1SDavid S. Miller struct fib_info *next_fi = fa->fa_info; 12150c838ff1SDavid S. Miller 121637e826c5SDavid S. Miller if (next_fi->fib_scope != res->scope || 12170c838ff1SDavid S. Miller fa->fa_type != RTN_UNICAST) 12180c838ff1SDavid S. Miller continue; 12190c838ff1SDavid S. Miller 12200c838ff1SDavid S. Miller if (next_fi->fib_priority > res->fi->fib_priority) 12210c838ff1SDavid S. Miller break; 12220c838ff1SDavid S. Miller if (!next_fi->fib_nh[0].nh_gw || 12230c838ff1SDavid S. Miller next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK) 12240c838ff1SDavid S. Miller continue; 12250c838ff1SDavid S. Miller 12260c838ff1SDavid S. Miller fib_alias_accessed(fa); 12270c838ff1SDavid S. Miller 122851456b29SIan Morris if (!fi) { 12290c838ff1SDavid S. Miller if (next_fi != res->fi) 12300c838ff1SDavid S. Miller break; 12310c838ff1SDavid S. Miller } else if (!fib_detect_death(fi, order, &last_resort, 12320c838ff1SDavid S. Miller &last_idx, tb->tb_default)) { 12330c838ff1SDavid S. Miller fib_result_assign(res, fi); 12340c838ff1SDavid S. Miller tb->tb_default = order; 12350c838ff1SDavid S. Miller goto out; 12360c838ff1SDavid S. Miller } 12370c838ff1SDavid S. Miller fi = next_fi; 12380c838ff1SDavid S. Miller order++; 12390c838ff1SDavid S. Miller } 12400c838ff1SDavid S. Miller 124151456b29SIan Morris if (order <= 0 || !fi) { 12420c838ff1SDavid S. Miller tb->tb_default = -1; 12430c838ff1SDavid S. Miller goto out; 12440c838ff1SDavid S. Miller } 12450c838ff1SDavid S. Miller 12460c838ff1SDavid S. Miller if (!fib_detect_death(fi, order, &last_resort, &last_idx, 12470c838ff1SDavid S. Miller tb->tb_default)) { 12480c838ff1SDavid S. Miller fib_result_assign(res, fi); 12490c838ff1SDavid S. Miller tb->tb_default = order; 12500c838ff1SDavid S. Miller goto out; 12510c838ff1SDavid S. Miller } 12520c838ff1SDavid S. Miller 12530c838ff1SDavid S. Miller if (last_idx >= 0) 12540c838ff1SDavid S. Miller fib_result_assign(res, last_resort); 12550c838ff1SDavid S. Miller tb->tb_default = last_idx; 12560c838ff1SDavid S. Miller out: 125731d40937SEric Dumazet return; 12580c838ff1SDavid S. Miller } 12590c838ff1SDavid S. Miller 12601da177e4SLinus Torvalds /* 12616a31d2a9SEric Dumazet * Dead device goes up. We wake up dead nexthops. 12626a31d2a9SEric Dumazet * It takes sense only on multipath routes. 12631da177e4SLinus Torvalds */ 12648a3d0316SAndy Gospodarek int fib_sync_up(struct net_device *dev, unsigned int nh_flags) 12651da177e4SLinus Torvalds { 12661da177e4SLinus Torvalds struct fib_info *prev_fi; 12671da177e4SLinus Torvalds unsigned int hash; 12681da177e4SLinus Torvalds struct hlist_head *head; 12691da177e4SLinus Torvalds struct fib_nh *nh; 12701da177e4SLinus Torvalds int ret; 12711da177e4SLinus Torvalds 12721da177e4SLinus Torvalds if (!(dev->flags & IFF_UP)) 12731da177e4SLinus Torvalds return 0; 12741da177e4SLinus Torvalds 12751da177e4SLinus Torvalds prev_fi = NULL; 12761da177e4SLinus Torvalds hash = fib_devindex_hashfn(dev->ifindex); 12771da177e4SLinus Torvalds head = &fib_info_devhash[hash]; 12781da177e4SLinus Torvalds ret = 0; 12791da177e4SLinus Torvalds 1280b67bfe0dSSasha Levin hlist_for_each_entry(nh, head, nh_hash) { 12811da177e4SLinus Torvalds struct fib_info *fi = nh->nh_parent; 12821da177e4SLinus Torvalds int alive; 12831da177e4SLinus Torvalds 12841da177e4SLinus Torvalds BUG_ON(!fi->fib_nhs); 12851da177e4SLinus Torvalds if (nh->nh_dev != dev || fi == prev_fi) 12861da177e4SLinus Torvalds continue; 12871da177e4SLinus Torvalds 12881da177e4SLinus Torvalds prev_fi = fi; 12891da177e4SLinus Torvalds alive = 0; 12901da177e4SLinus Torvalds change_nexthops(fi) { 12918a3d0316SAndy Gospodarek if (!(nexthop_nh->nh_flags & nh_flags)) { 12921da177e4SLinus Torvalds alive++; 12931da177e4SLinus Torvalds continue; 12941da177e4SLinus Torvalds } 129551456b29SIan Morris if (!nexthop_nh->nh_dev || 129671fceff0SDavid S. Miller !(nexthop_nh->nh_dev->flags & IFF_UP)) 12971da177e4SLinus Torvalds continue; 129871fceff0SDavid S. Miller if (nexthop_nh->nh_dev != dev || 129971fceff0SDavid S. Miller !__in_dev_get_rtnl(dev)) 13001da177e4SLinus Torvalds continue; 13011da177e4SLinus Torvalds alive++; 13028a3d0316SAndy Gospodarek #ifdef CONFIG_IP_ROUTE_MULTIPATH 13031da177e4SLinus Torvalds spin_lock_bh(&fib_multipath_lock); 130471fceff0SDavid S. Miller nexthop_nh->nh_power = 0; 13058a3d0316SAndy Gospodarek nexthop_nh->nh_flags &= ~nh_flags; 13061da177e4SLinus Torvalds spin_unlock_bh(&fib_multipath_lock); 13078a3d0316SAndy Gospodarek #else 13088a3d0316SAndy Gospodarek nexthop_nh->nh_flags &= ~nh_flags; 13098a3d0316SAndy Gospodarek #endif 13101da177e4SLinus Torvalds } endfor_nexthops(fi) 13111da177e4SLinus Torvalds 13121da177e4SLinus Torvalds if (alive > 0) { 13138a3d0316SAndy Gospodarek fi->fib_flags &= ~nh_flags; 13141da177e4SLinus Torvalds ret++; 13151da177e4SLinus Torvalds } 13161da177e4SLinus Torvalds } 13171da177e4SLinus Torvalds 13181da177e4SLinus Torvalds return ret; 13191da177e4SLinus Torvalds } 13201da177e4SLinus Torvalds 13218a3d0316SAndy Gospodarek #ifdef CONFIG_IP_ROUTE_MULTIPATH 13228a3d0316SAndy Gospodarek 13231da177e4SLinus Torvalds /* 13246a31d2a9SEric Dumazet * The algorithm is suboptimal, but it provides really 13256a31d2a9SEric Dumazet * fair weighted route distribution. 13261da177e4SLinus Torvalds */ 13271b7fe593SDavid S. Miller void fib_select_multipath(struct fib_result *res) 13281da177e4SLinus Torvalds { 13291da177e4SLinus Torvalds struct fib_info *fi = res->fi; 1330*0eeb075fSAndy Gospodarek struct in_device *in_dev; 13311da177e4SLinus Torvalds int w; 13321da177e4SLinus Torvalds 13331da177e4SLinus Torvalds spin_lock_bh(&fib_multipath_lock); 13341da177e4SLinus Torvalds if (fi->fib_power <= 0) { 13351da177e4SLinus Torvalds int power = 0; 13361da177e4SLinus Torvalds change_nexthops(fi) { 1337*0eeb075fSAndy Gospodarek in_dev = __in_dev_get_rcu(nexthop_nh->nh_dev); 1338*0eeb075fSAndy Gospodarek if (nexthop_nh->nh_flags & RTNH_F_DEAD) 1339*0eeb075fSAndy Gospodarek continue; 1340*0eeb075fSAndy Gospodarek if (in_dev && 1341*0eeb075fSAndy Gospodarek IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) && 1342*0eeb075fSAndy Gospodarek nexthop_nh->nh_flags & RTNH_F_LINKDOWN) 1343*0eeb075fSAndy Gospodarek continue; 134471fceff0SDavid S. Miller power += nexthop_nh->nh_weight; 134571fceff0SDavid S. Miller nexthop_nh->nh_power = nexthop_nh->nh_weight; 13461da177e4SLinus Torvalds } endfor_nexthops(fi); 13471da177e4SLinus Torvalds fi->fib_power = power; 13481da177e4SLinus Torvalds if (power <= 0) { 13491da177e4SLinus Torvalds spin_unlock_bh(&fib_multipath_lock); 13501da177e4SLinus Torvalds /* Race condition: route has just become dead. */ 13511da177e4SLinus Torvalds res->nh_sel = 0; 13521da177e4SLinus Torvalds return; 13531da177e4SLinus Torvalds } 13541da177e4SLinus Torvalds } 13551da177e4SLinus Torvalds 13561da177e4SLinus Torvalds 13571da177e4SLinus Torvalds /* w should be random number [0..fi->fib_power-1], 13586a31d2a9SEric Dumazet * it is pretty bad approximation. 13591da177e4SLinus Torvalds */ 13601da177e4SLinus Torvalds 13611da177e4SLinus Torvalds w = jiffies % fi->fib_power; 13621da177e4SLinus Torvalds 13631da177e4SLinus Torvalds change_nexthops(fi) { 136471fceff0SDavid S. Miller if (!(nexthop_nh->nh_flags & RTNH_F_DEAD) && 136571fceff0SDavid S. Miller nexthop_nh->nh_power) { 13666a31d2a9SEric Dumazet w -= nexthop_nh->nh_power; 13676a31d2a9SEric Dumazet if (w <= 0) { 136871fceff0SDavid S. Miller nexthop_nh->nh_power--; 13691da177e4SLinus Torvalds fi->fib_power--; 13701da177e4SLinus Torvalds res->nh_sel = nhsel; 13711da177e4SLinus Torvalds spin_unlock_bh(&fib_multipath_lock); 13721da177e4SLinus Torvalds return; 13731da177e4SLinus Torvalds } 13741da177e4SLinus Torvalds } 13751da177e4SLinus Torvalds } endfor_nexthops(fi); 13761da177e4SLinus Torvalds 13771da177e4SLinus Torvalds /* Race condition: route has just become dead. */ 13781da177e4SLinus Torvalds res->nh_sel = 0; 13791da177e4SLinus Torvalds spin_unlock_bh(&fib_multipath_lock); 13801da177e4SLinus Torvalds } 13811da177e4SLinus Torvalds #endif 1382