11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * INET An implementation of the TCP/IP protocol suite for the LINUX 31da177e4SLinus Torvalds * operating system. INET is implemented using the BSD Socket 41da177e4SLinus Torvalds * interface as the means of communication with the user level. 51da177e4SLinus Torvalds * 61da177e4SLinus Torvalds * IPv4 Forwarding Information Base: semantics. 71da177e4SLinus Torvalds * 81da177e4SLinus Torvalds * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 91da177e4SLinus Torvalds * 101da177e4SLinus Torvalds * This program is free software; you can redistribute it and/or 111da177e4SLinus Torvalds * modify it under the terms of the GNU General Public License 121da177e4SLinus Torvalds * as published by the Free Software Foundation; either version 131da177e4SLinus Torvalds * 2 of the License, or (at your option) any later version. 141da177e4SLinus Torvalds */ 151da177e4SLinus Torvalds 161da177e4SLinus Torvalds #include <asm/uaccess.h> 171da177e4SLinus Torvalds #include <linux/bitops.h> 181da177e4SLinus Torvalds #include <linux/types.h> 191da177e4SLinus Torvalds #include <linux/kernel.h> 201da177e4SLinus Torvalds #include <linux/jiffies.h> 211da177e4SLinus Torvalds #include <linux/mm.h> 221da177e4SLinus Torvalds #include <linux/string.h> 231da177e4SLinus Torvalds #include <linux/socket.h> 241da177e4SLinus Torvalds #include <linux/sockios.h> 251da177e4SLinus Torvalds #include <linux/errno.h> 261da177e4SLinus Torvalds #include <linux/in.h> 271da177e4SLinus Torvalds #include <linux/inet.h> 2814c85021SArnaldo Carvalho de Melo #include <linux/inetdevice.h> 291da177e4SLinus Torvalds #include <linux/netdevice.h> 301da177e4SLinus Torvalds #include <linux/if_arp.h> 311da177e4SLinus Torvalds #include <linux/proc_fs.h> 321da177e4SLinus Torvalds #include <linux/skbuff.h> 331da177e4SLinus Torvalds #include <linux/init.h> 345a0e3ad6STejun Heo #include <linux/slab.h> 351da177e4SLinus Torvalds 3614c85021SArnaldo Carvalho de Melo #include <net/arp.h> 371da177e4SLinus Torvalds #include <net/ip.h> 381da177e4SLinus Torvalds #include <net/protocol.h> 391da177e4SLinus Torvalds #include <net/route.h> 401da177e4SLinus Torvalds #include <net/tcp.h> 411da177e4SLinus Torvalds #include <net/sock.h> 421da177e4SLinus Torvalds #include <net/ip_fib.h> 43f21c7bc5SThomas Graf #include <net/netlink.h> 444e902c57SThomas Graf #include <net/nexthop.h> 45571e7226SRoopa Prabhu #include <net/lwtunnel.h> 461da177e4SLinus Torvalds 471da177e4SLinus Torvalds #include "fib_lookup.h" 481da177e4SLinus Torvalds 49832b4c5eSStephen Hemminger static DEFINE_SPINLOCK(fib_info_lock); 501da177e4SLinus Torvalds static struct hlist_head *fib_info_hash; 511da177e4SLinus Torvalds static struct hlist_head *fib_info_laddrhash; 52123b9731SDavid S. Miller static unsigned int fib_info_hash_size; 531da177e4SLinus Torvalds static unsigned int fib_info_cnt; 541da177e4SLinus Torvalds 551da177e4SLinus Torvalds #define DEVINDEX_HASHBITS 8 561da177e4SLinus Torvalds #define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS) 571da177e4SLinus Torvalds static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE]; 581da177e4SLinus Torvalds 591da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 601da177e4SLinus Torvalds 611da177e4SLinus Torvalds static DEFINE_SPINLOCK(fib_multipath_lock); 621da177e4SLinus Torvalds 636a31d2a9SEric Dumazet #define for_nexthops(fi) { \ 646a31d2a9SEric Dumazet int nhsel; const struct fib_nh *nh; \ 656a31d2a9SEric Dumazet for (nhsel = 0, nh = (fi)->fib_nh; \ 666a31d2a9SEric Dumazet nhsel < (fi)->fib_nhs; \ 676a31d2a9SEric Dumazet nh++, nhsel++) 681da177e4SLinus Torvalds 696a31d2a9SEric Dumazet #define change_nexthops(fi) { \ 706a31d2a9SEric Dumazet int nhsel; struct fib_nh *nexthop_nh; \ 716a31d2a9SEric Dumazet for (nhsel = 0, nexthop_nh = (struct fib_nh *)((fi)->fib_nh); \ 726a31d2a9SEric Dumazet nhsel < (fi)->fib_nhs; \ 736a31d2a9SEric Dumazet nexthop_nh++, nhsel++) 741da177e4SLinus Torvalds 751da177e4SLinus Torvalds #else /* CONFIG_IP_ROUTE_MULTIPATH */ 761da177e4SLinus Torvalds 771da177e4SLinus Torvalds /* Hope, that gcc will optimize it to get rid of dummy loop */ 781da177e4SLinus Torvalds 796a31d2a9SEric Dumazet #define for_nexthops(fi) { \ 806a31d2a9SEric Dumazet int nhsel; const struct fib_nh *nh = (fi)->fib_nh; \ 811da177e4SLinus Torvalds for (nhsel = 0; nhsel < 1; nhsel++) 821da177e4SLinus Torvalds 836a31d2a9SEric Dumazet #define change_nexthops(fi) { \ 846a31d2a9SEric Dumazet int nhsel; \ 856a31d2a9SEric Dumazet struct fib_nh *nexthop_nh = (struct fib_nh *)((fi)->fib_nh); \ 861da177e4SLinus Torvalds for (nhsel = 0; nhsel < 1; nhsel++) 871da177e4SLinus Torvalds 881da177e4SLinus Torvalds #endif /* CONFIG_IP_ROUTE_MULTIPATH */ 891da177e4SLinus Torvalds 901da177e4SLinus Torvalds #define endfor_nexthops(fi) } 911da177e4SLinus Torvalds 921da177e4SLinus Torvalds 933be0686bSDavid S. Miller const struct fib_prop fib_props[RTN_MAX + 1] = { 946a31d2a9SEric Dumazet [RTN_UNSPEC] = { 951da177e4SLinus Torvalds .error = 0, 961da177e4SLinus Torvalds .scope = RT_SCOPE_NOWHERE, 976a31d2a9SEric Dumazet }, 986a31d2a9SEric Dumazet [RTN_UNICAST] = { 991da177e4SLinus Torvalds .error = 0, 1001da177e4SLinus Torvalds .scope = RT_SCOPE_UNIVERSE, 1016a31d2a9SEric Dumazet }, 1026a31d2a9SEric Dumazet [RTN_LOCAL] = { 1031da177e4SLinus Torvalds .error = 0, 1041da177e4SLinus Torvalds .scope = RT_SCOPE_HOST, 1056a31d2a9SEric Dumazet }, 1066a31d2a9SEric Dumazet [RTN_BROADCAST] = { 1071da177e4SLinus Torvalds .error = 0, 1081da177e4SLinus Torvalds .scope = RT_SCOPE_LINK, 1096a31d2a9SEric Dumazet }, 1106a31d2a9SEric Dumazet [RTN_ANYCAST] = { 1111da177e4SLinus Torvalds .error = 0, 1121da177e4SLinus Torvalds .scope = RT_SCOPE_LINK, 1136a31d2a9SEric Dumazet }, 1146a31d2a9SEric Dumazet [RTN_MULTICAST] = { 1151da177e4SLinus Torvalds .error = 0, 1161da177e4SLinus Torvalds .scope = RT_SCOPE_UNIVERSE, 1176a31d2a9SEric Dumazet }, 1186a31d2a9SEric Dumazet [RTN_BLACKHOLE] = { 1191da177e4SLinus Torvalds .error = -EINVAL, 1201da177e4SLinus Torvalds .scope = RT_SCOPE_UNIVERSE, 1216a31d2a9SEric Dumazet }, 1226a31d2a9SEric Dumazet [RTN_UNREACHABLE] = { 1231da177e4SLinus Torvalds .error = -EHOSTUNREACH, 1241da177e4SLinus Torvalds .scope = RT_SCOPE_UNIVERSE, 1256a31d2a9SEric Dumazet }, 1266a31d2a9SEric Dumazet [RTN_PROHIBIT] = { 1271da177e4SLinus Torvalds .error = -EACCES, 1281da177e4SLinus Torvalds .scope = RT_SCOPE_UNIVERSE, 1296a31d2a9SEric Dumazet }, 1306a31d2a9SEric Dumazet [RTN_THROW] = { 1311da177e4SLinus Torvalds .error = -EAGAIN, 1321da177e4SLinus Torvalds .scope = RT_SCOPE_UNIVERSE, 1336a31d2a9SEric Dumazet }, 1346a31d2a9SEric Dumazet [RTN_NAT] = { 1351da177e4SLinus Torvalds .error = -EINVAL, 1361da177e4SLinus Torvalds .scope = RT_SCOPE_NOWHERE, 1376a31d2a9SEric Dumazet }, 1386a31d2a9SEric Dumazet [RTN_XRESOLVE] = { 1391da177e4SLinus Torvalds .error = -EINVAL, 1401da177e4SLinus Torvalds .scope = RT_SCOPE_NOWHERE, 1416a31d2a9SEric Dumazet }, 1421da177e4SLinus Torvalds }; 1431da177e4SLinus Torvalds 144c5038a83SDavid S. Miller static void rt_fibinfo_free(struct rtable __rcu **rtp) 14554764bb6SEric Dumazet { 14654764bb6SEric Dumazet struct rtable *rt = rcu_dereference_protected(*rtp, 1); 14754764bb6SEric Dumazet 14854764bb6SEric Dumazet if (!rt) 14954764bb6SEric Dumazet return; 15054764bb6SEric Dumazet 15154764bb6SEric Dumazet /* Not even needed : RCU_INIT_POINTER(*rtp, NULL); 15254764bb6SEric Dumazet * because we waited an RCU grace period before calling 15354764bb6SEric Dumazet * free_fib_info_rcu() 15454764bb6SEric Dumazet */ 15554764bb6SEric Dumazet 15654764bb6SEric Dumazet dst_free(&rt->dst); 15754764bb6SEric Dumazet } 15854764bb6SEric Dumazet 159c5038a83SDavid S. Miller static void free_nh_exceptions(struct fib_nh *nh) 160c5038a83SDavid S. Miller { 161caa41527SEric Dumazet struct fnhe_hash_bucket *hash; 162c5038a83SDavid S. Miller int i; 163c5038a83SDavid S. Miller 164caa41527SEric Dumazet hash = rcu_dereference_protected(nh->nh_exceptions, 1); 165caa41527SEric Dumazet if (!hash) 166caa41527SEric Dumazet return; 167c5038a83SDavid S. Miller for (i = 0; i < FNHE_HASH_SIZE; i++) { 168c5038a83SDavid S. Miller struct fib_nh_exception *fnhe; 169c5038a83SDavid S. Miller 170c5038a83SDavid S. Miller fnhe = rcu_dereference_protected(hash[i].chain, 1); 171c5038a83SDavid S. Miller while (fnhe) { 172c5038a83SDavid S. Miller struct fib_nh_exception *next; 173c5038a83SDavid S. Miller 174c5038a83SDavid S. Miller next = rcu_dereference_protected(fnhe->fnhe_next, 1); 175c5038a83SDavid S. Miller 1762ffae99dSTimo Teräs rt_fibinfo_free(&fnhe->fnhe_rth_input); 1772ffae99dSTimo Teräs rt_fibinfo_free(&fnhe->fnhe_rth_output); 178c5038a83SDavid S. Miller 179c5038a83SDavid S. Miller kfree(fnhe); 180c5038a83SDavid S. Miller 181c5038a83SDavid S. Miller fnhe = next; 182c5038a83SDavid S. Miller } 183c5038a83SDavid S. Miller } 184c5038a83SDavid S. Miller kfree(hash); 185c5038a83SDavid S. Miller } 186c5038a83SDavid S. Miller 187c5038a83SDavid S. Miller static void rt_fibinfo_free_cpus(struct rtable __rcu * __percpu *rtp) 188d26b3a7cSEric Dumazet { 189d26b3a7cSEric Dumazet int cpu; 190d26b3a7cSEric Dumazet 191d26b3a7cSEric Dumazet if (!rtp) 192d26b3a7cSEric Dumazet return; 193d26b3a7cSEric Dumazet 194d26b3a7cSEric Dumazet for_each_possible_cpu(cpu) { 195d26b3a7cSEric Dumazet struct rtable *rt; 196d26b3a7cSEric Dumazet 197d26b3a7cSEric Dumazet rt = rcu_dereference_protected(*per_cpu_ptr(rtp, cpu), 1); 198d26b3a7cSEric Dumazet if (rt) 199d26b3a7cSEric Dumazet dst_free(&rt->dst); 200d26b3a7cSEric Dumazet } 201d26b3a7cSEric Dumazet free_percpu(rtp); 202d26b3a7cSEric Dumazet } 203d26b3a7cSEric Dumazet 2041da177e4SLinus Torvalds /* Release a nexthop info record */ 20519c1ea14SYan, Zheng static void free_fib_info_rcu(struct rcu_head *head) 20619c1ea14SYan, Zheng { 20719c1ea14SYan, Zheng struct fib_info *fi = container_of(head, struct fib_info, rcu); 20819c1ea14SYan, Zheng 209e49cc0daSYanmin Zhang change_nexthops(fi) { 210e49cc0daSYanmin Zhang if (nexthop_nh->nh_dev) 211e49cc0daSYanmin Zhang dev_put(nexthop_nh->nh_dev); 2125a6228a0SNicolas Dichtel lwtstate_put(nexthop_nh->nh_lwtstate); 2134895c771SDavid S. Miller free_nh_exceptions(nexthop_nh); 214c5038a83SDavid S. Miller rt_fibinfo_free_cpus(nexthop_nh->nh_pcpu_rth_output); 215c5038a83SDavid S. Miller rt_fibinfo_free(&nexthop_nh->nh_rth_input); 216e49cc0daSYanmin Zhang } endfor_nexthops(fi); 217e49cc0daSYanmin Zhang 21819c1ea14SYan, Zheng if (fi->fib_metrics != (u32 *) dst_default_metrics) 21919c1ea14SYan, Zheng kfree(fi->fib_metrics); 22019c1ea14SYan, Zheng kfree(fi); 22119c1ea14SYan, Zheng } 2221da177e4SLinus Torvalds 2231da177e4SLinus Torvalds void free_fib_info(struct fib_info *fi) 2241da177e4SLinus Torvalds { 2251da177e4SLinus Torvalds if (fi->fib_dead == 0) { 226058bd4d2SJoe Perches pr_warn("Freeing alive fib_info %p\n", fi); 2271da177e4SLinus Torvalds return; 2281da177e4SLinus Torvalds } 2291da177e4SLinus Torvalds fib_info_cnt--; 2307a9bc9b8SDavid S. Miller #ifdef CONFIG_IP_ROUTE_CLASSID 2317a9bc9b8SDavid S. Miller change_nexthops(fi) { 2327a9bc9b8SDavid S. Miller if (nexthop_nh->nh_tclassid) 233f4530fa5SDavid S. Miller fi->fib_net->ipv4.fib_num_tclassid_users--; 2347a9bc9b8SDavid S. Miller } endfor_nexthops(fi); 2357a9bc9b8SDavid S. Miller #endif 23619c1ea14SYan, Zheng call_rcu(&fi->rcu, free_fib_info_rcu); 2371da177e4SLinus Torvalds } 2381da177e4SLinus Torvalds 2391da177e4SLinus Torvalds void fib_release_info(struct fib_info *fi) 2401da177e4SLinus Torvalds { 241832b4c5eSStephen Hemminger spin_lock_bh(&fib_info_lock); 2421da177e4SLinus Torvalds if (fi && --fi->fib_treeref == 0) { 2431da177e4SLinus Torvalds hlist_del(&fi->fib_hash); 2441da177e4SLinus Torvalds if (fi->fib_prefsrc) 2451da177e4SLinus Torvalds hlist_del(&fi->fib_lhash); 2461da177e4SLinus Torvalds change_nexthops(fi) { 24771fceff0SDavid S. Miller if (!nexthop_nh->nh_dev) 2481da177e4SLinus Torvalds continue; 24971fceff0SDavid S. Miller hlist_del(&nexthop_nh->nh_hash); 2501da177e4SLinus Torvalds } endfor_nexthops(fi) 2511da177e4SLinus Torvalds fi->fib_dead = 1; 2521da177e4SLinus Torvalds fib_info_put(fi); 2531da177e4SLinus Torvalds } 254832b4c5eSStephen Hemminger spin_unlock_bh(&fib_info_lock); 2551da177e4SLinus Torvalds } 2561da177e4SLinus Torvalds 2576a31d2a9SEric Dumazet static inline int nh_comp(const struct fib_info *fi, const struct fib_info *ofi) 2581da177e4SLinus Torvalds { 2591da177e4SLinus Torvalds const struct fib_nh *onh = ofi->fib_nh; 2601da177e4SLinus Torvalds 2611da177e4SLinus Torvalds for_nexthops(fi) { 2621da177e4SLinus Torvalds if (nh->nh_oif != onh->nh_oif || 2631da177e4SLinus Torvalds nh->nh_gw != onh->nh_gw || 2641da177e4SLinus Torvalds nh->nh_scope != onh->nh_scope || 2651da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 2661da177e4SLinus Torvalds nh->nh_weight != onh->nh_weight || 2671da177e4SLinus Torvalds #endif 268c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID 2691da177e4SLinus Torvalds nh->nh_tclassid != onh->nh_tclassid || 2701da177e4SLinus Torvalds #endif 271571e7226SRoopa Prabhu lwtunnel_cmp_encap(nh->nh_lwtstate, onh->nh_lwtstate) || 2728a3d0316SAndy Gospodarek ((nh->nh_flags ^ onh->nh_flags) & ~RTNH_COMPARE_MASK)) 2731da177e4SLinus Torvalds return -1; 2741da177e4SLinus Torvalds onh++; 2751da177e4SLinus Torvalds } endfor_nexthops(fi); 2761da177e4SLinus Torvalds return 0; 2771da177e4SLinus Torvalds } 2781da177e4SLinus Torvalds 27988ebc72fSDavid S. Miller static inline unsigned int fib_devindex_hashfn(unsigned int val) 28088ebc72fSDavid S. Miller { 28188ebc72fSDavid S. Miller unsigned int mask = DEVINDEX_HASHSIZE - 1; 28288ebc72fSDavid S. Miller 28388ebc72fSDavid S. Miller return (val ^ 28488ebc72fSDavid S. Miller (val >> DEVINDEX_HASHBITS) ^ 28588ebc72fSDavid S. Miller (val >> (DEVINDEX_HASHBITS * 2))) & mask; 28688ebc72fSDavid S. Miller } 28788ebc72fSDavid S. Miller 2881da177e4SLinus Torvalds static inline unsigned int fib_info_hashfn(const struct fib_info *fi) 2891da177e4SLinus Torvalds { 290123b9731SDavid S. Miller unsigned int mask = (fib_info_hash_size - 1); 2911da177e4SLinus Torvalds unsigned int val = fi->fib_nhs; 2921da177e4SLinus Torvalds 29337e826c5SDavid S. Miller val ^= (fi->fib_protocol << 8) | fi->fib_scope; 29481f7bf6cSAl Viro val ^= (__force u32)fi->fib_prefsrc; 2951da177e4SLinus Torvalds val ^= fi->fib_priority; 29688ebc72fSDavid S. Miller for_nexthops(fi) { 29788ebc72fSDavid S. Miller val ^= fib_devindex_hashfn(nh->nh_oif); 29888ebc72fSDavid S. Miller } endfor_nexthops(fi) 2991da177e4SLinus Torvalds 3001da177e4SLinus Torvalds return (val ^ (val >> 7) ^ (val >> 12)) & mask; 3011da177e4SLinus Torvalds } 3021da177e4SLinus Torvalds 3031da177e4SLinus Torvalds static struct fib_info *fib_find_info(const struct fib_info *nfi) 3041da177e4SLinus Torvalds { 3051da177e4SLinus Torvalds struct hlist_head *head; 3061da177e4SLinus Torvalds struct fib_info *fi; 3071da177e4SLinus Torvalds unsigned int hash; 3081da177e4SLinus Torvalds 3091da177e4SLinus Torvalds hash = fib_info_hashfn(nfi); 3101da177e4SLinus Torvalds head = &fib_info_hash[hash]; 3111da177e4SLinus Torvalds 312b67bfe0dSSasha Levin hlist_for_each_entry(fi, head, fib_hash) { 31309ad9bc7SOctavian Purdila if (!net_eq(fi->fib_net, nfi->fib_net)) 3144814bdbdSDenis V. Lunev continue; 3151da177e4SLinus Torvalds if (fi->fib_nhs != nfi->fib_nhs) 3161da177e4SLinus Torvalds continue; 3171da177e4SLinus Torvalds if (nfi->fib_protocol == fi->fib_protocol && 31837e826c5SDavid S. Miller nfi->fib_scope == fi->fib_scope && 3191da177e4SLinus Torvalds nfi->fib_prefsrc == fi->fib_prefsrc && 3201da177e4SLinus Torvalds nfi->fib_priority == fi->fib_priority && 321f4ef85bbSEric Dumazet nfi->fib_type == fi->fib_type && 3221da177e4SLinus Torvalds memcmp(nfi->fib_metrics, fi->fib_metrics, 323fcd13f42SEric Dumazet sizeof(u32) * RTAX_MAX) == 0 && 3248a3d0316SAndy Gospodarek !((nfi->fib_flags ^ fi->fib_flags) & ~RTNH_COMPARE_MASK) && 3251da177e4SLinus Torvalds (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0)) 3261da177e4SLinus Torvalds return fi; 3271da177e4SLinus Torvalds } 3281da177e4SLinus Torvalds 3291da177e4SLinus Torvalds return NULL; 3301da177e4SLinus Torvalds } 3311da177e4SLinus Torvalds 3321da177e4SLinus Torvalds /* Check, that the gateway is already configured. 3336a31d2a9SEric Dumazet * Used only by redirect accept routine. 3341da177e4SLinus Torvalds */ 335d878e72eSAl Viro int ip_fib_check_default(__be32 gw, struct net_device *dev) 3361da177e4SLinus Torvalds { 3371da177e4SLinus Torvalds struct hlist_head *head; 3381da177e4SLinus Torvalds struct fib_nh *nh; 3391da177e4SLinus Torvalds unsigned int hash; 3401da177e4SLinus Torvalds 341832b4c5eSStephen Hemminger spin_lock(&fib_info_lock); 3421da177e4SLinus Torvalds 3431da177e4SLinus Torvalds hash = fib_devindex_hashfn(dev->ifindex); 3441da177e4SLinus Torvalds head = &fib_info_devhash[hash]; 345b67bfe0dSSasha Levin hlist_for_each_entry(nh, head, nh_hash) { 3461da177e4SLinus Torvalds if (nh->nh_dev == dev && 3471da177e4SLinus Torvalds nh->nh_gw == gw && 3481da177e4SLinus Torvalds !(nh->nh_flags & RTNH_F_DEAD)) { 349832b4c5eSStephen Hemminger spin_unlock(&fib_info_lock); 3501da177e4SLinus Torvalds return 0; 3511da177e4SLinus Torvalds } 3521da177e4SLinus Torvalds } 3531da177e4SLinus Torvalds 354832b4c5eSStephen Hemminger spin_unlock(&fib_info_lock); 3551da177e4SLinus Torvalds 3561da177e4SLinus Torvalds return -1; 3571da177e4SLinus Torvalds } 3581da177e4SLinus Torvalds 359339bf98fSThomas Graf static inline size_t fib_nlmsg_size(struct fib_info *fi) 360339bf98fSThomas Graf { 361339bf98fSThomas Graf size_t payload = NLMSG_ALIGN(sizeof(struct rtmsg)) 362339bf98fSThomas Graf + nla_total_size(4) /* RTA_TABLE */ 363339bf98fSThomas Graf + nla_total_size(4) /* RTA_DST */ 364339bf98fSThomas Graf + nla_total_size(4) /* RTA_PRIORITY */ 365ea697639SDaniel Borkmann + nla_total_size(4) /* RTA_PREFSRC */ 366ea697639SDaniel Borkmann + nla_total_size(TCP_CA_NAME_MAX); /* RTAX_CC_ALGO */ 367339bf98fSThomas Graf 368339bf98fSThomas Graf /* space for nested metrics */ 369339bf98fSThomas Graf payload += nla_total_size((RTAX_MAX * nla_total_size(4))); 370339bf98fSThomas Graf 371339bf98fSThomas Graf if (fi->fib_nhs) { 372571e7226SRoopa Prabhu size_t nh_encapsize = 0; 373339bf98fSThomas Graf /* Also handles the special case fib_nhs == 1 */ 374339bf98fSThomas Graf 375339bf98fSThomas Graf /* each nexthop is packed in an attribute */ 376339bf98fSThomas Graf size_t nhsize = nla_total_size(sizeof(struct rtnexthop)); 377339bf98fSThomas Graf 378339bf98fSThomas Graf /* may contain flow and gateway attribute */ 379339bf98fSThomas Graf nhsize += 2 * nla_total_size(4); 380339bf98fSThomas Graf 381571e7226SRoopa Prabhu /* grab encap info */ 382571e7226SRoopa Prabhu for_nexthops(fi) { 383571e7226SRoopa Prabhu if (nh->nh_lwtstate) { 384571e7226SRoopa Prabhu /* RTA_ENCAP_TYPE */ 385571e7226SRoopa Prabhu nh_encapsize += lwtunnel_get_encap_size( 386571e7226SRoopa Prabhu nh->nh_lwtstate); 387571e7226SRoopa Prabhu /* RTA_ENCAP */ 388571e7226SRoopa Prabhu nh_encapsize += nla_total_size(2); 389571e7226SRoopa Prabhu } 390571e7226SRoopa Prabhu } endfor_nexthops(fi); 391571e7226SRoopa Prabhu 392339bf98fSThomas Graf /* all nexthops are packed in a nested attribute */ 393571e7226SRoopa Prabhu payload += nla_total_size((fi->fib_nhs * nhsize) + 394571e7226SRoopa Prabhu nh_encapsize); 395571e7226SRoopa Prabhu 396339bf98fSThomas Graf } 397339bf98fSThomas Graf 398339bf98fSThomas Graf return payload; 399339bf98fSThomas Graf } 400339bf98fSThomas Graf 40181f7bf6cSAl Viro void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, 4029877b253SJoe Perches int dst_len, u32 tb_id, const struct nl_info *info, 403b8f55831SMilan Kocian unsigned int nlm_flags) 4041da177e4SLinus Torvalds { 4051da177e4SLinus Torvalds struct sk_buff *skb; 4064e902c57SThomas Graf u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0; 407f21c7bc5SThomas Graf int err = -ENOBUFS; 4081da177e4SLinus Torvalds 409339bf98fSThomas Graf skb = nlmsg_new(fib_nlmsg_size(fa->fa_info), GFP_KERNEL); 41051456b29SIan Morris if (!skb) 411f21c7bc5SThomas Graf goto errout; 4121da177e4SLinus Torvalds 41315e47304SEric W. Biederman err = fib_dump_info(skb, info->portid, seq, event, tb_id, 41437e826c5SDavid S. Miller fa->fa_type, key, dst_len, 415b8f55831SMilan Kocian fa->fa_tos, fa->fa_info, nlm_flags); 41626932566SPatrick McHardy if (err < 0) { 41726932566SPatrick McHardy /* -EMSGSIZE implies BUG in fib_nlmsg_size() */ 41826932566SPatrick McHardy WARN_ON(err == -EMSGSIZE); 41926932566SPatrick McHardy kfree_skb(skb); 42026932566SPatrick McHardy goto errout; 42126932566SPatrick McHardy } 42215e47304SEric W. Biederman rtnl_notify(skb, info->nl_net, info->portid, RTNLGRP_IPV4_ROUTE, 4234e902c57SThomas Graf info->nlh, GFP_KERNEL); 4241ce85fe4SPablo Neira Ayuso return; 425f21c7bc5SThomas Graf errout: 426f21c7bc5SThomas Graf if (err < 0) 4274d1169c1SDenis V. Lunev rtnl_set_sk_err(info->nl_net, RTNLGRP_IPV4_ROUTE, err); 4281da177e4SLinus Torvalds } 4291da177e4SLinus Torvalds 430c9cb6b6eSStephen Hemminger static int fib_detect_death(struct fib_info *fi, int order, 431c9cb6b6eSStephen Hemminger struct fib_info **last_resort, int *last_idx, 432c9cb6b6eSStephen Hemminger int dflt) 4331da177e4SLinus Torvalds { 4341da177e4SLinus Torvalds struct neighbour *n; 4351da177e4SLinus Torvalds int state = NUD_NONE; 4361da177e4SLinus Torvalds 4371da177e4SLinus Torvalds n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev); 4381da177e4SLinus Torvalds if (n) { 4391da177e4SLinus Torvalds state = n->nud_state; 4401da177e4SLinus Torvalds neigh_release(n); 44188f64320SJulian Anastasov } else { 44288f64320SJulian Anastasov return 0; 4431da177e4SLinus Torvalds } 4441da177e4SLinus Torvalds if (state == NUD_REACHABLE) 4451da177e4SLinus Torvalds return 0; 446c17860a0SDenis V. Lunev if ((state & NUD_VALID) && order != dflt) 4471da177e4SLinus Torvalds return 0; 4481da177e4SLinus Torvalds if ((state & NUD_VALID) || 44988f64320SJulian Anastasov (*last_idx < 0 && order > dflt && state != NUD_INCOMPLETE)) { 4501da177e4SLinus Torvalds *last_resort = fi; 4511da177e4SLinus Torvalds *last_idx = order; 4521da177e4SLinus Torvalds } 4531da177e4SLinus Torvalds return 1; 4541da177e4SLinus Torvalds } 4551da177e4SLinus Torvalds 4561da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 4571da177e4SLinus Torvalds 4584e902c57SThomas Graf static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining) 4591da177e4SLinus Torvalds { 4601da177e4SLinus Torvalds int nhs = 0; 4611da177e4SLinus Torvalds 4624e902c57SThomas Graf while (rtnh_ok(rtnh, remaining)) { 4631da177e4SLinus Torvalds nhs++; 4644e902c57SThomas Graf rtnh = rtnh_next(rtnh, &remaining); 4651da177e4SLinus Torvalds } 4661da177e4SLinus Torvalds 4674e902c57SThomas Graf /* leftover implies invalid nexthop configuration, discard it */ 4684e902c57SThomas Graf return remaining > 0 ? 0 : nhs; 4694e902c57SThomas Graf } 4701da177e4SLinus Torvalds 4714e902c57SThomas Graf static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, 4724e902c57SThomas Graf int remaining, struct fib_config *cfg) 4734e902c57SThomas Graf { 474571e7226SRoopa Prabhu struct net *net = cfg->fc_nlinfo.nl_net; 475571e7226SRoopa Prabhu int ret; 476571e7226SRoopa Prabhu 4771da177e4SLinus Torvalds change_nexthops(fi) { 4784e902c57SThomas Graf int attrlen; 4794e902c57SThomas Graf 4804e902c57SThomas Graf if (!rtnh_ok(rtnh, remaining)) 4811da177e4SLinus Torvalds return -EINVAL; 4824e902c57SThomas Graf 48371fceff0SDavid S. Miller nexthop_nh->nh_flags = 48471fceff0SDavid S. Miller (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags; 48571fceff0SDavid S. Miller nexthop_nh->nh_oif = rtnh->rtnh_ifindex; 48671fceff0SDavid S. Miller nexthop_nh->nh_weight = rtnh->rtnh_hops + 1; 4874e902c57SThomas Graf 4884e902c57SThomas Graf attrlen = rtnh_attrlen(rtnh); 4894e902c57SThomas Graf if (attrlen > 0) { 4904e902c57SThomas Graf struct nlattr *nla, *attrs = rtnh_attrs(rtnh); 4914e902c57SThomas Graf 4924e902c57SThomas Graf nla = nla_find(attrs, attrlen, RTA_GATEWAY); 49367b61f6cSJiri Benc nexthop_nh->nh_gw = nla ? nla_get_in_addr(nla) : 0; 494c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID 4954e902c57SThomas Graf nla = nla_find(attrs, attrlen, RTA_FLOW); 49671fceff0SDavid S. Miller nexthop_nh->nh_tclassid = nla ? nla_get_u32(nla) : 0; 4977a9bc9b8SDavid S. Miller if (nexthop_nh->nh_tclassid) 498f4530fa5SDavid S. Miller fi->fib_net->ipv4.fib_num_tclassid_users++; 4991da177e4SLinus Torvalds #endif 500571e7226SRoopa Prabhu nla = nla_find(attrs, attrlen, RTA_ENCAP); 501571e7226SRoopa Prabhu if (nla) { 502571e7226SRoopa Prabhu struct lwtunnel_state *lwtstate; 503571e7226SRoopa Prabhu struct net_device *dev = NULL; 504571e7226SRoopa Prabhu struct nlattr *nla_entype; 505571e7226SRoopa Prabhu 506571e7226SRoopa Prabhu nla_entype = nla_find(attrs, attrlen, 507571e7226SRoopa Prabhu RTA_ENCAP_TYPE); 508571e7226SRoopa Prabhu if (!nla_entype) 509571e7226SRoopa Prabhu goto err_inval; 510571e7226SRoopa Prabhu if (cfg->fc_oif) 511571e7226SRoopa Prabhu dev = __dev_get_by_index(net, cfg->fc_oif); 512571e7226SRoopa Prabhu ret = lwtunnel_build_state(dev, nla_get_u16( 513571e7226SRoopa Prabhu nla_entype), 514127eb7cdSTom Herbert nla, AF_INET, cfg, 515127eb7cdSTom Herbert &lwtstate); 516571e7226SRoopa Prabhu if (ret) 517571e7226SRoopa Prabhu goto errout; 5185a6228a0SNicolas Dichtel nexthop_nh->nh_lwtstate = 5195a6228a0SNicolas Dichtel lwtstate_get(lwtstate); 520571e7226SRoopa Prabhu } 5211da177e4SLinus Torvalds } 5224e902c57SThomas Graf 5234e902c57SThomas Graf rtnh = rtnh_next(rtnh, &remaining); 5241da177e4SLinus Torvalds } endfor_nexthops(fi); 5254e902c57SThomas Graf 5261da177e4SLinus Torvalds return 0; 527571e7226SRoopa Prabhu 528571e7226SRoopa Prabhu err_inval: 529571e7226SRoopa Prabhu ret = -EINVAL; 530571e7226SRoopa Prabhu 531571e7226SRoopa Prabhu errout: 532571e7226SRoopa Prabhu return ret; 5331da177e4SLinus Torvalds } 5341da177e4SLinus Torvalds 5351da177e4SLinus Torvalds #endif 5361da177e4SLinus Torvalds 537e01286efSYing Xue static int fib_encap_match(struct net *net, u16 encap_type, 538571e7226SRoopa Prabhu struct nlattr *encap, 539127eb7cdSTom Herbert int oif, const struct fib_nh *nh, 540127eb7cdSTom Herbert const struct fib_config *cfg) 541571e7226SRoopa Prabhu { 542571e7226SRoopa Prabhu struct lwtunnel_state *lwtstate; 543571e7226SRoopa Prabhu struct net_device *dev = NULL; 544df383e62SJiri Benc int ret, result = 0; 545571e7226SRoopa Prabhu 546571e7226SRoopa Prabhu if (encap_type == LWTUNNEL_ENCAP_NONE) 547571e7226SRoopa Prabhu return 0; 548571e7226SRoopa Prabhu 549571e7226SRoopa Prabhu if (oif) 550571e7226SRoopa Prabhu dev = __dev_get_by_index(net, oif); 551127eb7cdSTom Herbert ret = lwtunnel_build_state(dev, encap_type, encap, 552127eb7cdSTom Herbert AF_INET, cfg, &lwtstate); 553df383e62SJiri Benc if (!ret) { 554df383e62SJiri Benc result = lwtunnel_cmp_encap(lwtstate, nh->nh_lwtstate); 555df383e62SJiri Benc lwtstate_free(lwtstate); 556df383e62SJiri Benc } 557571e7226SRoopa Prabhu 558df383e62SJiri Benc return result; 559571e7226SRoopa Prabhu } 560571e7226SRoopa Prabhu 5614e902c57SThomas Graf int fib_nh_match(struct fib_config *cfg, struct fib_info *fi) 5621da177e4SLinus Torvalds { 563571e7226SRoopa Prabhu struct net *net = cfg->fc_nlinfo.nl_net; 5641da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 5654e902c57SThomas Graf struct rtnexthop *rtnh; 5664e902c57SThomas Graf int remaining; 5671da177e4SLinus Torvalds #endif 5681da177e4SLinus Torvalds 5694e902c57SThomas Graf if (cfg->fc_priority && cfg->fc_priority != fi->fib_priority) 5701da177e4SLinus Torvalds return 1; 5711da177e4SLinus Torvalds 5724e902c57SThomas Graf if (cfg->fc_oif || cfg->fc_gw) { 573571e7226SRoopa Prabhu if (cfg->fc_encap) { 574571e7226SRoopa Prabhu if (fib_encap_match(net, cfg->fc_encap_type, 575571e7226SRoopa Prabhu cfg->fc_encap, cfg->fc_oif, 576127eb7cdSTom Herbert fi->fib_nh, cfg)) 577571e7226SRoopa Prabhu return 1; 578571e7226SRoopa Prabhu } 5794e902c57SThomas Graf if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) && 5804e902c57SThomas Graf (!cfg->fc_gw || cfg->fc_gw == fi->fib_nh->nh_gw)) 5811da177e4SLinus Torvalds return 0; 5821da177e4SLinus Torvalds return 1; 5831da177e4SLinus Torvalds } 5841da177e4SLinus Torvalds 5851da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 58651456b29SIan Morris if (!cfg->fc_mp) 5871da177e4SLinus Torvalds return 0; 5884e902c57SThomas Graf 5894e902c57SThomas Graf rtnh = cfg->fc_mp; 5904e902c57SThomas Graf remaining = cfg->fc_mp_len; 5911da177e4SLinus Torvalds 5921da177e4SLinus Torvalds for_nexthops(fi) { 5934e902c57SThomas Graf int attrlen; 5941da177e4SLinus Torvalds 5954e902c57SThomas Graf if (!rtnh_ok(rtnh, remaining)) 5961da177e4SLinus Torvalds return -EINVAL; 5974e902c57SThomas Graf 5984e902c57SThomas Graf if (rtnh->rtnh_ifindex && rtnh->rtnh_ifindex != nh->nh_oif) 5991da177e4SLinus Torvalds return 1; 6004e902c57SThomas Graf 6014e902c57SThomas Graf attrlen = rtnh_attrlen(rtnh); 602f76936d0SJiri Pirko if (attrlen > 0) { 6034e902c57SThomas Graf struct nlattr *nla, *attrs = rtnh_attrs(rtnh); 6044e902c57SThomas Graf 6054e902c57SThomas Graf nla = nla_find(attrs, attrlen, RTA_GATEWAY); 60667b61f6cSJiri Benc if (nla && nla_get_in_addr(nla) != nh->nh_gw) 6071da177e4SLinus Torvalds return 1; 608c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID 6094e902c57SThomas Graf nla = nla_find(attrs, attrlen, RTA_FLOW); 6104e902c57SThomas Graf if (nla && nla_get_u32(nla) != nh->nh_tclassid) 6111da177e4SLinus Torvalds return 1; 6121da177e4SLinus Torvalds #endif 6131da177e4SLinus Torvalds } 6144e902c57SThomas Graf 6154e902c57SThomas Graf rtnh = rtnh_next(rtnh, &remaining); 6161da177e4SLinus Torvalds } endfor_nexthops(fi); 6171da177e4SLinus Torvalds #endif 6181da177e4SLinus Torvalds return 0; 6191da177e4SLinus Torvalds } 6201da177e4SLinus Torvalds 6211da177e4SLinus Torvalds 6221da177e4SLinus Torvalds /* 6236a31d2a9SEric Dumazet * Picture 6246a31d2a9SEric Dumazet * ------- 6256a31d2a9SEric Dumazet * 6266a31d2a9SEric Dumazet * Semantics of nexthop is very messy by historical reasons. 6276a31d2a9SEric Dumazet * We have to take into account, that: 6286a31d2a9SEric Dumazet * a) gateway can be actually local interface address, 6296a31d2a9SEric Dumazet * so that gatewayed route is direct. 6306a31d2a9SEric Dumazet * b) gateway must be on-link address, possibly 6316a31d2a9SEric Dumazet * described not by an ifaddr, but also by a direct route. 6326a31d2a9SEric Dumazet * c) If both gateway and interface are specified, they should not 6336a31d2a9SEric Dumazet * contradict. 6346a31d2a9SEric Dumazet * d) If we use tunnel routes, gateway could be not on-link. 6356a31d2a9SEric Dumazet * 6366a31d2a9SEric Dumazet * Attempt to reconcile all of these (alas, self-contradictory) conditions 6376a31d2a9SEric Dumazet * results in pretty ugly and hairy code with obscure logic. 6386a31d2a9SEric Dumazet * 6396a31d2a9SEric Dumazet * I chose to generalized it instead, so that the size 6406a31d2a9SEric Dumazet * of code does not increase practically, but it becomes 6416a31d2a9SEric Dumazet * much more general. 6426a31d2a9SEric Dumazet * Every prefix is assigned a "scope" value: "host" is local address, 6436a31d2a9SEric Dumazet * "link" is direct route, 6446a31d2a9SEric Dumazet * [ ... "site" ... "interior" ... ] 6456a31d2a9SEric Dumazet * and "universe" is true gateway route with global meaning. 6466a31d2a9SEric Dumazet * 6476a31d2a9SEric Dumazet * Every prefix refers to a set of "nexthop"s (gw, oif), 6486a31d2a9SEric Dumazet * where gw must have narrower scope. This recursion stops 6496a31d2a9SEric Dumazet * when gw has LOCAL scope or if "nexthop" is declared ONLINK, 6506a31d2a9SEric Dumazet * which means that gw is forced to be on link. 6516a31d2a9SEric Dumazet * 6526a31d2a9SEric Dumazet * Code is still hairy, but now it is apparently logically 6536a31d2a9SEric Dumazet * consistent and very flexible. F.e. as by-product it allows 6546a31d2a9SEric Dumazet * to co-exists in peace independent exterior and interior 6556a31d2a9SEric Dumazet * routing processes. 6566a31d2a9SEric Dumazet * 6576a31d2a9SEric Dumazet * Normally it looks as following. 6586a31d2a9SEric Dumazet * 6596a31d2a9SEric Dumazet * {universe prefix} -> (gw, oif) [scope link] 6606a31d2a9SEric Dumazet * | 6616a31d2a9SEric Dumazet * |-> {link prefix} -> (gw, oif) [scope local] 6626a31d2a9SEric Dumazet * | 6636a31d2a9SEric Dumazet * |-> {local prefix} (terminal node) 6641da177e4SLinus Torvalds */ 6654e902c57SThomas Graf static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, 6664e902c57SThomas Graf struct fib_nh *nh) 6671da177e4SLinus Torvalds { 668127eb7cdSTom Herbert int err = 0; 66986167a37SDenis V. Lunev struct net *net; 6706a31d2a9SEric Dumazet struct net_device *dev; 6711da177e4SLinus Torvalds 67286167a37SDenis V. Lunev net = cfg->fc_nlinfo.nl_net; 6731da177e4SLinus Torvalds if (nh->nh_gw) { 6741da177e4SLinus Torvalds struct fib_result res; 6751da177e4SLinus Torvalds 6761da177e4SLinus Torvalds if (nh->nh_flags & RTNH_F_ONLINK) { 67730bbaa19SDavid Ahern unsigned int addr_type; 6781da177e4SLinus Torvalds 6794e902c57SThomas Graf if (cfg->fc_scope >= RT_SCOPE_LINK) 6801da177e4SLinus Torvalds return -EINVAL; 6816a31d2a9SEric Dumazet dev = __dev_get_by_index(net, nh->nh_oif); 6826a31d2a9SEric Dumazet if (!dev) 6831da177e4SLinus Torvalds return -ENODEV; 6841da177e4SLinus Torvalds if (!(dev->flags & IFF_UP)) 6851da177e4SLinus Torvalds return -ENETDOWN; 68630bbaa19SDavid Ahern addr_type = inet_addr_type_dev_table(net, dev, nh->nh_gw); 68730bbaa19SDavid Ahern if (addr_type != RTN_UNICAST) 68830bbaa19SDavid Ahern return -EINVAL; 6898a3d0316SAndy Gospodarek if (!netif_carrier_ok(dev)) 6908a3d0316SAndy Gospodarek nh->nh_flags |= RTNH_F_LINKDOWN; 6911da177e4SLinus Torvalds nh->nh_dev = dev; 6921da177e4SLinus Torvalds dev_hold(dev); 6931da177e4SLinus Torvalds nh->nh_scope = RT_SCOPE_LINK; 6941da177e4SLinus Torvalds return 0; 6951da177e4SLinus Torvalds } 696ebc0ffaeSEric Dumazet rcu_read_lock(); 6971da177e4SLinus Torvalds { 6983bfd8472SDavid Ahern struct fib_table *tbl = NULL; 6999ade2286SDavid S. Miller struct flowi4 fl4 = { 7009ade2286SDavid S. Miller .daddr = nh->nh_gw, 7019ade2286SDavid S. Miller .flowi4_scope = cfg->fc_scope + 1, 7029ade2286SDavid S. Miller .flowi4_oif = nh->nh_oif, 7036a662719SCong Wang .flowi4_iif = LOOPBACK_IFINDEX, 7044e902c57SThomas Graf }; 7051da177e4SLinus Torvalds 7061da177e4SLinus Torvalds /* It is not necessary, but requires a bit of thinking */ 7079ade2286SDavid S. Miller if (fl4.flowi4_scope < RT_SCOPE_LINK) 7089ade2286SDavid S. Miller fl4.flowi4_scope = RT_SCOPE_LINK; 7093bfd8472SDavid Ahern 7103bfd8472SDavid Ahern if (cfg->fc_table) 7113bfd8472SDavid Ahern tbl = fib_get_table(net, cfg->fc_table); 7123bfd8472SDavid Ahern 7133bfd8472SDavid Ahern if (tbl) 7143bfd8472SDavid Ahern err = fib_table_lookup(tbl, &fl4, &res, 7151e313678SEric Dumazet FIB_LOOKUP_IGNORE_LINKSTATE | 7161e313678SEric Dumazet FIB_LOOKUP_NOREF); 7174c9bcd11SDavid Ahern 7184c9bcd11SDavid Ahern /* on error or if no table given do full lookup. This 7194c9bcd11SDavid Ahern * is needed for example when nexthops are in the local 7204c9bcd11SDavid Ahern * table rather than the given table 7214c9bcd11SDavid Ahern */ 7224c9bcd11SDavid Ahern if (!tbl || err) { 7230eeb075fSAndy Gospodarek err = fib_lookup(net, &fl4, &res, 7240eeb075fSAndy Gospodarek FIB_LOOKUP_IGNORE_LINKSTATE); 7254c9bcd11SDavid Ahern } 7264c9bcd11SDavid Ahern 727ebc0ffaeSEric Dumazet if (err) { 728ebc0ffaeSEric Dumazet rcu_read_unlock(); 7291da177e4SLinus Torvalds return err; 7301da177e4SLinus Torvalds } 731ebc0ffaeSEric Dumazet } 7321da177e4SLinus Torvalds err = -EINVAL; 7331da177e4SLinus Torvalds if (res.type != RTN_UNICAST && res.type != RTN_LOCAL) 7341da177e4SLinus Torvalds goto out; 7351da177e4SLinus Torvalds nh->nh_scope = res.scope; 7361da177e4SLinus Torvalds nh->nh_oif = FIB_RES_OIF(res); 7376a31d2a9SEric Dumazet nh->nh_dev = dev = FIB_RES_DEV(res); 7386a31d2a9SEric Dumazet if (!dev) 7391da177e4SLinus Torvalds goto out; 7406a31d2a9SEric Dumazet dev_hold(dev); 7418a3d0316SAndy Gospodarek if (!netif_carrier_ok(dev)) 7428a3d0316SAndy Gospodarek nh->nh_flags |= RTNH_F_LINKDOWN; 7438723e1b4SEric Dumazet err = (dev->flags & IFF_UP) ? 0 : -ENETDOWN; 7441da177e4SLinus Torvalds } else { 7451da177e4SLinus Torvalds struct in_device *in_dev; 7461da177e4SLinus Torvalds 7471da177e4SLinus Torvalds if (nh->nh_flags & (RTNH_F_PERVASIVE | RTNH_F_ONLINK)) 7481da177e4SLinus Torvalds return -EINVAL; 7491da177e4SLinus Torvalds 7508723e1b4SEric Dumazet rcu_read_lock(); 7518723e1b4SEric Dumazet err = -ENODEV; 75286167a37SDenis V. Lunev in_dev = inetdev_by_index(net, nh->nh_oif); 75351456b29SIan Morris if (!in_dev) 7548723e1b4SEric Dumazet goto out; 7558723e1b4SEric Dumazet err = -ENETDOWN; 7568723e1b4SEric Dumazet if (!(in_dev->dev->flags & IFF_UP)) 7578723e1b4SEric Dumazet goto out; 7581da177e4SLinus Torvalds nh->nh_dev = in_dev->dev; 7591da177e4SLinus Torvalds dev_hold(nh->nh_dev); 7601da177e4SLinus Torvalds nh->nh_scope = RT_SCOPE_HOST; 7618a3d0316SAndy Gospodarek if (!netif_carrier_ok(nh->nh_dev)) 7628a3d0316SAndy Gospodarek nh->nh_flags |= RTNH_F_LINKDOWN; 7638723e1b4SEric Dumazet err = 0; 7641da177e4SLinus Torvalds } 7658723e1b4SEric Dumazet out: 7668723e1b4SEric Dumazet rcu_read_unlock(); 7678723e1b4SEric Dumazet return err; 7681da177e4SLinus Torvalds } 7691da177e4SLinus Torvalds 77081f7bf6cSAl Viro static inline unsigned int fib_laddr_hashfn(__be32 val) 7711da177e4SLinus Torvalds { 772123b9731SDavid S. Miller unsigned int mask = (fib_info_hash_size - 1); 7731da177e4SLinus Torvalds 7746a31d2a9SEric Dumazet return ((__force u32)val ^ 7756a31d2a9SEric Dumazet ((__force u32)val >> 7) ^ 7766a31d2a9SEric Dumazet ((__force u32)val >> 14)) & mask; 7771da177e4SLinus Torvalds } 7781da177e4SLinus Torvalds 779123b9731SDavid S. Miller static struct hlist_head *fib_info_hash_alloc(int bytes) 7801da177e4SLinus Torvalds { 7811da177e4SLinus Torvalds if (bytes <= PAGE_SIZE) 78288f83491SJoonwoo Park return kzalloc(bytes, GFP_KERNEL); 7831da177e4SLinus Torvalds else 7841da177e4SLinus Torvalds return (struct hlist_head *) 7856a31d2a9SEric Dumazet __get_free_pages(GFP_KERNEL | __GFP_ZERO, 7866a31d2a9SEric Dumazet get_order(bytes)); 7871da177e4SLinus Torvalds } 7881da177e4SLinus Torvalds 789123b9731SDavid S. Miller static void fib_info_hash_free(struct hlist_head *hash, int bytes) 7901da177e4SLinus Torvalds { 7911da177e4SLinus Torvalds if (!hash) 7921da177e4SLinus Torvalds return; 7931da177e4SLinus Torvalds 7941da177e4SLinus Torvalds if (bytes <= PAGE_SIZE) 7951da177e4SLinus Torvalds kfree(hash); 7961da177e4SLinus Torvalds else 7971da177e4SLinus Torvalds free_pages((unsigned long) hash, get_order(bytes)); 7981da177e4SLinus Torvalds } 7991da177e4SLinus Torvalds 800123b9731SDavid S. Miller static void fib_info_hash_move(struct hlist_head *new_info_hash, 8011da177e4SLinus Torvalds struct hlist_head *new_laddrhash, 8021da177e4SLinus Torvalds unsigned int new_size) 8031da177e4SLinus Torvalds { 804b7656e7fSDavid S. Miller struct hlist_head *old_info_hash, *old_laddrhash; 805123b9731SDavid S. Miller unsigned int old_size = fib_info_hash_size; 806b7656e7fSDavid S. Miller unsigned int i, bytes; 8071da177e4SLinus Torvalds 808832b4c5eSStephen Hemminger spin_lock_bh(&fib_info_lock); 809b7656e7fSDavid S. Miller old_info_hash = fib_info_hash; 810b7656e7fSDavid S. Miller old_laddrhash = fib_info_laddrhash; 811123b9731SDavid S. Miller fib_info_hash_size = new_size; 8121da177e4SLinus Torvalds 8131da177e4SLinus Torvalds for (i = 0; i < old_size; i++) { 8141da177e4SLinus Torvalds struct hlist_head *head = &fib_info_hash[i]; 815b67bfe0dSSasha Levin struct hlist_node *n; 8161da177e4SLinus Torvalds struct fib_info *fi; 8171da177e4SLinus Torvalds 818b67bfe0dSSasha Levin hlist_for_each_entry_safe(fi, n, head, fib_hash) { 8191da177e4SLinus Torvalds struct hlist_head *dest; 8201da177e4SLinus Torvalds unsigned int new_hash; 8211da177e4SLinus Torvalds 8221da177e4SLinus Torvalds new_hash = fib_info_hashfn(fi); 8231da177e4SLinus Torvalds dest = &new_info_hash[new_hash]; 8241da177e4SLinus Torvalds hlist_add_head(&fi->fib_hash, dest); 8251da177e4SLinus Torvalds } 8261da177e4SLinus Torvalds } 8271da177e4SLinus Torvalds fib_info_hash = new_info_hash; 8281da177e4SLinus Torvalds 8291da177e4SLinus Torvalds for (i = 0; i < old_size; i++) { 8301da177e4SLinus Torvalds struct hlist_head *lhead = &fib_info_laddrhash[i]; 831b67bfe0dSSasha Levin struct hlist_node *n; 8321da177e4SLinus Torvalds struct fib_info *fi; 8331da177e4SLinus Torvalds 834b67bfe0dSSasha Levin hlist_for_each_entry_safe(fi, n, lhead, fib_lhash) { 8351da177e4SLinus Torvalds struct hlist_head *ldest; 8361da177e4SLinus Torvalds unsigned int new_hash; 8371da177e4SLinus Torvalds 8381da177e4SLinus Torvalds new_hash = fib_laddr_hashfn(fi->fib_prefsrc); 8391da177e4SLinus Torvalds ldest = &new_laddrhash[new_hash]; 8401da177e4SLinus Torvalds hlist_add_head(&fi->fib_lhash, ldest); 8411da177e4SLinus Torvalds } 8421da177e4SLinus Torvalds } 8431da177e4SLinus Torvalds fib_info_laddrhash = new_laddrhash; 8441da177e4SLinus Torvalds 845832b4c5eSStephen Hemminger spin_unlock_bh(&fib_info_lock); 846b7656e7fSDavid S. Miller 847b7656e7fSDavid S. Miller bytes = old_size * sizeof(struct hlist_head *); 848123b9731SDavid S. Miller fib_info_hash_free(old_info_hash, bytes); 849123b9731SDavid S. Miller fib_info_hash_free(old_laddrhash, bytes); 8501da177e4SLinus Torvalds } 8511da177e4SLinus Torvalds 852436c3b66SDavid S. Miller __be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh) 853436c3b66SDavid S. Miller { 854436c3b66SDavid S. Miller nh->nh_saddr = inet_select_addr(nh->nh_dev, 855436c3b66SDavid S. Miller nh->nh_gw, 85637e826c5SDavid S. Miller nh->nh_parent->fib_scope); 857436c3b66SDavid S. Miller nh->nh_saddr_genid = atomic_read(&net->ipv4.dev_addr_genid); 858436c3b66SDavid S. Miller 859436c3b66SDavid S. Miller return nh->nh_saddr; 860436c3b66SDavid S. Miller } 861436c3b66SDavid S. Miller 862021dd3b8SDavid Ahern static bool fib_valid_prefsrc(struct fib_config *cfg, __be32 fib_prefsrc) 863021dd3b8SDavid Ahern { 864021dd3b8SDavid Ahern if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst || 865021dd3b8SDavid Ahern fib_prefsrc != cfg->fc_dst) { 866021dd3b8SDavid Ahern int tb_id = cfg->fc_table; 867021dd3b8SDavid Ahern 868021dd3b8SDavid Ahern if (tb_id == RT_TABLE_MAIN) 869021dd3b8SDavid Ahern tb_id = RT_TABLE_LOCAL; 870021dd3b8SDavid Ahern 871021dd3b8SDavid Ahern if (inet_addr_type_table(cfg->fc_nlinfo.nl_net, 872021dd3b8SDavid Ahern fib_prefsrc, tb_id) != RTN_LOCAL) { 873021dd3b8SDavid Ahern return false; 874021dd3b8SDavid Ahern } 875021dd3b8SDavid Ahern } 876021dd3b8SDavid Ahern return true; 877021dd3b8SDavid Ahern } 878021dd3b8SDavid Ahern 8796cf9dfd3SFlorian Westphal static int 8806cf9dfd3SFlorian Westphal fib_convert_metrics(struct fib_info *fi, const struct fib_config *cfg) 8816cf9dfd3SFlorian Westphal { 882*c3a8d947SDaniel Borkmann bool ecn_ca = false; 8836cf9dfd3SFlorian Westphal struct nlattr *nla; 8846cf9dfd3SFlorian Westphal int remaining; 8856cf9dfd3SFlorian Westphal 8866cf9dfd3SFlorian Westphal if (!cfg->fc_mx) 8876cf9dfd3SFlorian Westphal return 0; 8886cf9dfd3SFlorian Westphal 8896cf9dfd3SFlorian Westphal nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { 8906cf9dfd3SFlorian Westphal int type = nla_type(nla); 8916cf9dfd3SFlorian Westphal u32 val; 8926cf9dfd3SFlorian Westphal 8936cf9dfd3SFlorian Westphal if (!type) 8946cf9dfd3SFlorian Westphal continue; 8956cf9dfd3SFlorian Westphal if (type > RTAX_MAX) 8966cf9dfd3SFlorian Westphal return -EINVAL; 8976cf9dfd3SFlorian Westphal 8986cf9dfd3SFlorian Westphal if (type == RTAX_CC_ALGO) { 8996cf9dfd3SFlorian Westphal char tmp[TCP_CA_NAME_MAX]; 9006cf9dfd3SFlorian Westphal 9016cf9dfd3SFlorian Westphal nla_strlcpy(tmp, nla, sizeof(tmp)); 902*c3a8d947SDaniel Borkmann val = tcp_ca_get_key_by_name(tmp, &ecn_ca); 9036cf9dfd3SFlorian Westphal if (val == TCP_CA_UNSPEC) 9046cf9dfd3SFlorian Westphal return -EINVAL; 9056cf9dfd3SFlorian Westphal } else { 9066cf9dfd3SFlorian Westphal val = nla_get_u32(nla); 9076cf9dfd3SFlorian Westphal } 9086cf9dfd3SFlorian Westphal if (type == RTAX_ADVMSS && val > 65535 - 40) 9096cf9dfd3SFlorian Westphal val = 65535 - 40; 9106cf9dfd3SFlorian Westphal if (type == RTAX_MTU && val > 65535 - 15) 9116cf9dfd3SFlorian Westphal val = 65535 - 15; 912b8d3e416SDaniel Borkmann if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK)) 913b8d3e416SDaniel Borkmann return -EINVAL; 9146cf9dfd3SFlorian Westphal fi->fib_metrics[type - 1] = val; 9156cf9dfd3SFlorian Westphal } 9166cf9dfd3SFlorian Westphal 917*c3a8d947SDaniel Borkmann if (ecn_ca) 918*c3a8d947SDaniel Borkmann fi->fib_metrics[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA; 919*c3a8d947SDaniel Borkmann 9206cf9dfd3SFlorian Westphal return 0; 9216cf9dfd3SFlorian Westphal } 9226cf9dfd3SFlorian Westphal 9234e902c57SThomas Graf struct fib_info *fib_create_info(struct fib_config *cfg) 9241da177e4SLinus Torvalds { 9251da177e4SLinus Torvalds int err; 9261da177e4SLinus Torvalds struct fib_info *fi = NULL; 9271da177e4SLinus Torvalds struct fib_info *ofi; 9281da177e4SLinus Torvalds int nhs = 1; 9297462bd74SDenis V. Lunev struct net *net = cfg->fc_nlinfo.nl_net; 9301da177e4SLinus Torvalds 9314c8237cdSDavid S. Miller if (cfg->fc_type > RTN_MAX) 9324c8237cdSDavid S. Miller goto err_inval; 9334c8237cdSDavid S. Miller 9341da177e4SLinus Torvalds /* Fast check to catch the most weird cases */ 9354e902c57SThomas Graf if (fib_props[cfg->fc_type].scope > cfg->fc_scope) 9361da177e4SLinus Torvalds goto err_inval; 9371da177e4SLinus Torvalds 9381da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 9394e902c57SThomas Graf if (cfg->fc_mp) { 9404e902c57SThomas Graf nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len); 9411da177e4SLinus Torvalds if (nhs == 0) 9421da177e4SLinus Torvalds goto err_inval; 9431da177e4SLinus Torvalds } 9441da177e4SLinus Torvalds #endif 9451da177e4SLinus Torvalds 9461da177e4SLinus Torvalds err = -ENOBUFS; 947123b9731SDavid S. Miller if (fib_info_cnt >= fib_info_hash_size) { 948123b9731SDavid S. Miller unsigned int new_size = fib_info_hash_size << 1; 9491da177e4SLinus Torvalds struct hlist_head *new_info_hash; 9501da177e4SLinus Torvalds struct hlist_head *new_laddrhash; 9511da177e4SLinus Torvalds unsigned int bytes; 9521da177e4SLinus Torvalds 9531da177e4SLinus Torvalds if (!new_size) 954d94ce9b2SEric Dumazet new_size = 16; 9551da177e4SLinus Torvalds bytes = new_size * sizeof(struct hlist_head *); 956123b9731SDavid S. Miller new_info_hash = fib_info_hash_alloc(bytes); 957123b9731SDavid S. Miller new_laddrhash = fib_info_hash_alloc(bytes); 9581da177e4SLinus Torvalds if (!new_info_hash || !new_laddrhash) { 959123b9731SDavid S. Miller fib_info_hash_free(new_info_hash, bytes); 960123b9731SDavid S. Miller fib_info_hash_free(new_laddrhash, bytes); 96188f83491SJoonwoo Park } else 962123b9731SDavid S. Miller fib_info_hash_move(new_info_hash, new_laddrhash, new_size); 9631da177e4SLinus Torvalds 964123b9731SDavid S. Miller if (!fib_info_hash_size) 9651da177e4SLinus Torvalds goto failure; 9661da177e4SLinus Torvalds } 9671da177e4SLinus Torvalds 9680da974f4SPanagiotis Issaris fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL); 96951456b29SIan Morris if (!fi) 9701da177e4SLinus Torvalds goto failure; 971aeefa1ecSSergey Popovich fib_info_cnt++; 972725d1e1bSDavid S. Miller if (cfg->fc_mx) { 9739c150e82SDavid S. Miller fi->fib_metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL); 9749c150e82SDavid S. Miller if (!fi->fib_metrics) 9759c150e82SDavid S. Miller goto failure; 976725d1e1bSDavid S. Miller } else 977725d1e1bSDavid S. Miller fi->fib_metrics = (u32 *) dst_default_metrics; 9781da177e4SLinus Torvalds 979efd7ef1cSEric W. Biederman fi->fib_net = net; 9804e902c57SThomas Graf fi->fib_protocol = cfg->fc_protocol; 98137e826c5SDavid S. Miller fi->fib_scope = cfg->fc_scope; 9824e902c57SThomas Graf fi->fib_flags = cfg->fc_flags; 9834e902c57SThomas Graf fi->fib_priority = cfg->fc_priority; 9844e902c57SThomas Graf fi->fib_prefsrc = cfg->fc_prefsrc; 985f4ef85bbSEric Dumazet fi->fib_type = cfg->fc_type; 9861da177e4SLinus Torvalds 9871da177e4SLinus Torvalds fi->fib_nhs = nhs; 9881da177e4SLinus Torvalds change_nexthops(fi) { 98971fceff0SDavid S. Miller nexthop_nh->nh_parent = fi; 990d26b3a7cSEric Dumazet nexthop_nh->nh_pcpu_rth_output = alloc_percpu(struct rtable __rcu *); 991f8a17175SJulian Anastasov if (!nexthop_nh->nh_pcpu_rth_output) 992f8a17175SJulian Anastasov goto failure; 9931da177e4SLinus Torvalds } endfor_nexthops(fi) 9941da177e4SLinus Torvalds 9956cf9dfd3SFlorian Westphal err = fib_convert_metrics(fi, cfg); 9966cf9dfd3SFlorian Westphal if (err) 9976cf9dfd3SFlorian Westphal goto failure; 9981da177e4SLinus Torvalds 9994e902c57SThomas Graf if (cfg->fc_mp) { 10001da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 10014e902c57SThomas Graf err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg); 10024e902c57SThomas Graf if (err != 0) 10031da177e4SLinus Torvalds goto failure; 10044e902c57SThomas Graf if (cfg->fc_oif && fi->fib_nh->nh_oif != cfg->fc_oif) 10051da177e4SLinus Torvalds goto err_inval; 10064e902c57SThomas Graf if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw) 10071da177e4SLinus Torvalds goto err_inval; 1008c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID 10094e902c57SThomas Graf if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow) 10101da177e4SLinus Torvalds goto err_inval; 10111da177e4SLinus Torvalds #endif 10121da177e4SLinus Torvalds #else 10131da177e4SLinus Torvalds goto err_inval; 10141da177e4SLinus Torvalds #endif 10151da177e4SLinus Torvalds } else { 10161da177e4SLinus Torvalds struct fib_nh *nh = fi->fib_nh; 10174e902c57SThomas Graf 1018571e7226SRoopa Prabhu if (cfg->fc_encap) { 1019571e7226SRoopa Prabhu struct lwtunnel_state *lwtstate; 1020571e7226SRoopa Prabhu struct net_device *dev = NULL; 1021571e7226SRoopa Prabhu 1022571e7226SRoopa Prabhu if (cfg->fc_encap_type == LWTUNNEL_ENCAP_NONE) 1023571e7226SRoopa Prabhu goto err_inval; 1024571e7226SRoopa Prabhu if (cfg->fc_oif) 1025571e7226SRoopa Prabhu dev = __dev_get_by_index(net, cfg->fc_oif); 1026571e7226SRoopa Prabhu err = lwtunnel_build_state(dev, cfg->fc_encap_type, 1027127eb7cdSTom Herbert cfg->fc_encap, AF_INET, cfg, 1028127eb7cdSTom Herbert &lwtstate); 1029571e7226SRoopa Prabhu if (err) 1030571e7226SRoopa Prabhu goto failure; 1031571e7226SRoopa Prabhu 10325a6228a0SNicolas Dichtel nh->nh_lwtstate = lwtstate_get(lwtstate); 1033571e7226SRoopa Prabhu } 10344e902c57SThomas Graf nh->nh_oif = cfg->fc_oif; 10354e902c57SThomas Graf nh->nh_gw = cfg->fc_gw; 10364e902c57SThomas Graf nh->nh_flags = cfg->fc_flags; 1037c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID 10384e902c57SThomas Graf nh->nh_tclassid = cfg->fc_flow; 10397a9bc9b8SDavid S. Miller if (nh->nh_tclassid) 1040f4530fa5SDavid S. Miller fi->fib_net->ipv4.fib_num_tclassid_users++; 10411da177e4SLinus Torvalds #endif 10421da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 10431da177e4SLinus Torvalds nh->nh_weight = 1; 10441da177e4SLinus Torvalds #endif 10451da177e4SLinus Torvalds } 10461da177e4SLinus Torvalds 10474e902c57SThomas Graf if (fib_props[cfg->fc_type].error) { 10484e902c57SThomas Graf if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp) 10491da177e4SLinus Torvalds goto err_inval; 10501da177e4SLinus Torvalds goto link_it; 10514c8237cdSDavid S. Miller } else { 10524c8237cdSDavid S. Miller switch (cfg->fc_type) { 10534c8237cdSDavid S. Miller case RTN_UNICAST: 10544c8237cdSDavid S. Miller case RTN_LOCAL: 10554c8237cdSDavid S. Miller case RTN_BROADCAST: 10564c8237cdSDavid S. Miller case RTN_ANYCAST: 10574c8237cdSDavid S. Miller case RTN_MULTICAST: 10584c8237cdSDavid S. Miller break; 10594c8237cdSDavid S. Miller default: 10604c8237cdSDavid S. Miller goto err_inval; 10614c8237cdSDavid S. Miller } 10621da177e4SLinus Torvalds } 10631da177e4SLinus Torvalds 10644e902c57SThomas Graf if (cfg->fc_scope > RT_SCOPE_HOST) 10651da177e4SLinus Torvalds goto err_inval; 10661da177e4SLinus Torvalds 10674e902c57SThomas Graf if (cfg->fc_scope == RT_SCOPE_HOST) { 10681da177e4SLinus Torvalds struct fib_nh *nh = fi->fib_nh; 10691da177e4SLinus Torvalds 10701da177e4SLinus Torvalds /* Local address is added. */ 10711da177e4SLinus Torvalds if (nhs != 1 || nh->nh_gw) 10721da177e4SLinus Torvalds goto err_inval; 10731da177e4SLinus Torvalds nh->nh_scope = RT_SCOPE_NOWHERE; 10747462bd74SDenis V. Lunev nh->nh_dev = dev_get_by_index(net, fi->fib_nh->nh_oif); 10751da177e4SLinus Torvalds err = -ENODEV; 107651456b29SIan Morris if (!nh->nh_dev) 10771da177e4SLinus Torvalds goto failure; 10781da177e4SLinus Torvalds } else { 10798a3d0316SAndy Gospodarek int linkdown = 0; 10808a3d0316SAndy Gospodarek 10811da177e4SLinus Torvalds change_nexthops(fi) { 10826a31d2a9SEric Dumazet err = fib_check_nh(cfg, fi, nexthop_nh); 10836a31d2a9SEric Dumazet if (err != 0) 10841da177e4SLinus Torvalds goto failure; 10858a3d0316SAndy Gospodarek if (nexthop_nh->nh_flags & RTNH_F_LINKDOWN) 10868a3d0316SAndy Gospodarek linkdown++; 10871da177e4SLinus Torvalds } endfor_nexthops(fi) 10888a3d0316SAndy Gospodarek if (linkdown == fi->fib_nhs) 10898a3d0316SAndy Gospodarek fi->fib_flags |= RTNH_F_LINKDOWN; 10901da177e4SLinus Torvalds } 10911da177e4SLinus Torvalds 1092021dd3b8SDavid Ahern if (fi->fib_prefsrc && !fib_valid_prefsrc(cfg, fi->fib_prefsrc)) 10931da177e4SLinus Torvalds goto err_inval; 10941da177e4SLinus Torvalds 10951fc050a1SDavid S. Miller change_nexthops(fi) { 1096436c3b66SDavid S. Miller fib_info_update_nh_saddr(net, nexthop_nh); 10971fc050a1SDavid S. Miller } endfor_nexthops(fi) 10981fc050a1SDavid S. Miller 10991da177e4SLinus Torvalds link_it: 11006a31d2a9SEric Dumazet ofi = fib_find_info(fi); 11016a31d2a9SEric Dumazet if (ofi) { 11021da177e4SLinus Torvalds fi->fib_dead = 1; 11031da177e4SLinus Torvalds free_fib_info(fi); 11041da177e4SLinus Torvalds ofi->fib_treeref++; 11051da177e4SLinus Torvalds return ofi; 11061da177e4SLinus Torvalds } 11071da177e4SLinus Torvalds 11081da177e4SLinus Torvalds fi->fib_treeref++; 11091da177e4SLinus Torvalds atomic_inc(&fi->fib_clntref); 1110832b4c5eSStephen Hemminger spin_lock_bh(&fib_info_lock); 11111da177e4SLinus Torvalds hlist_add_head(&fi->fib_hash, 11121da177e4SLinus Torvalds &fib_info_hash[fib_info_hashfn(fi)]); 11131da177e4SLinus Torvalds if (fi->fib_prefsrc) { 11141da177e4SLinus Torvalds struct hlist_head *head; 11151da177e4SLinus Torvalds 11161da177e4SLinus Torvalds head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)]; 11171da177e4SLinus Torvalds hlist_add_head(&fi->fib_lhash, head); 11181da177e4SLinus Torvalds } 11191da177e4SLinus Torvalds change_nexthops(fi) { 11201da177e4SLinus Torvalds struct hlist_head *head; 11211da177e4SLinus Torvalds unsigned int hash; 11221da177e4SLinus Torvalds 112371fceff0SDavid S. Miller if (!nexthop_nh->nh_dev) 11241da177e4SLinus Torvalds continue; 112571fceff0SDavid S. Miller hash = fib_devindex_hashfn(nexthop_nh->nh_dev->ifindex); 11261da177e4SLinus Torvalds head = &fib_info_devhash[hash]; 112771fceff0SDavid S. Miller hlist_add_head(&nexthop_nh->nh_hash, head); 11281da177e4SLinus Torvalds } endfor_nexthops(fi) 1129832b4c5eSStephen Hemminger spin_unlock_bh(&fib_info_lock); 11301da177e4SLinus Torvalds return fi; 11311da177e4SLinus Torvalds 11321da177e4SLinus Torvalds err_inval: 11331da177e4SLinus Torvalds err = -EINVAL; 11341da177e4SLinus Torvalds 11351da177e4SLinus Torvalds failure: 11361da177e4SLinus Torvalds if (fi) { 11371da177e4SLinus Torvalds fi->fib_dead = 1; 11381da177e4SLinus Torvalds free_fib_info(fi); 11391da177e4SLinus Torvalds } 11404e902c57SThomas Graf 11414e902c57SThomas Graf return ERR_PTR(err); 11421da177e4SLinus Torvalds } 11431da177e4SLinus Torvalds 114415e47304SEric W. Biederman int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, 114537e826c5SDavid S. Miller u32 tb_id, u8 type, __be32 dst, int dst_len, u8 tos, 1146b6544c0bSJamal Hadi Salim struct fib_info *fi, unsigned int flags) 11471da177e4SLinus Torvalds { 11481da177e4SLinus Torvalds struct nlmsghdr *nlh; 1149be403ea1SThomas Graf struct rtmsg *rtm; 11501da177e4SLinus Torvalds 115115e47304SEric W. Biederman nlh = nlmsg_put(skb, portid, seq, event, sizeof(*rtm), flags); 115251456b29SIan Morris if (!nlh) 115326932566SPatrick McHardy return -EMSGSIZE; 1154be403ea1SThomas Graf 1155be403ea1SThomas Graf rtm = nlmsg_data(nlh); 11561da177e4SLinus Torvalds rtm->rtm_family = AF_INET; 11571da177e4SLinus Torvalds rtm->rtm_dst_len = dst_len; 11581da177e4SLinus Torvalds rtm->rtm_src_len = 0; 11591da177e4SLinus Torvalds rtm->rtm_tos = tos; 1160709772e6SKrzysztof Piotr Oledzki if (tb_id < 256) 11611da177e4SLinus Torvalds rtm->rtm_table = tb_id; 1162709772e6SKrzysztof Piotr Oledzki else 1163709772e6SKrzysztof Piotr Oledzki rtm->rtm_table = RT_TABLE_COMPAT; 1164f3756b79SDavid S. Miller if (nla_put_u32(skb, RTA_TABLE, tb_id)) 1165f3756b79SDavid S. Miller goto nla_put_failure; 11661da177e4SLinus Torvalds rtm->rtm_type = type; 11671da177e4SLinus Torvalds rtm->rtm_flags = fi->fib_flags; 116837e826c5SDavid S. Miller rtm->rtm_scope = fi->fib_scope; 11691da177e4SLinus Torvalds rtm->rtm_protocol = fi->fib_protocol; 1170be403ea1SThomas Graf 1171f3756b79SDavid S. Miller if (rtm->rtm_dst_len && 1172930345eaSJiri Benc nla_put_in_addr(skb, RTA_DST, dst)) 1173f3756b79SDavid S. Miller goto nla_put_failure; 1174f3756b79SDavid S. Miller if (fi->fib_priority && 1175f3756b79SDavid S. Miller nla_put_u32(skb, RTA_PRIORITY, fi->fib_priority)) 1176f3756b79SDavid S. Miller goto nla_put_failure; 11771da177e4SLinus Torvalds if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0) 1178be403ea1SThomas Graf goto nla_put_failure; 1179be403ea1SThomas Graf 1180f3756b79SDavid S. Miller if (fi->fib_prefsrc && 1181930345eaSJiri Benc nla_put_in_addr(skb, RTA_PREFSRC, fi->fib_prefsrc)) 1182f3756b79SDavid S. Miller goto nla_put_failure; 11831da177e4SLinus Torvalds if (fi->fib_nhs == 1) { 11840eeb075fSAndy Gospodarek struct in_device *in_dev; 11850eeb075fSAndy Gospodarek 1186f3756b79SDavid S. Miller if (fi->fib_nh->nh_gw && 1187930345eaSJiri Benc nla_put_in_addr(skb, RTA_GATEWAY, fi->fib_nh->nh_gw)) 1188f3756b79SDavid S. Miller goto nla_put_failure; 1189f3756b79SDavid S. Miller if (fi->fib_nh->nh_oif && 1190f3756b79SDavid S. Miller nla_put_u32(skb, RTA_OIF, fi->fib_nh->nh_oif)) 1191f3756b79SDavid S. Miller goto nla_put_failure; 11920eeb075fSAndy Gospodarek if (fi->fib_nh->nh_flags & RTNH_F_LINKDOWN) { 119396ac5cc9SAndy Gospodarek in_dev = __in_dev_get_rtnl(fi->fib_nh->nh_dev); 11940eeb075fSAndy Gospodarek if (in_dev && 11950eeb075fSAndy Gospodarek IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev)) 11960eeb075fSAndy Gospodarek rtm->rtm_flags |= RTNH_F_DEAD; 11970eeb075fSAndy Gospodarek } 1198c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID 1199f3756b79SDavid S. Miller if (fi->fib_nh[0].nh_tclassid && 1200f3756b79SDavid S. Miller nla_put_u32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid)) 1201f3756b79SDavid S. Miller goto nla_put_failure; 12028265abc0SPatrick McHardy #endif 1203571e7226SRoopa Prabhu if (fi->fib_nh->nh_lwtstate) 1204571e7226SRoopa Prabhu lwtunnel_fill_encap(skb, fi->fib_nh->nh_lwtstate); 12051da177e4SLinus Torvalds } 12061da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 12071da177e4SLinus Torvalds if (fi->fib_nhs > 1) { 1208be403ea1SThomas Graf struct rtnexthop *rtnh; 1209be403ea1SThomas Graf struct nlattr *mp; 1210be403ea1SThomas Graf 1211be403ea1SThomas Graf mp = nla_nest_start(skb, RTA_MULTIPATH); 121251456b29SIan Morris if (!mp) 1213be403ea1SThomas Graf goto nla_put_failure; 12141da177e4SLinus Torvalds 12151da177e4SLinus Torvalds for_nexthops(fi) { 12160eeb075fSAndy Gospodarek struct in_device *in_dev; 12170eeb075fSAndy Gospodarek 1218be403ea1SThomas Graf rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh)); 121951456b29SIan Morris if (!rtnh) 1220be403ea1SThomas Graf goto nla_put_failure; 1221be403ea1SThomas Graf 1222be403ea1SThomas Graf rtnh->rtnh_flags = nh->nh_flags & 0xFF; 12230eeb075fSAndy Gospodarek if (nh->nh_flags & RTNH_F_LINKDOWN) { 122496ac5cc9SAndy Gospodarek in_dev = __in_dev_get_rtnl(nh->nh_dev); 12250eeb075fSAndy Gospodarek if (in_dev && 12260eeb075fSAndy Gospodarek IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev)) 12270eeb075fSAndy Gospodarek rtnh->rtnh_flags |= RTNH_F_DEAD; 12280eeb075fSAndy Gospodarek } 1229be403ea1SThomas Graf rtnh->rtnh_hops = nh->nh_weight - 1; 1230be403ea1SThomas Graf rtnh->rtnh_ifindex = nh->nh_oif; 1231be403ea1SThomas Graf 1232f3756b79SDavid S. Miller if (nh->nh_gw && 1233930345eaSJiri Benc nla_put_in_addr(skb, RTA_GATEWAY, nh->nh_gw)) 1234f3756b79SDavid S. Miller goto nla_put_failure; 1235c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID 1236f3756b79SDavid S. Miller if (nh->nh_tclassid && 1237f3756b79SDavid S. Miller nla_put_u32(skb, RTA_FLOW, nh->nh_tclassid)) 1238f3756b79SDavid S. Miller goto nla_put_failure; 12398265abc0SPatrick McHardy #endif 1240571e7226SRoopa Prabhu if (nh->nh_lwtstate) 1241571e7226SRoopa Prabhu lwtunnel_fill_encap(skb, nh->nh_lwtstate); 1242be403ea1SThomas Graf /* length of rtnetlink header + attributes */ 1243be403ea1SThomas Graf rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh; 12441da177e4SLinus Torvalds } endfor_nexthops(fi); 1245be403ea1SThomas Graf 1246be403ea1SThomas Graf nla_nest_end(skb, mp); 12471da177e4SLinus Torvalds } 12481da177e4SLinus Torvalds #endif 1249053c095aSJohannes Berg nlmsg_end(skb, nlh); 1250053c095aSJohannes Berg return 0; 12511da177e4SLinus Torvalds 1252be403ea1SThomas Graf nla_put_failure: 125326932566SPatrick McHardy nlmsg_cancel(skb, nlh); 125426932566SPatrick McHardy return -EMSGSIZE; 12551da177e4SLinus Torvalds } 12561da177e4SLinus Torvalds 12571da177e4SLinus Torvalds /* 12586a31d2a9SEric Dumazet * Update FIB if: 12596a31d2a9SEric Dumazet * - local address disappeared -> we must delete all the entries 12606a31d2a9SEric Dumazet * referring to it. 12616a31d2a9SEric Dumazet * - device went down -> we must shutdown all nexthops going via it. 12621da177e4SLinus Torvalds */ 12634814bdbdSDenis V. Lunev int fib_sync_down_addr(struct net *net, __be32 local) 12641da177e4SLinus Torvalds { 12651da177e4SLinus Torvalds int ret = 0; 12661da177e4SLinus Torvalds unsigned int hash = fib_laddr_hashfn(local); 12671da177e4SLinus Torvalds struct hlist_head *head = &fib_info_laddrhash[hash]; 12681da177e4SLinus Torvalds struct fib_info *fi; 12691da177e4SLinus Torvalds 127051456b29SIan Morris if (!fib_info_laddrhash || local == 0) 127185326fa5SDenis V. Lunev return 0; 127285326fa5SDenis V. Lunev 1273b67bfe0dSSasha Levin hlist_for_each_entry(fi, head, fib_lhash) { 127409ad9bc7SOctavian Purdila if (!net_eq(fi->fib_net, net)) 12754814bdbdSDenis V. Lunev continue; 12761da177e4SLinus Torvalds if (fi->fib_prefsrc == local) { 12771da177e4SLinus Torvalds fi->fib_flags |= RTNH_F_DEAD; 12781da177e4SLinus Torvalds ret++; 12791da177e4SLinus Torvalds } 12801da177e4SLinus Torvalds } 128185326fa5SDenis V. Lunev return ret; 12821da177e4SLinus Torvalds } 12831da177e4SLinus Torvalds 12848a3d0316SAndy Gospodarek int fib_sync_down_dev(struct net_device *dev, unsigned long event) 128585326fa5SDenis V. Lunev { 128685326fa5SDenis V. Lunev int ret = 0; 128785326fa5SDenis V. Lunev int scope = RT_SCOPE_NOWHERE; 12881da177e4SLinus Torvalds struct fib_info *prev_fi = NULL; 12891da177e4SLinus Torvalds unsigned int hash = fib_devindex_hashfn(dev->ifindex); 12901da177e4SLinus Torvalds struct hlist_head *head = &fib_info_devhash[hash]; 12911da177e4SLinus Torvalds struct fib_nh *nh; 12921da177e4SLinus Torvalds 12938a3d0316SAndy Gospodarek if (event == NETDEV_UNREGISTER || 12948a3d0316SAndy Gospodarek event == NETDEV_DOWN) 129585326fa5SDenis V. Lunev scope = -1; 129685326fa5SDenis V. Lunev 1297b67bfe0dSSasha Levin hlist_for_each_entry(nh, head, nh_hash) { 12981da177e4SLinus Torvalds struct fib_info *fi = nh->nh_parent; 12991da177e4SLinus Torvalds int dead; 13001da177e4SLinus Torvalds 13011da177e4SLinus Torvalds BUG_ON(!fi->fib_nhs); 13021da177e4SLinus Torvalds if (nh->nh_dev != dev || fi == prev_fi) 13031da177e4SLinus Torvalds continue; 13041da177e4SLinus Torvalds prev_fi = fi; 13051da177e4SLinus Torvalds dead = 0; 13061da177e4SLinus Torvalds change_nexthops(fi) { 130771fceff0SDavid S. Miller if (nexthop_nh->nh_flags & RTNH_F_DEAD) 13081da177e4SLinus Torvalds dead++; 130971fceff0SDavid S. Miller else if (nexthop_nh->nh_dev == dev && 131071fceff0SDavid S. Miller nexthop_nh->nh_scope != scope) { 13118a3d0316SAndy Gospodarek switch (event) { 13128a3d0316SAndy Gospodarek case NETDEV_DOWN: 13138a3d0316SAndy Gospodarek case NETDEV_UNREGISTER: 131471fceff0SDavid S. Miller nexthop_nh->nh_flags |= RTNH_F_DEAD; 13158a3d0316SAndy Gospodarek /* fall through */ 13168a3d0316SAndy Gospodarek case NETDEV_CHANGE: 13178a3d0316SAndy Gospodarek nexthop_nh->nh_flags |= RTNH_F_LINKDOWN; 13188a3d0316SAndy Gospodarek break; 13198a3d0316SAndy Gospodarek } 13201da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 13211da177e4SLinus Torvalds spin_lock_bh(&fib_multipath_lock); 132271fceff0SDavid S. Miller fi->fib_power -= nexthop_nh->nh_power; 132371fceff0SDavid S. Miller nexthop_nh->nh_power = 0; 13241da177e4SLinus Torvalds spin_unlock_bh(&fib_multipath_lock); 13251da177e4SLinus Torvalds #endif 13261da177e4SLinus Torvalds dead++; 13271da177e4SLinus Torvalds } 13281da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 13298a3d0316SAndy Gospodarek if (event == NETDEV_UNREGISTER && 13308a3d0316SAndy Gospodarek nexthop_nh->nh_dev == dev) { 13311da177e4SLinus Torvalds dead = fi->fib_nhs; 13321da177e4SLinus Torvalds break; 13331da177e4SLinus Torvalds } 13341da177e4SLinus Torvalds #endif 13351da177e4SLinus Torvalds } endfor_nexthops(fi) 13361da177e4SLinus Torvalds if (dead == fi->fib_nhs) { 13378a3d0316SAndy Gospodarek switch (event) { 13388a3d0316SAndy Gospodarek case NETDEV_DOWN: 13398a3d0316SAndy Gospodarek case NETDEV_UNREGISTER: 13401da177e4SLinus Torvalds fi->fib_flags |= RTNH_F_DEAD; 13418a3d0316SAndy Gospodarek /* fall through */ 13428a3d0316SAndy Gospodarek case NETDEV_CHANGE: 13438a3d0316SAndy Gospodarek fi->fib_flags |= RTNH_F_LINKDOWN; 13448a3d0316SAndy Gospodarek break; 13458a3d0316SAndy Gospodarek } 13461da177e4SLinus Torvalds ret++; 13471da177e4SLinus Torvalds } 13481da177e4SLinus Torvalds } 13491da177e4SLinus Torvalds 13501da177e4SLinus Torvalds return ret; 13511da177e4SLinus Torvalds } 13521da177e4SLinus Torvalds 13530c838ff1SDavid S. Miller /* Must be invoked inside of an RCU protected region. */ 13542392debcSJulian Anastasov void fib_select_default(const struct flowi4 *flp, struct fib_result *res) 13550c838ff1SDavid S. Miller { 13560c838ff1SDavid S. Miller struct fib_info *fi = NULL, *last_resort = NULL; 135756315f9eSAlexander Duyck struct hlist_head *fa_head = res->fa_head; 13580c838ff1SDavid S. Miller struct fib_table *tb = res->table; 135918a912e9SJulian Anastasov u8 slen = 32 - res->prefixlen; 13600c838ff1SDavid S. Miller int order = -1, last_idx = -1; 13612392debcSJulian Anastasov struct fib_alias *fa, *fa1 = NULL; 13622392debcSJulian Anastasov u32 last_prio = res->fi->fib_priority; 13632392debcSJulian Anastasov u8 last_tos = 0; 13640c838ff1SDavid S. Miller 136556315f9eSAlexander Duyck hlist_for_each_entry_rcu(fa, fa_head, fa_list) { 13660c838ff1SDavid S. Miller struct fib_info *next_fi = fa->fa_info; 13670c838ff1SDavid S. Miller 136818a912e9SJulian Anastasov if (fa->fa_slen != slen) 136918a912e9SJulian Anastasov continue; 13702392debcSJulian Anastasov if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos) 13712392debcSJulian Anastasov continue; 137218a912e9SJulian Anastasov if (fa->tb_id != tb->tb_id) 137318a912e9SJulian Anastasov continue; 13742392debcSJulian Anastasov if (next_fi->fib_priority > last_prio && 13752392debcSJulian Anastasov fa->fa_tos == last_tos) { 13762392debcSJulian Anastasov if (last_tos) 13772392debcSJulian Anastasov continue; 13782392debcSJulian Anastasov break; 13792392debcSJulian Anastasov } 13802392debcSJulian Anastasov if (next_fi->fib_flags & RTNH_F_DEAD) 13812392debcSJulian Anastasov continue; 13822392debcSJulian Anastasov last_tos = fa->fa_tos; 13832392debcSJulian Anastasov last_prio = next_fi->fib_priority; 13842392debcSJulian Anastasov 138537e826c5SDavid S. Miller if (next_fi->fib_scope != res->scope || 13860c838ff1SDavid S. Miller fa->fa_type != RTN_UNICAST) 13870c838ff1SDavid S. Miller continue; 13880c838ff1SDavid S. Miller if (!next_fi->fib_nh[0].nh_gw || 13890c838ff1SDavid S. Miller next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK) 13900c838ff1SDavid S. Miller continue; 13910c838ff1SDavid S. Miller 13920c838ff1SDavid S. Miller fib_alias_accessed(fa); 13930c838ff1SDavid S. Miller 139451456b29SIan Morris if (!fi) { 13950c838ff1SDavid S. Miller if (next_fi != res->fi) 13960c838ff1SDavid S. Miller break; 13972392debcSJulian Anastasov fa1 = fa; 13980c838ff1SDavid S. Miller } else if (!fib_detect_death(fi, order, &last_resort, 13992392debcSJulian Anastasov &last_idx, fa1->fa_default)) { 14000c838ff1SDavid S. Miller fib_result_assign(res, fi); 14012392debcSJulian Anastasov fa1->fa_default = order; 14020c838ff1SDavid S. Miller goto out; 14030c838ff1SDavid S. Miller } 14040c838ff1SDavid S. Miller fi = next_fi; 14050c838ff1SDavid S. Miller order++; 14060c838ff1SDavid S. Miller } 14070c838ff1SDavid S. Miller 140851456b29SIan Morris if (order <= 0 || !fi) { 14092392debcSJulian Anastasov if (fa1) 14102392debcSJulian Anastasov fa1->fa_default = -1; 14110c838ff1SDavid S. Miller goto out; 14120c838ff1SDavid S. Miller } 14130c838ff1SDavid S. Miller 14140c838ff1SDavid S. Miller if (!fib_detect_death(fi, order, &last_resort, &last_idx, 14152392debcSJulian Anastasov fa1->fa_default)) { 14160c838ff1SDavid S. Miller fib_result_assign(res, fi); 14172392debcSJulian Anastasov fa1->fa_default = order; 14180c838ff1SDavid S. Miller goto out; 14190c838ff1SDavid S. Miller } 14200c838ff1SDavid S. Miller 14210c838ff1SDavid S. Miller if (last_idx >= 0) 14220c838ff1SDavid S. Miller fib_result_assign(res, last_resort); 14232392debcSJulian Anastasov fa1->fa_default = last_idx; 14240c838ff1SDavid S. Miller out: 142531d40937SEric Dumazet return; 14260c838ff1SDavid S. Miller } 14270c838ff1SDavid S. Miller 14281da177e4SLinus Torvalds /* 14296a31d2a9SEric Dumazet * Dead device goes up. We wake up dead nexthops. 14306a31d2a9SEric Dumazet * It takes sense only on multipath routes. 14311da177e4SLinus Torvalds */ 14328a3d0316SAndy Gospodarek int fib_sync_up(struct net_device *dev, unsigned int nh_flags) 14331da177e4SLinus Torvalds { 14341da177e4SLinus Torvalds struct fib_info *prev_fi; 14351da177e4SLinus Torvalds unsigned int hash; 14361da177e4SLinus Torvalds struct hlist_head *head; 14371da177e4SLinus Torvalds struct fib_nh *nh; 14381da177e4SLinus Torvalds int ret; 14391da177e4SLinus Torvalds 14401da177e4SLinus Torvalds if (!(dev->flags & IFF_UP)) 14411da177e4SLinus Torvalds return 0; 14421da177e4SLinus Torvalds 14431da177e4SLinus Torvalds prev_fi = NULL; 14441da177e4SLinus Torvalds hash = fib_devindex_hashfn(dev->ifindex); 14451da177e4SLinus Torvalds head = &fib_info_devhash[hash]; 14461da177e4SLinus Torvalds ret = 0; 14471da177e4SLinus Torvalds 1448b67bfe0dSSasha Levin hlist_for_each_entry(nh, head, nh_hash) { 14491da177e4SLinus Torvalds struct fib_info *fi = nh->nh_parent; 14501da177e4SLinus Torvalds int alive; 14511da177e4SLinus Torvalds 14521da177e4SLinus Torvalds BUG_ON(!fi->fib_nhs); 14531da177e4SLinus Torvalds if (nh->nh_dev != dev || fi == prev_fi) 14541da177e4SLinus Torvalds continue; 14551da177e4SLinus Torvalds 14561da177e4SLinus Torvalds prev_fi = fi; 14571da177e4SLinus Torvalds alive = 0; 14581da177e4SLinus Torvalds change_nexthops(fi) { 14598a3d0316SAndy Gospodarek if (!(nexthop_nh->nh_flags & nh_flags)) { 14601da177e4SLinus Torvalds alive++; 14611da177e4SLinus Torvalds continue; 14621da177e4SLinus Torvalds } 146351456b29SIan Morris if (!nexthop_nh->nh_dev || 146471fceff0SDavid S. Miller !(nexthop_nh->nh_dev->flags & IFF_UP)) 14651da177e4SLinus Torvalds continue; 146671fceff0SDavid S. Miller if (nexthop_nh->nh_dev != dev || 146771fceff0SDavid S. Miller !__in_dev_get_rtnl(dev)) 14681da177e4SLinus Torvalds continue; 14691da177e4SLinus Torvalds alive++; 14708a3d0316SAndy Gospodarek #ifdef CONFIG_IP_ROUTE_MULTIPATH 14711da177e4SLinus Torvalds spin_lock_bh(&fib_multipath_lock); 147271fceff0SDavid S. Miller nexthop_nh->nh_power = 0; 14738a3d0316SAndy Gospodarek nexthop_nh->nh_flags &= ~nh_flags; 14741da177e4SLinus Torvalds spin_unlock_bh(&fib_multipath_lock); 14758a3d0316SAndy Gospodarek #else 14768a3d0316SAndy Gospodarek nexthop_nh->nh_flags &= ~nh_flags; 14778a3d0316SAndy Gospodarek #endif 14781da177e4SLinus Torvalds } endfor_nexthops(fi) 14791da177e4SLinus Torvalds 14801da177e4SLinus Torvalds if (alive > 0) { 14818a3d0316SAndy Gospodarek fi->fib_flags &= ~nh_flags; 14821da177e4SLinus Torvalds ret++; 14831da177e4SLinus Torvalds } 14841da177e4SLinus Torvalds } 14851da177e4SLinus Torvalds 14861da177e4SLinus Torvalds return ret; 14871da177e4SLinus Torvalds } 14881da177e4SLinus Torvalds 14898a3d0316SAndy Gospodarek #ifdef CONFIG_IP_ROUTE_MULTIPATH 14908a3d0316SAndy Gospodarek 14911da177e4SLinus Torvalds /* 14926a31d2a9SEric Dumazet * The algorithm is suboptimal, but it provides really 14936a31d2a9SEric Dumazet * fair weighted route distribution. 14941da177e4SLinus Torvalds */ 14951b7fe593SDavid S. Miller void fib_select_multipath(struct fib_result *res) 14961da177e4SLinus Torvalds { 14971da177e4SLinus Torvalds struct fib_info *fi = res->fi; 14980eeb075fSAndy Gospodarek struct in_device *in_dev; 14991da177e4SLinus Torvalds int w; 15001da177e4SLinus Torvalds 15011da177e4SLinus Torvalds spin_lock_bh(&fib_multipath_lock); 15021da177e4SLinus Torvalds if (fi->fib_power <= 0) { 15031da177e4SLinus Torvalds int power = 0; 15041da177e4SLinus Torvalds change_nexthops(fi) { 15050eeb075fSAndy Gospodarek in_dev = __in_dev_get_rcu(nexthop_nh->nh_dev); 15060eeb075fSAndy Gospodarek if (nexthop_nh->nh_flags & RTNH_F_DEAD) 15070eeb075fSAndy Gospodarek continue; 15080eeb075fSAndy Gospodarek if (in_dev && 15090eeb075fSAndy Gospodarek IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) && 15100eeb075fSAndy Gospodarek nexthop_nh->nh_flags & RTNH_F_LINKDOWN) 15110eeb075fSAndy Gospodarek continue; 151271fceff0SDavid S. Miller power += nexthop_nh->nh_weight; 151371fceff0SDavid S. Miller nexthop_nh->nh_power = nexthop_nh->nh_weight; 15141da177e4SLinus Torvalds } endfor_nexthops(fi); 15151da177e4SLinus Torvalds fi->fib_power = power; 15161da177e4SLinus Torvalds if (power <= 0) { 15171da177e4SLinus Torvalds spin_unlock_bh(&fib_multipath_lock); 15181da177e4SLinus Torvalds /* Race condition: route has just become dead. */ 15191da177e4SLinus Torvalds res->nh_sel = 0; 15201da177e4SLinus Torvalds return; 15211da177e4SLinus Torvalds } 15221da177e4SLinus Torvalds } 15231da177e4SLinus Torvalds 15241da177e4SLinus Torvalds 15251da177e4SLinus Torvalds /* w should be random number [0..fi->fib_power-1], 15266a31d2a9SEric Dumazet * it is pretty bad approximation. 15271da177e4SLinus Torvalds */ 15281da177e4SLinus Torvalds 15291da177e4SLinus Torvalds w = jiffies % fi->fib_power; 15301da177e4SLinus Torvalds 15311da177e4SLinus Torvalds change_nexthops(fi) { 153271fceff0SDavid S. Miller if (!(nexthop_nh->nh_flags & RTNH_F_DEAD) && 153371fceff0SDavid S. Miller nexthop_nh->nh_power) { 15346a31d2a9SEric Dumazet w -= nexthop_nh->nh_power; 15356a31d2a9SEric Dumazet if (w <= 0) { 153671fceff0SDavid S. Miller nexthop_nh->nh_power--; 15371da177e4SLinus Torvalds fi->fib_power--; 15381da177e4SLinus Torvalds res->nh_sel = nhsel; 15391da177e4SLinus Torvalds spin_unlock_bh(&fib_multipath_lock); 15401da177e4SLinus Torvalds return; 15411da177e4SLinus Torvalds } 15421da177e4SLinus Torvalds } 15431da177e4SLinus Torvalds } endfor_nexthops(fi); 15441da177e4SLinus Torvalds 15451da177e4SLinus Torvalds /* Race condition: route has just become dead. */ 15461da177e4SLinus Torvalds res->nh_sel = 0; 15471da177e4SLinus Torvalds spin_unlock_bh(&fib_multipath_lock); 15481da177e4SLinus Torvalds } 15491da177e4SLinus Torvalds #endif 1550