11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * INET An implementation of the TCP/IP protocol suite for the LINUX 31da177e4SLinus Torvalds * operating system. INET is implemented using the BSD Socket 41da177e4SLinus Torvalds * interface as the means of communication with the user level. 51da177e4SLinus Torvalds * 61da177e4SLinus Torvalds * IPv4 Forwarding Information Base: semantics. 71da177e4SLinus Torvalds * 81da177e4SLinus Torvalds * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 91da177e4SLinus Torvalds * 101da177e4SLinus Torvalds * This program is free software; you can redistribute it and/or 111da177e4SLinus Torvalds * modify it under the terms of the GNU General Public License 121da177e4SLinus Torvalds * as published by the Free Software Foundation; either version 131da177e4SLinus Torvalds * 2 of the License, or (at your option) any later version. 141da177e4SLinus Torvalds */ 151da177e4SLinus Torvalds 167c0f6ba6SLinus Torvalds #include <linux/uaccess.h> 171da177e4SLinus Torvalds #include <linux/bitops.h> 181da177e4SLinus Torvalds #include <linux/types.h> 191da177e4SLinus Torvalds #include <linux/kernel.h> 201da177e4SLinus Torvalds #include <linux/jiffies.h> 211da177e4SLinus Torvalds #include <linux/mm.h> 221da177e4SLinus Torvalds #include <linux/string.h> 231da177e4SLinus Torvalds #include <linux/socket.h> 241da177e4SLinus Torvalds #include <linux/sockios.h> 251da177e4SLinus Torvalds #include <linux/errno.h> 261da177e4SLinus Torvalds #include <linux/in.h> 271da177e4SLinus Torvalds #include <linux/inet.h> 2814c85021SArnaldo Carvalho de Melo #include <linux/inetdevice.h> 291da177e4SLinus Torvalds #include <linux/netdevice.h> 301da177e4SLinus Torvalds #include <linux/if_arp.h> 311da177e4SLinus Torvalds #include <linux/proc_fs.h> 321da177e4SLinus Torvalds #include <linux/skbuff.h> 331da177e4SLinus Torvalds #include <linux/init.h> 345a0e3ad6STejun Heo #include <linux/slab.h> 351da177e4SLinus Torvalds 3614c85021SArnaldo Carvalho de Melo #include <net/arp.h> 371da177e4SLinus Torvalds #include <net/ip.h> 381da177e4SLinus Torvalds #include <net/protocol.h> 391da177e4SLinus Torvalds #include <net/route.h> 401da177e4SLinus Torvalds #include <net/tcp.h> 411da177e4SLinus Torvalds #include <net/sock.h> 421da177e4SLinus Torvalds #include <net/ip_fib.h> 43f21c7bc5SThomas Graf #include <net/netlink.h> 444e902c57SThomas Graf #include <net/nexthop.h> 45571e7226SRoopa Prabhu #include <net/lwtunnel.h> 461da177e4SLinus Torvalds 471da177e4SLinus Torvalds #include "fib_lookup.h" 481da177e4SLinus Torvalds 49832b4c5eSStephen Hemminger static DEFINE_SPINLOCK(fib_info_lock); 501da177e4SLinus Torvalds static struct hlist_head *fib_info_hash; 511da177e4SLinus Torvalds static struct hlist_head *fib_info_laddrhash; 52123b9731SDavid S. Miller static unsigned int fib_info_hash_size; 531da177e4SLinus Torvalds static unsigned int fib_info_cnt; 541da177e4SLinus Torvalds 551da177e4SLinus Torvalds #define DEVINDEX_HASHBITS 8 561da177e4SLinus Torvalds #define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS) 571da177e4SLinus Torvalds static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE]; 581da177e4SLinus Torvalds 591da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 601da177e4SLinus Torvalds 616a31d2a9SEric Dumazet #define for_nexthops(fi) { \ 626a31d2a9SEric Dumazet int nhsel; const struct fib_nh *nh; \ 636a31d2a9SEric Dumazet for (nhsel = 0, nh = (fi)->fib_nh; \ 646a31d2a9SEric Dumazet nhsel < (fi)->fib_nhs; \ 656a31d2a9SEric Dumazet nh++, nhsel++) 661da177e4SLinus Torvalds 676a31d2a9SEric Dumazet #define change_nexthops(fi) { \ 686a31d2a9SEric Dumazet int nhsel; struct fib_nh *nexthop_nh; \ 696a31d2a9SEric Dumazet for (nhsel = 0, nexthop_nh = (struct fib_nh *)((fi)->fib_nh); \ 706a31d2a9SEric Dumazet nhsel < (fi)->fib_nhs; \ 716a31d2a9SEric Dumazet nexthop_nh++, nhsel++) 721da177e4SLinus Torvalds 731da177e4SLinus Torvalds #else /* CONFIG_IP_ROUTE_MULTIPATH */ 741da177e4SLinus Torvalds 751da177e4SLinus Torvalds /* Hope, that gcc will optimize it to get rid of dummy loop */ 761da177e4SLinus Torvalds 776a31d2a9SEric Dumazet #define for_nexthops(fi) { \ 786a31d2a9SEric Dumazet int nhsel; const struct fib_nh *nh = (fi)->fib_nh; \ 791da177e4SLinus Torvalds for (nhsel = 0; nhsel < 1; nhsel++) 801da177e4SLinus Torvalds 816a31d2a9SEric Dumazet #define change_nexthops(fi) { \ 826a31d2a9SEric Dumazet int nhsel; \ 836a31d2a9SEric Dumazet struct fib_nh *nexthop_nh = (struct fib_nh *)((fi)->fib_nh); \ 841da177e4SLinus Torvalds for (nhsel = 0; nhsel < 1; nhsel++) 851da177e4SLinus Torvalds 861da177e4SLinus Torvalds #endif /* CONFIG_IP_ROUTE_MULTIPATH */ 871da177e4SLinus Torvalds 881da177e4SLinus Torvalds #define endfor_nexthops(fi) } 891da177e4SLinus Torvalds 901da177e4SLinus Torvalds 913be0686bSDavid S. Miller const struct fib_prop fib_props[RTN_MAX + 1] = { 926a31d2a9SEric Dumazet [RTN_UNSPEC] = { 931da177e4SLinus Torvalds .error = 0, 941da177e4SLinus Torvalds .scope = RT_SCOPE_NOWHERE, 956a31d2a9SEric Dumazet }, 966a31d2a9SEric Dumazet [RTN_UNICAST] = { 971da177e4SLinus Torvalds .error = 0, 981da177e4SLinus Torvalds .scope = RT_SCOPE_UNIVERSE, 996a31d2a9SEric Dumazet }, 1006a31d2a9SEric Dumazet [RTN_LOCAL] = { 1011da177e4SLinus Torvalds .error = 0, 1021da177e4SLinus Torvalds .scope = RT_SCOPE_HOST, 1036a31d2a9SEric Dumazet }, 1046a31d2a9SEric Dumazet [RTN_BROADCAST] = { 1051da177e4SLinus Torvalds .error = 0, 1061da177e4SLinus Torvalds .scope = RT_SCOPE_LINK, 1076a31d2a9SEric Dumazet }, 1086a31d2a9SEric Dumazet [RTN_ANYCAST] = { 1091da177e4SLinus Torvalds .error = 0, 1101da177e4SLinus Torvalds .scope = RT_SCOPE_LINK, 1116a31d2a9SEric Dumazet }, 1126a31d2a9SEric Dumazet [RTN_MULTICAST] = { 1131da177e4SLinus Torvalds .error = 0, 1141da177e4SLinus Torvalds .scope = RT_SCOPE_UNIVERSE, 1156a31d2a9SEric Dumazet }, 1166a31d2a9SEric Dumazet [RTN_BLACKHOLE] = { 1171da177e4SLinus Torvalds .error = -EINVAL, 1181da177e4SLinus Torvalds .scope = RT_SCOPE_UNIVERSE, 1196a31d2a9SEric Dumazet }, 1206a31d2a9SEric Dumazet [RTN_UNREACHABLE] = { 1211da177e4SLinus Torvalds .error = -EHOSTUNREACH, 1221da177e4SLinus Torvalds .scope = RT_SCOPE_UNIVERSE, 1236a31d2a9SEric Dumazet }, 1246a31d2a9SEric Dumazet [RTN_PROHIBIT] = { 1251da177e4SLinus Torvalds .error = -EACCES, 1261da177e4SLinus Torvalds .scope = RT_SCOPE_UNIVERSE, 1276a31d2a9SEric Dumazet }, 1286a31d2a9SEric Dumazet [RTN_THROW] = { 1291da177e4SLinus Torvalds .error = -EAGAIN, 1301da177e4SLinus Torvalds .scope = RT_SCOPE_UNIVERSE, 1316a31d2a9SEric Dumazet }, 1326a31d2a9SEric Dumazet [RTN_NAT] = { 1331da177e4SLinus Torvalds .error = -EINVAL, 1341da177e4SLinus Torvalds .scope = RT_SCOPE_NOWHERE, 1356a31d2a9SEric Dumazet }, 1366a31d2a9SEric Dumazet [RTN_XRESOLVE] = { 1371da177e4SLinus Torvalds .error = -EINVAL, 1381da177e4SLinus Torvalds .scope = RT_SCOPE_NOWHERE, 1396a31d2a9SEric Dumazet }, 1401da177e4SLinus Torvalds }; 1411da177e4SLinus Torvalds 142c5038a83SDavid S. Miller static void rt_fibinfo_free(struct rtable __rcu **rtp) 14354764bb6SEric Dumazet { 14454764bb6SEric Dumazet struct rtable *rt = rcu_dereference_protected(*rtp, 1); 14554764bb6SEric Dumazet 14654764bb6SEric Dumazet if (!rt) 14754764bb6SEric Dumazet return; 14854764bb6SEric Dumazet 14954764bb6SEric Dumazet /* Not even needed : RCU_INIT_POINTER(*rtp, NULL); 15054764bb6SEric Dumazet * because we waited an RCU grace period before calling 15154764bb6SEric Dumazet * free_fib_info_rcu() 15254764bb6SEric Dumazet */ 15354764bb6SEric Dumazet 15454764bb6SEric Dumazet dst_free(&rt->dst); 15554764bb6SEric Dumazet } 15654764bb6SEric Dumazet 157c5038a83SDavid S. Miller static void free_nh_exceptions(struct fib_nh *nh) 158c5038a83SDavid S. Miller { 159caa41527SEric Dumazet struct fnhe_hash_bucket *hash; 160c5038a83SDavid S. Miller int i; 161c5038a83SDavid S. Miller 162caa41527SEric Dumazet hash = rcu_dereference_protected(nh->nh_exceptions, 1); 163caa41527SEric Dumazet if (!hash) 164caa41527SEric Dumazet return; 165c5038a83SDavid S. Miller for (i = 0; i < FNHE_HASH_SIZE; i++) { 166c5038a83SDavid S. Miller struct fib_nh_exception *fnhe; 167c5038a83SDavid S. Miller 168c5038a83SDavid S. Miller fnhe = rcu_dereference_protected(hash[i].chain, 1); 169c5038a83SDavid S. Miller while (fnhe) { 170c5038a83SDavid S. Miller struct fib_nh_exception *next; 171c5038a83SDavid S. Miller 172c5038a83SDavid S. Miller next = rcu_dereference_protected(fnhe->fnhe_next, 1); 173c5038a83SDavid S. Miller 1742ffae99dSTimo Teräs rt_fibinfo_free(&fnhe->fnhe_rth_input); 1752ffae99dSTimo Teräs rt_fibinfo_free(&fnhe->fnhe_rth_output); 176c5038a83SDavid S. Miller 177c5038a83SDavid S. Miller kfree(fnhe); 178c5038a83SDavid S. Miller 179c5038a83SDavid S. Miller fnhe = next; 180c5038a83SDavid S. Miller } 181c5038a83SDavid S. Miller } 182c5038a83SDavid S. Miller kfree(hash); 183c5038a83SDavid S. Miller } 184c5038a83SDavid S. Miller 185c5038a83SDavid S. Miller static void rt_fibinfo_free_cpus(struct rtable __rcu * __percpu *rtp) 186d26b3a7cSEric Dumazet { 187d26b3a7cSEric Dumazet int cpu; 188d26b3a7cSEric Dumazet 189d26b3a7cSEric Dumazet if (!rtp) 190d26b3a7cSEric Dumazet return; 191d26b3a7cSEric Dumazet 192d26b3a7cSEric Dumazet for_each_possible_cpu(cpu) { 193d26b3a7cSEric Dumazet struct rtable *rt; 194d26b3a7cSEric Dumazet 195d26b3a7cSEric Dumazet rt = rcu_dereference_protected(*per_cpu_ptr(rtp, cpu), 1); 196d26b3a7cSEric Dumazet if (rt) 197d26b3a7cSEric Dumazet dst_free(&rt->dst); 198d26b3a7cSEric Dumazet } 199d26b3a7cSEric Dumazet free_percpu(rtp); 200d26b3a7cSEric Dumazet } 201d26b3a7cSEric Dumazet 2021da177e4SLinus Torvalds /* Release a nexthop info record */ 20319c1ea14SYan, Zheng static void free_fib_info_rcu(struct rcu_head *head) 20419c1ea14SYan, Zheng { 20519c1ea14SYan, Zheng struct fib_info *fi = container_of(head, struct fib_info, rcu); 20619c1ea14SYan, Zheng 207e49cc0daSYanmin Zhang change_nexthops(fi) { 208e49cc0daSYanmin Zhang if (nexthop_nh->nh_dev) 209e49cc0daSYanmin Zhang dev_put(nexthop_nh->nh_dev); 2105a6228a0SNicolas Dichtel lwtstate_put(nexthop_nh->nh_lwtstate); 2114895c771SDavid S. Miller free_nh_exceptions(nexthop_nh); 212c5038a83SDavid S. Miller rt_fibinfo_free_cpus(nexthop_nh->nh_pcpu_rth_output); 213c5038a83SDavid S. Miller rt_fibinfo_free(&nexthop_nh->nh_rth_input); 214e49cc0daSYanmin Zhang } endfor_nexthops(fi); 215e49cc0daSYanmin Zhang 21619c1ea14SYan, Zheng if (fi->fib_metrics != (u32 *) dst_default_metrics) 21719c1ea14SYan, Zheng kfree(fi->fib_metrics); 21819c1ea14SYan, Zheng kfree(fi); 21919c1ea14SYan, Zheng } 2201da177e4SLinus Torvalds 2211da177e4SLinus Torvalds void free_fib_info(struct fib_info *fi) 2221da177e4SLinus Torvalds { 2231da177e4SLinus Torvalds if (fi->fib_dead == 0) { 224058bd4d2SJoe Perches pr_warn("Freeing alive fib_info %p\n", fi); 2251da177e4SLinus Torvalds return; 2261da177e4SLinus Torvalds } 2271da177e4SLinus Torvalds fib_info_cnt--; 2287a9bc9b8SDavid S. Miller #ifdef CONFIG_IP_ROUTE_CLASSID 2297a9bc9b8SDavid S. Miller change_nexthops(fi) { 2307a9bc9b8SDavid S. Miller if (nexthop_nh->nh_tclassid) 231f4530fa5SDavid S. Miller fi->fib_net->ipv4.fib_num_tclassid_users--; 2327a9bc9b8SDavid S. Miller } endfor_nexthops(fi); 2337a9bc9b8SDavid S. Miller #endif 23419c1ea14SYan, Zheng call_rcu(&fi->rcu, free_fib_info_rcu); 2351da177e4SLinus Torvalds } 236b423cb10SIdo Schimmel EXPORT_SYMBOL_GPL(free_fib_info); 2371da177e4SLinus Torvalds 2381da177e4SLinus Torvalds void fib_release_info(struct fib_info *fi) 2391da177e4SLinus Torvalds { 240832b4c5eSStephen Hemminger spin_lock_bh(&fib_info_lock); 2411da177e4SLinus Torvalds if (fi && --fi->fib_treeref == 0) { 2421da177e4SLinus Torvalds hlist_del(&fi->fib_hash); 2431da177e4SLinus Torvalds if (fi->fib_prefsrc) 2441da177e4SLinus Torvalds hlist_del(&fi->fib_lhash); 2451da177e4SLinus Torvalds change_nexthops(fi) { 24671fceff0SDavid S. Miller if (!nexthop_nh->nh_dev) 2471da177e4SLinus Torvalds continue; 24871fceff0SDavid S. Miller hlist_del(&nexthop_nh->nh_hash); 2491da177e4SLinus Torvalds } endfor_nexthops(fi) 2501da177e4SLinus Torvalds fi->fib_dead = 1; 2511da177e4SLinus Torvalds fib_info_put(fi); 2521da177e4SLinus Torvalds } 253832b4c5eSStephen Hemminger spin_unlock_bh(&fib_info_lock); 2541da177e4SLinus Torvalds } 2551da177e4SLinus Torvalds 2566a31d2a9SEric Dumazet static inline int nh_comp(const struct fib_info *fi, const struct fib_info *ofi) 2571da177e4SLinus Torvalds { 2581da177e4SLinus Torvalds const struct fib_nh *onh = ofi->fib_nh; 2591da177e4SLinus Torvalds 2601da177e4SLinus Torvalds for_nexthops(fi) { 2611da177e4SLinus Torvalds if (nh->nh_oif != onh->nh_oif || 2621da177e4SLinus Torvalds nh->nh_gw != onh->nh_gw || 2631da177e4SLinus Torvalds nh->nh_scope != onh->nh_scope || 2641da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 2651da177e4SLinus Torvalds nh->nh_weight != onh->nh_weight || 2661da177e4SLinus Torvalds #endif 267c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID 2681da177e4SLinus Torvalds nh->nh_tclassid != onh->nh_tclassid || 2691da177e4SLinus Torvalds #endif 270571e7226SRoopa Prabhu lwtunnel_cmp_encap(nh->nh_lwtstate, onh->nh_lwtstate) || 2718a3d0316SAndy Gospodarek ((nh->nh_flags ^ onh->nh_flags) & ~RTNH_COMPARE_MASK)) 2721da177e4SLinus Torvalds return -1; 2731da177e4SLinus Torvalds onh++; 2741da177e4SLinus Torvalds } endfor_nexthops(fi); 2751da177e4SLinus Torvalds return 0; 2761da177e4SLinus Torvalds } 2771da177e4SLinus Torvalds 27888ebc72fSDavid S. Miller static inline unsigned int fib_devindex_hashfn(unsigned int val) 27988ebc72fSDavid S. Miller { 28088ebc72fSDavid S. Miller unsigned int mask = DEVINDEX_HASHSIZE - 1; 28188ebc72fSDavid S. Miller 28288ebc72fSDavid S. Miller return (val ^ 28388ebc72fSDavid S. Miller (val >> DEVINDEX_HASHBITS) ^ 28488ebc72fSDavid S. Miller (val >> (DEVINDEX_HASHBITS * 2))) & mask; 28588ebc72fSDavid S. Miller } 28688ebc72fSDavid S. Miller 2871da177e4SLinus Torvalds static inline unsigned int fib_info_hashfn(const struct fib_info *fi) 2881da177e4SLinus Torvalds { 289123b9731SDavid S. Miller unsigned int mask = (fib_info_hash_size - 1); 2901da177e4SLinus Torvalds unsigned int val = fi->fib_nhs; 2911da177e4SLinus Torvalds 29237e826c5SDavid S. Miller val ^= (fi->fib_protocol << 8) | fi->fib_scope; 29381f7bf6cSAl Viro val ^= (__force u32)fi->fib_prefsrc; 2941da177e4SLinus Torvalds val ^= fi->fib_priority; 29588ebc72fSDavid S. Miller for_nexthops(fi) { 29688ebc72fSDavid S. Miller val ^= fib_devindex_hashfn(nh->nh_oif); 29788ebc72fSDavid S. Miller } endfor_nexthops(fi) 2981da177e4SLinus Torvalds 2991da177e4SLinus Torvalds return (val ^ (val >> 7) ^ (val >> 12)) & mask; 3001da177e4SLinus Torvalds } 3011da177e4SLinus Torvalds 3021da177e4SLinus Torvalds static struct fib_info *fib_find_info(const struct fib_info *nfi) 3031da177e4SLinus Torvalds { 3041da177e4SLinus Torvalds struct hlist_head *head; 3051da177e4SLinus Torvalds struct fib_info *fi; 3061da177e4SLinus Torvalds unsigned int hash; 3071da177e4SLinus Torvalds 3081da177e4SLinus Torvalds hash = fib_info_hashfn(nfi); 3091da177e4SLinus Torvalds head = &fib_info_hash[hash]; 3101da177e4SLinus Torvalds 311b67bfe0dSSasha Levin hlist_for_each_entry(fi, head, fib_hash) { 31209ad9bc7SOctavian Purdila if (!net_eq(fi->fib_net, nfi->fib_net)) 3134814bdbdSDenis V. Lunev continue; 3141da177e4SLinus Torvalds if (fi->fib_nhs != nfi->fib_nhs) 3151da177e4SLinus Torvalds continue; 3161da177e4SLinus Torvalds if (nfi->fib_protocol == fi->fib_protocol && 31737e826c5SDavid S. Miller nfi->fib_scope == fi->fib_scope && 3181da177e4SLinus Torvalds nfi->fib_prefsrc == fi->fib_prefsrc && 3191da177e4SLinus Torvalds nfi->fib_priority == fi->fib_priority && 320f4ef85bbSEric Dumazet nfi->fib_type == fi->fib_type && 3211da177e4SLinus Torvalds memcmp(nfi->fib_metrics, fi->fib_metrics, 322fcd13f42SEric Dumazet sizeof(u32) * RTAX_MAX) == 0 && 3238a3d0316SAndy Gospodarek !((nfi->fib_flags ^ fi->fib_flags) & ~RTNH_COMPARE_MASK) && 3241da177e4SLinus Torvalds (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0)) 3251da177e4SLinus Torvalds return fi; 3261da177e4SLinus Torvalds } 3271da177e4SLinus Torvalds 3281da177e4SLinus Torvalds return NULL; 3291da177e4SLinus Torvalds } 3301da177e4SLinus Torvalds 3311da177e4SLinus Torvalds /* Check, that the gateway is already configured. 3326a31d2a9SEric Dumazet * Used only by redirect accept routine. 3331da177e4SLinus Torvalds */ 334d878e72eSAl Viro int ip_fib_check_default(__be32 gw, struct net_device *dev) 3351da177e4SLinus Torvalds { 3361da177e4SLinus Torvalds struct hlist_head *head; 3371da177e4SLinus Torvalds struct fib_nh *nh; 3381da177e4SLinus Torvalds unsigned int hash; 3391da177e4SLinus Torvalds 340832b4c5eSStephen Hemminger spin_lock(&fib_info_lock); 3411da177e4SLinus Torvalds 3421da177e4SLinus Torvalds hash = fib_devindex_hashfn(dev->ifindex); 3431da177e4SLinus Torvalds head = &fib_info_devhash[hash]; 344b67bfe0dSSasha Levin hlist_for_each_entry(nh, head, nh_hash) { 3451da177e4SLinus Torvalds if (nh->nh_dev == dev && 3461da177e4SLinus Torvalds nh->nh_gw == gw && 3471da177e4SLinus Torvalds !(nh->nh_flags & RTNH_F_DEAD)) { 348832b4c5eSStephen Hemminger spin_unlock(&fib_info_lock); 3491da177e4SLinus Torvalds return 0; 3501da177e4SLinus Torvalds } 3511da177e4SLinus Torvalds } 3521da177e4SLinus Torvalds 353832b4c5eSStephen Hemminger spin_unlock(&fib_info_lock); 3541da177e4SLinus Torvalds 3551da177e4SLinus Torvalds return -1; 3561da177e4SLinus Torvalds } 3571da177e4SLinus Torvalds 358339bf98fSThomas Graf static inline size_t fib_nlmsg_size(struct fib_info *fi) 359339bf98fSThomas Graf { 360339bf98fSThomas Graf size_t payload = NLMSG_ALIGN(sizeof(struct rtmsg)) 361339bf98fSThomas Graf + nla_total_size(4) /* RTA_TABLE */ 362339bf98fSThomas Graf + nla_total_size(4) /* RTA_DST */ 363339bf98fSThomas Graf + nla_total_size(4) /* RTA_PRIORITY */ 364ea697639SDaniel Borkmann + nla_total_size(4) /* RTA_PREFSRC */ 365ea697639SDaniel Borkmann + nla_total_size(TCP_CA_NAME_MAX); /* RTAX_CC_ALGO */ 366339bf98fSThomas Graf 367339bf98fSThomas Graf /* space for nested metrics */ 368339bf98fSThomas Graf payload += nla_total_size((RTAX_MAX * nla_total_size(4))); 369339bf98fSThomas Graf 370339bf98fSThomas Graf if (fi->fib_nhs) { 371571e7226SRoopa Prabhu size_t nh_encapsize = 0; 372339bf98fSThomas Graf /* Also handles the special case fib_nhs == 1 */ 373339bf98fSThomas Graf 374339bf98fSThomas Graf /* each nexthop is packed in an attribute */ 375339bf98fSThomas Graf size_t nhsize = nla_total_size(sizeof(struct rtnexthop)); 376339bf98fSThomas Graf 377339bf98fSThomas Graf /* may contain flow and gateway attribute */ 378339bf98fSThomas Graf nhsize += 2 * nla_total_size(4); 379339bf98fSThomas Graf 380571e7226SRoopa Prabhu /* grab encap info */ 381571e7226SRoopa Prabhu for_nexthops(fi) { 382571e7226SRoopa Prabhu if (nh->nh_lwtstate) { 383571e7226SRoopa Prabhu /* RTA_ENCAP_TYPE */ 384571e7226SRoopa Prabhu nh_encapsize += lwtunnel_get_encap_size( 385571e7226SRoopa Prabhu nh->nh_lwtstate); 386571e7226SRoopa Prabhu /* RTA_ENCAP */ 387571e7226SRoopa Prabhu nh_encapsize += nla_total_size(2); 388571e7226SRoopa Prabhu } 389571e7226SRoopa Prabhu } endfor_nexthops(fi); 390571e7226SRoopa Prabhu 391339bf98fSThomas Graf /* all nexthops are packed in a nested attribute */ 392571e7226SRoopa Prabhu payload += nla_total_size((fi->fib_nhs * nhsize) + 393571e7226SRoopa Prabhu nh_encapsize); 394571e7226SRoopa Prabhu 395339bf98fSThomas Graf } 396339bf98fSThomas Graf 397339bf98fSThomas Graf return payload; 398339bf98fSThomas Graf } 399339bf98fSThomas Graf 40081f7bf6cSAl Viro void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, 4019877b253SJoe Perches int dst_len, u32 tb_id, const struct nl_info *info, 402b8f55831SMilan Kocian unsigned int nlm_flags) 4031da177e4SLinus Torvalds { 4041da177e4SLinus Torvalds struct sk_buff *skb; 4054e902c57SThomas Graf u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0; 406f21c7bc5SThomas Graf int err = -ENOBUFS; 4071da177e4SLinus Torvalds 408339bf98fSThomas Graf skb = nlmsg_new(fib_nlmsg_size(fa->fa_info), GFP_KERNEL); 40951456b29SIan Morris if (!skb) 410f21c7bc5SThomas Graf goto errout; 4111da177e4SLinus Torvalds 41215e47304SEric W. Biederman err = fib_dump_info(skb, info->portid, seq, event, tb_id, 41337e826c5SDavid S. Miller fa->fa_type, key, dst_len, 414b8f55831SMilan Kocian fa->fa_tos, fa->fa_info, nlm_flags); 41526932566SPatrick McHardy if (err < 0) { 41626932566SPatrick McHardy /* -EMSGSIZE implies BUG in fib_nlmsg_size() */ 41726932566SPatrick McHardy WARN_ON(err == -EMSGSIZE); 41826932566SPatrick McHardy kfree_skb(skb); 41926932566SPatrick McHardy goto errout; 42026932566SPatrick McHardy } 42115e47304SEric W. Biederman rtnl_notify(skb, info->nl_net, info->portid, RTNLGRP_IPV4_ROUTE, 4224e902c57SThomas Graf info->nlh, GFP_KERNEL); 4231ce85fe4SPablo Neira Ayuso return; 424f21c7bc5SThomas Graf errout: 425f21c7bc5SThomas Graf if (err < 0) 4264d1169c1SDenis V. Lunev rtnl_set_sk_err(info->nl_net, RTNLGRP_IPV4_ROUTE, err); 4271da177e4SLinus Torvalds } 4281da177e4SLinus Torvalds 429c9cb6b6eSStephen Hemminger static int fib_detect_death(struct fib_info *fi, int order, 430c9cb6b6eSStephen Hemminger struct fib_info **last_resort, int *last_idx, 431c9cb6b6eSStephen Hemminger int dflt) 4321da177e4SLinus Torvalds { 4331da177e4SLinus Torvalds struct neighbour *n; 4341da177e4SLinus Torvalds int state = NUD_NONE; 4351da177e4SLinus Torvalds 4361da177e4SLinus Torvalds n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev); 4371da177e4SLinus Torvalds if (n) { 4381da177e4SLinus Torvalds state = n->nud_state; 4391da177e4SLinus Torvalds neigh_release(n); 44088f64320SJulian Anastasov } else { 44188f64320SJulian Anastasov return 0; 4421da177e4SLinus Torvalds } 4431da177e4SLinus Torvalds if (state == NUD_REACHABLE) 4441da177e4SLinus Torvalds return 0; 445c17860a0SDenis V. Lunev if ((state & NUD_VALID) && order != dflt) 4461da177e4SLinus Torvalds return 0; 4471da177e4SLinus Torvalds if ((state & NUD_VALID) || 44888f64320SJulian Anastasov (*last_idx < 0 && order > dflt && state != NUD_INCOMPLETE)) { 4491da177e4SLinus Torvalds *last_resort = fi; 4501da177e4SLinus Torvalds *last_idx = order; 4511da177e4SLinus Torvalds } 4521da177e4SLinus Torvalds return 1; 4531da177e4SLinus Torvalds } 4541da177e4SLinus Torvalds 4551da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 4561da177e4SLinus Torvalds 457*6d8422a1SDavid Ahern static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining, 458*6d8422a1SDavid Ahern struct netlink_ext_ack *extack) 4591da177e4SLinus Torvalds { 4601da177e4SLinus Torvalds int nhs = 0; 4611da177e4SLinus Torvalds 4624e902c57SThomas Graf while (rtnh_ok(rtnh, remaining)) { 4631da177e4SLinus Torvalds nhs++; 4644e902c57SThomas Graf rtnh = rtnh_next(rtnh, &remaining); 4651da177e4SLinus Torvalds } 4661da177e4SLinus Torvalds 4674e902c57SThomas Graf /* leftover implies invalid nexthop configuration, discard it */ 4684e902c57SThomas Graf return remaining > 0 ? 0 : nhs; 4694e902c57SThomas Graf } 4701da177e4SLinus Torvalds 4714e902c57SThomas Graf static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, 472*6d8422a1SDavid Ahern int remaining, struct fib_config *cfg, 473*6d8422a1SDavid Ahern struct netlink_ext_ack *extack) 4744e902c57SThomas Graf { 475571e7226SRoopa Prabhu int ret; 476571e7226SRoopa Prabhu 4771da177e4SLinus Torvalds change_nexthops(fi) { 4784e902c57SThomas Graf int attrlen; 4794e902c57SThomas Graf 4804e902c57SThomas Graf if (!rtnh_ok(rtnh, remaining)) 4811da177e4SLinus Torvalds return -EINVAL; 4824e902c57SThomas Graf 48380610229SJulian Anastasov if (rtnh->rtnh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)) 48480610229SJulian Anastasov return -EINVAL; 48580610229SJulian Anastasov 48671fceff0SDavid S. Miller nexthop_nh->nh_flags = 48771fceff0SDavid S. Miller (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags; 48871fceff0SDavid S. Miller nexthop_nh->nh_oif = rtnh->rtnh_ifindex; 48971fceff0SDavid S. Miller nexthop_nh->nh_weight = rtnh->rtnh_hops + 1; 4904e902c57SThomas Graf 4914e902c57SThomas Graf attrlen = rtnh_attrlen(rtnh); 4924e902c57SThomas Graf if (attrlen > 0) { 4934e902c57SThomas Graf struct nlattr *nla, *attrs = rtnh_attrs(rtnh); 4944e902c57SThomas Graf 4954e902c57SThomas Graf nla = nla_find(attrs, attrlen, RTA_GATEWAY); 49667b61f6cSJiri Benc nexthop_nh->nh_gw = nla ? nla_get_in_addr(nla) : 0; 497c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID 4984e902c57SThomas Graf nla = nla_find(attrs, attrlen, RTA_FLOW); 49971fceff0SDavid S. Miller nexthop_nh->nh_tclassid = nla ? nla_get_u32(nla) : 0; 5007a9bc9b8SDavid S. Miller if (nexthop_nh->nh_tclassid) 501f4530fa5SDavid S. Miller fi->fib_net->ipv4.fib_num_tclassid_users++; 5021da177e4SLinus Torvalds #endif 503571e7226SRoopa Prabhu nla = nla_find(attrs, attrlen, RTA_ENCAP); 504571e7226SRoopa Prabhu if (nla) { 505571e7226SRoopa Prabhu struct lwtunnel_state *lwtstate; 506571e7226SRoopa Prabhu struct nlattr *nla_entype; 507571e7226SRoopa Prabhu 508571e7226SRoopa Prabhu nla_entype = nla_find(attrs, attrlen, 509571e7226SRoopa Prabhu RTA_ENCAP_TYPE); 510571e7226SRoopa Prabhu if (!nla_entype) 511571e7226SRoopa Prabhu goto err_inval; 51230357d7dSDavid Ahern 51330357d7dSDavid Ahern ret = lwtunnel_build_state(nla_get_u16( 514571e7226SRoopa Prabhu nla_entype), 515127eb7cdSTom Herbert nla, AF_INET, cfg, 516127eb7cdSTom Herbert &lwtstate); 517571e7226SRoopa Prabhu if (ret) 518571e7226SRoopa Prabhu goto errout; 5195a6228a0SNicolas Dichtel nexthop_nh->nh_lwtstate = 5205a6228a0SNicolas Dichtel lwtstate_get(lwtstate); 521571e7226SRoopa Prabhu } 5221da177e4SLinus Torvalds } 5234e902c57SThomas Graf 5244e902c57SThomas Graf rtnh = rtnh_next(rtnh, &remaining); 5251da177e4SLinus Torvalds } endfor_nexthops(fi); 5264e902c57SThomas Graf 5271da177e4SLinus Torvalds return 0; 528571e7226SRoopa Prabhu 529571e7226SRoopa Prabhu err_inval: 530571e7226SRoopa Prabhu ret = -EINVAL; 531571e7226SRoopa Prabhu 532571e7226SRoopa Prabhu errout: 533571e7226SRoopa Prabhu return ret; 5341da177e4SLinus Torvalds } 5351da177e4SLinus Torvalds 5360e884c78SPeter Nørlund static void fib_rebalance(struct fib_info *fi) 5370e884c78SPeter Nørlund { 5380e884c78SPeter Nørlund int total; 5390e884c78SPeter Nørlund int w; 5400e884c78SPeter Nørlund struct in_device *in_dev; 5410e884c78SPeter Nørlund 5420e884c78SPeter Nørlund if (fi->fib_nhs < 2) 5430e884c78SPeter Nørlund return; 5440e884c78SPeter Nørlund 5450e884c78SPeter Nørlund total = 0; 5460e884c78SPeter Nørlund for_nexthops(fi) { 5470e884c78SPeter Nørlund if (nh->nh_flags & RTNH_F_DEAD) 5480e884c78SPeter Nørlund continue; 5490e884c78SPeter Nørlund 55051161aa9SDavid Ahern in_dev = __in_dev_get_rtnl(nh->nh_dev); 5510e884c78SPeter Nørlund 5520e884c78SPeter Nørlund if (in_dev && 5530e884c78SPeter Nørlund IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) && 5540e884c78SPeter Nørlund nh->nh_flags & RTNH_F_LINKDOWN) 5550e884c78SPeter Nørlund continue; 5560e884c78SPeter Nørlund 5570e884c78SPeter Nørlund total += nh->nh_weight; 5580e884c78SPeter Nørlund } endfor_nexthops(fi); 5590e884c78SPeter Nørlund 5600e884c78SPeter Nørlund w = 0; 5610e884c78SPeter Nørlund change_nexthops(fi) { 5620e884c78SPeter Nørlund int upper_bound; 5630e884c78SPeter Nørlund 56451161aa9SDavid Ahern in_dev = __in_dev_get_rtnl(nexthop_nh->nh_dev); 5650e884c78SPeter Nørlund 5660e884c78SPeter Nørlund if (nexthop_nh->nh_flags & RTNH_F_DEAD) { 5670e884c78SPeter Nørlund upper_bound = -1; 5680e884c78SPeter Nørlund } else if (in_dev && 5690e884c78SPeter Nørlund IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) && 5700e884c78SPeter Nørlund nexthop_nh->nh_flags & RTNH_F_LINKDOWN) { 5710e884c78SPeter Nørlund upper_bound = -1; 5720e884c78SPeter Nørlund } else { 5730e884c78SPeter Nørlund w += nexthop_nh->nh_weight; 5740a837fe4SPeter Nørlund upper_bound = DIV_ROUND_CLOSEST_ULL((u64)w << 31, 5750e884c78SPeter Nørlund total) - 1; 5760e884c78SPeter Nørlund } 5770e884c78SPeter Nørlund 5780e884c78SPeter Nørlund atomic_set(&nexthop_nh->nh_upper_bound, upper_bound); 5790e884c78SPeter Nørlund } endfor_nexthops(fi); 5800e884c78SPeter Nørlund } 5810e884c78SPeter Nørlund 5820e884c78SPeter Nørlund static inline void fib_add_weight(struct fib_info *fi, 5830e884c78SPeter Nørlund const struct fib_nh *nh) 5840e884c78SPeter Nørlund { 5850e884c78SPeter Nørlund fi->fib_weight += nh->nh_weight; 5860e884c78SPeter Nørlund } 5870e884c78SPeter Nørlund 5880e884c78SPeter Nørlund #else /* CONFIG_IP_ROUTE_MULTIPATH */ 5890e884c78SPeter Nørlund 5900e884c78SPeter Nørlund #define fib_rebalance(fi) do { } while (0) 5910e884c78SPeter Nørlund #define fib_add_weight(fi, nh) do { } while (0) 5920e884c78SPeter Nørlund 5930e884c78SPeter Nørlund #endif /* CONFIG_IP_ROUTE_MULTIPATH */ 5941da177e4SLinus Torvalds 59530357d7dSDavid Ahern static int fib_encap_match(u16 encap_type, 596571e7226SRoopa Prabhu struct nlattr *encap, 59730357d7dSDavid Ahern const struct fib_nh *nh, 598127eb7cdSTom Herbert const struct fib_config *cfg) 599571e7226SRoopa Prabhu { 600571e7226SRoopa Prabhu struct lwtunnel_state *lwtstate; 601df383e62SJiri Benc int ret, result = 0; 602571e7226SRoopa Prabhu 603571e7226SRoopa Prabhu if (encap_type == LWTUNNEL_ENCAP_NONE) 604571e7226SRoopa Prabhu return 0; 605571e7226SRoopa Prabhu 60630357d7dSDavid Ahern ret = lwtunnel_build_state(encap_type, encap, 607127eb7cdSTom Herbert AF_INET, cfg, &lwtstate); 608df383e62SJiri Benc if (!ret) { 609df383e62SJiri Benc result = lwtunnel_cmp_encap(lwtstate, nh->nh_lwtstate); 610df383e62SJiri Benc lwtstate_free(lwtstate); 611df383e62SJiri Benc } 612571e7226SRoopa Prabhu 613df383e62SJiri Benc return result; 614571e7226SRoopa Prabhu } 615571e7226SRoopa Prabhu 6164e902c57SThomas Graf int fib_nh_match(struct fib_config *cfg, struct fib_info *fi) 6171da177e4SLinus Torvalds { 6181da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 6194e902c57SThomas Graf struct rtnexthop *rtnh; 6204e902c57SThomas Graf int remaining; 6211da177e4SLinus Torvalds #endif 6221da177e4SLinus Torvalds 6234e902c57SThomas Graf if (cfg->fc_priority && cfg->fc_priority != fi->fib_priority) 6241da177e4SLinus Torvalds return 1; 6251da177e4SLinus Torvalds 6264e902c57SThomas Graf if (cfg->fc_oif || cfg->fc_gw) { 627571e7226SRoopa Prabhu if (cfg->fc_encap) { 62830357d7dSDavid Ahern if (fib_encap_match(cfg->fc_encap_type, 62930357d7dSDavid Ahern cfg->fc_encap, fi->fib_nh, cfg)) 630571e7226SRoopa Prabhu return 1; 631571e7226SRoopa Prabhu } 6324e902c57SThomas Graf if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) && 6334e902c57SThomas Graf (!cfg->fc_gw || cfg->fc_gw == fi->fib_nh->nh_gw)) 6341da177e4SLinus Torvalds return 0; 6351da177e4SLinus Torvalds return 1; 6361da177e4SLinus Torvalds } 6371da177e4SLinus Torvalds 6381da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 63951456b29SIan Morris if (!cfg->fc_mp) 6401da177e4SLinus Torvalds return 0; 6414e902c57SThomas Graf 6424e902c57SThomas Graf rtnh = cfg->fc_mp; 6434e902c57SThomas Graf remaining = cfg->fc_mp_len; 6441da177e4SLinus Torvalds 6451da177e4SLinus Torvalds for_nexthops(fi) { 6464e902c57SThomas Graf int attrlen; 6471da177e4SLinus Torvalds 6484e902c57SThomas Graf if (!rtnh_ok(rtnh, remaining)) 6491da177e4SLinus Torvalds return -EINVAL; 6504e902c57SThomas Graf 6514e902c57SThomas Graf if (rtnh->rtnh_ifindex && rtnh->rtnh_ifindex != nh->nh_oif) 6521da177e4SLinus Torvalds return 1; 6534e902c57SThomas Graf 6544e902c57SThomas Graf attrlen = rtnh_attrlen(rtnh); 655f76936d0SJiri Pirko if (attrlen > 0) { 6564e902c57SThomas Graf struct nlattr *nla, *attrs = rtnh_attrs(rtnh); 6574e902c57SThomas Graf 6584e902c57SThomas Graf nla = nla_find(attrs, attrlen, RTA_GATEWAY); 65967b61f6cSJiri Benc if (nla && nla_get_in_addr(nla) != nh->nh_gw) 6601da177e4SLinus Torvalds return 1; 661c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID 6624e902c57SThomas Graf nla = nla_find(attrs, attrlen, RTA_FLOW); 6634e902c57SThomas Graf if (nla && nla_get_u32(nla) != nh->nh_tclassid) 6641da177e4SLinus Torvalds return 1; 6651da177e4SLinus Torvalds #endif 6661da177e4SLinus Torvalds } 6674e902c57SThomas Graf 6684e902c57SThomas Graf rtnh = rtnh_next(rtnh, &remaining); 6691da177e4SLinus Torvalds } endfor_nexthops(fi); 6701da177e4SLinus Torvalds #endif 6711da177e4SLinus Torvalds return 0; 6721da177e4SLinus Torvalds } 6731da177e4SLinus Torvalds 6741da177e4SLinus Torvalds 6751da177e4SLinus Torvalds /* 6766a31d2a9SEric Dumazet * Picture 6776a31d2a9SEric Dumazet * ------- 6786a31d2a9SEric Dumazet * 6796a31d2a9SEric Dumazet * Semantics of nexthop is very messy by historical reasons. 6806a31d2a9SEric Dumazet * We have to take into account, that: 6816a31d2a9SEric Dumazet * a) gateway can be actually local interface address, 6826a31d2a9SEric Dumazet * so that gatewayed route is direct. 6836a31d2a9SEric Dumazet * b) gateway must be on-link address, possibly 6846a31d2a9SEric Dumazet * described not by an ifaddr, but also by a direct route. 6856a31d2a9SEric Dumazet * c) If both gateway and interface are specified, they should not 6866a31d2a9SEric Dumazet * contradict. 6876a31d2a9SEric Dumazet * d) If we use tunnel routes, gateway could be not on-link. 6886a31d2a9SEric Dumazet * 6896a31d2a9SEric Dumazet * Attempt to reconcile all of these (alas, self-contradictory) conditions 6906a31d2a9SEric Dumazet * results in pretty ugly and hairy code with obscure logic. 6916a31d2a9SEric Dumazet * 6926a31d2a9SEric Dumazet * I chose to generalized it instead, so that the size 6936a31d2a9SEric Dumazet * of code does not increase practically, but it becomes 6946a31d2a9SEric Dumazet * much more general. 6956a31d2a9SEric Dumazet * Every prefix is assigned a "scope" value: "host" is local address, 6966a31d2a9SEric Dumazet * "link" is direct route, 6976a31d2a9SEric Dumazet * [ ... "site" ... "interior" ... ] 6986a31d2a9SEric Dumazet * and "universe" is true gateway route with global meaning. 6996a31d2a9SEric Dumazet * 7006a31d2a9SEric Dumazet * Every prefix refers to a set of "nexthop"s (gw, oif), 7016a31d2a9SEric Dumazet * where gw must have narrower scope. This recursion stops 7026a31d2a9SEric Dumazet * when gw has LOCAL scope or if "nexthop" is declared ONLINK, 7036a31d2a9SEric Dumazet * which means that gw is forced to be on link. 7046a31d2a9SEric Dumazet * 7056a31d2a9SEric Dumazet * Code is still hairy, but now it is apparently logically 7066a31d2a9SEric Dumazet * consistent and very flexible. F.e. as by-product it allows 7076a31d2a9SEric Dumazet * to co-exists in peace independent exterior and interior 7086a31d2a9SEric Dumazet * routing processes. 7096a31d2a9SEric Dumazet * 7106a31d2a9SEric Dumazet * Normally it looks as following. 7116a31d2a9SEric Dumazet * 7126a31d2a9SEric Dumazet * {universe prefix} -> (gw, oif) [scope link] 7136a31d2a9SEric Dumazet * | 7146a31d2a9SEric Dumazet * |-> {link prefix} -> (gw, oif) [scope local] 7156a31d2a9SEric Dumazet * | 7166a31d2a9SEric Dumazet * |-> {local prefix} (terminal node) 7171da177e4SLinus Torvalds */ 7184e902c57SThomas Graf static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, 719*6d8422a1SDavid Ahern struct fib_nh *nh, struct netlink_ext_ack *extack) 7201da177e4SLinus Torvalds { 721127eb7cdSTom Herbert int err = 0; 72286167a37SDenis V. Lunev struct net *net; 7236a31d2a9SEric Dumazet struct net_device *dev; 7241da177e4SLinus Torvalds 72586167a37SDenis V. Lunev net = cfg->fc_nlinfo.nl_net; 7261da177e4SLinus Torvalds if (nh->nh_gw) { 7271da177e4SLinus Torvalds struct fib_result res; 7281da177e4SLinus Torvalds 7291da177e4SLinus Torvalds if (nh->nh_flags & RTNH_F_ONLINK) { 73030bbaa19SDavid Ahern unsigned int addr_type; 7311da177e4SLinus Torvalds 7324e902c57SThomas Graf if (cfg->fc_scope >= RT_SCOPE_LINK) 7331da177e4SLinus Torvalds return -EINVAL; 7346a31d2a9SEric Dumazet dev = __dev_get_by_index(net, nh->nh_oif); 7356a31d2a9SEric Dumazet if (!dev) 7361da177e4SLinus Torvalds return -ENODEV; 7371da177e4SLinus Torvalds if (!(dev->flags & IFF_UP)) 7381da177e4SLinus Torvalds return -ENETDOWN; 73930bbaa19SDavid Ahern addr_type = inet_addr_type_dev_table(net, dev, nh->nh_gw); 74030bbaa19SDavid Ahern if (addr_type != RTN_UNICAST) 74130bbaa19SDavid Ahern return -EINVAL; 7428a3d0316SAndy Gospodarek if (!netif_carrier_ok(dev)) 7438a3d0316SAndy Gospodarek nh->nh_flags |= RTNH_F_LINKDOWN; 7441da177e4SLinus Torvalds nh->nh_dev = dev; 7451da177e4SLinus Torvalds dev_hold(dev); 7461da177e4SLinus Torvalds nh->nh_scope = RT_SCOPE_LINK; 7471da177e4SLinus Torvalds return 0; 7481da177e4SLinus Torvalds } 749ebc0ffaeSEric Dumazet rcu_read_lock(); 7501da177e4SLinus Torvalds { 7513bfd8472SDavid Ahern struct fib_table *tbl = NULL; 7529ade2286SDavid S. Miller struct flowi4 fl4 = { 7539ade2286SDavid S. Miller .daddr = nh->nh_gw, 7549ade2286SDavid S. Miller .flowi4_scope = cfg->fc_scope + 1, 7559ade2286SDavid S. Miller .flowi4_oif = nh->nh_oif, 7566a662719SCong Wang .flowi4_iif = LOOPBACK_IFINDEX, 7574e902c57SThomas Graf }; 7581da177e4SLinus Torvalds 7591da177e4SLinus Torvalds /* It is not necessary, but requires a bit of thinking */ 7609ade2286SDavid S. Miller if (fl4.flowi4_scope < RT_SCOPE_LINK) 7619ade2286SDavid S. Miller fl4.flowi4_scope = RT_SCOPE_LINK; 7623bfd8472SDavid Ahern 7633bfd8472SDavid Ahern if (cfg->fc_table) 7643bfd8472SDavid Ahern tbl = fib_get_table(net, cfg->fc_table); 7653bfd8472SDavid Ahern 7663bfd8472SDavid Ahern if (tbl) 7673bfd8472SDavid Ahern err = fib_table_lookup(tbl, &fl4, &res, 7681e313678SEric Dumazet FIB_LOOKUP_IGNORE_LINKSTATE | 7691e313678SEric Dumazet FIB_LOOKUP_NOREF); 7704c9bcd11SDavid Ahern 7714c9bcd11SDavid Ahern /* on error or if no table given do full lookup. This 7724c9bcd11SDavid Ahern * is needed for example when nexthops are in the local 7734c9bcd11SDavid Ahern * table rather than the given table 7744c9bcd11SDavid Ahern */ 7754c9bcd11SDavid Ahern if (!tbl || err) { 7760eeb075fSAndy Gospodarek err = fib_lookup(net, &fl4, &res, 7770eeb075fSAndy Gospodarek FIB_LOOKUP_IGNORE_LINKSTATE); 7784c9bcd11SDavid Ahern } 7794c9bcd11SDavid Ahern 780ebc0ffaeSEric Dumazet if (err) { 781ebc0ffaeSEric Dumazet rcu_read_unlock(); 7821da177e4SLinus Torvalds return err; 7831da177e4SLinus Torvalds } 784ebc0ffaeSEric Dumazet } 7851da177e4SLinus Torvalds err = -EINVAL; 7861da177e4SLinus Torvalds if (res.type != RTN_UNICAST && res.type != RTN_LOCAL) 7871da177e4SLinus Torvalds goto out; 7881da177e4SLinus Torvalds nh->nh_scope = res.scope; 7891da177e4SLinus Torvalds nh->nh_oif = FIB_RES_OIF(res); 7906a31d2a9SEric Dumazet nh->nh_dev = dev = FIB_RES_DEV(res); 7916a31d2a9SEric Dumazet if (!dev) 7921da177e4SLinus Torvalds goto out; 7936a31d2a9SEric Dumazet dev_hold(dev); 7948a3d0316SAndy Gospodarek if (!netif_carrier_ok(dev)) 7958a3d0316SAndy Gospodarek nh->nh_flags |= RTNH_F_LINKDOWN; 7968723e1b4SEric Dumazet err = (dev->flags & IFF_UP) ? 0 : -ENETDOWN; 7971da177e4SLinus Torvalds } else { 7981da177e4SLinus Torvalds struct in_device *in_dev; 7991da177e4SLinus Torvalds 8001da177e4SLinus Torvalds if (nh->nh_flags & (RTNH_F_PERVASIVE | RTNH_F_ONLINK)) 8011da177e4SLinus Torvalds return -EINVAL; 8028723e1b4SEric Dumazet rcu_read_lock(); 8038723e1b4SEric Dumazet err = -ENODEV; 80486167a37SDenis V. Lunev in_dev = inetdev_by_index(net, nh->nh_oif); 80551456b29SIan Morris if (!in_dev) 8068723e1b4SEric Dumazet goto out; 8078723e1b4SEric Dumazet err = -ENETDOWN; 8088723e1b4SEric Dumazet if (!(in_dev->dev->flags & IFF_UP)) 8098723e1b4SEric Dumazet goto out; 8101da177e4SLinus Torvalds nh->nh_dev = in_dev->dev; 8111da177e4SLinus Torvalds dev_hold(nh->nh_dev); 8121da177e4SLinus Torvalds nh->nh_scope = RT_SCOPE_HOST; 8138a3d0316SAndy Gospodarek if (!netif_carrier_ok(nh->nh_dev)) 8148a3d0316SAndy Gospodarek nh->nh_flags |= RTNH_F_LINKDOWN; 8158723e1b4SEric Dumazet err = 0; 8161da177e4SLinus Torvalds } 8178723e1b4SEric Dumazet out: 8188723e1b4SEric Dumazet rcu_read_unlock(); 8198723e1b4SEric Dumazet return err; 8201da177e4SLinus Torvalds } 8211da177e4SLinus Torvalds 82281f7bf6cSAl Viro static inline unsigned int fib_laddr_hashfn(__be32 val) 8231da177e4SLinus Torvalds { 824123b9731SDavid S. Miller unsigned int mask = (fib_info_hash_size - 1); 8251da177e4SLinus Torvalds 8266a31d2a9SEric Dumazet return ((__force u32)val ^ 8276a31d2a9SEric Dumazet ((__force u32)val >> 7) ^ 8286a31d2a9SEric Dumazet ((__force u32)val >> 14)) & mask; 8291da177e4SLinus Torvalds } 8301da177e4SLinus Torvalds 831123b9731SDavid S. Miller static struct hlist_head *fib_info_hash_alloc(int bytes) 8321da177e4SLinus Torvalds { 8331da177e4SLinus Torvalds if (bytes <= PAGE_SIZE) 83488f83491SJoonwoo Park return kzalloc(bytes, GFP_KERNEL); 8351da177e4SLinus Torvalds else 8361da177e4SLinus Torvalds return (struct hlist_head *) 8376a31d2a9SEric Dumazet __get_free_pages(GFP_KERNEL | __GFP_ZERO, 8386a31d2a9SEric Dumazet get_order(bytes)); 8391da177e4SLinus Torvalds } 8401da177e4SLinus Torvalds 841123b9731SDavid S. Miller static void fib_info_hash_free(struct hlist_head *hash, int bytes) 8421da177e4SLinus Torvalds { 8431da177e4SLinus Torvalds if (!hash) 8441da177e4SLinus Torvalds return; 8451da177e4SLinus Torvalds 8461da177e4SLinus Torvalds if (bytes <= PAGE_SIZE) 8471da177e4SLinus Torvalds kfree(hash); 8481da177e4SLinus Torvalds else 8491da177e4SLinus Torvalds free_pages((unsigned long) hash, get_order(bytes)); 8501da177e4SLinus Torvalds } 8511da177e4SLinus Torvalds 852123b9731SDavid S. Miller static void fib_info_hash_move(struct hlist_head *new_info_hash, 8531da177e4SLinus Torvalds struct hlist_head *new_laddrhash, 8541da177e4SLinus Torvalds unsigned int new_size) 8551da177e4SLinus Torvalds { 856b7656e7fSDavid S. Miller struct hlist_head *old_info_hash, *old_laddrhash; 857123b9731SDavid S. Miller unsigned int old_size = fib_info_hash_size; 858b7656e7fSDavid S. Miller unsigned int i, bytes; 8591da177e4SLinus Torvalds 860832b4c5eSStephen Hemminger spin_lock_bh(&fib_info_lock); 861b7656e7fSDavid S. Miller old_info_hash = fib_info_hash; 862b7656e7fSDavid S. Miller old_laddrhash = fib_info_laddrhash; 863123b9731SDavid S. Miller fib_info_hash_size = new_size; 8641da177e4SLinus Torvalds 8651da177e4SLinus Torvalds for (i = 0; i < old_size; i++) { 8661da177e4SLinus Torvalds struct hlist_head *head = &fib_info_hash[i]; 867b67bfe0dSSasha Levin struct hlist_node *n; 8681da177e4SLinus Torvalds struct fib_info *fi; 8691da177e4SLinus Torvalds 870b67bfe0dSSasha Levin hlist_for_each_entry_safe(fi, n, head, fib_hash) { 8711da177e4SLinus Torvalds struct hlist_head *dest; 8721da177e4SLinus Torvalds unsigned int new_hash; 8731da177e4SLinus Torvalds 8741da177e4SLinus Torvalds new_hash = fib_info_hashfn(fi); 8751da177e4SLinus Torvalds dest = &new_info_hash[new_hash]; 8761da177e4SLinus Torvalds hlist_add_head(&fi->fib_hash, dest); 8771da177e4SLinus Torvalds } 8781da177e4SLinus Torvalds } 8791da177e4SLinus Torvalds fib_info_hash = new_info_hash; 8801da177e4SLinus Torvalds 8811da177e4SLinus Torvalds for (i = 0; i < old_size; i++) { 8821da177e4SLinus Torvalds struct hlist_head *lhead = &fib_info_laddrhash[i]; 883b67bfe0dSSasha Levin struct hlist_node *n; 8841da177e4SLinus Torvalds struct fib_info *fi; 8851da177e4SLinus Torvalds 886b67bfe0dSSasha Levin hlist_for_each_entry_safe(fi, n, lhead, fib_lhash) { 8871da177e4SLinus Torvalds struct hlist_head *ldest; 8881da177e4SLinus Torvalds unsigned int new_hash; 8891da177e4SLinus Torvalds 8901da177e4SLinus Torvalds new_hash = fib_laddr_hashfn(fi->fib_prefsrc); 8911da177e4SLinus Torvalds ldest = &new_laddrhash[new_hash]; 8921da177e4SLinus Torvalds hlist_add_head(&fi->fib_lhash, ldest); 8931da177e4SLinus Torvalds } 8941da177e4SLinus Torvalds } 8951da177e4SLinus Torvalds fib_info_laddrhash = new_laddrhash; 8961da177e4SLinus Torvalds 897832b4c5eSStephen Hemminger spin_unlock_bh(&fib_info_lock); 898b7656e7fSDavid S. Miller 899b7656e7fSDavid S. Miller bytes = old_size * sizeof(struct hlist_head *); 900123b9731SDavid S. Miller fib_info_hash_free(old_info_hash, bytes); 901123b9731SDavid S. Miller fib_info_hash_free(old_laddrhash, bytes); 9021da177e4SLinus Torvalds } 9031da177e4SLinus Torvalds 904436c3b66SDavid S. Miller __be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh) 905436c3b66SDavid S. Miller { 906436c3b66SDavid S. Miller nh->nh_saddr = inet_select_addr(nh->nh_dev, 907436c3b66SDavid S. Miller nh->nh_gw, 90837e826c5SDavid S. Miller nh->nh_parent->fib_scope); 909436c3b66SDavid S. Miller nh->nh_saddr_genid = atomic_read(&net->ipv4.dev_addr_genid); 910436c3b66SDavid S. Miller 911436c3b66SDavid S. Miller return nh->nh_saddr; 912436c3b66SDavid S. Miller } 913436c3b66SDavid S. Miller 914021dd3b8SDavid Ahern static bool fib_valid_prefsrc(struct fib_config *cfg, __be32 fib_prefsrc) 915021dd3b8SDavid Ahern { 916021dd3b8SDavid Ahern if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst || 917021dd3b8SDavid Ahern fib_prefsrc != cfg->fc_dst) { 9189b8ff518SDavid Ahern u32 tb_id = cfg->fc_table; 919e1b8d903SDavid Ahern int rc; 920021dd3b8SDavid Ahern 921021dd3b8SDavid Ahern if (tb_id == RT_TABLE_MAIN) 922021dd3b8SDavid Ahern tb_id = RT_TABLE_LOCAL; 923021dd3b8SDavid Ahern 924e1b8d903SDavid Ahern rc = inet_addr_type_table(cfg->fc_nlinfo.nl_net, 925e1b8d903SDavid Ahern fib_prefsrc, tb_id); 926e1b8d903SDavid Ahern 927e1b8d903SDavid Ahern if (rc != RTN_LOCAL && tb_id != RT_TABLE_LOCAL) { 928e1b8d903SDavid Ahern rc = inet_addr_type_table(cfg->fc_nlinfo.nl_net, 929e1b8d903SDavid Ahern fib_prefsrc, RT_TABLE_LOCAL); 930021dd3b8SDavid Ahern } 931e1b8d903SDavid Ahern 932e1b8d903SDavid Ahern if (rc != RTN_LOCAL) 933e1b8d903SDavid Ahern return false; 934021dd3b8SDavid Ahern } 935021dd3b8SDavid Ahern return true; 936021dd3b8SDavid Ahern } 937021dd3b8SDavid Ahern 9386cf9dfd3SFlorian Westphal static int 9396cf9dfd3SFlorian Westphal fib_convert_metrics(struct fib_info *fi, const struct fib_config *cfg) 9406cf9dfd3SFlorian Westphal { 941c3a8d947SDaniel Borkmann bool ecn_ca = false; 9426cf9dfd3SFlorian Westphal struct nlattr *nla; 9436cf9dfd3SFlorian Westphal int remaining; 9446cf9dfd3SFlorian Westphal 9456cf9dfd3SFlorian Westphal if (!cfg->fc_mx) 9466cf9dfd3SFlorian Westphal return 0; 9476cf9dfd3SFlorian Westphal 9486cf9dfd3SFlorian Westphal nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { 9496cf9dfd3SFlorian Westphal int type = nla_type(nla); 9506cf9dfd3SFlorian Westphal u32 val; 9516cf9dfd3SFlorian Westphal 9526cf9dfd3SFlorian Westphal if (!type) 9536cf9dfd3SFlorian Westphal continue; 9546cf9dfd3SFlorian Westphal if (type > RTAX_MAX) 9556cf9dfd3SFlorian Westphal return -EINVAL; 9566cf9dfd3SFlorian Westphal 9576cf9dfd3SFlorian Westphal if (type == RTAX_CC_ALGO) { 9586cf9dfd3SFlorian Westphal char tmp[TCP_CA_NAME_MAX]; 9596cf9dfd3SFlorian Westphal 9606cf9dfd3SFlorian Westphal nla_strlcpy(tmp, nla, sizeof(tmp)); 961c3a8d947SDaniel Borkmann val = tcp_ca_get_key_by_name(tmp, &ecn_ca); 9626cf9dfd3SFlorian Westphal if (val == TCP_CA_UNSPEC) 9636cf9dfd3SFlorian Westphal return -EINVAL; 9646cf9dfd3SFlorian Westphal } else { 9656cf9dfd3SFlorian Westphal val = nla_get_u32(nla); 9666cf9dfd3SFlorian Westphal } 9676cf9dfd3SFlorian Westphal if (type == RTAX_ADVMSS && val > 65535 - 40) 9686cf9dfd3SFlorian Westphal val = 65535 - 40; 9696cf9dfd3SFlorian Westphal if (type == RTAX_MTU && val > 65535 - 15) 9706cf9dfd3SFlorian Westphal val = 65535 - 15; 971626abd59SPaolo Abeni if (type == RTAX_HOPLIMIT && val > 255) 972626abd59SPaolo Abeni val = 255; 973b8d3e416SDaniel Borkmann if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK)) 974b8d3e416SDaniel Borkmann return -EINVAL; 9756cf9dfd3SFlorian Westphal fi->fib_metrics[type - 1] = val; 9766cf9dfd3SFlorian Westphal } 9776cf9dfd3SFlorian Westphal 978c3a8d947SDaniel Borkmann if (ecn_ca) 979c3a8d947SDaniel Borkmann fi->fib_metrics[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA; 980c3a8d947SDaniel Borkmann 9816cf9dfd3SFlorian Westphal return 0; 9826cf9dfd3SFlorian Westphal } 9836cf9dfd3SFlorian Westphal 984*6d8422a1SDavid Ahern struct fib_info *fib_create_info(struct fib_config *cfg, 985*6d8422a1SDavid Ahern struct netlink_ext_ack *extack) 9861da177e4SLinus Torvalds { 9871da177e4SLinus Torvalds int err; 9881da177e4SLinus Torvalds struct fib_info *fi = NULL; 9891da177e4SLinus Torvalds struct fib_info *ofi; 9901da177e4SLinus Torvalds int nhs = 1; 9917462bd74SDenis V. Lunev struct net *net = cfg->fc_nlinfo.nl_net; 9921da177e4SLinus Torvalds 9934c8237cdSDavid S. Miller if (cfg->fc_type > RTN_MAX) 9944c8237cdSDavid S. Miller goto err_inval; 9954c8237cdSDavid S. Miller 9961da177e4SLinus Torvalds /* Fast check to catch the most weird cases */ 9974e902c57SThomas Graf if (fib_props[cfg->fc_type].scope > cfg->fc_scope) 9981da177e4SLinus Torvalds goto err_inval; 9991da177e4SLinus Torvalds 100080610229SJulian Anastasov if (cfg->fc_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)) 100180610229SJulian Anastasov goto err_inval; 100280610229SJulian Anastasov 10031da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 10044e902c57SThomas Graf if (cfg->fc_mp) { 1005*6d8422a1SDavid Ahern nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len, extack); 10061da177e4SLinus Torvalds if (nhs == 0) 10071da177e4SLinus Torvalds goto err_inval; 10081da177e4SLinus Torvalds } 10091da177e4SLinus Torvalds #endif 10101da177e4SLinus Torvalds 10111da177e4SLinus Torvalds err = -ENOBUFS; 1012123b9731SDavid S. Miller if (fib_info_cnt >= fib_info_hash_size) { 1013123b9731SDavid S. Miller unsigned int new_size = fib_info_hash_size << 1; 10141da177e4SLinus Torvalds struct hlist_head *new_info_hash; 10151da177e4SLinus Torvalds struct hlist_head *new_laddrhash; 10161da177e4SLinus Torvalds unsigned int bytes; 10171da177e4SLinus Torvalds 10181da177e4SLinus Torvalds if (!new_size) 1019d94ce9b2SEric Dumazet new_size = 16; 10201da177e4SLinus Torvalds bytes = new_size * sizeof(struct hlist_head *); 1021123b9731SDavid S. Miller new_info_hash = fib_info_hash_alloc(bytes); 1022123b9731SDavid S. Miller new_laddrhash = fib_info_hash_alloc(bytes); 10231da177e4SLinus Torvalds if (!new_info_hash || !new_laddrhash) { 1024123b9731SDavid S. Miller fib_info_hash_free(new_info_hash, bytes); 1025123b9731SDavid S. Miller fib_info_hash_free(new_laddrhash, bytes); 102688f83491SJoonwoo Park } else 1027123b9731SDavid S. Miller fib_info_hash_move(new_info_hash, new_laddrhash, new_size); 10281da177e4SLinus Torvalds 1029123b9731SDavid S. Miller if (!fib_info_hash_size) 10301da177e4SLinus Torvalds goto failure; 10311da177e4SLinus Torvalds } 10321da177e4SLinus Torvalds 10330da974f4SPanagiotis Issaris fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL); 103451456b29SIan Morris if (!fi) 10351da177e4SLinus Torvalds goto failure; 1036aeefa1ecSSergey Popovich fib_info_cnt++; 1037725d1e1bSDavid S. Miller if (cfg->fc_mx) { 10389c150e82SDavid S. Miller fi->fib_metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL); 10399c150e82SDavid S. Miller if (!fi->fib_metrics) 10409c150e82SDavid S. Miller goto failure; 1041725d1e1bSDavid S. Miller } else 1042725d1e1bSDavid S. Miller fi->fib_metrics = (u32 *) dst_default_metrics; 10431da177e4SLinus Torvalds 1044efd7ef1cSEric W. Biederman fi->fib_net = net; 10454e902c57SThomas Graf fi->fib_protocol = cfg->fc_protocol; 104637e826c5SDavid S. Miller fi->fib_scope = cfg->fc_scope; 10474e902c57SThomas Graf fi->fib_flags = cfg->fc_flags; 10484e902c57SThomas Graf fi->fib_priority = cfg->fc_priority; 10494e902c57SThomas Graf fi->fib_prefsrc = cfg->fc_prefsrc; 1050f4ef85bbSEric Dumazet fi->fib_type = cfg->fc_type; 10515a56a0b3SMark Tomlinson fi->fib_tb_id = cfg->fc_table; 10521da177e4SLinus Torvalds 10531da177e4SLinus Torvalds fi->fib_nhs = nhs; 10541da177e4SLinus Torvalds change_nexthops(fi) { 105571fceff0SDavid S. Miller nexthop_nh->nh_parent = fi; 1056d26b3a7cSEric Dumazet nexthop_nh->nh_pcpu_rth_output = alloc_percpu(struct rtable __rcu *); 1057f8a17175SJulian Anastasov if (!nexthop_nh->nh_pcpu_rth_output) 1058f8a17175SJulian Anastasov goto failure; 10591da177e4SLinus Torvalds } endfor_nexthops(fi) 10601da177e4SLinus Torvalds 10616cf9dfd3SFlorian Westphal err = fib_convert_metrics(fi, cfg); 10626cf9dfd3SFlorian Westphal if (err) 10636cf9dfd3SFlorian Westphal goto failure; 10641da177e4SLinus Torvalds 10654e902c57SThomas Graf if (cfg->fc_mp) { 10661da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 1067*6d8422a1SDavid Ahern err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg, extack); 10684e902c57SThomas Graf if (err != 0) 10691da177e4SLinus Torvalds goto failure; 10704e902c57SThomas Graf if (cfg->fc_oif && fi->fib_nh->nh_oif != cfg->fc_oif) 10711da177e4SLinus Torvalds goto err_inval; 10724e902c57SThomas Graf if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw) 10731da177e4SLinus Torvalds goto err_inval; 1074c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID 10754e902c57SThomas Graf if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow) 10761da177e4SLinus Torvalds goto err_inval; 10771da177e4SLinus Torvalds #endif 10781da177e4SLinus Torvalds #else 10791da177e4SLinus Torvalds goto err_inval; 10801da177e4SLinus Torvalds #endif 10811da177e4SLinus Torvalds } else { 10821da177e4SLinus Torvalds struct fib_nh *nh = fi->fib_nh; 10834e902c57SThomas Graf 1084571e7226SRoopa Prabhu if (cfg->fc_encap) { 1085571e7226SRoopa Prabhu struct lwtunnel_state *lwtstate; 1086571e7226SRoopa Prabhu 1087571e7226SRoopa Prabhu if (cfg->fc_encap_type == LWTUNNEL_ENCAP_NONE) 1088571e7226SRoopa Prabhu goto err_inval; 108930357d7dSDavid Ahern err = lwtunnel_build_state(cfg->fc_encap_type, 1090127eb7cdSTom Herbert cfg->fc_encap, AF_INET, cfg, 1091127eb7cdSTom Herbert &lwtstate); 1092571e7226SRoopa Prabhu if (err) 1093571e7226SRoopa Prabhu goto failure; 1094571e7226SRoopa Prabhu 10955a6228a0SNicolas Dichtel nh->nh_lwtstate = lwtstate_get(lwtstate); 1096571e7226SRoopa Prabhu } 10974e902c57SThomas Graf nh->nh_oif = cfg->fc_oif; 10984e902c57SThomas Graf nh->nh_gw = cfg->fc_gw; 10994e902c57SThomas Graf nh->nh_flags = cfg->fc_flags; 1100c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID 11014e902c57SThomas Graf nh->nh_tclassid = cfg->fc_flow; 11027a9bc9b8SDavid S. Miller if (nh->nh_tclassid) 1103f4530fa5SDavid S. Miller fi->fib_net->ipv4.fib_num_tclassid_users++; 11041da177e4SLinus Torvalds #endif 11051da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 11061da177e4SLinus Torvalds nh->nh_weight = 1; 11071da177e4SLinus Torvalds #endif 11081da177e4SLinus Torvalds } 11091da177e4SLinus Torvalds 11104e902c57SThomas Graf if (fib_props[cfg->fc_type].error) { 11114e902c57SThomas Graf if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp) 11121da177e4SLinus Torvalds goto err_inval; 11131da177e4SLinus Torvalds goto link_it; 11144c8237cdSDavid S. Miller } else { 11154c8237cdSDavid S. Miller switch (cfg->fc_type) { 11164c8237cdSDavid S. Miller case RTN_UNICAST: 11174c8237cdSDavid S. Miller case RTN_LOCAL: 11184c8237cdSDavid S. Miller case RTN_BROADCAST: 11194c8237cdSDavid S. Miller case RTN_ANYCAST: 11204c8237cdSDavid S. Miller case RTN_MULTICAST: 11214c8237cdSDavid S. Miller break; 11224c8237cdSDavid S. Miller default: 11234c8237cdSDavid S. Miller goto err_inval; 11244c8237cdSDavid S. Miller } 11251da177e4SLinus Torvalds } 11261da177e4SLinus Torvalds 11274e902c57SThomas Graf if (cfg->fc_scope > RT_SCOPE_HOST) 11281da177e4SLinus Torvalds goto err_inval; 11291da177e4SLinus Torvalds 11304e902c57SThomas Graf if (cfg->fc_scope == RT_SCOPE_HOST) { 11311da177e4SLinus Torvalds struct fib_nh *nh = fi->fib_nh; 11321da177e4SLinus Torvalds 11331da177e4SLinus Torvalds /* Local address is added. */ 1134*6d8422a1SDavid Ahern if (nhs != 1) 1135*6d8422a1SDavid Ahern goto err_inval; 1136*6d8422a1SDavid Ahern if (nh->nh_gw) 11371da177e4SLinus Torvalds goto err_inval; 11381da177e4SLinus Torvalds nh->nh_scope = RT_SCOPE_NOWHERE; 11397462bd74SDenis V. Lunev nh->nh_dev = dev_get_by_index(net, fi->fib_nh->nh_oif); 11401da177e4SLinus Torvalds err = -ENODEV; 114151456b29SIan Morris if (!nh->nh_dev) 11421da177e4SLinus Torvalds goto failure; 11431da177e4SLinus Torvalds } else { 11448a3d0316SAndy Gospodarek int linkdown = 0; 11458a3d0316SAndy Gospodarek 11461da177e4SLinus Torvalds change_nexthops(fi) { 1147*6d8422a1SDavid Ahern err = fib_check_nh(cfg, fi, nexthop_nh, extack); 11486a31d2a9SEric Dumazet if (err != 0) 11491da177e4SLinus Torvalds goto failure; 11508a3d0316SAndy Gospodarek if (nexthop_nh->nh_flags & RTNH_F_LINKDOWN) 11518a3d0316SAndy Gospodarek linkdown++; 11521da177e4SLinus Torvalds } endfor_nexthops(fi) 11538a3d0316SAndy Gospodarek if (linkdown == fi->fib_nhs) 11548a3d0316SAndy Gospodarek fi->fib_flags |= RTNH_F_LINKDOWN; 11551da177e4SLinus Torvalds } 11561da177e4SLinus Torvalds 1157021dd3b8SDavid Ahern if (fi->fib_prefsrc && !fib_valid_prefsrc(cfg, fi->fib_prefsrc)) 11581da177e4SLinus Torvalds goto err_inval; 11591da177e4SLinus Torvalds 11601fc050a1SDavid S. Miller change_nexthops(fi) { 1161436c3b66SDavid S. Miller fib_info_update_nh_saddr(net, nexthop_nh); 11620e884c78SPeter Nørlund fib_add_weight(fi, nexthop_nh); 11631fc050a1SDavid S. Miller } endfor_nexthops(fi) 11641fc050a1SDavid S. Miller 11650e884c78SPeter Nørlund fib_rebalance(fi); 11660e884c78SPeter Nørlund 11671da177e4SLinus Torvalds link_it: 11686a31d2a9SEric Dumazet ofi = fib_find_info(fi); 11696a31d2a9SEric Dumazet if (ofi) { 11701da177e4SLinus Torvalds fi->fib_dead = 1; 11711da177e4SLinus Torvalds free_fib_info(fi); 11721da177e4SLinus Torvalds ofi->fib_treeref++; 11731da177e4SLinus Torvalds return ofi; 11741da177e4SLinus Torvalds } 11751da177e4SLinus Torvalds 11761da177e4SLinus Torvalds fi->fib_treeref++; 11771da177e4SLinus Torvalds atomic_inc(&fi->fib_clntref); 1178832b4c5eSStephen Hemminger spin_lock_bh(&fib_info_lock); 11791da177e4SLinus Torvalds hlist_add_head(&fi->fib_hash, 11801da177e4SLinus Torvalds &fib_info_hash[fib_info_hashfn(fi)]); 11811da177e4SLinus Torvalds if (fi->fib_prefsrc) { 11821da177e4SLinus Torvalds struct hlist_head *head; 11831da177e4SLinus Torvalds 11841da177e4SLinus Torvalds head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)]; 11851da177e4SLinus Torvalds hlist_add_head(&fi->fib_lhash, head); 11861da177e4SLinus Torvalds } 11871da177e4SLinus Torvalds change_nexthops(fi) { 11881da177e4SLinus Torvalds struct hlist_head *head; 11891da177e4SLinus Torvalds unsigned int hash; 11901da177e4SLinus Torvalds 119171fceff0SDavid S. Miller if (!nexthop_nh->nh_dev) 11921da177e4SLinus Torvalds continue; 119371fceff0SDavid S. Miller hash = fib_devindex_hashfn(nexthop_nh->nh_dev->ifindex); 11941da177e4SLinus Torvalds head = &fib_info_devhash[hash]; 119571fceff0SDavid S. Miller hlist_add_head(&nexthop_nh->nh_hash, head); 11961da177e4SLinus Torvalds } endfor_nexthops(fi) 1197832b4c5eSStephen Hemminger spin_unlock_bh(&fib_info_lock); 11981da177e4SLinus Torvalds return fi; 11991da177e4SLinus Torvalds 12001da177e4SLinus Torvalds err_inval: 12011da177e4SLinus Torvalds err = -EINVAL; 12021da177e4SLinus Torvalds 12031da177e4SLinus Torvalds failure: 12041da177e4SLinus Torvalds if (fi) { 12051da177e4SLinus Torvalds fi->fib_dead = 1; 12061da177e4SLinus Torvalds free_fib_info(fi); 12071da177e4SLinus Torvalds } 12084e902c57SThomas Graf 12094e902c57SThomas Graf return ERR_PTR(err); 12101da177e4SLinus Torvalds } 12111da177e4SLinus Torvalds 121215e47304SEric W. Biederman int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, 121337e826c5SDavid S. Miller u32 tb_id, u8 type, __be32 dst, int dst_len, u8 tos, 1214b6544c0bSJamal Hadi Salim struct fib_info *fi, unsigned int flags) 12151da177e4SLinus Torvalds { 12161da177e4SLinus Torvalds struct nlmsghdr *nlh; 1217be403ea1SThomas Graf struct rtmsg *rtm; 12181da177e4SLinus Torvalds 121915e47304SEric W. Biederman nlh = nlmsg_put(skb, portid, seq, event, sizeof(*rtm), flags); 122051456b29SIan Morris if (!nlh) 122126932566SPatrick McHardy return -EMSGSIZE; 1222be403ea1SThomas Graf 1223be403ea1SThomas Graf rtm = nlmsg_data(nlh); 12241da177e4SLinus Torvalds rtm->rtm_family = AF_INET; 12251da177e4SLinus Torvalds rtm->rtm_dst_len = dst_len; 12261da177e4SLinus Torvalds rtm->rtm_src_len = 0; 12271da177e4SLinus Torvalds rtm->rtm_tos = tos; 1228709772e6SKrzysztof Piotr Oledzki if (tb_id < 256) 12291da177e4SLinus Torvalds rtm->rtm_table = tb_id; 1230709772e6SKrzysztof Piotr Oledzki else 1231709772e6SKrzysztof Piotr Oledzki rtm->rtm_table = RT_TABLE_COMPAT; 1232f3756b79SDavid S. Miller if (nla_put_u32(skb, RTA_TABLE, tb_id)) 1233f3756b79SDavid S. Miller goto nla_put_failure; 12341da177e4SLinus Torvalds rtm->rtm_type = type; 12351da177e4SLinus Torvalds rtm->rtm_flags = fi->fib_flags; 123637e826c5SDavid S. Miller rtm->rtm_scope = fi->fib_scope; 12371da177e4SLinus Torvalds rtm->rtm_protocol = fi->fib_protocol; 1238be403ea1SThomas Graf 1239f3756b79SDavid S. Miller if (rtm->rtm_dst_len && 1240930345eaSJiri Benc nla_put_in_addr(skb, RTA_DST, dst)) 1241f3756b79SDavid S. Miller goto nla_put_failure; 1242f3756b79SDavid S. Miller if (fi->fib_priority && 1243f3756b79SDavid S. Miller nla_put_u32(skb, RTA_PRIORITY, fi->fib_priority)) 1244f3756b79SDavid S. Miller goto nla_put_failure; 12451da177e4SLinus Torvalds if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0) 1246be403ea1SThomas Graf goto nla_put_failure; 1247be403ea1SThomas Graf 1248f3756b79SDavid S. Miller if (fi->fib_prefsrc && 1249930345eaSJiri Benc nla_put_in_addr(skb, RTA_PREFSRC, fi->fib_prefsrc)) 1250f3756b79SDavid S. Miller goto nla_put_failure; 12511da177e4SLinus Torvalds if (fi->fib_nhs == 1) { 12520eeb075fSAndy Gospodarek struct in_device *in_dev; 12530eeb075fSAndy Gospodarek 1254f3756b79SDavid S. Miller if (fi->fib_nh->nh_gw && 1255930345eaSJiri Benc nla_put_in_addr(skb, RTA_GATEWAY, fi->fib_nh->nh_gw)) 1256f3756b79SDavid S. Miller goto nla_put_failure; 1257f3756b79SDavid S. Miller if (fi->fib_nh->nh_oif && 1258f3756b79SDavid S. Miller nla_put_u32(skb, RTA_OIF, fi->fib_nh->nh_oif)) 1259f3756b79SDavid S. Miller goto nla_put_failure; 12600eeb075fSAndy Gospodarek if (fi->fib_nh->nh_flags & RTNH_F_LINKDOWN) { 126196ac5cc9SAndy Gospodarek in_dev = __in_dev_get_rtnl(fi->fib_nh->nh_dev); 12620eeb075fSAndy Gospodarek if (in_dev && 12630eeb075fSAndy Gospodarek IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev)) 12640eeb075fSAndy Gospodarek rtm->rtm_flags |= RTNH_F_DEAD; 12650eeb075fSAndy Gospodarek } 1266c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID 1267f3756b79SDavid S. Miller if (fi->fib_nh[0].nh_tclassid && 1268f3756b79SDavid S. Miller nla_put_u32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid)) 1269f3756b79SDavid S. Miller goto nla_put_failure; 12708265abc0SPatrick McHardy #endif 1271ea7a8085SDavid Ahern if (fi->fib_nh->nh_lwtstate && 1272ea7a8085SDavid Ahern lwtunnel_fill_encap(skb, fi->fib_nh->nh_lwtstate) < 0) 1273ea7a8085SDavid Ahern goto nla_put_failure; 12741da177e4SLinus Torvalds } 12751da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 12761da177e4SLinus Torvalds if (fi->fib_nhs > 1) { 1277be403ea1SThomas Graf struct rtnexthop *rtnh; 1278be403ea1SThomas Graf struct nlattr *mp; 1279be403ea1SThomas Graf 1280be403ea1SThomas Graf mp = nla_nest_start(skb, RTA_MULTIPATH); 128151456b29SIan Morris if (!mp) 1282be403ea1SThomas Graf goto nla_put_failure; 12831da177e4SLinus Torvalds 12841da177e4SLinus Torvalds for_nexthops(fi) { 12850eeb075fSAndy Gospodarek struct in_device *in_dev; 12860eeb075fSAndy Gospodarek 1287be403ea1SThomas Graf rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh)); 128851456b29SIan Morris if (!rtnh) 1289be403ea1SThomas Graf goto nla_put_failure; 1290be403ea1SThomas Graf 1291be403ea1SThomas Graf rtnh->rtnh_flags = nh->nh_flags & 0xFF; 12920eeb075fSAndy Gospodarek if (nh->nh_flags & RTNH_F_LINKDOWN) { 129396ac5cc9SAndy Gospodarek in_dev = __in_dev_get_rtnl(nh->nh_dev); 12940eeb075fSAndy Gospodarek if (in_dev && 12950eeb075fSAndy Gospodarek IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev)) 12960eeb075fSAndy Gospodarek rtnh->rtnh_flags |= RTNH_F_DEAD; 12970eeb075fSAndy Gospodarek } 1298be403ea1SThomas Graf rtnh->rtnh_hops = nh->nh_weight - 1; 1299be403ea1SThomas Graf rtnh->rtnh_ifindex = nh->nh_oif; 1300be403ea1SThomas Graf 1301f3756b79SDavid S. Miller if (nh->nh_gw && 1302930345eaSJiri Benc nla_put_in_addr(skb, RTA_GATEWAY, nh->nh_gw)) 1303f3756b79SDavid S. Miller goto nla_put_failure; 1304c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID 1305f3756b79SDavid S. Miller if (nh->nh_tclassid && 1306f3756b79SDavid S. Miller nla_put_u32(skb, RTA_FLOW, nh->nh_tclassid)) 1307f3756b79SDavid S. Miller goto nla_put_failure; 13088265abc0SPatrick McHardy #endif 1309ea7a8085SDavid Ahern if (nh->nh_lwtstate && 1310ea7a8085SDavid Ahern lwtunnel_fill_encap(skb, nh->nh_lwtstate) < 0) 1311ea7a8085SDavid Ahern goto nla_put_failure; 1312ea7a8085SDavid Ahern 1313be403ea1SThomas Graf /* length of rtnetlink header + attributes */ 1314be403ea1SThomas Graf rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh; 13151da177e4SLinus Torvalds } endfor_nexthops(fi); 1316be403ea1SThomas Graf 1317be403ea1SThomas Graf nla_nest_end(skb, mp); 13181da177e4SLinus Torvalds } 13191da177e4SLinus Torvalds #endif 1320053c095aSJohannes Berg nlmsg_end(skb, nlh); 1321053c095aSJohannes Berg return 0; 13221da177e4SLinus Torvalds 1323be403ea1SThomas Graf nla_put_failure: 132426932566SPatrick McHardy nlmsg_cancel(skb, nlh); 132526932566SPatrick McHardy return -EMSGSIZE; 13261da177e4SLinus Torvalds } 13271da177e4SLinus Torvalds 13281da177e4SLinus Torvalds /* 13296a31d2a9SEric Dumazet * Update FIB if: 13306a31d2a9SEric Dumazet * - local address disappeared -> we must delete all the entries 13316a31d2a9SEric Dumazet * referring to it. 13326a31d2a9SEric Dumazet * - device went down -> we must shutdown all nexthops going via it. 13331da177e4SLinus Torvalds */ 13345a56a0b3SMark Tomlinson int fib_sync_down_addr(struct net_device *dev, __be32 local) 13351da177e4SLinus Torvalds { 13361da177e4SLinus Torvalds int ret = 0; 13371da177e4SLinus Torvalds unsigned int hash = fib_laddr_hashfn(local); 13381da177e4SLinus Torvalds struct hlist_head *head = &fib_info_laddrhash[hash]; 13395a56a0b3SMark Tomlinson struct net *net = dev_net(dev); 13405a56a0b3SMark Tomlinson int tb_id = l3mdev_fib_table(dev); 13411da177e4SLinus Torvalds struct fib_info *fi; 13421da177e4SLinus Torvalds 134351456b29SIan Morris if (!fib_info_laddrhash || local == 0) 134485326fa5SDenis V. Lunev return 0; 134585326fa5SDenis V. Lunev 1346b67bfe0dSSasha Levin hlist_for_each_entry(fi, head, fib_lhash) { 13475a56a0b3SMark Tomlinson if (!net_eq(fi->fib_net, net) || 13485a56a0b3SMark Tomlinson fi->fib_tb_id != tb_id) 13494814bdbdSDenis V. Lunev continue; 13501da177e4SLinus Torvalds if (fi->fib_prefsrc == local) { 13511da177e4SLinus Torvalds fi->fib_flags |= RTNH_F_DEAD; 13521da177e4SLinus Torvalds ret++; 13531da177e4SLinus Torvalds } 13541da177e4SLinus Torvalds } 135585326fa5SDenis V. Lunev return ret; 13561da177e4SLinus Torvalds } 13571da177e4SLinus Torvalds 1358982acb97SIdo Schimmel static int call_fib_nh_notifiers(struct fib_nh *fib_nh, 1359982acb97SIdo Schimmel enum fib_event_type event_type) 1360982acb97SIdo Schimmel { 1361982acb97SIdo Schimmel struct in_device *in_dev = __in_dev_get_rtnl(fib_nh->nh_dev); 1362982acb97SIdo Schimmel struct fib_nh_notifier_info info = { 1363982acb97SIdo Schimmel .fib_nh = fib_nh, 1364982acb97SIdo Schimmel }; 1365982acb97SIdo Schimmel 1366982acb97SIdo Schimmel switch (event_type) { 1367982acb97SIdo Schimmel case FIB_EVENT_NH_ADD: 1368982acb97SIdo Schimmel if (fib_nh->nh_flags & RTNH_F_DEAD) 1369982acb97SIdo Schimmel break; 1370982acb97SIdo Schimmel if (IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) && 1371982acb97SIdo Schimmel fib_nh->nh_flags & RTNH_F_LINKDOWN) 1372982acb97SIdo Schimmel break; 1373982acb97SIdo Schimmel return call_fib_notifiers(dev_net(fib_nh->nh_dev), event_type, 1374982acb97SIdo Schimmel &info.info); 1375982acb97SIdo Schimmel case FIB_EVENT_NH_DEL: 1376982acb97SIdo Schimmel if ((IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) && 1377982acb97SIdo Schimmel fib_nh->nh_flags & RTNH_F_LINKDOWN) || 1378982acb97SIdo Schimmel (fib_nh->nh_flags & RTNH_F_DEAD)) 1379982acb97SIdo Schimmel return call_fib_notifiers(dev_net(fib_nh->nh_dev), 1380982acb97SIdo Schimmel event_type, &info.info); 1381982acb97SIdo Schimmel default: 1382982acb97SIdo Schimmel break; 1383982acb97SIdo Schimmel } 1384982acb97SIdo Schimmel 1385982acb97SIdo Schimmel return NOTIFY_DONE; 1386982acb97SIdo Schimmel } 1387982acb97SIdo Schimmel 13884f823defSJulian Anastasov /* Event force Flags Description 13894f823defSJulian Anastasov * NETDEV_CHANGE 0 LINKDOWN Carrier OFF, not for scope host 13904f823defSJulian Anastasov * NETDEV_DOWN 0 LINKDOWN|DEAD Link down, not for scope host 13914f823defSJulian Anastasov * NETDEV_DOWN 1 LINKDOWN|DEAD Last address removed 13924f823defSJulian Anastasov * NETDEV_UNREGISTER 1 LINKDOWN|DEAD Device removed 13934f823defSJulian Anastasov */ 13944f823defSJulian Anastasov int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force) 139585326fa5SDenis V. Lunev { 139685326fa5SDenis V. Lunev int ret = 0; 139785326fa5SDenis V. Lunev int scope = RT_SCOPE_NOWHERE; 13981da177e4SLinus Torvalds struct fib_info *prev_fi = NULL; 13991da177e4SLinus Torvalds unsigned int hash = fib_devindex_hashfn(dev->ifindex); 14001da177e4SLinus Torvalds struct hlist_head *head = &fib_info_devhash[hash]; 14011da177e4SLinus Torvalds struct fib_nh *nh; 14021da177e4SLinus Torvalds 14034f823defSJulian Anastasov if (force) 140485326fa5SDenis V. Lunev scope = -1; 140585326fa5SDenis V. Lunev 1406b67bfe0dSSasha Levin hlist_for_each_entry(nh, head, nh_hash) { 14071da177e4SLinus Torvalds struct fib_info *fi = nh->nh_parent; 14081da177e4SLinus Torvalds int dead; 14091da177e4SLinus Torvalds 14101da177e4SLinus Torvalds BUG_ON(!fi->fib_nhs); 14111da177e4SLinus Torvalds if (nh->nh_dev != dev || fi == prev_fi) 14121da177e4SLinus Torvalds continue; 14131da177e4SLinus Torvalds prev_fi = fi; 14141da177e4SLinus Torvalds dead = 0; 14151da177e4SLinus Torvalds change_nexthops(fi) { 141671fceff0SDavid S. Miller if (nexthop_nh->nh_flags & RTNH_F_DEAD) 14171da177e4SLinus Torvalds dead++; 141871fceff0SDavid S. Miller else if (nexthop_nh->nh_dev == dev && 141971fceff0SDavid S. Miller nexthop_nh->nh_scope != scope) { 14208a3d0316SAndy Gospodarek switch (event) { 14218a3d0316SAndy Gospodarek case NETDEV_DOWN: 14228a3d0316SAndy Gospodarek case NETDEV_UNREGISTER: 142371fceff0SDavid S. Miller nexthop_nh->nh_flags |= RTNH_F_DEAD; 14248a3d0316SAndy Gospodarek /* fall through */ 14258a3d0316SAndy Gospodarek case NETDEV_CHANGE: 14268a3d0316SAndy Gospodarek nexthop_nh->nh_flags |= RTNH_F_LINKDOWN; 14278a3d0316SAndy Gospodarek break; 14288a3d0316SAndy Gospodarek } 1429982acb97SIdo Schimmel call_fib_nh_notifiers(nexthop_nh, 1430982acb97SIdo Schimmel FIB_EVENT_NH_DEL); 14311da177e4SLinus Torvalds dead++; 14321da177e4SLinus Torvalds } 14331da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 14348a3d0316SAndy Gospodarek if (event == NETDEV_UNREGISTER && 14358a3d0316SAndy Gospodarek nexthop_nh->nh_dev == dev) { 14361da177e4SLinus Torvalds dead = fi->fib_nhs; 14371da177e4SLinus Torvalds break; 14381da177e4SLinus Torvalds } 14391da177e4SLinus Torvalds #endif 14401da177e4SLinus Torvalds } endfor_nexthops(fi) 14411da177e4SLinus Torvalds if (dead == fi->fib_nhs) { 14428a3d0316SAndy Gospodarek switch (event) { 14438a3d0316SAndy Gospodarek case NETDEV_DOWN: 14448a3d0316SAndy Gospodarek case NETDEV_UNREGISTER: 14451da177e4SLinus Torvalds fi->fib_flags |= RTNH_F_DEAD; 14468a3d0316SAndy Gospodarek /* fall through */ 14478a3d0316SAndy Gospodarek case NETDEV_CHANGE: 14488a3d0316SAndy Gospodarek fi->fib_flags |= RTNH_F_LINKDOWN; 14498a3d0316SAndy Gospodarek break; 14508a3d0316SAndy Gospodarek } 14511da177e4SLinus Torvalds ret++; 14521da177e4SLinus Torvalds } 14530e884c78SPeter Nørlund 14540e884c78SPeter Nørlund fib_rebalance(fi); 14551da177e4SLinus Torvalds } 14561da177e4SLinus Torvalds 14571da177e4SLinus Torvalds return ret; 14581da177e4SLinus Torvalds } 14591da177e4SLinus Torvalds 14600c838ff1SDavid S. Miller /* Must be invoked inside of an RCU protected region. */ 1461c7b371e3SDavid Ahern static void fib_select_default(const struct flowi4 *flp, struct fib_result *res) 14620c838ff1SDavid S. Miller { 14630c838ff1SDavid S. Miller struct fib_info *fi = NULL, *last_resort = NULL; 146456315f9eSAlexander Duyck struct hlist_head *fa_head = res->fa_head; 14650c838ff1SDavid S. Miller struct fib_table *tb = res->table; 146618a912e9SJulian Anastasov u8 slen = 32 - res->prefixlen; 14670c838ff1SDavid S. Miller int order = -1, last_idx = -1; 14682392debcSJulian Anastasov struct fib_alias *fa, *fa1 = NULL; 14692392debcSJulian Anastasov u32 last_prio = res->fi->fib_priority; 14702392debcSJulian Anastasov u8 last_tos = 0; 14710c838ff1SDavid S. Miller 147256315f9eSAlexander Duyck hlist_for_each_entry_rcu(fa, fa_head, fa_list) { 14730c838ff1SDavid S. Miller struct fib_info *next_fi = fa->fa_info; 14740c838ff1SDavid S. Miller 147518a912e9SJulian Anastasov if (fa->fa_slen != slen) 147618a912e9SJulian Anastasov continue; 14772392debcSJulian Anastasov if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos) 14782392debcSJulian Anastasov continue; 147918a912e9SJulian Anastasov if (fa->tb_id != tb->tb_id) 148018a912e9SJulian Anastasov continue; 14812392debcSJulian Anastasov if (next_fi->fib_priority > last_prio && 14822392debcSJulian Anastasov fa->fa_tos == last_tos) { 14832392debcSJulian Anastasov if (last_tos) 14842392debcSJulian Anastasov continue; 14852392debcSJulian Anastasov break; 14862392debcSJulian Anastasov } 14872392debcSJulian Anastasov if (next_fi->fib_flags & RTNH_F_DEAD) 14882392debcSJulian Anastasov continue; 14892392debcSJulian Anastasov last_tos = fa->fa_tos; 14902392debcSJulian Anastasov last_prio = next_fi->fib_priority; 14912392debcSJulian Anastasov 149237e826c5SDavid S. Miller if (next_fi->fib_scope != res->scope || 14930c838ff1SDavid S. Miller fa->fa_type != RTN_UNICAST) 14940c838ff1SDavid S. Miller continue; 14950c838ff1SDavid S. Miller if (!next_fi->fib_nh[0].nh_gw || 14960c838ff1SDavid S. Miller next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK) 14970c838ff1SDavid S. Miller continue; 14980c838ff1SDavid S. Miller 14990c838ff1SDavid S. Miller fib_alias_accessed(fa); 15000c838ff1SDavid S. Miller 150151456b29SIan Morris if (!fi) { 15020c838ff1SDavid S. Miller if (next_fi != res->fi) 15030c838ff1SDavid S. Miller break; 15042392debcSJulian Anastasov fa1 = fa; 15050c838ff1SDavid S. Miller } else if (!fib_detect_death(fi, order, &last_resort, 15062392debcSJulian Anastasov &last_idx, fa1->fa_default)) { 15070c838ff1SDavid S. Miller fib_result_assign(res, fi); 15082392debcSJulian Anastasov fa1->fa_default = order; 15090c838ff1SDavid S. Miller goto out; 15100c838ff1SDavid S. Miller } 15110c838ff1SDavid S. Miller fi = next_fi; 15120c838ff1SDavid S. Miller order++; 15130c838ff1SDavid S. Miller } 15140c838ff1SDavid S. Miller 151551456b29SIan Morris if (order <= 0 || !fi) { 15162392debcSJulian Anastasov if (fa1) 15172392debcSJulian Anastasov fa1->fa_default = -1; 15180c838ff1SDavid S. Miller goto out; 15190c838ff1SDavid S. Miller } 15200c838ff1SDavid S. Miller 15210c838ff1SDavid S. Miller if (!fib_detect_death(fi, order, &last_resort, &last_idx, 15222392debcSJulian Anastasov fa1->fa_default)) { 15230c838ff1SDavid S. Miller fib_result_assign(res, fi); 15242392debcSJulian Anastasov fa1->fa_default = order; 15250c838ff1SDavid S. Miller goto out; 15260c838ff1SDavid S. Miller } 15270c838ff1SDavid S. Miller 15280c838ff1SDavid S. Miller if (last_idx >= 0) 15290c838ff1SDavid S. Miller fib_result_assign(res, last_resort); 15302392debcSJulian Anastasov fa1->fa_default = last_idx; 15310c838ff1SDavid S. Miller out: 153231d40937SEric Dumazet return; 15330c838ff1SDavid S. Miller } 15340c838ff1SDavid S. Miller 15351da177e4SLinus Torvalds /* 15366a31d2a9SEric Dumazet * Dead device goes up. We wake up dead nexthops. 15376a31d2a9SEric Dumazet * It takes sense only on multipath routes. 15381da177e4SLinus Torvalds */ 15398a3d0316SAndy Gospodarek int fib_sync_up(struct net_device *dev, unsigned int nh_flags) 15401da177e4SLinus Torvalds { 15411da177e4SLinus Torvalds struct fib_info *prev_fi; 15421da177e4SLinus Torvalds unsigned int hash; 15431da177e4SLinus Torvalds struct hlist_head *head; 15441da177e4SLinus Torvalds struct fib_nh *nh; 15451da177e4SLinus Torvalds int ret; 15461da177e4SLinus Torvalds 15471da177e4SLinus Torvalds if (!(dev->flags & IFF_UP)) 15481da177e4SLinus Torvalds return 0; 15491da177e4SLinus Torvalds 1550c9b3292eSJulian Anastasov if (nh_flags & RTNH_F_DEAD) { 1551c9b3292eSJulian Anastasov unsigned int flags = dev_get_flags(dev); 1552c9b3292eSJulian Anastasov 1553c9b3292eSJulian Anastasov if (flags & (IFF_RUNNING | IFF_LOWER_UP)) 1554c9b3292eSJulian Anastasov nh_flags |= RTNH_F_LINKDOWN; 1555c9b3292eSJulian Anastasov } 1556c9b3292eSJulian Anastasov 15571da177e4SLinus Torvalds prev_fi = NULL; 15581da177e4SLinus Torvalds hash = fib_devindex_hashfn(dev->ifindex); 15591da177e4SLinus Torvalds head = &fib_info_devhash[hash]; 15601da177e4SLinus Torvalds ret = 0; 15611da177e4SLinus Torvalds 1562b67bfe0dSSasha Levin hlist_for_each_entry(nh, head, nh_hash) { 15631da177e4SLinus Torvalds struct fib_info *fi = nh->nh_parent; 15641da177e4SLinus Torvalds int alive; 15651da177e4SLinus Torvalds 15661da177e4SLinus Torvalds BUG_ON(!fi->fib_nhs); 15671da177e4SLinus Torvalds if (nh->nh_dev != dev || fi == prev_fi) 15681da177e4SLinus Torvalds continue; 15691da177e4SLinus Torvalds 15701da177e4SLinus Torvalds prev_fi = fi; 15711da177e4SLinus Torvalds alive = 0; 15721da177e4SLinus Torvalds change_nexthops(fi) { 15738a3d0316SAndy Gospodarek if (!(nexthop_nh->nh_flags & nh_flags)) { 15741da177e4SLinus Torvalds alive++; 15751da177e4SLinus Torvalds continue; 15761da177e4SLinus Torvalds } 157751456b29SIan Morris if (!nexthop_nh->nh_dev || 157871fceff0SDavid S. Miller !(nexthop_nh->nh_dev->flags & IFF_UP)) 15791da177e4SLinus Torvalds continue; 158071fceff0SDavid S. Miller if (nexthop_nh->nh_dev != dev || 158171fceff0SDavid S. Miller !__in_dev_get_rtnl(dev)) 15821da177e4SLinus Torvalds continue; 15831da177e4SLinus Torvalds alive++; 15848a3d0316SAndy Gospodarek nexthop_nh->nh_flags &= ~nh_flags; 1585982acb97SIdo Schimmel call_fib_nh_notifiers(nexthop_nh, FIB_EVENT_NH_ADD); 15861da177e4SLinus Torvalds } endfor_nexthops(fi) 15871da177e4SLinus Torvalds 15881da177e4SLinus Torvalds if (alive > 0) { 15898a3d0316SAndy Gospodarek fi->fib_flags &= ~nh_flags; 15901da177e4SLinus Torvalds ret++; 15911da177e4SLinus Torvalds } 15920e884c78SPeter Nørlund 15930e884c78SPeter Nørlund fib_rebalance(fi); 15941da177e4SLinus Torvalds } 15951da177e4SLinus Torvalds 15961da177e4SLinus Torvalds return ret; 15971da177e4SLinus Torvalds } 15981da177e4SLinus Torvalds 15998a3d0316SAndy Gospodarek #ifdef CONFIG_IP_ROUTE_MULTIPATH 1600a6db4494SDavid Ahern static bool fib_good_nh(const struct fib_nh *nh) 1601a6db4494SDavid Ahern { 1602a6db4494SDavid Ahern int state = NUD_REACHABLE; 1603a6db4494SDavid Ahern 1604a6db4494SDavid Ahern if (nh->nh_scope == RT_SCOPE_LINK) { 1605a6db4494SDavid Ahern struct neighbour *n; 1606a6db4494SDavid Ahern 1607a6db4494SDavid Ahern rcu_read_lock_bh(); 1608a6db4494SDavid Ahern 1609d985d151SEric Dumazet n = __ipv4_neigh_lookup_noref(nh->nh_dev, 1610d985d151SEric Dumazet (__force u32)nh->nh_gw); 1611a6db4494SDavid Ahern if (n) 1612a6db4494SDavid Ahern state = n->nud_state; 1613a6db4494SDavid Ahern 1614a6db4494SDavid Ahern rcu_read_unlock_bh(); 1615a6db4494SDavid Ahern } 1616a6db4494SDavid Ahern 1617a6db4494SDavid Ahern return !!(state & NUD_VALID); 1618a6db4494SDavid Ahern } 16198a3d0316SAndy Gospodarek 16200e884c78SPeter Nørlund void fib_select_multipath(struct fib_result *res, int hash) 16211da177e4SLinus Torvalds { 16221da177e4SLinus Torvalds struct fib_info *fi = res->fi; 1623a6db4494SDavid Ahern struct net *net = fi->fib_net; 1624a6db4494SDavid Ahern bool first = false; 16251da177e4SLinus Torvalds 16260e884c78SPeter Nørlund for_nexthops(fi) { 16270e884c78SPeter Nørlund if (hash > atomic_read(&nh->nh_upper_bound)) 16280eeb075fSAndy Gospodarek continue; 16291da177e4SLinus Torvalds 1630a6db4494SDavid Ahern if (!net->ipv4.sysctl_fib_multipath_use_neigh || 1631a6db4494SDavid Ahern fib_good_nh(nh)) { 16321da177e4SLinus Torvalds res->nh_sel = nhsel; 16331da177e4SLinus Torvalds return; 1634a6db4494SDavid Ahern } 1635a6db4494SDavid Ahern if (!first) { 1636a6db4494SDavid Ahern res->nh_sel = nhsel; 1637a6db4494SDavid Ahern first = true; 1638a6db4494SDavid Ahern } 16391da177e4SLinus Torvalds } endfor_nexthops(fi); 16401da177e4SLinus Torvalds } 16411da177e4SLinus Torvalds #endif 16423ce58d84SDavid Ahern 16433ce58d84SDavid Ahern void fib_select_path(struct net *net, struct fib_result *res, 1644bf4e0a3dSNikolay Aleksandrov struct flowi4 *fl4, const struct sk_buff *skb) 16453ce58d84SDavid Ahern { 16467a18c5b9SDavid Ahern bool oif_check; 16477a18c5b9SDavid Ahern 16487a18c5b9SDavid Ahern oif_check = (fl4->flowi4_oif == 0 || 16497a18c5b9SDavid Ahern fl4->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF); 16507a18c5b9SDavid Ahern 16513ce58d84SDavid Ahern #ifdef CONFIG_IP_ROUTE_MULTIPATH 16527a18c5b9SDavid Ahern if (res->fi->fib_nhs > 1 && oif_check) { 1653bf4e0a3dSNikolay Aleksandrov int h = fib_multipath_hash(res->fi, fl4, skb); 16549920e48bSPaolo Abeni 1655bf4e0a3dSNikolay Aleksandrov fib_select_multipath(res, h); 16563ce58d84SDavid Ahern } 16573ce58d84SDavid Ahern else 16583ce58d84SDavid Ahern #endif 16593ce58d84SDavid Ahern if (!res->prefixlen && 16603ce58d84SDavid Ahern res->table->tb_num_default > 1 && 16617a18c5b9SDavid Ahern res->type == RTN_UNICAST && oif_check) 16623ce58d84SDavid Ahern fib_select_default(fl4, res); 16633ce58d84SDavid Ahern 16643ce58d84SDavid Ahern if (!fl4->saddr) 16653ce58d84SDavid Ahern fl4->saddr = FIB_RES_PREFSRC(net, *res); 16663ce58d84SDavid Ahern } 16673ce58d84SDavid Ahern EXPORT_SYMBOL_GPL(fib_select_path); 1668