11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * INET An implementation of the TCP/IP protocol suite for the LINUX 31da177e4SLinus Torvalds * operating system. INET is implemented using the BSD Socket 41da177e4SLinus Torvalds * interface as the means of communication with the user level. 51da177e4SLinus Torvalds * 61da177e4SLinus Torvalds * IPv4 Forwarding Information Base: semantics. 71da177e4SLinus Torvalds * 81da177e4SLinus Torvalds * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 91da177e4SLinus Torvalds * 101da177e4SLinus Torvalds * This program is free software; you can redistribute it and/or 111da177e4SLinus Torvalds * modify it under the terms of the GNU General Public License 121da177e4SLinus Torvalds * as published by the Free Software Foundation; either version 131da177e4SLinus Torvalds * 2 of the License, or (at your option) any later version. 141da177e4SLinus Torvalds */ 151da177e4SLinus Torvalds 167c0f6ba6SLinus Torvalds #include <linux/uaccess.h> 171da177e4SLinus Torvalds #include <linux/bitops.h> 181da177e4SLinus Torvalds #include <linux/types.h> 191da177e4SLinus Torvalds #include <linux/kernel.h> 201da177e4SLinus Torvalds #include <linux/jiffies.h> 211da177e4SLinus Torvalds #include <linux/mm.h> 221da177e4SLinus Torvalds #include <linux/string.h> 231da177e4SLinus Torvalds #include <linux/socket.h> 241da177e4SLinus Torvalds #include <linux/sockios.h> 251da177e4SLinus Torvalds #include <linux/errno.h> 261da177e4SLinus Torvalds #include <linux/in.h> 271da177e4SLinus Torvalds #include <linux/inet.h> 2814c85021SArnaldo Carvalho de Melo #include <linux/inetdevice.h> 291da177e4SLinus Torvalds #include <linux/netdevice.h> 301da177e4SLinus Torvalds #include <linux/if_arp.h> 311da177e4SLinus Torvalds #include <linux/proc_fs.h> 321da177e4SLinus Torvalds #include <linux/skbuff.h> 331da177e4SLinus Torvalds #include <linux/init.h> 345a0e3ad6STejun Heo #include <linux/slab.h> 35c3ab2b4eSDavid Ahern #include <linux/netlink.h> 361da177e4SLinus Torvalds 3714c85021SArnaldo Carvalho de Melo #include <net/arp.h> 381da177e4SLinus Torvalds #include <net/ip.h> 391da177e4SLinus Torvalds #include <net/protocol.h> 401da177e4SLinus Torvalds #include <net/route.h> 411da177e4SLinus Torvalds #include <net/tcp.h> 421da177e4SLinus Torvalds #include <net/sock.h> 431da177e4SLinus Torvalds #include <net/ip_fib.h> 44717a8f5bSDavid Ahern #include <net/ip6_fib.h> 45f21c7bc5SThomas Graf #include <net/netlink.h> 463c618c1dSDavid Ahern #include <net/rtnh.h> 47571e7226SRoopa Prabhu #include <net/lwtunnel.h> 4804b1d4e5SIdo Schimmel #include <net/fib_notifier.h> 49c0a72077SDavid Ahern #include <net/addrconf.h> 501da177e4SLinus Torvalds 511da177e4SLinus Torvalds #include "fib_lookup.h" 521da177e4SLinus Torvalds 53832b4c5eSStephen Hemminger static DEFINE_SPINLOCK(fib_info_lock); 541da177e4SLinus Torvalds static struct hlist_head *fib_info_hash; 551da177e4SLinus Torvalds static struct hlist_head *fib_info_laddrhash; 56123b9731SDavid S. Miller static unsigned int fib_info_hash_size; 571da177e4SLinus Torvalds static unsigned int fib_info_cnt; 581da177e4SLinus Torvalds 591da177e4SLinus Torvalds #define DEVINDEX_HASHBITS 8 601da177e4SLinus Torvalds #define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS) 611da177e4SLinus Torvalds static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE]; 621da177e4SLinus Torvalds 631da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 641da177e4SLinus Torvalds 656a31d2a9SEric Dumazet #define for_nexthops(fi) { \ 666a31d2a9SEric Dumazet int nhsel; const struct fib_nh *nh; \ 676a31d2a9SEric Dumazet for (nhsel = 0, nh = (fi)->fib_nh; \ 686a31d2a9SEric Dumazet nhsel < (fi)->fib_nhs; \ 696a31d2a9SEric Dumazet nh++, nhsel++) 701da177e4SLinus Torvalds 716a31d2a9SEric Dumazet #define change_nexthops(fi) { \ 726a31d2a9SEric Dumazet int nhsel; struct fib_nh *nexthop_nh; \ 736a31d2a9SEric Dumazet for (nhsel = 0, nexthop_nh = (struct fib_nh *)((fi)->fib_nh); \ 746a31d2a9SEric Dumazet nhsel < (fi)->fib_nhs; \ 756a31d2a9SEric Dumazet nexthop_nh++, nhsel++) 761da177e4SLinus Torvalds 771da177e4SLinus Torvalds #else /* CONFIG_IP_ROUTE_MULTIPATH */ 781da177e4SLinus Torvalds 791da177e4SLinus Torvalds /* Hope, that gcc will optimize it to get rid of dummy loop */ 801da177e4SLinus Torvalds 816a31d2a9SEric Dumazet #define for_nexthops(fi) { \ 826a31d2a9SEric Dumazet int nhsel; const struct fib_nh *nh = (fi)->fib_nh; \ 831da177e4SLinus Torvalds for (nhsel = 0; nhsel < 1; nhsel++) 841da177e4SLinus Torvalds 856a31d2a9SEric Dumazet #define change_nexthops(fi) { \ 866a31d2a9SEric Dumazet int nhsel; \ 876a31d2a9SEric Dumazet struct fib_nh *nexthop_nh = (struct fib_nh *)((fi)->fib_nh); \ 881da177e4SLinus Torvalds for (nhsel = 0; nhsel < 1; nhsel++) 891da177e4SLinus Torvalds 901da177e4SLinus Torvalds #endif /* CONFIG_IP_ROUTE_MULTIPATH */ 911da177e4SLinus Torvalds 921da177e4SLinus Torvalds #define endfor_nexthops(fi) } 931da177e4SLinus Torvalds 941da177e4SLinus Torvalds 953be0686bSDavid S. Miller const struct fib_prop fib_props[RTN_MAX + 1] = { 966a31d2a9SEric Dumazet [RTN_UNSPEC] = { 971da177e4SLinus Torvalds .error = 0, 981da177e4SLinus Torvalds .scope = RT_SCOPE_NOWHERE, 996a31d2a9SEric Dumazet }, 1006a31d2a9SEric Dumazet [RTN_UNICAST] = { 1011da177e4SLinus Torvalds .error = 0, 1021da177e4SLinus Torvalds .scope = RT_SCOPE_UNIVERSE, 1036a31d2a9SEric Dumazet }, 1046a31d2a9SEric Dumazet [RTN_LOCAL] = { 1051da177e4SLinus Torvalds .error = 0, 1061da177e4SLinus Torvalds .scope = RT_SCOPE_HOST, 1076a31d2a9SEric Dumazet }, 1086a31d2a9SEric Dumazet [RTN_BROADCAST] = { 1091da177e4SLinus Torvalds .error = 0, 1101da177e4SLinus Torvalds .scope = RT_SCOPE_LINK, 1116a31d2a9SEric Dumazet }, 1126a31d2a9SEric Dumazet [RTN_ANYCAST] = { 1131da177e4SLinus Torvalds .error = 0, 1141da177e4SLinus Torvalds .scope = RT_SCOPE_LINK, 1156a31d2a9SEric Dumazet }, 1166a31d2a9SEric Dumazet [RTN_MULTICAST] = { 1171da177e4SLinus Torvalds .error = 0, 1181da177e4SLinus Torvalds .scope = RT_SCOPE_UNIVERSE, 1196a31d2a9SEric Dumazet }, 1206a31d2a9SEric Dumazet [RTN_BLACKHOLE] = { 1211da177e4SLinus Torvalds .error = -EINVAL, 1221da177e4SLinus Torvalds .scope = RT_SCOPE_UNIVERSE, 1236a31d2a9SEric Dumazet }, 1246a31d2a9SEric Dumazet [RTN_UNREACHABLE] = { 1251da177e4SLinus Torvalds .error = -EHOSTUNREACH, 1261da177e4SLinus Torvalds .scope = RT_SCOPE_UNIVERSE, 1276a31d2a9SEric Dumazet }, 1286a31d2a9SEric Dumazet [RTN_PROHIBIT] = { 1291da177e4SLinus Torvalds .error = -EACCES, 1301da177e4SLinus Torvalds .scope = RT_SCOPE_UNIVERSE, 1316a31d2a9SEric Dumazet }, 1326a31d2a9SEric Dumazet [RTN_THROW] = { 1331da177e4SLinus Torvalds .error = -EAGAIN, 1341da177e4SLinus Torvalds .scope = RT_SCOPE_UNIVERSE, 1356a31d2a9SEric Dumazet }, 1366a31d2a9SEric Dumazet [RTN_NAT] = { 1371da177e4SLinus Torvalds .error = -EINVAL, 1381da177e4SLinus Torvalds .scope = RT_SCOPE_NOWHERE, 1396a31d2a9SEric Dumazet }, 1406a31d2a9SEric Dumazet [RTN_XRESOLVE] = { 1411da177e4SLinus Torvalds .error = -EINVAL, 1421da177e4SLinus Torvalds .scope = RT_SCOPE_NOWHERE, 1436a31d2a9SEric Dumazet }, 1441da177e4SLinus Torvalds }; 1451da177e4SLinus Torvalds 146c5038a83SDavid S. Miller static void rt_fibinfo_free(struct rtable __rcu **rtp) 14754764bb6SEric Dumazet { 14854764bb6SEric Dumazet struct rtable *rt = rcu_dereference_protected(*rtp, 1); 14954764bb6SEric Dumazet 15054764bb6SEric Dumazet if (!rt) 15154764bb6SEric Dumazet return; 15254764bb6SEric Dumazet 15354764bb6SEric Dumazet /* Not even needed : RCU_INIT_POINTER(*rtp, NULL); 15454764bb6SEric Dumazet * because we waited an RCU grace period before calling 15554764bb6SEric Dumazet * free_fib_info_rcu() 15654764bb6SEric Dumazet */ 15754764bb6SEric Dumazet 15895c47f9cSWei Wang dst_dev_put(&rt->dst); 159b838d5e1SWei Wang dst_release_immediate(&rt->dst); 16054764bb6SEric Dumazet } 16154764bb6SEric Dumazet 162c5038a83SDavid S. Miller static void free_nh_exceptions(struct fib_nh *nh) 163c5038a83SDavid S. Miller { 164caa41527SEric Dumazet struct fnhe_hash_bucket *hash; 165c5038a83SDavid S. Miller int i; 166c5038a83SDavid S. Miller 167caa41527SEric Dumazet hash = rcu_dereference_protected(nh->nh_exceptions, 1); 168caa41527SEric Dumazet if (!hash) 169caa41527SEric Dumazet return; 170c5038a83SDavid S. Miller for (i = 0; i < FNHE_HASH_SIZE; i++) { 171c5038a83SDavid S. Miller struct fib_nh_exception *fnhe; 172c5038a83SDavid S. Miller 173c5038a83SDavid S. Miller fnhe = rcu_dereference_protected(hash[i].chain, 1); 174c5038a83SDavid S. Miller while (fnhe) { 175c5038a83SDavid S. Miller struct fib_nh_exception *next; 176c5038a83SDavid S. Miller 177c5038a83SDavid S. Miller next = rcu_dereference_protected(fnhe->fnhe_next, 1); 178c5038a83SDavid S. Miller 1792ffae99dSTimo Teräs rt_fibinfo_free(&fnhe->fnhe_rth_input); 1802ffae99dSTimo Teräs rt_fibinfo_free(&fnhe->fnhe_rth_output); 181c5038a83SDavid S. Miller 182c5038a83SDavid S. Miller kfree(fnhe); 183c5038a83SDavid S. Miller 184c5038a83SDavid S. Miller fnhe = next; 185c5038a83SDavid S. Miller } 186c5038a83SDavid S. Miller } 187c5038a83SDavid S. Miller kfree(hash); 188c5038a83SDavid S. Miller } 189c5038a83SDavid S. Miller 190c5038a83SDavid S. Miller static void rt_fibinfo_free_cpus(struct rtable __rcu * __percpu *rtp) 191d26b3a7cSEric Dumazet { 192d26b3a7cSEric Dumazet int cpu; 193d26b3a7cSEric Dumazet 194d26b3a7cSEric Dumazet if (!rtp) 195d26b3a7cSEric Dumazet return; 196d26b3a7cSEric Dumazet 197d26b3a7cSEric Dumazet for_each_possible_cpu(cpu) { 198d26b3a7cSEric Dumazet struct rtable *rt; 199d26b3a7cSEric Dumazet 200d26b3a7cSEric Dumazet rt = rcu_dereference_protected(*per_cpu_ptr(rtp, cpu), 1); 2010830106cSWei Wang if (rt) { 20295c47f9cSWei Wang dst_dev_put(&rt->dst); 203b838d5e1SWei Wang dst_release_immediate(&rt->dst); 204d26b3a7cSEric Dumazet } 2050830106cSWei Wang } 206d26b3a7cSEric Dumazet free_percpu(rtp); 207d26b3a7cSEric Dumazet } 208d26b3a7cSEric Dumazet 209979e276eSDavid Ahern void fib_nh_common_release(struct fib_nh_common *nhc) 210979e276eSDavid Ahern { 211979e276eSDavid Ahern if (nhc->nhc_dev) 212979e276eSDavid Ahern dev_put(nhc->nhc_dev); 213979e276eSDavid Ahern 214979e276eSDavid Ahern lwtstate_put(nhc->nhc_lwtstate); 215*0f457a36SDavid Ahern rt_fibinfo_free_cpus(nhc->nhc_pcpu_rth_output); 216*0f457a36SDavid Ahern rt_fibinfo_free(&nhc->nhc_rth_input); 217979e276eSDavid Ahern } 218979e276eSDavid Ahern EXPORT_SYMBOL_GPL(fib_nh_common_release); 219979e276eSDavid Ahern 220faa041a4SDavid Ahern void fib_nh_release(struct net *net, struct fib_nh *fib_nh) 221faa041a4SDavid Ahern { 222faa041a4SDavid Ahern #ifdef CONFIG_IP_ROUTE_CLASSID 223faa041a4SDavid Ahern if (fib_nh->nh_tclassid) 224faa041a4SDavid Ahern net->ipv4.fib_num_tclassid_users--; 225faa041a4SDavid Ahern #endif 226979e276eSDavid Ahern fib_nh_common_release(&fib_nh->nh_common); 227faa041a4SDavid Ahern free_nh_exceptions(fib_nh); 228faa041a4SDavid Ahern } 229faa041a4SDavid Ahern 2301da177e4SLinus Torvalds /* Release a nexthop info record */ 23119c1ea14SYan, Zheng static void free_fib_info_rcu(struct rcu_head *head) 23219c1ea14SYan, Zheng { 23319c1ea14SYan, Zheng struct fib_info *fi = container_of(head, struct fib_info, rcu); 23419c1ea14SYan, Zheng 235e49cc0daSYanmin Zhang change_nexthops(fi) { 236faa041a4SDavid Ahern fib_nh_release(fi->fib_net, nexthop_nh); 237e49cc0daSYanmin Zhang } endfor_nexthops(fi); 238e49cc0daSYanmin Zhang 239cc5f0eb2SDavid Ahern ip_fib_metrics_put(fi->fib_metrics); 240cc5f0eb2SDavid Ahern 24119c1ea14SYan, Zheng kfree(fi); 24219c1ea14SYan, Zheng } 2431da177e4SLinus Torvalds 2441da177e4SLinus Torvalds void free_fib_info(struct fib_info *fi) 2451da177e4SLinus Torvalds { 2461da177e4SLinus Torvalds if (fi->fib_dead == 0) { 247058bd4d2SJoe Perches pr_warn("Freeing alive fib_info %p\n", fi); 2481da177e4SLinus Torvalds return; 2491da177e4SLinus Torvalds } 2501da177e4SLinus Torvalds fib_info_cnt--; 251faa041a4SDavid Ahern 25219c1ea14SYan, Zheng call_rcu(&fi->rcu, free_fib_info_rcu); 2531da177e4SLinus Torvalds } 254b423cb10SIdo Schimmel EXPORT_SYMBOL_GPL(free_fib_info); 2551da177e4SLinus Torvalds 2561da177e4SLinus Torvalds void fib_release_info(struct fib_info *fi) 2571da177e4SLinus Torvalds { 258832b4c5eSStephen Hemminger spin_lock_bh(&fib_info_lock); 2591da177e4SLinus Torvalds if (fi && --fi->fib_treeref == 0) { 2601da177e4SLinus Torvalds hlist_del(&fi->fib_hash); 2611da177e4SLinus Torvalds if (fi->fib_prefsrc) 2621da177e4SLinus Torvalds hlist_del(&fi->fib_lhash); 2631da177e4SLinus Torvalds change_nexthops(fi) { 264b75ed8b1SDavid Ahern if (!nexthop_nh->fib_nh_dev) 2651da177e4SLinus Torvalds continue; 26671fceff0SDavid S. Miller hlist_del(&nexthop_nh->nh_hash); 2671da177e4SLinus Torvalds } endfor_nexthops(fi) 2681da177e4SLinus Torvalds fi->fib_dead = 1; 2691da177e4SLinus Torvalds fib_info_put(fi); 2701da177e4SLinus Torvalds } 271832b4c5eSStephen Hemminger spin_unlock_bh(&fib_info_lock); 2721da177e4SLinus Torvalds } 2731da177e4SLinus Torvalds 2746a31d2a9SEric Dumazet static inline int nh_comp(const struct fib_info *fi, const struct fib_info *ofi) 2751da177e4SLinus Torvalds { 2761da177e4SLinus Torvalds const struct fib_nh *onh = ofi->fib_nh; 2771da177e4SLinus Torvalds 2781da177e4SLinus Torvalds for_nexthops(fi) { 279b75ed8b1SDavid Ahern if (nh->fib_nh_oif != onh->fib_nh_oif || 280a4ea5d43SDavid Ahern nh->fib_nh_gw_family != onh->fib_nh_gw_family || 281b75ed8b1SDavid Ahern nh->fib_nh_scope != onh->fib_nh_scope || 2821da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 283b75ed8b1SDavid Ahern nh->fib_nh_weight != onh->fib_nh_weight || 2841da177e4SLinus Torvalds #endif 285c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID 2861da177e4SLinus Torvalds nh->nh_tclassid != onh->nh_tclassid || 2871da177e4SLinus Torvalds #endif 288b75ed8b1SDavid Ahern lwtunnel_cmp_encap(nh->fib_nh_lws, onh->fib_nh_lws) || 289b75ed8b1SDavid Ahern ((nh->fib_nh_flags ^ onh->fib_nh_flags) & ~RTNH_COMPARE_MASK)) 2901da177e4SLinus Torvalds return -1; 291a4ea5d43SDavid Ahern 292a4ea5d43SDavid Ahern if (nh->fib_nh_gw_family == AF_INET && 293a4ea5d43SDavid Ahern nh->fib_nh_gw4 != onh->fib_nh_gw4) 294a4ea5d43SDavid Ahern return -1; 295a4ea5d43SDavid Ahern 296a4ea5d43SDavid Ahern if (nh->fib_nh_gw_family == AF_INET6 && 297a4ea5d43SDavid Ahern ipv6_addr_cmp(&nh->fib_nh_gw6, &onh->fib_nh_gw6)) 298a4ea5d43SDavid Ahern return -1; 299a4ea5d43SDavid Ahern 3001da177e4SLinus Torvalds onh++; 3011da177e4SLinus Torvalds } endfor_nexthops(fi); 3021da177e4SLinus Torvalds return 0; 3031da177e4SLinus Torvalds } 3041da177e4SLinus Torvalds 30588ebc72fSDavid S. Miller static inline unsigned int fib_devindex_hashfn(unsigned int val) 30688ebc72fSDavid S. Miller { 30788ebc72fSDavid S. Miller unsigned int mask = DEVINDEX_HASHSIZE - 1; 30888ebc72fSDavid S. Miller 30988ebc72fSDavid S. Miller return (val ^ 31088ebc72fSDavid S. Miller (val >> DEVINDEX_HASHBITS) ^ 31188ebc72fSDavid S. Miller (val >> (DEVINDEX_HASHBITS * 2))) & mask; 31288ebc72fSDavid S. Miller } 31388ebc72fSDavid S. Miller 3141da177e4SLinus Torvalds static inline unsigned int fib_info_hashfn(const struct fib_info *fi) 3151da177e4SLinus Torvalds { 316123b9731SDavid S. Miller unsigned int mask = (fib_info_hash_size - 1); 3171da177e4SLinus Torvalds unsigned int val = fi->fib_nhs; 3181da177e4SLinus Torvalds 31937e826c5SDavid S. Miller val ^= (fi->fib_protocol << 8) | fi->fib_scope; 32081f7bf6cSAl Viro val ^= (__force u32)fi->fib_prefsrc; 3211da177e4SLinus Torvalds val ^= fi->fib_priority; 32288ebc72fSDavid S. Miller for_nexthops(fi) { 323b75ed8b1SDavid Ahern val ^= fib_devindex_hashfn(nh->fib_nh_oif); 32488ebc72fSDavid S. Miller } endfor_nexthops(fi) 3251da177e4SLinus Torvalds 3261da177e4SLinus Torvalds return (val ^ (val >> 7) ^ (val >> 12)) & mask; 3271da177e4SLinus Torvalds } 3281da177e4SLinus Torvalds 3291da177e4SLinus Torvalds static struct fib_info *fib_find_info(const struct fib_info *nfi) 3301da177e4SLinus Torvalds { 3311da177e4SLinus Torvalds struct hlist_head *head; 3321da177e4SLinus Torvalds struct fib_info *fi; 3331da177e4SLinus Torvalds unsigned int hash; 3341da177e4SLinus Torvalds 3351da177e4SLinus Torvalds hash = fib_info_hashfn(nfi); 3361da177e4SLinus Torvalds head = &fib_info_hash[hash]; 3371da177e4SLinus Torvalds 338b67bfe0dSSasha Levin hlist_for_each_entry(fi, head, fib_hash) { 33909ad9bc7SOctavian Purdila if (!net_eq(fi->fib_net, nfi->fib_net)) 3404814bdbdSDenis V. Lunev continue; 3411da177e4SLinus Torvalds if (fi->fib_nhs != nfi->fib_nhs) 3421da177e4SLinus Torvalds continue; 3431da177e4SLinus Torvalds if (nfi->fib_protocol == fi->fib_protocol && 34437e826c5SDavid S. Miller nfi->fib_scope == fi->fib_scope && 3451da177e4SLinus Torvalds nfi->fib_prefsrc == fi->fib_prefsrc && 3461da177e4SLinus Torvalds nfi->fib_priority == fi->fib_priority && 347f4ef85bbSEric Dumazet nfi->fib_type == fi->fib_type && 3481da177e4SLinus Torvalds memcmp(nfi->fib_metrics, fi->fib_metrics, 349fcd13f42SEric Dumazet sizeof(u32) * RTAX_MAX) == 0 && 3508a3d0316SAndy Gospodarek !((nfi->fib_flags ^ fi->fib_flags) & ~RTNH_COMPARE_MASK) && 3511da177e4SLinus Torvalds (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0)) 3521da177e4SLinus Torvalds return fi; 3531da177e4SLinus Torvalds } 3541da177e4SLinus Torvalds 3551da177e4SLinus Torvalds return NULL; 3561da177e4SLinus Torvalds } 3571da177e4SLinus Torvalds 3581da177e4SLinus Torvalds /* Check, that the gateway is already configured. 3596a31d2a9SEric Dumazet * Used only by redirect accept routine. 3601da177e4SLinus Torvalds */ 361d878e72eSAl Viro int ip_fib_check_default(__be32 gw, struct net_device *dev) 3621da177e4SLinus Torvalds { 3631da177e4SLinus Torvalds struct hlist_head *head; 3641da177e4SLinus Torvalds struct fib_nh *nh; 3651da177e4SLinus Torvalds unsigned int hash; 3661da177e4SLinus Torvalds 367832b4c5eSStephen Hemminger spin_lock(&fib_info_lock); 3681da177e4SLinus Torvalds 3691da177e4SLinus Torvalds hash = fib_devindex_hashfn(dev->ifindex); 3701da177e4SLinus Torvalds head = &fib_info_devhash[hash]; 371b67bfe0dSSasha Levin hlist_for_each_entry(nh, head, nh_hash) { 372b75ed8b1SDavid Ahern if (nh->fib_nh_dev == dev && 373b75ed8b1SDavid Ahern nh->fib_nh_gw4 == gw && 374b75ed8b1SDavid Ahern !(nh->fib_nh_flags & RTNH_F_DEAD)) { 375832b4c5eSStephen Hemminger spin_unlock(&fib_info_lock); 3761da177e4SLinus Torvalds return 0; 3771da177e4SLinus Torvalds } 3781da177e4SLinus Torvalds } 3791da177e4SLinus Torvalds 380832b4c5eSStephen Hemminger spin_unlock(&fib_info_lock); 3811da177e4SLinus Torvalds 3821da177e4SLinus Torvalds return -1; 3831da177e4SLinus Torvalds } 3841da177e4SLinus Torvalds 385339bf98fSThomas Graf static inline size_t fib_nlmsg_size(struct fib_info *fi) 386339bf98fSThomas Graf { 387339bf98fSThomas Graf size_t payload = NLMSG_ALIGN(sizeof(struct rtmsg)) 388339bf98fSThomas Graf + nla_total_size(4) /* RTA_TABLE */ 389339bf98fSThomas Graf + nla_total_size(4) /* RTA_DST */ 390339bf98fSThomas Graf + nla_total_size(4) /* RTA_PRIORITY */ 391ea697639SDaniel Borkmann + nla_total_size(4) /* RTA_PREFSRC */ 392ea697639SDaniel Borkmann + nla_total_size(TCP_CA_NAME_MAX); /* RTAX_CC_ALGO */ 393339bf98fSThomas Graf 394339bf98fSThomas Graf /* space for nested metrics */ 395339bf98fSThomas Graf payload += nla_total_size((RTAX_MAX * nla_total_size(4))); 396339bf98fSThomas Graf 397339bf98fSThomas Graf if (fi->fib_nhs) { 398571e7226SRoopa Prabhu size_t nh_encapsize = 0; 399339bf98fSThomas Graf /* Also handles the special case fib_nhs == 1 */ 400339bf98fSThomas Graf 401339bf98fSThomas Graf /* each nexthop is packed in an attribute */ 402339bf98fSThomas Graf size_t nhsize = nla_total_size(sizeof(struct rtnexthop)); 403339bf98fSThomas Graf 404339bf98fSThomas Graf /* may contain flow and gateway attribute */ 405339bf98fSThomas Graf nhsize += 2 * nla_total_size(4); 406339bf98fSThomas Graf 407571e7226SRoopa Prabhu /* grab encap info */ 408571e7226SRoopa Prabhu for_nexthops(fi) { 409b75ed8b1SDavid Ahern if (nh->fib_nh_lws) { 410571e7226SRoopa Prabhu /* RTA_ENCAP_TYPE */ 411571e7226SRoopa Prabhu nh_encapsize += lwtunnel_get_encap_size( 412b75ed8b1SDavid Ahern nh->fib_nh_lws); 413571e7226SRoopa Prabhu /* RTA_ENCAP */ 414571e7226SRoopa Prabhu nh_encapsize += nla_total_size(2); 415571e7226SRoopa Prabhu } 416571e7226SRoopa Prabhu } endfor_nexthops(fi); 417571e7226SRoopa Prabhu 418339bf98fSThomas Graf /* all nexthops are packed in a nested attribute */ 419571e7226SRoopa Prabhu payload += nla_total_size((fi->fib_nhs * nhsize) + 420571e7226SRoopa Prabhu nh_encapsize); 421571e7226SRoopa Prabhu 422339bf98fSThomas Graf } 423339bf98fSThomas Graf 424339bf98fSThomas Graf return payload; 425339bf98fSThomas Graf } 426339bf98fSThomas Graf 42781f7bf6cSAl Viro void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, 4289877b253SJoe Perches int dst_len, u32 tb_id, const struct nl_info *info, 429b8f55831SMilan Kocian unsigned int nlm_flags) 4301da177e4SLinus Torvalds { 4311da177e4SLinus Torvalds struct sk_buff *skb; 4324e902c57SThomas Graf u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0; 433f21c7bc5SThomas Graf int err = -ENOBUFS; 4341da177e4SLinus Torvalds 435339bf98fSThomas Graf skb = nlmsg_new(fib_nlmsg_size(fa->fa_info), GFP_KERNEL); 43651456b29SIan Morris if (!skb) 437f21c7bc5SThomas Graf goto errout; 4381da177e4SLinus Torvalds 43915e47304SEric W. Biederman err = fib_dump_info(skb, info->portid, seq, event, tb_id, 44037e826c5SDavid S. Miller fa->fa_type, key, dst_len, 441b8f55831SMilan Kocian fa->fa_tos, fa->fa_info, nlm_flags); 44226932566SPatrick McHardy if (err < 0) { 44326932566SPatrick McHardy /* -EMSGSIZE implies BUG in fib_nlmsg_size() */ 44426932566SPatrick McHardy WARN_ON(err == -EMSGSIZE); 44526932566SPatrick McHardy kfree_skb(skb); 44626932566SPatrick McHardy goto errout; 44726932566SPatrick McHardy } 44815e47304SEric W. Biederman rtnl_notify(skb, info->nl_net, info->portid, RTNLGRP_IPV4_ROUTE, 4494e902c57SThomas Graf info->nlh, GFP_KERNEL); 4501ce85fe4SPablo Neira Ayuso return; 451f21c7bc5SThomas Graf errout: 452f21c7bc5SThomas Graf if (err < 0) 4534d1169c1SDenis V. Lunev rtnl_set_sk_err(info->nl_net, RTNLGRP_IPV4_ROUTE, err); 4541da177e4SLinus Torvalds } 4551da177e4SLinus Torvalds 456c9cb6b6eSStephen Hemminger static int fib_detect_death(struct fib_info *fi, int order, 457c9cb6b6eSStephen Hemminger struct fib_info **last_resort, int *last_idx, 458c9cb6b6eSStephen Hemminger int dflt) 4591da177e4SLinus Torvalds { 460619d1826SDavid Ahern const struct fib_nh_common *nhc = fib_info_nhc(fi, 0); 4611da177e4SLinus Torvalds struct neighbour *n; 4621da177e4SLinus Torvalds int state = NUD_NONE; 4631da177e4SLinus Torvalds 464619d1826SDavid Ahern if (likely(nhc->nhc_gw_family == AF_INET)) 465619d1826SDavid Ahern n = neigh_lookup(&arp_tbl, &nhc->nhc_gw.ipv4, nhc->nhc_dev); 466619d1826SDavid Ahern else if (nhc->nhc_gw_family == AF_INET6) 467619d1826SDavid Ahern n = neigh_lookup(ipv6_stub->nd_tbl, &nhc->nhc_gw.ipv6, 468619d1826SDavid Ahern nhc->nhc_dev); 469619d1826SDavid Ahern else 470619d1826SDavid Ahern n = NULL; 471619d1826SDavid Ahern 4721da177e4SLinus Torvalds if (n) { 4731da177e4SLinus Torvalds state = n->nud_state; 4741da177e4SLinus Torvalds neigh_release(n); 47588f64320SJulian Anastasov } else { 47688f64320SJulian Anastasov return 0; 4771da177e4SLinus Torvalds } 4781da177e4SLinus Torvalds if (state == NUD_REACHABLE) 4791da177e4SLinus Torvalds return 0; 480c17860a0SDenis V. Lunev if ((state & NUD_VALID) && order != dflt) 4811da177e4SLinus Torvalds return 0; 4821da177e4SLinus Torvalds if ((state & NUD_VALID) || 48388f64320SJulian Anastasov (*last_idx < 0 && order > dflt && state != NUD_INCOMPLETE)) { 4841da177e4SLinus Torvalds *last_resort = fi; 4851da177e4SLinus Torvalds *last_idx = order; 4861da177e4SLinus Torvalds } 4871da177e4SLinus Torvalds return 1; 4881da177e4SLinus Torvalds } 4891da177e4SLinus Torvalds 490979e276eSDavid Ahern int fib_nh_common_init(struct fib_nh_common *nhc, struct nlattr *encap, 491979e276eSDavid Ahern u16 encap_type, void *cfg, gfp_t gfp_flags, 492979e276eSDavid Ahern struct netlink_ext_ack *extack) 493979e276eSDavid Ahern { 494*0f457a36SDavid Ahern int err; 495*0f457a36SDavid Ahern 496*0f457a36SDavid Ahern nhc->nhc_pcpu_rth_output = alloc_percpu_gfp(struct rtable __rcu *, 497*0f457a36SDavid Ahern gfp_flags); 498*0f457a36SDavid Ahern if (!nhc->nhc_pcpu_rth_output) 499*0f457a36SDavid Ahern return -ENOMEM; 500*0f457a36SDavid Ahern 501979e276eSDavid Ahern if (encap) { 502979e276eSDavid Ahern struct lwtunnel_state *lwtstate; 503979e276eSDavid Ahern 504979e276eSDavid Ahern if (encap_type == LWTUNNEL_ENCAP_NONE) { 505979e276eSDavid Ahern NL_SET_ERR_MSG(extack, "LWT encap type not specified"); 506*0f457a36SDavid Ahern err = -EINVAL; 507*0f457a36SDavid Ahern goto lwt_failure; 508979e276eSDavid Ahern } 509979e276eSDavid Ahern err = lwtunnel_build_state(encap_type, encap, nhc->nhc_family, 510979e276eSDavid Ahern cfg, &lwtstate, extack); 511979e276eSDavid Ahern if (err) 512*0f457a36SDavid Ahern goto lwt_failure; 513979e276eSDavid Ahern 514979e276eSDavid Ahern nhc->nhc_lwtstate = lwtstate_get(lwtstate); 515979e276eSDavid Ahern } 516979e276eSDavid Ahern 517979e276eSDavid Ahern return 0; 518*0f457a36SDavid Ahern 519*0f457a36SDavid Ahern lwt_failure: 520*0f457a36SDavid Ahern rt_fibinfo_free_cpus(nhc->nhc_pcpu_rth_output); 521*0f457a36SDavid Ahern nhc->nhc_pcpu_rth_output = NULL; 522*0f457a36SDavid Ahern return err; 523979e276eSDavid Ahern } 524979e276eSDavid Ahern EXPORT_SYMBOL_GPL(fib_nh_common_init); 525979e276eSDavid Ahern 526e4516ef6SDavid Ahern int fib_nh_init(struct net *net, struct fib_nh *nh, 527e4516ef6SDavid Ahern struct fib_config *cfg, int nh_weight, 528e4516ef6SDavid Ahern struct netlink_ext_ack *extack) 529e4516ef6SDavid Ahern { 530*0f457a36SDavid Ahern int err; 531e4516ef6SDavid Ahern 532f1741730SDavid Ahern nh->fib_nh_family = AF_INET; 533f1741730SDavid Ahern 534979e276eSDavid Ahern err = fib_nh_common_init(&nh->nh_common, cfg->fc_encap, 535979e276eSDavid Ahern cfg->fc_encap_type, cfg, GFP_KERNEL, extack); 536e4516ef6SDavid Ahern if (err) 537*0f457a36SDavid Ahern return err; 538e4516ef6SDavid Ahern 539b75ed8b1SDavid Ahern nh->fib_nh_oif = cfg->fc_oif; 540a4ea5d43SDavid Ahern nh->fib_nh_gw_family = cfg->fc_gw_family; 541a4ea5d43SDavid Ahern if (cfg->fc_gw_family == AF_INET) 542f35b794bSDavid Ahern nh->fib_nh_gw4 = cfg->fc_gw4; 543a4ea5d43SDavid Ahern else if (cfg->fc_gw_family == AF_INET6) 544a4ea5d43SDavid Ahern nh->fib_nh_gw6 = cfg->fc_gw6; 545a4ea5d43SDavid Ahern 546b75ed8b1SDavid Ahern nh->fib_nh_flags = cfg->fc_flags; 547e4516ef6SDavid Ahern 548e4516ef6SDavid Ahern #ifdef CONFIG_IP_ROUTE_CLASSID 549e4516ef6SDavid Ahern nh->nh_tclassid = cfg->fc_flow; 550e4516ef6SDavid Ahern if (nh->nh_tclassid) 551e4516ef6SDavid Ahern net->ipv4.fib_num_tclassid_users++; 552e4516ef6SDavid Ahern #endif 553e4516ef6SDavid Ahern #ifdef CONFIG_IP_ROUTE_MULTIPATH 554b75ed8b1SDavid Ahern nh->fib_nh_weight = nh_weight; 555e4516ef6SDavid Ahern #endif 556e4516ef6SDavid Ahern return 0; 557e4516ef6SDavid Ahern } 558e4516ef6SDavid Ahern 5591da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 5601da177e4SLinus Torvalds 5616d8422a1SDavid Ahern static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining, 5626d8422a1SDavid Ahern struct netlink_ext_ack *extack) 5631da177e4SLinus Torvalds { 5641da177e4SLinus Torvalds int nhs = 0; 5651da177e4SLinus Torvalds 5664e902c57SThomas Graf while (rtnh_ok(rtnh, remaining)) { 5671da177e4SLinus Torvalds nhs++; 5684e902c57SThomas Graf rtnh = rtnh_next(rtnh, &remaining); 5691da177e4SLinus Torvalds } 5701da177e4SLinus Torvalds 5714e902c57SThomas Graf /* leftover implies invalid nexthop configuration, discard it */ 572c3ab2b4eSDavid Ahern if (remaining > 0) { 573c3ab2b4eSDavid Ahern NL_SET_ERR_MSG(extack, 574c3ab2b4eSDavid Ahern "Invalid nexthop configuration - extra data after nexthops"); 575c3ab2b4eSDavid Ahern nhs = 0; 576c3ab2b4eSDavid Ahern } 577c3ab2b4eSDavid Ahern 578c3ab2b4eSDavid Ahern return nhs; 5794e902c57SThomas Graf } 5801da177e4SLinus Torvalds 5814e902c57SThomas Graf static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, 5826d8422a1SDavid Ahern int remaining, struct fib_config *cfg, 5836d8422a1SDavid Ahern struct netlink_ext_ack *extack) 5844e902c57SThomas Graf { 585e4516ef6SDavid Ahern struct net *net = fi->fib_net; 586e4516ef6SDavid Ahern struct fib_config fib_cfg; 587571e7226SRoopa Prabhu int ret; 588571e7226SRoopa Prabhu 5891da177e4SLinus Torvalds change_nexthops(fi) { 5904e902c57SThomas Graf int attrlen; 5914e902c57SThomas Graf 592e4516ef6SDavid Ahern memset(&fib_cfg, 0, sizeof(fib_cfg)); 593e4516ef6SDavid Ahern 594c3ab2b4eSDavid Ahern if (!rtnh_ok(rtnh, remaining)) { 595c3ab2b4eSDavid Ahern NL_SET_ERR_MSG(extack, 596c3ab2b4eSDavid Ahern "Invalid nexthop configuration - extra data after nexthop"); 5971da177e4SLinus Torvalds return -EINVAL; 598c3ab2b4eSDavid Ahern } 5994e902c57SThomas Graf 600c3ab2b4eSDavid Ahern if (rtnh->rtnh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)) { 601c3ab2b4eSDavid Ahern NL_SET_ERR_MSG(extack, 602c3ab2b4eSDavid Ahern "Invalid flags for nexthop - can not contain DEAD or LINKDOWN"); 60380610229SJulian Anastasov return -EINVAL; 604c3ab2b4eSDavid Ahern } 60580610229SJulian Anastasov 606e4516ef6SDavid Ahern fib_cfg.fc_flags = (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags; 607e4516ef6SDavid Ahern fib_cfg.fc_oif = rtnh->rtnh_ifindex; 6084e902c57SThomas Graf 6094e902c57SThomas Graf attrlen = rtnh_attrlen(rtnh); 6104e902c57SThomas Graf if (attrlen > 0) { 611d1566268SDavid Ahern struct nlattr *nla, *nlav, *attrs = rtnh_attrs(rtnh); 6124e902c57SThomas Graf 6134e902c57SThomas Graf nla = nla_find(attrs, attrlen, RTA_GATEWAY); 614d1566268SDavid Ahern nlav = nla_find(attrs, attrlen, RTA_VIA); 615d1566268SDavid Ahern if (nla && nlav) { 616d1566268SDavid Ahern NL_SET_ERR_MSG(extack, 617d1566268SDavid Ahern "Nexthop configuration can not contain both GATEWAY and VIA"); 618d1566268SDavid Ahern return -EINVAL; 619d1566268SDavid Ahern } 620f35b794bSDavid Ahern if (nla) { 621f35b794bSDavid Ahern fib_cfg.fc_gw4 = nla_get_in_addr(nla); 622d73f80f9SDavid Ahern if (fib_cfg.fc_gw4) 623d73f80f9SDavid Ahern fib_cfg.fc_gw_family = AF_INET; 624d1566268SDavid Ahern } else if (nlav) { 625d1566268SDavid Ahern ret = fib_gw_from_via(&fib_cfg, nlav, extack); 626d1566268SDavid Ahern if (ret) 627d1566268SDavid Ahern goto errout; 628f35b794bSDavid Ahern } 629571e7226SRoopa Prabhu 630e4516ef6SDavid Ahern nla = nla_find(attrs, attrlen, RTA_FLOW); 631e4516ef6SDavid Ahern if (nla) 632e4516ef6SDavid Ahern fib_cfg.fc_flow = nla_get_u32(nla); 633e4516ef6SDavid Ahern 634e4516ef6SDavid Ahern fib_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP); 635e4516ef6SDavid Ahern nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE); 636e4516ef6SDavid Ahern if (nla) 637e4516ef6SDavid Ahern fib_cfg.fc_encap_type = nla_get_u16(nla); 638c3ab2b4eSDavid Ahern } 63930357d7dSDavid Ahern 640e4516ef6SDavid Ahern ret = fib_nh_init(net, nexthop_nh, &fib_cfg, 641e4516ef6SDavid Ahern rtnh->rtnh_hops + 1, extack); 642571e7226SRoopa Prabhu if (ret) 643571e7226SRoopa Prabhu goto errout; 6444e902c57SThomas Graf 6454e902c57SThomas Graf rtnh = rtnh_next(rtnh, &remaining); 6461da177e4SLinus Torvalds } endfor_nexthops(fi); 6474e902c57SThomas Graf 648571e7226SRoopa Prabhu ret = -EINVAL; 649b75ed8b1SDavid Ahern if (cfg->fc_oif && fi->fib_nh->fib_nh_oif != cfg->fc_oif) { 650e4516ef6SDavid Ahern NL_SET_ERR_MSG(extack, 651e4516ef6SDavid Ahern "Nexthop device index does not match RTA_OIF"); 652e4516ef6SDavid Ahern goto errout; 653e4516ef6SDavid Ahern } 654f35b794bSDavid Ahern if (cfg->fc_gw_family) { 655f35b794bSDavid Ahern if (cfg->fc_gw_family != fi->fib_nh->fib_nh_gw_family || 656f35b794bSDavid Ahern (cfg->fc_gw_family == AF_INET && 657a4ea5d43SDavid Ahern fi->fib_nh->fib_nh_gw4 != cfg->fc_gw4) || 658a4ea5d43SDavid Ahern (cfg->fc_gw_family == AF_INET6 && 659a4ea5d43SDavid Ahern ipv6_addr_cmp(&fi->fib_nh->fib_nh_gw6, &cfg->fc_gw6))) { 660e4516ef6SDavid Ahern NL_SET_ERR_MSG(extack, 661a4ea5d43SDavid Ahern "Nexthop gateway does not match RTA_GATEWAY or RTA_VIA"); 662e4516ef6SDavid Ahern goto errout; 663e4516ef6SDavid Ahern } 664f35b794bSDavid Ahern } 665e4516ef6SDavid Ahern #ifdef CONFIG_IP_ROUTE_CLASSID 666e4516ef6SDavid Ahern if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow) { 667e4516ef6SDavid Ahern NL_SET_ERR_MSG(extack, 668e4516ef6SDavid Ahern "Nexthop class id does not match RTA_FLOW"); 669e4516ef6SDavid Ahern goto errout; 670e4516ef6SDavid Ahern } 671e4516ef6SDavid Ahern #endif 672e4516ef6SDavid Ahern ret = 0; 673571e7226SRoopa Prabhu errout: 674571e7226SRoopa Prabhu return ret; 6751da177e4SLinus Torvalds } 6761da177e4SLinus Torvalds 6770e884c78SPeter Nørlund static void fib_rebalance(struct fib_info *fi) 6780e884c78SPeter Nørlund { 6790e884c78SPeter Nørlund int total; 6800e884c78SPeter Nørlund int w; 6810e884c78SPeter Nørlund 6820e884c78SPeter Nørlund if (fi->fib_nhs < 2) 6830e884c78SPeter Nørlund return; 6840e884c78SPeter Nørlund 6850e884c78SPeter Nørlund total = 0; 6860e884c78SPeter Nørlund for_nexthops(fi) { 687b75ed8b1SDavid Ahern if (nh->fib_nh_flags & RTNH_F_DEAD) 6880e884c78SPeter Nørlund continue; 6890e884c78SPeter Nørlund 690b75ed8b1SDavid Ahern if (ip_ignore_linkdown(nh->fib_nh_dev) && 691b75ed8b1SDavid Ahern nh->fib_nh_flags & RTNH_F_LINKDOWN) 6920e884c78SPeter Nørlund continue; 6930e884c78SPeter Nørlund 694b75ed8b1SDavid Ahern total += nh->fib_nh_weight; 6950e884c78SPeter Nørlund } endfor_nexthops(fi); 6960e884c78SPeter Nørlund 6970e884c78SPeter Nørlund w = 0; 6980e884c78SPeter Nørlund change_nexthops(fi) { 6990e884c78SPeter Nørlund int upper_bound; 7000e884c78SPeter Nørlund 701b75ed8b1SDavid Ahern if (nexthop_nh->fib_nh_flags & RTNH_F_DEAD) { 7020e884c78SPeter Nørlund upper_bound = -1; 703b75ed8b1SDavid Ahern } else if (ip_ignore_linkdown(nexthop_nh->fib_nh_dev) && 704b75ed8b1SDavid Ahern nexthop_nh->fib_nh_flags & RTNH_F_LINKDOWN) { 7050e884c78SPeter Nørlund upper_bound = -1; 7060e884c78SPeter Nørlund } else { 707b75ed8b1SDavid Ahern w += nexthop_nh->fib_nh_weight; 7080a837fe4SPeter Nørlund upper_bound = DIV_ROUND_CLOSEST_ULL((u64)w << 31, 7090e884c78SPeter Nørlund total) - 1; 7100e884c78SPeter Nørlund } 7110e884c78SPeter Nørlund 712b75ed8b1SDavid Ahern atomic_set(&nexthop_nh->fib_nh_upper_bound, upper_bound); 7130e884c78SPeter Nørlund } endfor_nexthops(fi); 7140e884c78SPeter Nørlund } 7150e884c78SPeter Nørlund #else /* CONFIG_IP_ROUTE_MULTIPATH */ 7160e884c78SPeter Nørlund 7178373c6c8SDavid Ahern static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, 7188373c6c8SDavid Ahern int remaining, struct fib_config *cfg, 7198373c6c8SDavid Ahern struct netlink_ext_ack *extack) 7208373c6c8SDavid Ahern { 7218373c6c8SDavid Ahern NL_SET_ERR_MSG(extack, "Multipath support not enabled in kernel"); 7228373c6c8SDavid Ahern 7238373c6c8SDavid Ahern return -EINVAL; 7248373c6c8SDavid Ahern } 7258373c6c8SDavid Ahern 7260e884c78SPeter Nørlund #define fib_rebalance(fi) do { } while (0) 7270e884c78SPeter Nørlund 7280e884c78SPeter Nørlund #endif /* CONFIG_IP_ROUTE_MULTIPATH */ 7291da177e4SLinus Torvalds 73030357d7dSDavid Ahern static int fib_encap_match(u16 encap_type, 731571e7226SRoopa Prabhu struct nlattr *encap, 73230357d7dSDavid Ahern const struct fib_nh *nh, 7339ae28727SDavid Ahern const struct fib_config *cfg, 7349ae28727SDavid Ahern struct netlink_ext_ack *extack) 735571e7226SRoopa Prabhu { 736571e7226SRoopa Prabhu struct lwtunnel_state *lwtstate; 737df383e62SJiri Benc int ret, result = 0; 738571e7226SRoopa Prabhu 739571e7226SRoopa Prabhu if (encap_type == LWTUNNEL_ENCAP_NONE) 740571e7226SRoopa Prabhu return 0; 741571e7226SRoopa Prabhu 7429ae28727SDavid Ahern ret = lwtunnel_build_state(encap_type, encap, AF_INET, 7439ae28727SDavid Ahern cfg, &lwtstate, extack); 744df383e62SJiri Benc if (!ret) { 745b75ed8b1SDavid Ahern result = lwtunnel_cmp_encap(lwtstate, nh->fib_nh_lws); 746df383e62SJiri Benc lwtstate_free(lwtstate); 747df383e62SJiri Benc } 748571e7226SRoopa Prabhu 749df383e62SJiri Benc return result; 750571e7226SRoopa Prabhu } 751571e7226SRoopa Prabhu 7529ae28727SDavid Ahern int fib_nh_match(struct fib_config *cfg, struct fib_info *fi, 7539ae28727SDavid Ahern struct netlink_ext_ack *extack) 7541da177e4SLinus Torvalds { 7551da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 7564e902c57SThomas Graf struct rtnexthop *rtnh; 7574e902c57SThomas Graf int remaining; 7581da177e4SLinus Torvalds #endif 7591da177e4SLinus Torvalds 7604e902c57SThomas Graf if (cfg->fc_priority && cfg->fc_priority != fi->fib_priority) 7611da177e4SLinus Torvalds return 1; 7621da177e4SLinus Torvalds 763f35b794bSDavid Ahern if (cfg->fc_oif || cfg->fc_gw_family) { 764571e7226SRoopa Prabhu if (cfg->fc_encap) { 7659ae28727SDavid Ahern if (fib_encap_match(cfg->fc_encap_type, cfg->fc_encap, 7669ae28727SDavid Ahern fi->fib_nh, cfg, extack)) 767571e7226SRoopa Prabhu return 1; 768571e7226SRoopa Prabhu } 769a8c6db1dSStefano Brivio #ifdef CONFIG_IP_ROUTE_CLASSID 770a8c6db1dSStefano Brivio if (cfg->fc_flow && 771a8c6db1dSStefano Brivio cfg->fc_flow != fi->fib_nh->nh_tclassid) 772a8c6db1dSStefano Brivio return 1; 773a8c6db1dSStefano Brivio #endif 774f35b794bSDavid Ahern if ((cfg->fc_oif && cfg->fc_oif != fi->fib_nh->fib_nh_oif) || 775f35b794bSDavid Ahern (cfg->fc_gw_family && 776f35b794bSDavid Ahern cfg->fc_gw_family != fi->fib_nh->fib_nh_gw_family)) 7771da177e4SLinus Torvalds return 1; 778f35b794bSDavid Ahern 779f35b794bSDavid Ahern if (cfg->fc_gw_family == AF_INET && 780f35b794bSDavid Ahern cfg->fc_gw4 != fi->fib_nh->fib_nh_gw4) 781f35b794bSDavid Ahern return 1; 782f35b794bSDavid Ahern 783a4ea5d43SDavid Ahern if (cfg->fc_gw_family == AF_INET6 && 784a4ea5d43SDavid Ahern ipv6_addr_cmp(&cfg->fc_gw6, &fi->fib_nh->fib_nh_gw6)) 785a4ea5d43SDavid Ahern return 1; 786a4ea5d43SDavid Ahern 787f35b794bSDavid Ahern return 0; 7881da177e4SLinus Torvalds } 7891da177e4SLinus Torvalds 7901da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 79151456b29SIan Morris if (!cfg->fc_mp) 7921da177e4SLinus Torvalds return 0; 7934e902c57SThomas Graf 7944e902c57SThomas Graf rtnh = cfg->fc_mp; 7954e902c57SThomas Graf remaining = cfg->fc_mp_len; 7961da177e4SLinus Torvalds 7971da177e4SLinus Torvalds for_nexthops(fi) { 7984e902c57SThomas Graf int attrlen; 7991da177e4SLinus Torvalds 8004e902c57SThomas Graf if (!rtnh_ok(rtnh, remaining)) 8011da177e4SLinus Torvalds return -EINVAL; 8024e902c57SThomas Graf 803b75ed8b1SDavid Ahern if (rtnh->rtnh_ifindex && rtnh->rtnh_ifindex != nh->fib_nh_oif) 8041da177e4SLinus Torvalds return 1; 8054e902c57SThomas Graf 8064e902c57SThomas Graf attrlen = rtnh_attrlen(rtnh); 807f76936d0SJiri Pirko if (attrlen > 0) { 808d1566268SDavid Ahern struct nlattr *nla, *nlav, *attrs = rtnh_attrs(rtnh); 8094e902c57SThomas Graf 8104e902c57SThomas Graf nla = nla_find(attrs, attrlen, RTA_GATEWAY); 811d1566268SDavid Ahern nlav = nla_find(attrs, attrlen, RTA_VIA); 812d1566268SDavid Ahern if (nla && nlav) { 813d1566268SDavid Ahern NL_SET_ERR_MSG(extack, 814d1566268SDavid Ahern "Nexthop configuration can not contain both GATEWAY and VIA"); 815d1566268SDavid Ahern return -EINVAL; 816d1566268SDavid Ahern } 817d1566268SDavid Ahern 818d1566268SDavid Ahern if (nla) { 819d1566268SDavid Ahern if (nh->fib_nh_gw_family != AF_INET || 820d1566268SDavid Ahern nla_get_in_addr(nla) != nh->fib_nh_gw4) 8211da177e4SLinus Torvalds return 1; 822d1566268SDavid Ahern } else if (nlav) { 823d1566268SDavid Ahern struct fib_config cfg2; 824d1566268SDavid Ahern int err; 825d1566268SDavid Ahern 826d1566268SDavid Ahern err = fib_gw_from_via(&cfg2, nlav, extack); 827d1566268SDavid Ahern if (err) 828d1566268SDavid Ahern return err; 829d1566268SDavid Ahern 830d1566268SDavid Ahern switch (nh->fib_nh_gw_family) { 831d1566268SDavid Ahern case AF_INET: 832d1566268SDavid Ahern if (cfg2.fc_gw_family != AF_INET || 833d1566268SDavid Ahern cfg2.fc_gw4 != nh->fib_nh_gw4) 834d1566268SDavid Ahern return 1; 835d1566268SDavid Ahern break; 836d1566268SDavid Ahern case AF_INET6: 837d1566268SDavid Ahern if (cfg2.fc_gw_family != AF_INET6 || 838d1566268SDavid Ahern ipv6_addr_cmp(&cfg2.fc_gw6, 839d1566268SDavid Ahern &nh->fib_nh_gw6)) 840d1566268SDavid Ahern return 1; 841d1566268SDavid Ahern break; 842d1566268SDavid Ahern } 843d1566268SDavid Ahern } 844d1566268SDavid Ahern 845c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID 8464e902c57SThomas Graf nla = nla_find(attrs, attrlen, RTA_FLOW); 8474e902c57SThomas Graf if (nla && nla_get_u32(nla) != nh->nh_tclassid) 8481da177e4SLinus Torvalds return 1; 8491da177e4SLinus Torvalds #endif 8501da177e4SLinus Torvalds } 8514e902c57SThomas Graf 8524e902c57SThomas Graf rtnh = rtnh_next(rtnh, &remaining); 8531da177e4SLinus Torvalds } endfor_nexthops(fi); 8541da177e4SLinus Torvalds #endif 8551da177e4SLinus Torvalds return 0; 8561da177e4SLinus Torvalds } 8571da177e4SLinus Torvalds 8585f9ae3d9SXin Long bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi) 8595f9ae3d9SXin Long { 8605f9ae3d9SXin Long struct nlattr *nla; 8615f9ae3d9SXin Long int remaining; 8625f9ae3d9SXin Long 8635f9ae3d9SXin Long if (!cfg->fc_mx) 8645f9ae3d9SXin Long return true; 8655f9ae3d9SXin Long 8665f9ae3d9SXin Long nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { 8675f9ae3d9SXin Long int type = nla_type(nla); 868d03a4557SPhil Sutter u32 fi_val, val; 8695f9ae3d9SXin Long 8705f9ae3d9SXin Long if (!type) 8715f9ae3d9SXin Long continue; 8725f9ae3d9SXin Long if (type > RTAX_MAX) 8735f9ae3d9SXin Long return false; 8745f9ae3d9SXin Long 8755f9ae3d9SXin Long if (type == RTAX_CC_ALGO) { 8765f9ae3d9SXin Long char tmp[TCP_CA_NAME_MAX]; 8775f9ae3d9SXin Long bool ecn_ca = false; 8785f9ae3d9SXin Long 8795f9ae3d9SXin Long nla_strlcpy(tmp, nla, sizeof(tmp)); 8806670e152SStephen Hemminger val = tcp_ca_get_key_by_name(fi->fib_net, tmp, &ecn_ca); 8815f9ae3d9SXin Long } else { 8825b5e7a0dSEric Dumazet if (nla_len(nla) != sizeof(u32)) 8835b5e7a0dSEric Dumazet return false; 8845f9ae3d9SXin Long val = nla_get_u32(nla); 8855f9ae3d9SXin Long } 8865f9ae3d9SXin Long 887d03a4557SPhil Sutter fi_val = fi->fib_metrics->metrics[type - 1]; 888d03a4557SPhil Sutter if (type == RTAX_FEATURES) 889d03a4557SPhil Sutter fi_val &= ~DST_FEATURE_ECN_CA; 890d03a4557SPhil Sutter 891d03a4557SPhil Sutter if (fi_val != val) 8925f9ae3d9SXin Long return false; 8935f9ae3d9SXin Long } 8945f9ae3d9SXin Long 8955f9ae3d9SXin Long return true; 8965f9ae3d9SXin Long } 8975f9ae3d9SXin Long 898717a8f5bSDavid Ahern static int fib_check_nh_v6_gw(struct net *net, struct fib_nh *nh, 899717a8f5bSDavid Ahern u32 table, struct netlink_ext_ack *extack) 900717a8f5bSDavid Ahern { 901717a8f5bSDavid Ahern struct fib6_config cfg = { 902717a8f5bSDavid Ahern .fc_table = table, 903717a8f5bSDavid Ahern .fc_flags = nh->fib_nh_flags | RTF_GATEWAY, 904717a8f5bSDavid Ahern .fc_ifindex = nh->fib_nh_oif, 905717a8f5bSDavid Ahern .fc_gateway = nh->fib_nh_gw6, 906717a8f5bSDavid Ahern }; 907717a8f5bSDavid Ahern struct fib6_nh fib6_nh = {}; 908717a8f5bSDavid Ahern int err; 909717a8f5bSDavid Ahern 910717a8f5bSDavid Ahern err = ipv6_stub->fib6_nh_init(net, &fib6_nh, &cfg, GFP_KERNEL, extack); 911717a8f5bSDavid Ahern if (!err) { 912717a8f5bSDavid Ahern nh->fib_nh_dev = fib6_nh.fib_nh_dev; 913717a8f5bSDavid Ahern dev_hold(nh->fib_nh_dev); 914717a8f5bSDavid Ahern nh->fib_nh_oif = nh->fib_nh_dev->ifindex; 915717a8f5bSDavid Ahern nh->fib_nh_scope = RT_SCOPE_LINK; 916717a8f5bSDavid Ahern 917717a8f5bSDavid Ahern ipv6_stub->fib6_nh_release(&fib6_nh); 918717a8f5bSDavid Ahern } 919717a8f5bSDavid Ahern 920717a8f5bSDavid Ahern return err; 921717a8f5bSDavid Ahern } 9221da177e4SLinus Torvalds 9231da177e4SLinus Torvalds /* 9246a31d2a9SEric Dumazet * Picture 9256a31d2a9SEric Dumazet * ------- 9266a31d2a9SEric Dumazet * 9276a31d2a9SEric Dumazet * Semantics of nexthop is very messy by historical reasons. 9286a31d2a9SEric Dumazet * We have to take into account, that: 9296a31d2a9SEric Dumazet * a) gateway can be actually local interface address, 9306a31d2a9SEric Dumazet * so that gatewayed route is direct. 9316a31d2a9SEric Dumazet * b) gateway must be on-link address, possibly 9326a31d2a9SEric Dumazet * described not by an ifaddr, but also by a direct route. 9336a31d2a9SEric Dumazet * c) If both gateway and interface are specified, they should not 9346a31d2a9SEric Dumazet * contradict. 9356a31d2a9SEric Dumazet * d) If we use tunnel routes, gateway could be not on-link. 9366a31d2a9SEric Dumazet * 9376a31d2a9SEric Dumazet * Attempt to reconcile all of these (alas, self-contradictory) conditions 9386a31d2a9SEric Dumazet * results in pretty ugly and hairy code with obscure logic. 9396a31d2a9SEric Dumazet * 9406a31d2a9SEric Dumazet * I chose to generalized it instead, so that the size 9416a31d2a9SEric Dumazet * of code does not increase practically, but it becomes 9426a31d2a9SEric Dumazet * much more general. 9436a31d2a9SEric Dumazet * Every prefix is assigned a "scope" value: "host" is local address, 9446a31d2a9SEric Dumazet * "link" is direct route, 9456a31d2a9SEric Dumazet * [ ... "site" ... "interior" ... ] 9466a31d2a9SEric Dumazet * and "universe" is true gateway route with global meaning. 9476a31d2a9SEric Dumazet * 9486a31d2a9SEric Dumazet * Every prefix refers to a set of "nexthop"s (gw, oif), 9496a31d2a9SEric Dumazet * where gw must have narrower scope. This recursion stops 9506a31d2a9SEric Dumazet * when gw has LOCAL scope or if "nexthop" is declared ONLINK, 9516a31d2a9SEric Dumazet * which means that gw is forced to be on link. 9526a31d2a9SEric Dumazet * 9536a31d2a9SEric Dumazet * Code is still hairy, but now it is apparently logically 9546a31d2a9SEric Dumazet * consistent and very flexible. F.e. as by-product it allows 9556a31d2a9SEric Dumazet * to co-exists in peace independent exterior and interior 9566a31d2a9SEric Dumazet * routing processes. 9576a31d2a9SEric Dumazet * 9586a31d2a9SEric Dumazet * Normally it looks as following. 9596a31d2a9SEric Dumazet * 9606a31d2a9SEric Dumazet * {universe prefix} -> (gw, oif) [scope link] 9616a31d2a9SEric Dumazet * | 9626a31d2a9SEric Dumazet * |-> {link prefix} -> (gw, oif) [scope local] 9636a31d2a9SEric Dumazet * | 9646a31d2a9SEric Dumazet * |-> {local prefix} (terminal node) 9651da177e4SLinus Torvalds */ 966448d7248SDavid Ahern static int fib_check_nh_v4_gw(struct net *net, struct fib_nh *nh, u32 table, 967448d7248SDavid Ahern u8 scope, struct netlink_ext_ack *extack) 9681da177e4SLinus Torvalds { 9696a31d2a9SEric Dumazet struct net_device *dev; 9701da177e4SLinus Torvalds struct fib_result res; 971448d7248SDavid Ahern int err; 9721da177e4SLinus Torvalds 973b75ed8b1SDavid Ahern if (nh->fib_nh_flags & RTNH_F_ONLINK) { 97430bbaa19SDavid Ahern unsigned int addr_type; 9751da177e4SLinus Torvalds 976448d7248SDavid Ahern if (scope >= RT_SCOPE_LINK) { 977448d7248SDavid Ahern NL_SET_ERR_MSG(extack, "Nexthop has invalid scope"); 9781da177e4SLinus Torvalds return -EINVAL; 979c3ab2b4eSDavid Ahern } 980b75ed8b1SDavid Ahern dev = __dev_get_by_index(net, nh->fib_nh_oif); 981066b1030SDavid Ahern if (!dev) { 982066b1030SDavid Ahern NL_SET_ERR_MSG(extack, "Nexthop device required for onlink"); 9831da177e4SLinus Torvalds return -ENODEV; 984066b1030SDavid Ahern } 985c3ab2b4eSDavid Ahern if (!(dev->flags & IFF_UP)) { 986448d7248SDavid Ahern NL_SET_ERR_MSG(extack, "Nexthop device is not up"); 9871da177e4SLinus Torvalds return -ENETDOWN; 988c3ab2b4eSDavid Ahern } 989448d7248SDavid Ahern addr_type = inet_addr_type_dev_table(net, dev, nh->fib_nh_gw4); 990c3ab2b4eSDavid Ahern if (addr_type != RTN_UNICAST) { 991448d7248SDavid Ahern NL_SET_ERR_MSG(extack, "Nexthop has invalid gateway"); 99230bbaa19SDavid Ahern return -EINVAL; 993c3ab2b4eSDavid Ahern } 9948a3d0316SAndy Gospodarek if (!netif_carrier_ok(dev)) 995b75ed8b1SDavid Ahern nh->fib_nh_flags |= RTNH_F_LINKDOWN; 996b75ed8b1SDavid Ahern nh->fib_nh_dev = dev; 9971da177e4SLinus Torvalds dev_hold(dev); 998b75ed8b1SDavid Ahern nh->fib_nh_scope = RT_SCOPE_LINK; 9991da177e4SLinus Torvalds return 0; 10001da177e4SLinus Torvalds } 1001ebc0ffaeSEric Dumazet rcu_read_lock(); 10021da177e4SLinus Torvalds { 10033bfd8472SDavid Ahern struct fib_table *tbl = NULL; 10049ade2286SDavid S. Miller struct flowi4 fl4 = { 1005b75ed8b1SDavid Ahern .daddr = nh->fib_nh_gw4, 1006448d7248SDavid Ahern .flowi4_scope = scope + 1, 1007b75ed8b1SDavid Ahern .flowi4_oif = nh->fib_nh_oif, 10086a662719SCong Wang .flowi4_iif = LOOPBACK_IFINDEX, 10094e902c57SThomas Graf }; 10101da177e4SLinus Torvalds 10111da177e4SLinus Torvalds /* It is not necessary, but requires a bit of thinking */ 10129ade2286SDavid S. Miller if (fl4.flowi4_scope < RT_SCOPE_LINK) 10139ade2286SDavid S. Miller fl4.flowi4_scope = RT_SCOPE_LINK; 10143bfd8472SDavid Ahern 1015448d7248SDavid Ahern if (table) 1016448d7248SDavid Ahern tbl = fib_get_table(net, table); 10173bfd8472SDavid Ahern 10183bfd8472SDavid Ahern if (tbl) 10193bfd8472SDavid Ahern err = fib_table_lookup(tbl, &fl4, &res, 10201e313678SEric Dumazet FIB_LOOKUP_IGNORE_LINKSTATE | 10211e313678SEric Dumazet FIB_LOOKUP_NOREF); 10224c9bcd11SDavid Ahern 10234c9bcd11SDavid Ahern /* on error or if no table given do full lookup. This 10244c9bcd11SDavid Ahern * is needed for example when nexthops are in the local 10254c9bcd11SDavid Ahern * table rather than the given table 10264c9bcd11SDavid Ahern */ 10274c9bcd11SDavid Ahern if (!tbl || err) { 10280eeb075fSAndy Gospodarek err = fib_lookup(net, &fl4, &res, 10290eeb075fSAndy Gospodarek FIB_LOOKUP_IGNORE_LINKSTATE); 10304c9bcd11SDavid Ahern } 10314c9bcd11SDavid Ahern 1032ebc0ffaeSEric Dumazet if (err) { 1033448d7248SDavid Ahern NL_SET_ERR_MSG(extack, "Nexthop has invalid gateway"); 1034448d7248SDavid Ahern goto out; 10351da177e4SLinus Torvalds } 1036ebc0ffaeSEric Dumazet } 1037448d7248SDavid Ahern 10381da177e4SLinus Torvalds err = -EINVAL; 1039c3ab2b4eSDavid Ahern if (res.type != RTN_UNICAST && res.type != RTN_LOCAL) { 1040c3ab2b4eSDavid Ahern NL_SET_ERR_MSG(extack, "Nexthop has invalid gateway"); 10411da177e4SLinus Torvalds goto out; 1042c3ab2b4eSDavid Ahern } 1043b75ed8b1SDavid Ahern nh->fib_nh_scope = res.scope; 1044b75ed8b1SDavid Ahern nh->fib_nh_oif = FIB_RES_OIF(res); 1045b75ed8b1SDavid Ahern nh->fib_nh_dev = dev = FIB_RES_DEV(res); 1046c3ab2b4eSDavid Ahern if (!dev) { 1047c3ab2b4eSDavid Ahern NL_SET_ERR_MSG(extack, 1048c3ab2b4eSDavid Ahern "No egress device for nexthop gateway"); 10491da177e4SLinus Torvalds goto out; 1050c3ab2b4eSDavid Ahern } 10516a31d2a9SEric Dumazet dev_hold(dev); 10528a3d0316SAndy Gospodarek if (!netif_carrier_ok(dev)) 1053b75ed8b1SDavid Ahern nh->fib_nh_flags |= RTNH_F_LINKDOWN; 10548723e1b4SEric Dumazet err = (dev->flags & IFF_UP) ? 0 : -ENETDOWN; 1055448d7248SDavid Ahern out: 1056448d7248SDavid Ahern rcu_read_unlock(); 1057448d7248SDavid Ahern return err; 1058448d7248SDavid Ahern } 1059448d7248SDavid Ahern 1060448d7248SDavid Ahern static int fib_check_nh_nongw(struct net *net, struct fib_nh *nh, 1061448d7248SDavid Ahern struct netlink_ext_ack *extack) 1062448d7248SDavid Ahern { 10631da177e4SLinus Torvalds struct in_device *in_dev; 1064448d7248SDavid Ahern int err; 10651da177e4SLinus Torvalds 1066b75ed8b1SDavid Ahern if (nh->fib_nh_flags & (RTNH_F_PERVASIVE | RTNH_F_ONLINK)) { 1067c3ab2b4eSDavid Ahern NL_SET_ERR_MSG(extack, 1068c3ab2b4eSDavid Ahern "Invalid flags for nexthop - PERVASIVE and ONLINK can not be set"); 10691da177e4SLinus Torvalds return -EINVAL; 1070c3ab2b4eSDavid Ahern } 1071448d7248SDavid Ahern 10728723e1b4SEric Dumazet rcu_read_lock(); 1073448d7248SDavid Ahern 10748723e1b4SEric Dumazet err = -ENODEV; 1075b75ed8b1SDavid Ahern in_dev = inetdev_by_index(net, nh->fib_nh_oif); 107651456b29SIan Morris if (!in_dev) 10778723e1b4SEric Dumazet goto out; 10788723e1b4SEric Dumazet err = -ENETDOWN; 1079c3ab2b4eSDavid Ahern if (!(in_dev->dev->flags & IFF_UP)) { 1080c3ab2b4eSDavid Ahern NL_SET_ERR_MSG(extack, "Device for nexthop is not up"); 10818723e1b4SEric Dumazet goto out; 1082c3ab2b4eSDavid Ahern } 1083448d7248SDavid Ahern 1084b75ed8b1SDavid Ahern nh->fib_nh_dev = in_dev->dev; 1085b75ed8b1SDavid Ahern dev_hold(nh->fib_nh_dev); 1086b75ed8b1SDavid Ahern nh->fib_nh_scope = RT_SCOPE_HOST; 1087b75ed8b1SDavid Ahern if (!netif_carrier_ok(nh->fib_nh_dev)) 1088b75ed8b1SDavid Ahern nh->fib_nh_flags |= RTNH_F_LINKDOWN; 10898723e1b4SEric Dumazet err = 0; 10908723e1b4SEric Dumazet out: 10918723e1b4SEric Dumazet rcu_read_unlock(); 10928723e1b4SEric Dumazet return err; 10931da177e4SLinus Torvalds } 10941da177e4SLinus Torvalds 1095448d7248SDavid Ahern static int fib_check_nh(struct fib_config *cfg, struct fib_nh *nh, 1096448d7248SDavid Ahern struct netlink_ext_ack *extack) 1097448d7248SDavid Ahern { 1098448d7248SDavid Ahern struct net *net = cfg->fc_nlinfo.nl_net; 1099448d7248SDavid Ahern u32 table = cfg->fc_table; 1100448d7248SDavid Ahern int err; 1101448d7248SDavid Ahern 1102448d7248SDavid Ahern if (nh->fib_nh_gw_family == AF_INET) 1103448d7248SDavid Ahern err = fib_check_nh_v4_gw(net, nh, table, cfg->fc_scope, extack); 1104717a8f5bSDavid Ahern else if (nh->fib_nh_gw_family == AF_INET6) 1105717a8f5bSDavid Ahern err = fib_check_nh_v6_gw(net, nh, table, extack); 1106448d7248SDavid Ahern else 1107448d7248SDavid Ahern err = fib_check_nh_nongw(net, nh, extack); 1108448d7248SDavid Ahern 1109448d7248SDavid Ahern return err; 1110448d7248SDavid Ahern } 1111448d7248SDavid Ahern 111281f7bf6cSAl Viro static inline unsigned int fib_laddr_hashfn(__be32 val) 11131da177e4SLinus Torvalds { 1114123b9731SDavid S. Miller unsigned int mask = (fib_info_hash_size - 1); 11151da177e4SLinus Torvalds 11166a31d2a9SEric Dumazet return ((__force u32)val ^ 11176a31d2a9SEric Dumazet ((__force u32)val >> 7) ^ 11186a31d2a9SEric Dumazet ((__force u32)val >> 14)) & mask; 11191da177e4SLinus Torvalds } 11201da177e4SLinus Torvalds 1121123b9731SDavid S. Miller static struct hlist_head *fib_info_hash_alloc(int bytes) 11221da177e4SLinus Torvalds { 11231da177e4SLinus Torvalds if (bytes <= PAGE_SIZE) 112488f83491SJoonwoo Park return kzalloc(bytes, GFP_KERNEL); 11251da177e4SLinus Torvalds else 11261da177e4SLinus Torvalds return (struct hlist_head *) 11276a31d2a9SEric Dumazet __get_free_pages(GFP_KERNEL | __GFP_ZERO, 11286a31d2a9SEric Dumazet get_order(bytes)); 11291da177e4SLinus Torvalds } 11301da177e4SLinus Torvalds 1131123b9731SDavid S. Miller static void fib_info_hash_free(struct hlist_head *hash, int bytes) 11321da177e4SLinus Torvalds { 11331da177e4SLinus Torvalds if (!hash) 11341da177e4SLinus Torvalds return; 11351da177e4SLinus Torvalds 11361da177e4SLinus Torvalds if (bytes <= PAGE_SIZE) 11371da177e4SLinus Torvalds kfree(hash); 11381da177e4SLinus Torvalds else 11391da177e4SLinus Torvalds free_pages((unsigned long) hash, get_order(bytes)); 11401da177e4SLinus Torvalds } 11411da177e4SLinus Torvalds 1142123b9731SDavid S. Miller static void fib_info_hash_move(struct hlist_head *new_info_hash, 11431da177e4SLinus Torvalds struct hlist_head *new_laddrhash, 11441da177e4SLinus Torvalds unsigned int new_size) 11451da177e4SLinus Torvalds { 1146b7656e7fSDavid S. Miller struct hlist_head *old_info_hash, *old_laddrhash; 1147123b9731SDavid S. Miller unsigned int old_size = fib_info_hash_size; 1148b7656e7fSDavid S. Miller unsigned int i, bytes; 11491da177e4SLinus Torvalds 1150832b4c5eSStephen Hemminger spin_lock_bh(&fib_info_lock); 1151b7656e7fSDavid S. Miller old_info_hash = fib_info_hash; 1152b7656e7fSDavid S. Miller old_laddrhash = fib_info_laddrhash; 1153123b9731SDavid S. Miller fib_info_hash_size = new_size; 11541da177e4SLinus Torvalds 11551da177e4SLinus Torvalds for (i = 0; i < old_size; i++) { 11561da177e4SLinus Torvalds struct hlist_head *head = &fib_info_hash[i]; 1157b67bfe0dSSasha Levin struct hlist_node *n; 11581da177e4SLinus Torvalds struct fib_info *fi; 11591da177e4SLinus Torvalds 1160b67bfe0dSSasha Levin hlist_for_each_entry_safe(fi, n, head, fib_hash) { 11611da177e4SLinus Torvalds struct hlist_head *dest; 11621da177e4SLinus Torvalds unsigned int new_hash; 11631da177e4SLinus Torvalds 11641da177e4SLinus Torvalds new_hash = fib_info_hashfn(fi); 11651da177e4SLinus Torvalds dest = &new_info_hash[new_hash]; 11661da177e4SLinus Torvalds hlist_add_head(&fi->fib_hash, dest); 11671da177e4SLinus Torvalds } 11681da177e4SLinus Torvalds } 11691da177e4SLinus Torvalds fib_info_hash = new_info_hash; 11701da177e4SLinus Torvalds 11711da177e4SLinus Torvalds for (i = 0; i < old_size; i++) { 11721da177e4SLinus Torvalds struct hlist_head *lhead = &fib_info_laddrhash[i]; 1173b67bfe0dSSasha Levin struct hlist_node *n; 11741da177e4SLinus Torvalds struct fib_info *fi; 11751da177e4SLinus Torvalds 1176b67bfe0dSSasha Levin hlist_for_each_entry_safe(fi, n, lhead, fib_lhash) { 11771da177e4SLinus Torvalds struct hlist_head *ldest; 11781da177e4SLinus Torvalds unsigned int new_hash; 11791da177e4SLinus Torvalds 11801da177e4SLinus Torvalds new_hash = fib_laddr_hashfn(fi->fib_prefsrc); 11811da177e4SLinus Torvalds ldest = &new_laddrhash[new_hash]; 11821da177e4SLinus Torvalds hlist_add_head(&fi->fib_lhash, ldest); 11831da177e4SLinus Torvalds } 11841da177e4SLinus Torvalds } 11851da177e4SLinus Torvalds fib_info_laddrhash = new_laddrhash; 11861da177e4SLinus Torvalds 1187832b4c5eSStephen Hemminger spin_unlock_bh(&fib_info_lock); 1188b7656e7fSDavid S. Miller 1189b7656e7fSDavid S. Miller bytes = old_size * sizeof(struct hlist_head *); 1190123b9731SDavid S. Miller fib_info_hash_free(old_info_hash, bytes); 1191123b9731SDavid S. Miller fib_info_hash_free(old_laddrhash, bytes); 11921da177e4SLinus Torvalds } 11931da177e4SLinus Torvalds 1194436c3b66SDavid S. Miller __be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh) 1195436c3b66SDavid S. Miller { 1196b75ed8b1SDavid Ahern nh->nh_saddr = inet_select_addr(nh->fib_nh_dev, 1197b75ed8b1SDavid Ahern nh->fib_nh_gw4, 119837e826c5SDavid S. Miller nh->nh_parent->fib_scope); 1199436c3b66SDavid S. Miller nh->nh_saddr_genid = atomic_read(&net->ipv4.dev_addr_genid); 1200436c3b66SDavid S. Miller 1201436c3b66SDavid S. Miller return nh->nh_saddr; 1202436c3b66SDavid S. Miller } 1203436c3b66SDavid S. Miller 1204eba618abSDavid Ahern __be32 fib_result_prefsrc(struct net *net, struct fib_result *res) 1205eba618abSDavid Ahern { 1206eba618abSDavid Ahern struct fib_nh_common *nhc = res->nhc; 1207eba618abSDavid Ahern struct fib_nh *nh; 1208eba618abSDavid Ahern 1209eba618abSDavid Ahern if (res->fi->fib_prefsrc) 1210eba618abSDavid Ahern return res->fi->fib_prefsrc; 1211eba618abSDavid Ahern 1212eba618abSDavid Ahern nh = container_of(nhc, struct fib_nh, nh_common); 1213eba618abSDavid Ahern if (nh->nh_saddr_genid == atomic_read(&net->ipv4.dev_addr_genid)) 1214eba618abSDavid Ahern return nh->nh_saddr; 1215eba618abSDavid Ahern 1216eba618abSDavid Ahern return fib_info_update_nh_saddr(net, nh); 1217eba618abSDavid Ahern } 1218eba618abSDavid Ahern 1219021dd3b8SDavid Ahern static bool fib_valid_prefsrc(struct fib_config *cfg, __be32 fib_prefsrc) 1220021dd3b8SDavid Ahern { 1221021dd3b8SDavid Ahern if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst || 1222021dd3b8SDavid Ahern fib_prefsrc != cfg->fc_dst) { 12239b8ff518SDavid Ahern u32 tb_id = cfg->fc_table; 1224e1b8d903SDavid Ahern int rc; 1225021dd3b8SDavid Ahern 1226021dd3b8SDavid Ahern if (tb_id == RT_TABLE_MAIN) 1227021dd3b8SDavid Ahern tb_id = RT_TABLE_LOCAL; 1228021dd3b8SDavid Ahern 1229e1b8d903SDavid Ahern rc = inet_addr_type_table(cfg->fc_nlinfo.nl_net, 1230e1b8d903SDavid Ahern fib_prefsrc, tb_id); 1231e1b8d903SDavid Ahern 1232e1b8d903SDavid Ahern if (rc != RTN_LOCAL && tb_id != RT_TABLE_LOCAL) { 1233e1b8d903SDavid Ahern rc = inet_addr_type_table(cfg->fc_nlinfo.nl_net, 1234e1b8d903SDavid Ahern fib_prefsrc, RT_TABLE_LOCAL); 1235021dd3b8SDavid Ahern } 1236e1b8d903SDavid Ahern 1237e1b8d903SDavid Ahern if (rc != RTN_LOCAL) 1238e1b8d903SDavid Ahern return false; 1239021dd3b8SDavid Ahern } 1240021dd3b8SDavid Ahern return true; 1241021dd3b8SDavid Ahern } 1242021dd3b8SDavid Ahern 12436d8422a1SDavid Ahern struct fib_info *fib_create_info(struct fib_config *cfg, 12446d8422a1SDavid Ahern struct netlink_ext_ack *extack) 12451da177e4SLinus Torvalds { 12461da177e4SLinus Torvalds int err; 12471da177e4SLinus Torvalds struct fib_info *fi = NULL; 12481da177e4SLinus Torvalds struct fib_info *ofi; 12491da177e4SLinus Torvalds int nhs = 1; 12507462bd74SDenis V. Lunev struct net *net = cfg->fc_nlinfo.nl_net; 12511da177e4SLinus Torvalds 12524c8237cdSDavid S. Miller if (cfg->fc_type > RTN_MAX) 12534c8237cdSDavid S. Miller goto err_inval; 12544c8237cdSDavid S. Miller 12551da177e4SLinus Torvalds /* Fast check to catch the most weird cases */ 1256c3ab2b4eSDavid Ahern if (fib_props[cfg->fc_type].scope > cfg->fc_scope) { 1257c3ab2b4eSDavid Ahern NL_SET_ERR_MSG(extack, "Invalid scope"); 12581da177e4SLinus Torvalds goto err_inval; 1259c3ab2b4eSDavid Ahern } 12601da177e4SLinus Torvalds 1261c3ab2b4eSDavid Ahern if (cfg->fc_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)) { 1262c3ab2b4eSDavid Ahern NL_SET_ERR_MSG(extack, 1263c3ab2b4eSDavid Ahern "Invalid rtm_flags - can not contain DEAD or LINKDOWN"); 126480610229SJulian Anastasov goto err_inval; 1265c3ab2b4eSDavid Ahern } 126680610229SJulian Anastasov 12671da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 12684e902c57SThomas Graf if (cfg->fc_mp) { 12696d8422a1SDavid Ahern nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len, extack); 12701da177e4SLinus Torvalds if (nhs == 0) 12711da177e4SLinus Torvalds goto err_inval; 12721da177e4SLinus Torvalds } 12731da177e4SLinus Torvalds #endif 12741da177e4SLinus Torvalds 12751da177e4SLinus Torvalds err = -ENOBUFS; 1276123b9731SDavid S. Miller if (fib_info_cnt >= fib_info_hash_size) { 1277123b9731SDavid S. Miller unsigned int new_size = fib_info_hash_size << 1; 12781da177e4SLinus Torvalds struct hlist_head *new_info_hash; 12791da177e4SLinus Torvalds struct hlist_head *new_laddrhash; 12801da177e4SLinus Torvalds unsigned int bytes; 12811da177e4SLinus Torvalds 12821da177e4SLinus Torvalds if (!new_size) 1283d94ce9b2SEric Dumazet new_size = 16; 12841da177e4SLinus Torvalds bytes = new_size * sizeof(struct hlist_head *); 1285123b9731SDavid S. Miller new_info_hash = fib_info_hash_alloc(bytes); 1286123b9731SDavid S. Miller new_laddrhash = fib_info_hash_alloc(bytes); 12871da177e4SLinus Torvalds if (!new_info_hash || !new_laddrhash) { 1288123b9731SDavid S. Miller fib_info_hash_free(new_info_hash, bytes); 1289123b9731SDavid S. Miller fib_info_hash_free(new_laddrhash, bytes); 129088f83491SJoonwoo Park } else 1291123b9731SDavid S. Miller fib_info_hash_move(new_info_hash, new_laddrhash, new_size); 12921da177e4SLinus Torvalds 1293123b9731SDavid S. Miller if (!fib_info_hash_size) 12941da177e4SLinus Torvalds goto failure; 12951da177e4SLinus Torvalds } 12961da177e4SLinus Torvalds 12971f533ba6SGustavo A. R. Silva fi = kzalloc(struct_size(fi, fib_nh, nhs), GFP_KERNEL); 129851456b29SIan Morris if (!fi) 12991da177e4SLinus Torvalds goto failure; 1300767a2217SDavid Ahern fi->fib_metrics = ip_fib_metrics_init(fi->fib_net, cfg->fc_mx, 1301d7e774f3SDavid Ahern cfg->fc_mx_len, extack); 1302767a2217SDavid Ahern if (unlikely(IS_ERR(fi->fib_metrics))) { 1303767a2217SDavid Ahern err = PTR_ERR(fi->fib_metrics); 1304187e5b3aSEric Dumazet kfree(fi); 1305187e5b3aSEric Dumazet return ERR_PTR(err); 1306187e5b3aSEric Dumazet } 1307767a2217SDavid Ahern 1308187e5b3aSEric Dumazet fib_info_cnt++; 1309efd7ef1cSEric W. Biederman fi->fib_net = net; 13104e902c57SThomas Graf fi->fib_protocol = cfg->fc_protocol; 131137e826c5SDavid S. Miller fi->fib_scope = cfg->fc_scope; 13124e902c57SThomas Graf fi->fib_flags = cfg->fc_flags; 13134e902c57SThomas Graf fi->fib_priority = cfg->fc_priority; 13144e902c57SThomas Graf fi->fib_prefsrc = cfg->fc_prefsrc; 1315f4ef85bbSEric Dumazet fi->fib_type = cfg->fc_type; 13165a56a0b3SMark Tomlinson fi->fib_tb_id = cfg->fc_table; 13171da177e4SLinus Torvalds 13181da177e4SLinus Torvalds fi->fib_nhs = nhs; 13191da177e4SLinus Torvalds change_nexthops(fi) { 132071fceff0SDavid S. Miller nexthop_nh->nh_parent = fi; 13211da177e4SLinus Torvalds } endfor_nexthops(fi) 13221da177e4SLinus Torvalds 1323e4516ef6SDavid Ahern if (cfg->fc_mp) 13246d8422a1SDavid Ahern err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg, extack); 1325e4516ef6SDavid Ahern else 1326e4516ef6SDavid Ahern err = fib_nh_init(net, fi->fib_nh, cfg, 1, extack); 1327e4516ef6SDavid Ahern 13284e902c57SThomas Graf if (err != 0) 13291da177e4SLinus Torvalds goto failure; 13301da177e4SLinus Torvalds 13314e902c57SThomas Graf if (fib_props[cfg->fc_type].error) { 1332f35b794bSDavid Ahern if (cfg->fc_gw_family || cfg->fc_oif || cfg->fc_mp) { 1333c3ab2b4eSDavid Ahern NL_SET_ERR_MSG(extack, 1334c3ab2b4eSDavid Ahern "Gateway, device and multipath can not be specified for this route type"); 13351da177e4SLinus Torvalds goto err_inval; 1336c3ab2b4eSDavid Ahern } 13371da177e4SLinus Torvalds goto link_it; 13384c8237cdSDavid S. Miller } else { 13394c8237cdSDavid S. Miller switch (cfg->fc_type) { 13404c8237cdSDavid S. Miller case RTN_UNICAST: 13414c8237cdSDavid S. Miller case RTN_LOCAL: 13424c8237cdSDavid S. Miller case RTN_BROADCAST: 13434c8237cdSDavid S. Miller case RTN_ANYCAST: 13444c8237cdSDavid S. Miller case RTN_MULTICAST: 13454c8237cdSDavid S. Miller break; 13464c8237cdSDavid S. Miller default: 1347c3ab2b4eSDavid Ahern NL_SET_ERR_MSG(extack, "Invalid route type"); 13484c8237cdSDavid S. Miller goto err_inval; 13494c8237cdSDavid S. Miller } 13501da177e4SLinus Torvalds } 13511da177e4SLinus Torvalds 1352c3ab2b4eSDavid Ahern if (cfg->fc_scope > RT_SCOPE_HOST) { 1353c3ab2b4eSDavid Ahern NL_SET_ERR_MSG(extack, "Invalid scope"); 13541da177e4SLinus Torvalds goto err_inval; 1355c3ab2b4eSDavid Ahern } 13561da177e4SLinus Torvalds 13574e902c57SThomas Graf if (cfg->fc_scope == RT_SCOPE_HOST) { 13581da177e4SLinus Torvalds struct fib_nh *nh = fi->fib_nh; 13591da177e4SLinus Torvalds 13601da177e4SLinus Torvalds /* Local address is added. */ 1361c3ab2b4eSDavid Ahern if (nhs != 1) { 1362c3ab2b4eSDavid Ahern NL_SET_ERR_MSG(extack, 1363c3ab2b4eSDavid Ahern "Route with host scope can not have multiple nexthops"); 13646d8422a1SDavid Ahern goto err_inval; 1365c3ab2b4eSDavid Ahern } 1366bdf00467SDavid Ahern if (nh->fib_nh_gw_family) { 1367c3ab2b4eSDavid Ahern NL_SET_ERR_MSG(extack, 1368c3ab2b4eSDavid Ahern "Route with host scope can not have a gateway"); 13691da177e4SLinus Torvalds goto err_inval; 1370c3ab2b4eSDavid Ahern } 1371b75ed8b1SDavid Ahern nh->fib_nh_scope = RT_SCOPE_NOWHERE; 1372b75ed8b1SDavid Ahern nh->fib_nh_dev = dev_get_by_index(net, fi->fib_nh->fib_nh_oif); 13731da177e4SLinus Torvalds err = -ENODEV; 1374b75ed8b1SDavid Ahern if (!nh->fib_nh_dev) 13751da177e4SLinus Torvalds goto failure; 13761da177e4SLinus Torvalds } else { 13778a3d0316SAndy Gospodarek int linkdown = 0; 13788a3d0316SAndy Gospodarek 13791da177e4SLinus Torvalds change_nexthops(fi) { 1380fa8fefaaSDavid Ahern err = fib_check_nh(cfg, nexthop_nh, extack); 13816a31d2a9SEric Dumazet if (err != 0) 13821da177e4SLinus Torvalds goto failure; 1383b75ed8b1SDavid Ahern if (nexthop_nh->fib_nh_flags & RTNH_F_LINKDOWN) 13848a3d0316SAndy Gospodarek linkdown++; 13851da177e4SLinus Torvalds } endfor_nexthops(fi) 13868a3d0316SAndy Gospodarek if (linkdown == fi->fib_nhs) 13878a3d0316SAndy Gospodarek fi->fib_flags |= RTNH_F_LINKDOWN; 13881da177e4SLinus Torvalds } 13891da177e4SLinus Torvalds 1390c3ab2b4eSDavid Ahern if (fi->fib_prefsrc && !fib_valid_prefsrc(cfg, fi->fib_prefsrc)) { 1391c3ab2b4eSDavid Ahern NL_SET_ERR_MSG(extack, "Invalid prefsrc address"); 13921da177e4SLinus Torvalds goto err_inval; 1393c3ab2b4eSDavid Ahern } 13941da177e4SLinus Torvalds 13951fc050a1SDavid S. Miller change_nexthops(fi) { 1396436c3b66SDavid S. Miller fib_info_update_nh_saddr(net, nexthop_nh); 139719a9d136SDavid Ahern if (nexthop_nh->fib_nh_gw_family == AF_INET6) 139819a9d136SDavid Ahern fi->fib_nh_is_v6 = true; 13991fc050a1SDavid S. Miller } endfor_nexthops(fi) 14001fc050a1SDavid S. Miller 14010e884c78SPeter Nørlund fib_rebalance(fi); 14020e884c78SPeter Nørlund 14031da177e4SLinus Torvalds link_it: 14046a31d2a9SEric Dumazet ofi = fib_find_info(fi); 14056a31d2a9SEric Dumazet if (ofi) { 14061da177e4SLinus Torvalds fi->fib_dead = 1; 14071da177e4SLinus Torvalds free_fib_info(fi); 14081da177e4SLinus Torvalds ofi->fib_treeref++; 14091da177e4SLinus Torvalds return ofi; 14101da177e4SLinus Torvalds } 14111da177e4SLinus Torvalds 14121da177e4SLinus Torvalds fi->fib_treeref++; 14130029c0deSReshetova, Elena refcount_set(&fi->fib_clntref, 1); 1414832b4c5eSStephen Hemminger spin_lock_bh(&fib_info_lock); 14151da177e4SLinus Torvalds hlist_add_head(&fi->fib_hash, 14161da177e4SLinus Torvalds &fib_info_hash[fib_info_hashfn(fi)]); 14171da177e4SLinus Torvalds if (fi->fib_prefsrc) { 14181da177e4SLinus Torvalds struct hlist_head *head; 14191da177e4SLinus Torvalds 14201da177e4SLinus Torvalds head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)]; 14211da177e4SLinus Torvalds hlist_add_head(&fi->fib_lhash, head); 14221da177e4SLinus Torvalds } 14231da177e4SLinus Torvalds change_nexthops(fi) { 14241da177e4SLinus Torvalds struct hlist_head *head; 14251da177e4SLinus Torvalds unsigned int hash; 14261da177e4SLinus Torvalds 1427b75ed8b1SDavid Ahern if (!nexthop_nh->fib_nh_dev) 14281da177e4SLinus Torvalds continue; 1429b75ed8b1SDavid Ahern hash = fib_devindex_hashfn(nexthop_nh->fib_nh_dev->ifindex); 14301da177e4SLinus Torvalds head = &fib_info_devhash[hash]; 143171fceff0SDavid S. Miller hlist_add_head(&nexthop_nh->nh_hash, head); 14321da177e4SLinus Torvalds } endfor_nexthops(fi) 1433832b4c5eSStephen Hemminger spin_unlock_bh(&fib_info_lock); 14341da177e4SLinus Torvalds return fi; 14351da177e4SLinus Torvalds 14361da177e4SLinus Torvalds err_inval: 14371da177e4SLinus Torvalds err = -EINVAL; 14381da177e4SLinus Torvalds 14391da177e4SLinus Torvalds failure: 14401da177e4SLinus Torvalds if (fi) { 14411da177e4SLinus Torvalds fi->fib_dead = 1; 14421da177e4SLinus Torvalds free_fib_info(fi); 14431da177e4SLinus Torvalds } 14444e902c57SThomas Graf 14454e902c57SThomas Graf return ERR_PTR(err); 14461da177e4SLinus Torvalds } 14471da177e4SLinus Torvalds 1448c0a72077SDavid Ahern int fib_nexthop_info(struct sk_buff *skb, const struct fib_nh_common *nhc, 1449ecc5663cSDavid Ahern unsigned char *flags, bool skip_oif) 1450b0f60193SDavid Ahern { 1451c2364199SDavid Ahern if (nhc->nhc_flags & RTNH_F_DEAD) 1452b0f60193SDavid Ahern *flags |= RTNH_F_DEAD; 1453b0f60193SDavid Ahern 1454c2364199SDavid Ahern if (nhc->nhc_flags & RTNH_F_LINKDOWN) { 1455b0f60193SDavid Ahern *flags |= RTNH_F_LINKDOWN; 1456b0f60193SDavid Ahern 1457b0f60193SDavid Ahern rcu_read_lock(); 1458c2364199SDavid Ahern switch (nhc->nhc_family) { 1459c2364199SDavid Ahern case AF_INET: 1460c2364199SDavid Ahern if (ip_ignore_linkdown(nhc->nhc_dev)) 1461b0f60193SDavid Ahern *flags |= RTNH_F_DEAD; 1462c2364199SDavid Ahern break; 1463c0a72077SDavid Ahern case AF_INET6: 1464c0a72077SDavid Ahern if (ip6_ignore_linkdown(nhc->nhc_dev)) 1465c0a72077SDavid Ahern *flags |= RTNH_F_DEAD; 1466c0a72077SDavid Ahern break; 1467c2364199SDavid Ahern } 1468b0f60193SDavid Ahern rcu_read_unlock(); 1469b0f60193SDavid Ahern } 1470b0f60193SDavid Ahern 1471bdf00467SDavid Ahern switch (nhc->nhc_gw_family) { 1472c2364199SDavid Ahern case AF_INET: 1473c2364199SDavid Ahern if (nla_put_in_addr(skb, RTA_GATEWAY, nhc->nhc_gw.ipv4)) 1474b0f60193SDavid Ahern goto nla_put_failure; 1475c2364199SDavid Ahern break; 1476c0a72077SDavid Ahern case AF_INET6: 1477d1566268SDavid Ahern /* if gateway family does not match nexthop family 1478d1566268SDavid Ahern * gateway is encoded as RTA_VIA 1479d1566268SDavid Ahern */ 1480d1566268SDavid Ahern if (nhc->nhc_gw_family != nhc->nhc_family) { 1481d1566268SDavid Ahern int alen = sizeof(struct in6_addr); 1482d1566268SDavid Ahern struct nlattr *nla; 1483d1566268SDavid Ahern struct rtvia *via; 1484d1566268SDavid Ahern 1485d1566268SDavid Ahern nla = nla_reserve(skb, RTA_VIA, alen + 2); 1486d1566268SDavid Ahern if (!nla) 1487c0a72077SDavid Ahern goto nla_put_failure; 1488d1566268SDavid Ahern 1489d1566268SDavid Ahern via = nla_data(nla); 1490d1566268SDavid Ahern via->rtvia_family = AF_INET6; 1491d1566268SDavid Ahern memcpy(via->rtvia_addr, &nhc->nhc_gw.ipv6, alen); 1492d1566268SDavid Ahern } else if (nla_put_in6_addr(skb, RTA_GATEWAY, 1493d1566268SDavid Ahern &nhc->nhc_gw.ipv6) < 0) { 1494d1566268SDavid Ahern goto nla_put_failure; 1495d1566268SDavid Ahern } 1496c0a72077SDavid Ahern break; 1497c2364199SDavid Ahern } 1498b0f60193SDavid Ahern 1499c2364199SDavid Ahern *flags |= (nhc->nhc_flags & RTNH_F_ONLINK); 1500c2364199SDavid Ahern if (nhc->nhc_flags & RTNH_F_OFFLOAD) 1501b0f60193SDavid Ahern *flags |= RTNH_F_OFFLOAD; 1502b0f60193SDavid Ahern 1503c2364199SDavid Ahern if (!skip_oif && nhc->nhc_dev && 1504c2364199SDavid Ahern nla_put_u32(skb, RTA_OIF, nhc->nhc_dev->ifindex)) 1505b0f60193SDavid Ahern goto nla_put_failure; 1506b0f60193SDavid Ahern 1507c2364199SDavid Ahern if (nhc->nhc_lwtstate && 1508ffa8ce54SDavid Ahern lwtunnel_fill_encap(skb, nhc->nhc_lwtstate, 1509ffa8ce54SDavid Ahern RTA_ENCAP, RTA_ENCAP_TYPE) < 0) 1510b0f60193SDavid Ahern goto nla_put_failure; 1511b0f60193SDavid Ahern 1512b0f60193SDavid Ahern return 0; 1513b0f60193SDavid Ahern 1514b0f60193SDavid Ahern nla_put_failure: 1515b0f60193SDavid Ahern return -EMSGSIZE; 1516b0f60193SDavid Ahern } 1517c0a72077SDavid Ahern EXPORT_SYMBOL_GPL(fib_nexthop_info); 1518b0f60193SDavid Ahern 1519c0a72077SDavid Ahern #if IS_ENABLED(CONFIG_IP_ROUTE_MULTIPATH) || IS_ENABLED(CONFIG_IPV6) 1520c0a72077SDavid Ahern int fib_add_nexthop(struct sk_buff *skb, const struct fib_nh_common *nhc, 1521c2364199SDavid Ahern int nh_weight) 1522b0f60193SDavid Ahern { 1523c2364199SDavid Ahern const struct net_device *dev = nhc->nhc_dev; 1524b0f60193SDavid Ahern struct rtnexthop *rtnh; 1525ecc5663cSDavid Ahern unsigned char flags = 0; 1526b0f60193SDavid Ahern 1527b0f60193SDavid Ahern rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh)); 1528b0f60193SDavid Ahern if (!rtnh) 1529b0f60193SDavid Ahern goto nla_put_failure; 1530b0f60193SDavid Ahern 1531c2364199SDavid Ahern rtnh->rtnh_hops = nh_weight - 1; 1532b0f60193SDavid Ahern rtnh->rtnh_ifindex = dev ? dev->ifindex : 0; 1533b0f60193SDavid Ahern 1534c2364199SDavid Ahern if (fib_nexthop_info(skb, nhc, &flags, true) < 0) 1535b0f60193SDavid Ahern goto nla_put_failure; 1536b0f60193SDavid Ahern 1537b0f60193SDavid Ahern rtnh->rtnh_flags = flags; 1538b0f60193SDavid Ahern 1539b0f60193SDavid Ahern /* length of rtnetlink header + attributes */ 1540b0f60193SDavid Ahern rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh; 1541b0f60193SDavid Ahern 1542b0f60193SDavid Ahern return 0; 1543b0f60193SDavid Ahern 1544b0f60193SDavid Ahern nla_put_failure: 1545b0f60193SDavid Ahern return -EMSGSIZE; 1546b0f60193SDavid Ahern } 1547c0a72077SDavid Ahern EXPORT_SYMBOL_GPL(fib_add_nexthop); 1548c2364199SDavid Ahern #endif 1549b0f60193SDavid Ahern 1550c2364199SDavid Ahern #ifdef CONFIG_IP_ROUTE_MULTIPATH 1551b0f60193SDavid Ahern static int fib_add_multipath(struct sk_buff *skb, struct fib_info *fi) 1552b0f60193SDavid Ahern { 1553b0f60193SDavid Ahern struct nlattr *mp; 1554b0f60193SDavid Ahern 1555ae0be8deSMichal Kubecek mp = nla_nest_start_noflag(skb, RTA_MULTIPATH); 1556b0f60193SDavid Ahern if (!mp) 1557b0f60193SDavid Ahern goto nla_put_failure; 1558b0f60193SDavid Ahern 1559b0f60193SDavid Ahern for_nexthops(fi) { 1560c2364199SDavid Ahern if (fib_add_nexthop(skb, &nh->nh_common, nh->fib_nh_weight) < 0) 1561b0f60193SDavid Ahern goto nla_put_failure; 1562b0f60193SDavid Ahern #ifdef CONFIG_IP_ROUTE_CLASSID 1563b0f60193SDavid Ahern if (nh->nh_tclassid && 1564b0f60193SDavid Ahern nla_put_u32(skb, RTA_FLOW, nh->nh_tclassid)) 1565b0f60193SDavid Ahern goto nla_put_failure; 1566b0f60193SDavid Ahern #endif 1567b0f60193SDavid Ahern } endfor_nexthops(fi); 1568b0f60193SDavid Ahern 1569b0f60193SDavid Ahern nla_nest_end(skb, mp); 1570b0f60193SDavid Ahern 1571b0f60193SDavid Ahern return 0; 1572b0f60193SDavid Ahern 1573b0f60193SDavid Ahern nla_put_failure: 1574b0f60193SDavid Ahern return -EMSGSIZE; 1575b0f60193SDavid Ahern } 1576b0f60193SDavid Ahern #else 1577b0f60193SDavid Ahern static int fib_add_multipath(struct sk_buff *skb, struct fib_info *fi) 1578b0f60193SDavid Ahern { 1579b0f60193SDavid Ahern return 0; 1580b0f60193SDavid Ahern } 1581b0f60193SDavid Ahern #endif 1582b0f60193SDavid Ahern 158315e47304SEric W. Biederman int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, 158437e826c5SDavid S. Miller u32 tb_id, u8 type, __be32 dst, int dst_len, u8 tos, 1585b6544c0bSJamal Hadi Salim struct fib_info *fi, unsigned int flags) 15861da177e4SLinus Torvalds { 15871da177e4SLinus Torvalds struct nlmsghdr *nlh; 1588be403ea1SThomas Graf struct rtmsg *rtm; 15891da177e4SLinus Torvalds 159015e47304SEric W. Biederman nlh = nlmsg_put(skb, portid, seq, event, sizeof(*rtm), flags); 159151456b29SIan Morris if (!nlh) 159226932566SPatrick McHardy return -EMSGSIZE; 1593be403ea1SThomas Graf 1594be403ea1SThomas Graf rtm = nlmsg_data(nlh); 15951da177e4SLinus Torvalds rtm->rtm_family = AF_INET; 15961da177e4SLinus Torvalds rtm->rtm_dst_len = dst_len; 15971da177e4SLinus Torvalds rtm->rtm_src_len = 0; 15981da177e4SLinus Torvalds rtm->rtm_tos = tos; 1599709772e6SKrzysztof Piotr Oledzki if (tb_id < 256) 16001da177e4SLinus Torvalds rtm->rtm_table = tb_id; 1601709772e6SKrzysztof Piotr Oledzki else 1602709772e6SKrzysztof Piotr Oledzki rtm->rtm_table = RT_TABLE_COMPAT; 1603f3756b79SDavid S. Miller if (nla_put_u32(skb, RTA_TABLE, tb_id)) 1604f3756b79SDavid S. Miller goto nla_put_failure; 16051da177e4SLinus Torvalds rtm->rtm_type = type; 16061da177e4SLinus Torvalds rtm->rtm_flags = fi->fib_flags; 160737e826c5SDavid S. Miller rtm->rtm_scope = fi->fib_scope; 16081da177e4SLinus Torvalds rtm->rtm_protocol = fi->fib_protocol; 1609be403ea1SThomas Graf 1610f3756b79SDavid S. Miller if (rtm->rtm_dst_len && 1611930345eaSJiri Benc nla_put_in_addr(skb, RTA_DST, dst)) 1612f3756b79SDavid S. Miller goto nla_put_failure; 1613f3756b79SDavid S. Miller if (fi->fib_priority && 1614f3756b79SDavid S. Miller nla_put_u32(skb, RTA_PRIORITY, fi->fib_priority)) 1615f3756b79SDavid S. Miller goto nla_put_failure; 16163fb07dafSEric Dumazet if (rtnetlink_put_metrics(skb, fi->fib_metrics->metrics) < 0) 1617be403ea1SThomas Graf goto nla_put_failure; 1618be403ea1SThomas Graf 1619f3756b79SDavid S. Miller if (fi->fib_prefsrc && 1620930345eaSJiri Benc nla_put_in_addr(skb, RTA_PREFSRC, fi->fib_prefsrc)) 1621f3756b79SDavid S. Miller goto nla_put_failure; 16221da177e4SLinus Torvalds if (fi->fib_nhs == 1) { 1623b0f60193SDavid Ahern struct fib_nh *nh = &fi->fib_nh[0]; 1624ecc5663cSDavid Ahern unsigned char flags = 0; 1625be403ea1SThomas Graf 1626c2364199SDavid Ahern if (fib_nexthop_info(skb, &nh->nh_common, &flags, false) < 0) 1627be403ea1SThomas Graf goto nla_put_failure; 16281da177e4SLinus Torvalds 1629b0f60193SDavid Ahern rtm->rtm_flags = flags; 1630c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID 1631f3756b79SDavid S. Miller if (nh->nh_tclassid && 1632f3756b79SDavid S. Miller nla_put_u32(skb, RTA_FLOW, nh->nh_tclassid)) 1633f3756b79SDavid S. Miller goto nla_put_failure; 16348265abc0SPatrick McHardy #endif 1635b0f60193SDavid Ahern } else { 1636b0f60193SDavid Ahern if (fib_add_multipath(skb, fi) < 0) 1637ea7a8085SDavid Ahern goto nla_put_failure; 16381da177e4SLinus Torvalds } 1639b0f60193SDavid Ahern 1640053c095aSJohannes Berg nlmsg_end(skb, nlh); 1641053c095aSJohannes Berg return 0; 16421da177e4SLinus Torvalds 1643be403ea1SThomas Graf nla_put_failure: 164426932566SPatrick McHardy nlmsg_cancel(skb, nlh); 164526932566SPatrick McHardy return -EMSGSIZE; 16461da177e4SLinus Torvalds } 16471da177e4SLinus Torvalds 16481da177e4SLinus Torvalds /* 16496a31d2a9SEric Dumazet * Update FIB if: 16506a31d2a9SEric Dumazet * - local address disappeared -> we must delete all the entries 16516a31d2a9SEric Dumazet * referring to it. 16526a31d2a9SEric Dumazet * - device went down -> we must shutdown all nexthops going via it. 16531da177e4SLinus Torvalds */ 16545a56a0b3SMark Tomlinson int fib_sync_down_addr(struct net_device *dev, __be32 local) 16551da177e4SLinus Torvalds { 16561da177e4SLinus Torvalds int ret = 0; 16571da177e4SLinus Torvalds unsigned int hash = fib_laddr_hashfn(local); 16581da177e4SLinus Torvalds struct hlist_head *head = &fib_info_laddrhash[hash]; 16595a56a0b3SMark Tomlinson struct net *net = dev_net(dev); 16605a56a0b3SMark Tomlinson int tb_id = l3mdev_fib_table(dev); 16611da177e4SLinus Torvalds struct fib_info *fi; 16621da177e4SLinus Torvalds 166351456b29SIan Morris if (!fib_info_laddrhash || local == 0) 166485326fa5SDenis V. Lunev return 0; 166585326fa5SDenis V. Lunev 1666b67bfe0dSSasha Levin hlist_for_each_entry(fi, head, fib_lhash) { 16675a56a0b3SMark Tomlinson if (!net_eq(fi->fib_net, net) || 16685a56a0b3SMark Tomlinson fi->fib_tb_id != tb_id) 16694814bdbdSDenis V. Lunev continue; 16701da177e4SLinus Torvalds if (fi->fib_prefsrc == local) { 16711da177e4SLinus Torvalds fi->fib_flags |= RTNH_F_DEAD; 16721da177e4SLinus Torvalds ret++; 16731da177e4SLinus Torvalds } 16741da177e4SLinus Torvalds } 167585326fa5SDenis V. Lunev return ret; 16761da177e4SLinus Torvalds } 16771da177e4SLinus Torvalds 1678b75ed8b1SDavid Ahern static int call_fib_nh_notifiers(struct fib_nh *nh, 1679982acb97SIdo Schimmel enum fib_event_type event_type) 1680982acb97SIdo Schimmel { 1681b75ed8b1SDavid Ahern bool ignore_link_down = ip_ignore_linkdown(nh->fib_nh_dev); 1682982acb97SIdo Schimmel struct fib_nh_notifier_info info = { 1683b75ed8b1SDavid Ahern .fib_nh = nh, 1684982acb97SIdo Schimmel }; 1685982acb97SIdo Schimmel 1686982acb97SIdo Schimmel switch (event_type) { 1687982acb97SIdo Schimmel case FIB_EVENT_NH_ADD: 1688b75ed8b1SDavid Ahern if (nh->fib_nh_flags & RTNH_F_DEAD) 1689982acb97SIdo Schimmel break; 1690b75ed8b1SDavid Ahern if (ignore_link_down && nh->fib_nh_flags & RTNH_F_LINKDOWN) 1691982acb97SIdo Schimmel break; 1692b75ed8b1SDavid Ahern return call_fib4_notifiers(dev_net(nh->fib_nh_dev), event_type, 1693982acb97SIdo Schimmel &info.info); 1694982acb97SIdo Schimmel case FIB_EVENT_NH_DEL: 1695b75ed8b1SDavid Ahern if ((ignore_link_down && nh->fib_nh_flags & RTNH_F_LINKDOWN) || 1696b75ed8b1SDavid Ahern (nh->fib_nh_flags & RTNH_F_DEAD)) 1697b75ed8b1SDavid Ahern return call_fib4_notifiers(dev_net(nh->fib_nh_dev), 1698982acb97SIdo Schimmel event_type, &info.info); 1699982acb97SIdo Schimmel default: 1700982acb97SIdo Schimmel break; 1701982acb97SIdo Schimmel } 1702982acb97SIdo Schimmel 1703982acb97SIdo Schimmel return NOTIFY_DONE; 1704982acb97SIdo Schimmel } 1705982acb97SIdo Schimmel 1706af7d6cceSSabrina Dubroca /* Update the PMTU of exceptions when: 1707af7d6cceSSabrina Dubroca * - the new MTU of the first hop becomes smaller than the PMTU 1708af7d6cceSSabrina Dubroca * - the old MTU was the same as the PMTU, and it limited discovery of 1709af7d6cceSSabrina Dubroca * larger MTUs on the path. With that limit raised, we can now 1710af7d6cceSSabrina Dubroca * discover larger MTUs 1711af7d6cceSSabrina Dubroca * A special case is locked exceptions, for which the PMTU is smaller 1712af7d6cceSSabrina Dubroca * than the minimal accepted PMTU: 1713af7d6cceSSabrina Dubroca * - if the new MTU is greater than the PMTU, don't make any change 1714af7d6cceSSabrina Dubroca * - otherwise, unlock and set PMTU 1715af7d6cceSSabrina Dubroca */ 1716af7d6cceSSabrina Dubroca static void nh_update_mtu(struct fib_nh *nh, u32 new, u32 orig) 1717af7d6cceSSabrina Dubroca { 1718af7d6cceSSabrina Dubroca struct fnhe_hash_bucket *bucket; 1719af7d6cceSSabrina Dubroca int i; 1720af7d6cceSSabrina Dubroca 1721af7d6cceSSabrina Dubroca bucket = rcu_dereference_protected(nh->nh_exceptions, 1); 1722af7d6cceSSabrina Dubroca if (!bucket) 1723af7d6cceSSabrina Dubroca return; 1724af7d6cceSSabrina Dubroca 1725af7d6cceSSabrina Dubroca for (i = 0; i < FNHE_HASH_SIZE; i++) { 1726af7d6cceSSabrina Dubroca struct fib_nh_exception *fnhe; 1727af7d6cceSSabrina Dubroca 1728af7d6cceSSabrina Dubroca for (fnhe = rcu_dereference_protected(bucket[i].chain, 1); 1729af7d6cceSSabrina Dubroca fnhe; 1730af7d6cceSSabrina Dubroca fnhe = rcu_dereference_protected(fnhe->fnhe_next, 1)) { 1731af7d6cceSSabrina Dubroca if (fnhe->fnhe_mtu_locked) { 1732af7d6cceSSabrina Dubroca if (new <= fnhe->fnhe_pmtu) { 1733af7d6cceSSabrina Dubroca fnhe->fnhe_pmtu = new; 1734af7d6cceSSabrina Dubroca fnhe->fnhe_mtu_locked = false; 1735af7d6cceSSabrina Dubroca } 1736af7d6cceSSabrina Dubroca } else if (new < fnhe->fnhe_pmtu || 1737af7d6cceSSabrina Dubroca orig == fnhe->fnhe_pmtu) { 1738af7d6cceSSabrina Dubroca fnhe->fnhe_pmtu = new; 1739af7d6cceSSabrina Dubroca } 1740af7d6cceSSabrina Dubroca } 1741af7d6cceSSabrina Dubroca } 1742af7d6cceSSabrina Dubroca } 1743af7d6cceSSabrina Dubroca 1744af7d6cceSSabrina Dubroca void fib_sync_mtu(struct net_device *dev, u32 orig_mtu) 1745af7d6cceSSabrina Dubroca { 1746af7d6cceSSabrina Dubroca unsigned int hash = fib_devindex_hashfn(dev->ifindex); 1747af7d6cceSSabrina Dubroca struct hlist_head *head = &fib_info_devhash[hash]; 1748af7d6cceSSabrina Dubroca struct fib_nh *nh; 1749af7d6cceSSabrina Dubroca 1750af7d6cceSSabrina Dubroca hlist_for_each_entry(nh, head, nh_hash) { 1751b75ed8b1SDavid Ahern if (nh->fib_nh_dev == dev) 1752af7d6cceSSabrina Dubroca nh_update_mtu(nh, dev->mtu, orig_mtu); 1753af7d6cceSSabrina Dubroca } 1754af7d6cceSSabrina Dubroca } 1755af7d6cceSSabrina Dubroca 17564f823defSJulian Anastasov /* Event force Flags Description 17574f823defSJulian Anastasov * NETDEV_CHANGE 0 LINKDOWN Carrier OFF, not for scope host 17584f823defSJulian Anastasov * NETDEV_DOWN 0 LINKDOWN|DEAD Link down, not for scope host 17594f823defSJulian Anastasov * NETDEV_DOWN 1 LINKDOWN|DEAD Last address removed 17604f823defSJulian Anastasov * NETDEV_UNREGISTER 1 LINKDOWN|DEAD Device removed 17614f823defSJulian Anastasov */ 17624f823defSJulian Anastasov int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force) 176385326fa5SDenis V. Lunev { 176485326fa5SDenis V. Lunev int ret = 0; 176585326fa5SDenis V. Lunev int scope = RT_SCOPE_NOWHERE; 17661da177e4SLinus Torvalds struct fib_info *prev_fi = NULL; 17671da177e4SLinus Torvalds unsigned int hash = fib_devindex_hashfn(dev->ifindex); 17681da177e4SLinus Torvalds struct hlist_head *head = &fib_info_devhash[hash]; 17691da177e4SLinus Torvalds struct fib_nh *nh; 17701da177e4SLinus Torvalds 17714f823defSJulian Anastasov if (force) 177285326fa5SDenis V. Lunev scope = -1; 177385326fa5SDenis V. Lunev 1774b67bfe0dSSasha Levin hlist_for_each_entry(nh, head, nh_hash) { 17751da177e4SLinus Torvalds struct fib_info *fi = nh->nh_parent; 17761da177e4SLinus Torvalds int dead; 17771da177e4SLinus Torvalds 17781da177e4SLinus Torvalds BUG_ON(!fi->fib_nhs); 1779b75ed8b1SDavid Ahern if (nh->fib_nh_dev != dev || fi == prev_fi) 17801da177e4SLinus Torvalds continue; 17811da177e4SLinus Torvalds prev_fi = fi; 17821da177e4SLinus Torvalds dead = 0; 17831da177e4SLinus Torvalds change_nexthops(fi) { 1784b75ed8b1SDavid Ahern if (nexthop_nh->fib_nh_flags & RTNH_F_DEAD) 17851da177e4SLinus Torvalds dead++; 1786b75ed8b1SDavid Ahern else if (nexthop_nh->fib_nh_dev == dev && 1787b75ed8b1SDavid Ahern nexthop_nh->fib_nh_scope != scope) { 17888a3d0316SAndy Gospodarek switch (event) { 17898a3d0316SAndy Gospodarek case NETDEV_DOWN: 17908a3d0316SAndy Gospodarek case NETDEV_UNREGISTER: 1791b75ed8b1SDavid Ahern nexthop_nh->fib_nh_flags |= RTNH_F_DEAD; 17928a3d0316SAndy Gospodarek /* fall through */ 17938a3d0316SAndy Gospodarek case NETDEV_CHANGE: 1794b75ed8b1SDavid Ahern nexthop_nh->fib_nh_flags |= RTNH_F_LINKDOWN; 17958a3d0316SAndy Gospodarek break; 17968a3d0316SAndy Gospodarek } 1797982acb97SIdo Schimmel call_fib_nh_notifiers(nexthop_nh, 1798982acb97SIdo Schimmel FIB_EVENT_NH_DEL); 17991da177e4SLinus Torvalds dead++; 18001da177e4SLinus Torvalds } 18011da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 18028a3d0316SAndy Gospodarek if (event == NETDEV_UNREGISTER && 1803b75ed8b1SDavid Ahern nexthop_nh->fib_nh_dev == dev) { 18041da177e4SLinus Torvalds dead = fi->fib_nhs; 18051da177e4SLinus Torvalds break; 18061da177e4SLinus Torvalds } 18071da177e4SLinus Torvalds #endif 18081da177e4SLinus Torvalds } endfor_nexthops(fi) 18091da177e4SLinus Torvalds if (dead == fi->fib_nhs) { 18108a3d0316SAndy Gospodarek switch (event) { 18118a3d0316SAndy Gospodarek case NETDEV_DOWN: 18128a3d0316SAndy Gospodarek case NETDEV_UNREGISTER: 18131da177e4SLinus Torvalds fi->fib_flags |= RTNH_F_DEAD; 18148a3d0316SAndy Gospodarek /* fall through */ 18158a3d0316SAndy Gospodarek case NETDEV_CHANGE: 18168a3d0316SAndy Gospodarek fi->fib_flags |= RTNH_F_LINKDOWN; 18178a3d0316SAndy Gospodarek break; 18188a3d0316SAndy Gospodarek } 18191da177e4SLinus Torvalds ret++; 18201da177e4SLinus Torvalds } 18210e884c78SPeter Nørlund 18220e884c78SPeter Nørlund fib_rebalance(fi); 18231da177e4SLinus Torvalds } 18241da177e4SLinus Torvalds 18251da177e4SLinus Torvalds return ret; 18261da177e4SLinus Torvalds } 18271da177e4SLinus Torvalds 18280c838ff1SDavid S. Miller /* Must be invoked inside of an RCU protected region. */ 1829c7b371e3SDavid Ahern static void fib_select_default(const struct flowi4 *flp, struct fib_result *res) 18300c838ff1SDavid S. Miller { 18310c838ff1SDavid S. Miller struct fib_info *fi = NULL, *last_resort = NULL; 183256315f9eSAlexander Duyck struct hlist_head *fa_head = res->fa_head; 18330c838ff1SDavid S. Miller struct fib_table *tb = res->table; 183418a912e9SJulian Anastasov u8 slen = 32 - res->prefixlen; 18350c838ff1SDavid S. Miller int order = -1, last_idx = -1; 18362392debcSJulian Anastasov struct fib_alias *fa, *fa1 = NULL; 18372392debcSJulian Anastasov u32 last_prio = res->fi->fib_priority; 18382392debcSJulian Anastasov u8 last_tos = 0; 18390c838ff1SDavid S. Miller 184056315f9eSAlexander Duyck hlist_for_each_entry_rcu(fa, fa_head, fa_list) { 18410c838ff1SDavid S. Miller struct fib_info *next_fi = fa->fa_info; 18420c838ff1SDavid S. Miller 184318a912e9SJulian Anastasov if (fa->fa_slen != slen) 184418a912e9SJulian Anastasov continue; 18452392debcSJulian Anastasov if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos) 18462392debcSJulian Anastasov continue; 184718a912e9SJulian Anastasov if (fa->tb_id != tb->tb_id) 184818a912e9SJulian Anastasov continue; 18492392debcSJulian Anastasov if (next_fi->fib_priority > last_prio && 18502392debcSJulian Anastasov fa->fa_tos == last_tos) { 18512392debcSJulian Anastasov if (last_tos) 18522392debcSJulian Anastasov continue; 18532392debcSJulian Anastasov break; 18542392debcSJulian Anastasov } 18552392debcSJulian Anastasov if (next_fi->fib_flags & RTNH_F_DEAD) 18562392debcSJulian Anastasov continue; 18572392debcSJulian Anastasov last_tos = fa->fa_tos; 18582392debcSJulian Anastasov last_prio = next_fi->fib_priority; 18592392debcSJulian Anastasov 186037e826c5SDavid S. Miller if (next_fi->fib_scope != res->scope || 18610c838ff1SDavid S. Miller fa->fa_type != RTN_UNICAST) 18620c838ff1SDavid S. Miller continue; 1863b75ed8b1SDavid Ahern if (!next_fi->fib_nh[0].fib_nh_gw4 || 1864b75ed8b1SDavid Ahern next_fi->fib_nh[0].fib_nh_scope != RT_SCOPE_LINK) 18650c838ff1SDavid S. Miller continue; 18660c838ff1SDavid S. Miller 18670c838ff1SDavid S. Miller fib_alias_accessed(fa); 18680c838ff1SDavid S. Miller 186951456b29SIan Morris if (!fi) { 18700c838ff1SDavid S. Miller if (next_fi != res->fi) 18710c838ff1SDavid S. Miller break; 18722392debcSJulian Anastasov fa1 = fa; 18730c838ff1SDavid S. Miller } else if (!fib_detect_death(fi, order, &last_resort, 18742392debcSJulian Anastasov &last_idx, fa1->fa_default)) { 18750c838ff1SDavid S. Miller fib_result_assign(res, fi); 18762392debcSJulian Anastasov fa1->fa_default = order; 18770c838ff1SDavid S. Miller goto out; 18780c838ff1SDavid S. Miller } 18790c838ff1SDavid S. Miller fi = next_fi; 18800c838ff1SDavid S. Miller order++; 18810c838ff1SDavid S. Miller } 18820c838ff1SDavid S. Miller 188351456b29SIan Morris if (order <= 0 || !fi) { 18842392debcSJulian Anastasov if (fa1) 18852392debcSJulian Anastasov fa1->fa_default = -1; 18860c838ff1SDavid S. Miller goto out; 18870c838ff1SDavid S. Miller } 18880c838ff1SDavid S. Miller 18890c838ff1SDavid S. Miller if (!fib_detect_death(fi, order, &last_resort, &last_idx, 18902392debcSJulian Anastasov fa1->fa_default)) { 18910c838ff1SDavid S. Miller fib_result_assign(res, fi); 18922392debcSJulian Anastasov fa1->fa_default = order; 18930c838ff1SDavid S. Miller goto out; 18940c838ff1SDavid S. Miller } 18950c838ff1SDavid S. Miller 18960c838ff1SDavid S. Miller if (last_idx >= 0) 18970c838ff1SDavid S. Miller fib_result_assign(res, last_resort); 18982392debcSJulian Anastasov fa1->fa_default = last_idx; 18990c838ff1SDavid S. Miller out: 190031d40937SEric Dumazet return; 19010c838ff1SDavid S. Miller } 19020c838ff1SDavid S. Miller 19031da177e4SLinus Torvalds /* 19046a31d2a9SEric Dumazet * Dead device goes up. We wake up dead nexthops. 19056a31d2a9SEric Dumazet * It takes sense only on multipath routes. 19061da177e4SLinus Torvalds */ 1907ecc5663cSDavid Ahern int fib_sync_up(struct net_device *dev, unsigned char nh_flags) 19081da177e4SLinus Torvalds { 19091da177e4SLinus Torvalds struct fib_info *prev_fi; 19101da177e4SLinus Torvalds unsigned int hash; 19111da177e4SLinus Torvalds struct hlist_head *head; 19121da177e4SLinus Torvalds struct fib_nh *nh; 19131da177e4SLinus Torvalds int ret; 19141da177e4SLinus Torvalds 19151da177e4SLinus Torvalds if (!(dev->flags & IFF_UP)) 19161da177e4SLinus Torvalds return 0; 19171da177e4SLinus Torvalds 1918c9b3292eSJulian Anastasov if (nh_flags & RTNH_F_DEAD) { 1919c9b3292eSJulian Anastasov unsigned int flags = dev_get_flags(dev); 1920c9b3292eSJulian Anastasov 1921c9b3292eSJulian Anastasov if (flags & (IFF_RUNNING | IFF_LOWER_UP)) 1922c9b3292eSJulian Anastasov nh_flags |= RTNH_F_LINKDOWN; 1923c9b3292eSJulian Anastasov } 1924c9b3292eSJulian Anastasov 19251da177e4SLinus Torvalds prev_fi = NULL; 19261da177e4SLinus Torvalds hash = fib_devindex_hashfn(dev->ifindex); 19271da177e4SLinus Torvalds head = &fib_info_devhash[hash]; 19281da177e4SLinus Torvalds ret = 0; 19291da177e4SLinus Torvalds 1930b67bfe0dSSasha Levin hlist_for_each_entry(nh, head, nh_hash) { 19311da177e4SLinus Torvalds struct fib_info *fi = nh->nh_parent; 19321da177e4SLinus Torvalds int alive; 19331da177e4SLinus Torvalds 19341da177e4SLinus Torvalds BUG_ON(!fi->fib_nhs); 1935b75ed8b1SDavid Ahern if (nh->fib_nh_dev != dev || fi == prev_fi) 19361da177e4SLinus Torvalds continue; 19371da177e4SLinus Torvalds 19381da177e4SLinus Torvalds prev_fi = fi; 19391da177e4SLinus Torvalds alive = 0; 19401da177e4SLinus Torvalds change_nexthops(fi) { 1941b75ed8b1SDavid Ahern if (!(nexthop_nh->fib_nh_flags & nh_flags)) { 19421da177e4SLinus Torvalds alive++; 19431da177e4SLinus Torvalds continue; 19441da177e4SLinus Torvalds } 1945b75ed8b1SDavid Ahern if (!nexthop_nh->fib_nh_dev || 1946b75ed8b1SDavid Ahern !(nexthop_nh->fib_nh_dev->flags & IFF_UP)) 19471da177e4SLinus Torvalds continue; 1948b75ed8b1SDavid Ahern if (nexthop_nh->fib_nh_dev != dev || 194971fceff0SDavid S. Miller !__in_dev_get_rtnl(dev)) 19501da177e4SLinus Torvalds continue; 19511da177e4SLinus Torvalds alive++; 1952b75ed8b1SDavid Ahern nexthop_nh->fib_nh_flags &= ~nh_flags; 1953982acb97SIdo Schimmel call_fib_nh_notifiers(nexthop_nh, FIB_EVENT_NH_ADD); 19541da177e4SLinus Torvalds } endfor_nexthops(fi) 19551da177e4SLinus Torvalds 19561da177e4SLinus Torvalds if (alive > 0) { 19578a3d0316SAndy Gospodarek fi->fib_flags &= ~nh_flags; 19581da177e4SLinus Torvalds ret++; 19591da177e4SLinus Torvalds } 19600e884c78SPeter Nørlund 19610e884c78SPeter Nørlund fib_rebalance(fi); 19621da177e4SLinus Torvalds } 19631da177e4SLinus Torvalds 19641da177e4SLinus Torvalds return ret; 19651da177e4SLinus Torvalds } 19661da177e4SLinus Torvalds 19678a3d0316SAndy Gospodarek #ifdef CONFIG_IP_ROUTE_MULTIPATH 1968a6db4494SDavid Ahern static bool fib_good_nh(const struct fib_nh *nh) 1969a6db4494SDavid Ahern { 1970a6db4494SDavid Ahern int state = NUD_REACHABLE; 1971a6db4494SDavid Ahern 1972b75ed8b1SDavid Ahern if (nh->fib_nh_scope == RT_SCOPE_LINK) { 1973a6db4494SDavid Ahern struct neighbour *n; 1974a6db4494SDavid Ahern 1975a6db4494SDavid Ahern rcu_read_lock_bh(); 1976a6db4494SDavid Ahern 19771a38c43dSDavid Ahern if (likely(nh->fib_nh_gw_family == AF_INET)) 1978b75ed8b1SDavid Ahern n = __ipv4_neigh_lookup_noref(nh->fib_nh_dev, 1979b75ed8b1SDavid Ahern (__force u32)nh->fib_nh_gw4); 19801a38c43dSDavid Ahern else if (nh->fib_nh_gw_family == AF_INET6) 19811a38c43dSDavid Ahern n = __ipv6_neigh_lookup_noref_stub(nh->fib_nh_dev, 19821a38c43dSDavid Ahern &nh->fib_nh_gw6); 19831a38c43dSDavid Ahern else 19841a38c43dSDavid Ahern n = NULL; 1985a6db4494SDavid Ahern if (n) 1986a6db4494SDavid Ahern state = n->nud_state; 1987a6db4494SDavid Ahern 1988a6db4494SDavid Ahern rcu_read_unlock_bh(); 1989a6db4494SDavid Ahern } 1990a6db4494SDavid Ahern 1991a6db4494SDavid Ahern return !!(state & NUD_VALID); 1992a6db4494SDavid Ahern } 19938a3d0316SAndy Gospodarek 19940e884c78SPeter Nørlund void fib_select_multipath(struct fib_result *res, int hash) 19951da177e4SLinus Torvalds { 19961da177e4SLinus Torvalds struct fib_info *fi = res->fi; 1997a6db4494SDavid Ahern struct net *net = fi->fib_net; 1998a6db4494SDavid Ahern bool first = false; 19991da177e4SLinus Torvalds 2000eba618abSDavid Ahern change_nexthops(fi) { 20016174a30dSXin Long if (net->ipv4.sysctl_fib_multipath_use_neigh) { 2002eba618abSDavid Ahern if (!fib_good_nh(nexthop_nh)) 20030eeb075fSAndy Gospodarek continue; 2004a6db4494SDavid Ahern if (!first) { 2005a6db4494SDavid Ahern res->nh_sel = nhsel; 2006eba618abSDavid Ahern res->nhc = &nexthop_nh->nh_common; 2007a6db4494SDavid Ahern first = true; 2008a6db4494SDavid Ahern } 20096174a30dSXin Long } 20106174a30dSXin Long 2011eba618abSDavid Ahern if (hash > atomic_read(&nexthop_nh->fib_nh_upper_bound)) 20126174a30dSXin Long continue; 20136174a30dSXin Long 20146174a30dSXin Long res->nh_sel = nhsel; 2015eba618abSDavid Ahern res->nhc = &nexthop_nh->nh_common; 20166174a30dSXin Long return; 20171da177e4SLinus Torvalds } endfor_nexthops(fi); 20181da177e4SLinus Torvalds } 20191da177e4SLinus Torvalds #endif 20203ce58d84SDavid Ahern 20213ce58d84SDavid Ahern void fib_select_path(struct net *net, struct fib_result *res, 2022bf4e0a3dSNikolay Aleksandrov struct flowi4 *fl4, const struct sk_buff *skb) 20233ce58d84SDavid Ahern { 20240d876f2cSDavid Ahern if (fl4->flowi4_oif && !(fl4->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF)) 20250d876f2cSDavid Ahern goto check_saddr; 20267a18c5b9SDavid Ahern 20273ce58d84SDavid Ahern #ifdef CONFIG_IP_ROUTE_MULTIPATH 20280d876f2cSDavid Ahern if (res->fi->fib_nhs > 1) { 20297efc0b6bSDavid Ahern int h = fib_multipath_hash(net, fl4, skb, NULL); 20309920e48bSPaolo Abeni 2031bf4e0a3dSNikolay Aleksandrov fib_select_multipath(res, h); 20323ce58d84SDavid Ahern } 20333ce58d84SDavid Ahern else 20343ce58d84SDavid Ahern #endif 20353ce58d84SDavid Ahern if (!res->prefixlen && 20363ce58d84SDavid Ahern res->table->tb_num_default > 1 && 20370d876f2cSDavid Ahern res->type == RTN_UNICAST) 20383ce58d84SDavid Ahern fib_select_default(fl4, res); 20393ce58d84SDavid Ahern 20400d876f2cSDavid Ahern check_saddr: 20413ce58d84SDavid Ahern if (!fl4->saddr) 2042eba618abSDavid Ahern fl4->saddr = fib_result_prefsrc(net, res); 20433ce58d84SDavid Ahern } 2044