11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * INET An implementation of the TCP/IP protocol suite for the LINUX 31da177e4SLinus Torvalds * operating system. INET is implemented using the BSD Socket 41da177e4SLinus Torvalds * interface as the means of communication with the user level. 51da177e4SLinus Torvalds * 61da177e4SLinus Torvalds * IPv4 Forwarding Information Base: semantics. 71da177e4SLinus Torvalds * 81da177e4SLinus Torvalds * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 91da177e4SLinus Torvalds * 101da177e4SLinus Torvalds * This program is free software; you can redistribute it and/or 111da177e4SLinus Torvalds * modify it under the terms of the GNU General Public License 121da177e4SLinus Torvalds * as published by the Free Software Foundation; either version 131da177e4SLinus Torvalds * 2 of the License, or (at your option) any later version. 141da177e4SLinus Torvalds */ 151da177e4SLinus Torvalds 167c0f6ba6SLinus Torvalds #include <linux/uaccess.h> 171da177e4SLinus Torvalds #include <linux/bitops.h> 181da177e4SLinus Torvalds #include <linux/types.h> 191da177e4SLinus Torvalds #include <linux/kernel.h> 201da177e4SLinus Torvalds #include <linux/jiffies.h> 211da177e4SLinus Torvalds #include <linux/mm.h> 221da177e4SLinus Torvalds #include <linux/string.h> 231da177e4SLinus Torvalds #include <linux/socket.h> 241da177e4SLinus Torvalds #include <linux/sockios.h> 251da177e4SLinus Torvalds #include <linux/errno.h> 261da177e4SLinus Torvalds #include <linux/in.h> 271da177e4SLinus Torvalds #include <linux/inet.h> 2814c85021SArnaldo Carvalho de Melo #include <linux/inetdevice.h> 291da177e4SLinus Torvalds #include <linux/netdevice.h> 301da177e4SLinus Torvalds #include <linux/if_arp.h> 311da177e4SLinus Torvalds #include <linux/proc_fs.h> 321da177e4SLinus Torvalds #include <linux/skbuff.h> 331da177e4SLinus Torvalds #include <linux/init.h> 345a0e3ad6STejun Heo #include <linux/slab.h> 35c3ab2b4eSDavid Ahern #include <linux/netlink.h> 361da177e4SLinus Torvalds 3714c85021SArnaldo Carvalho de Melo #include <net/arp.h> 381da177e4SLinus Torvalds #include <net/ip.h> 391da177e4SLinus Torvalds #include <net/protocol.h> 401da177e4SLinus Torvalds #include <net/route.h> 411da177e4SLinus Torvalds #include <net/tcp.h> 421da177e4SLinus Torvalds #include <net/sock.h> 431da177e4SLinus Torvalds #include <net/ip_fib.h> 44f21c7bc5SThomas Graf #include <net/netlink.h> 454e902c57SThomas Graf #include <net/nexthop.h> 46571e7226SRoopa Prabhu #include <net/lwtunnel.h> 4704b1d4e5SIdo Schimmel #include <net/fib_notifier.h> 481da177e4SLinus Torvalds 491da177e4SLinus Torvalds #include "fib_lookup.h" 501da177e4SLinus Torvalds 51832b4c5eSStephen Hemminger static DEFINE_SPINLOCK(fib_info_lock); 521da177e4SLinus Torvalds static struct hlist_head *fib_info_hash; 531da177e4SLinus Torvalds static struct hlist_head *fib_info_laddrhash; 54123b9731SDavid S. Miller static unsigned int fib_info_hash_size; 551da177e4SLinus Torvalds static unsigned int fib_info_cnt; 561da177e4SLinus Torvalds 571da177e4SLinus Torvalds #define DEVINDEX_HASHBITS 8 581da177e4SLinus Torvalds #define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS) 591da177e4SLinus Torvalds static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE]; 601da177e4SLinus Torvalds 611da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 621da177e4SLinus Torvalds 636a31d2a9SEric Dumazet #define for_nexthops(fi) { \ 646a31d2a9SEric Dumazet int nhsel; const struct fib_nh *nh; \ 656a31d2a9SEric Dumazet for (nhsel = 0, nh = (fi)->fib_nh; \ 666a31d2a9SEric Dumazet nhsel < (fi)->fib_nhs; \ 676a31d2a9SEric Dumazet nh++, nhsel++) 681da177e4SLinus Torvalds 696a31d2a9SEric Dumazet #define change_nexthops(fi) { \ 706a31d2a9SEric Dumazet int nhsel; struct fib_nh *nexthop_nh; \ 716a31d2a9SEric Dumazet for (nhsel = 0, nexthop_nh = (struct fib_nh *)((fi)->fib_nh); \ 726a31d2a9SEric Dumazet nhsel < (fi)->fib_nhs; \ 736a31d2a9SEric Dumazet nexthop_nh++, nhsel++) 741da177e4SLinus Torvalds 751da177e4SLinus Torvalds #else /* CONFIG_IP_ROUTE_MULTIPATH */ 761da177e4SLinus Torvalds 771da177e4SLinus Torvalds /* Hope, that gcc will optimize it to get rid of dummy loop */ 781da177e4SLinus Torvalds 796a31d2a9SEric Dumazet #define for_nexthops(fi) { \ 806a31d2a9SEric Dumazet int nhsel; const struct fib_nh *nh = (fi)->fib_nh; \ 811da177e4SLinus Torvalds for (nhsel = 0; nhsel < 1; nhsel++) 821da177e4SLinus Torvalds 836a31d2a9SEric Dumazet #define change_nexthops(fi) { \ 846a31d2a9SEric Dumazet int nhsel; \ 856a31d2a9SEric Dumazet struct fib_nh *nexthop_nh = (struct fib_nh *)((fi)->fib_nh); \ 861da177e4SLinus Torvalds for (nhsel = 0; nhsel < 1; nhsel++) 871da177e4SLinus Torvalds 881da177e4SLinus Torvalds #endif /* CONFIG_IP_ROUTE_MULTIPATH */ 891da177e4SLinus Torvalds 901da177e4SLinus Torvalds #define endfor_nexthops(fi) } 911da177e4SLinus Torvalds 921da177e4SLinus Torvalds 933be0686bSDavid S. Miller const struct fib_prop fib_props[RTN_MAX + 1] = { 946a31d2a9SEric Dumazet [RTN_UNSPEC] = { 951da177e4SLinus Torvalds .error = 0, 961da177e4SLinus Torvalds .scope = RT_SCOPE_NOWHERE, 976a31d2a9SEric Dumazet }, 986a31d2a9SEric Dumazet [RTN_UNICAST] = { 991da177e4SLinus Torvalds .error = 0, 1001da177e4SLinus Torvalds .scope = RT_SCOPE_UNIVERSE, 1016a31d2a9SEric Dumazet }, 1026a31d2a9SEric Dumazet [RTN_LOCAL] = { 1031da177e4SLinus Torvalds .error = 0, 1041da177e4SLinus Torvalds .scope = RT_SCOPE_HOST, 1056a31d2a9SEric Dumazet }, 1066a31d2a9SEric Dumazet [RTN_BROADCAST] = { 1071da177e4SLinus Torvalds .error = 0, 1081da177e4SLinus Torvalds .scope = RT_SCOPE_LINK, 1096a31d2a9SEric Dumazet }, 1106a31d2a9SEric Dumazet [RTN_ANYCAST] = { 1111da177e4SLinus Torvalds .error = 0, 1121da177e4SLinus Torvalds .scope = RT_SCOPE_LINK, 1136a31d2a9SEric Dumazet }, 1146a31d2a9SEric Dumazet [RTN_MULTICAST] = { 1151da177e4SLinus Torvalds .error = 0, 1161da177e4SLinus Torvalds .scope = RT_SCOPE_UNIVERSE, 1176a31d2a9SEric Dumazet }, 1186a31d2a9SEric Dumazet [RTN_BLACKHOLE] = { 1191da177e4SLinus Torvalds .error = -EINVAL, 1201da177e4SLinus Torvalds .scope = RT_SCOPE_UNIVERSE, 1216a31d2a9SEric Dumazet }, 1226a31d2a9SEric Dumazet [RTN_UNREACHABLE] = { 1231da177e4SLinus Torvalds .error = -EHOSTUNREACH, 1241da177e4SLinus Torvalds .scope = RT_SCOPE_UNIVERSE, 1256a31d2a9SEric Dumazet }, 1266a31d2a9SEric Dumazet [RTN_PROHIBIT] = { 1271da177e4SLinus Torvalds .error = -EACCES, 1281da177e4SLinus Torvalds .scope = RT_SCOPE_UNIVERSE, 1296a31d2a9SEric Dumazet }, 1306a31d2a9SEric Dumazet [RTN_THROW] = { 1311da177e4SLinus Torvalds .error = -EAGAIN, 1321da177e4SLinus Torvalds .scope = RT_SCOPE_UNIVERSE, 1336a31d2a9SEric Dumazet }, 1346a31d2a9SEric Dumazet [RTN_NAT] = { 1351da177e4SLinus Torvalds .error = -EINVAL, 1361da177e4SLinus Torvalds .scope = RT_SCOPE_NOWHERE, 1376a31d2a9SEric Dumazet }, 1386a31d2a9SEric Dumazet [RTN_XRESOLVE] = { 1391da177e4SLinus Torvalds .error = -EINVAL, 1401da177e4SLinus Torvalds .scope = RT_SCOPE_NOWHERE, 1416a31d2a9SEric Dumazet }, 1421da177e4SLinus Torvalds }; 1431da177e4SLinus Torvalds 144c5038a83SDavid S. Miller static void rt_fibinfo_free(struct rtable __rcu **rtp) 14554764bb6SEric Dumazet { 14654764bb6SEric Dumazet struct rtable *rt = rcu_dereference_protected(*rtp, 1); 14754764bb6SEric Dumazet 14854764bb6SEric Dumazet if (!rt) 14954764bb6SEric Dumazet return; 15054764bb6SEric Dumazet 15154764bb6SEric Dumazet /* Not even needed : RCU_INIT_POINTER(*rtp, NULL); 15254764bb6SEric Dumazet * because we waited an RCU grace period before calling 15354764bb6SEric Dumazet * free_fib_info_rcu() 15454764bb6SEric Dumazet */ 15554764bb6SEric Dumazet 15695c47f9cSWei Wang dst_dev_put(&rt->dst); 157b838d5e1SWei Wang dst_release_immediate(&rt->dst); 15854764bb6SEric Dumazet } 15954764bb6SEric Dumazet 160c5038a83SDavid S. Miller static void free_nh_exceptions(struct fib_nh *nh) 161c5038a83SDavid S. Miller { 162caa41527SEric Dumazet struct fnhe_hash_bucket *hash; 163c5038a83SDavid S. Miller int i; 164c5038a83SDavid S. Miller 165caa41527SEric Dumazet hash = rcu_dereference_protected(nh->nh_exceptions, 1); 166caa41527SEric Dumazet if (!hash) 167caa41527SEric Dumazet return; 168c5038a83SDavid S. Miller for (i = 0; i < FNHE_HASH_SIZE; i++) { 169c5038a83SDavid S. Miller struct fib_nh_exception *fnhe; 170c5038a83SDavid S. Miller 171c5038a83SDavid S. Miller fnhe = rcu_dereference_protected(hash[i].chain, 1); 172c5038a83SDavid S. Miller while (fnhe) { 173c5038a83SDavid S. Miller struct fib_nh_exception *next; 174c5038a83SDavid S. Miller 175c5038a83SDavid S. Miller next = rcu_dereference_protected(fnhe->fnhe_next, 1); 176c5038a83SDavid S. Miller 1772ffae99dSTimo Teräs rt_fibinfo_free(&fnhe->fnhe_rth_input); 1782ffae99dSTimo Teräs rt_fibinfo_free(&fnhe->fnhe_rth_output); 179c5038a83SDavid S. Miller 180c5038a83SDavid S. Miller kfree(fnhe); 181c5038a83SDavid S. Miller 182c5038a83SDavid S. Miller fnhe = next; 183c5038a83SDavid S. Miller } 184c5038a83SDavid S. Miller } 185c5038a83SDavid S. Miller kfree(hash); 186c5038a83SDavid S. Miller } 187c5038a83SDavid S. Miller 188c5038a83SDavid S. Miller static void rt_fibinfo_free_cpus(struct rtable __rcu * __percpu *rtp) 189d26b3a7cSEric Dumazet { 190d26b3a7cSEric Dumazet int cpu; 191d26b3a7cSEric Dumazet 192d26b3a7cSEric Dumazet if (!rtp) 193d26b3a7cSEric Dumazet return; 194d26b3a7cSEric Dumazet 195d26b3a7cSEric Dumazet for_each_possible_cpu(cpu) { 196d26b3a7cSEric Dumazet struct rtable *rt; 197d26b3a7cSEric Dumazet 198d26b3a7cSEric Dumazet rt = rcu_dereference_protected(*per_cpu_ptr(rtp, cpu), 1); 1990830106cSWei Wang if (rt) { 20095c47f9cSWei Wang dst_dev_put(&rt->dst); 201b838d5e1SWei Wang dst_release_immediate(&rt->dst); 202d26b3a7cSEric Dumazet } 2030830106cSWei Wang } 204d26b3a7cSEric Dumazet free_percpu(rtp); 205d26b3a7cSEric Dumazet } 206d26b3a7cSEric Dumazet 2071da177e4SLinus Torvalds /* Release a nexthop info record */ 20819c1ea14SYan, Zheng static void free_fib_info_rcu(struct rcu_head *head) 20919c1ea14SYan, Zheng { 21019c1ea14SYan, Zheng struct fib_info *fi = container_of(head, struct fib_info, rcu); 2113fb07dafSEric Dumazet struct dst_metrics *m; 21219c1ea14SYan, Zheng 213e49cc0daSYanmin Zhang change_nexthops(fi) { 214e49cc0daSYanmin Zhang if (nexthop_nh->nh_dev) 215e49cc0daSYanmin Zhang dev_put(nexthop_nh->nh_dev); 2165a6228a0SNicolas Dichtel lwtstate_put(nexthop_nh->nh_lwtstate); 2174895c771SDavid S. Miller free_nh_exceptions(nexthop_nh); 218c5038a83SDavid S. Miller rt_fibinfo_free_cpus(nexthop_nh->nh_pcpu_rth_output); 219c5038a83SDavid S. Miller rt_fibinfo_free(&nexthop_nh->nh_rth_input); 220e49cc0daSYanmin Zhang } endfor_nexthops(fi); 221e49cc0daSYanmin Zhang 2223fb07dafSEric Dumazet m = fi->fib_metrics; 2239620fef2SEric Dumazet if (m != &dst_default_metrics && refcount_dec_and_test(&m->refcnt)) 2243fb07dafSEric Dumazet kfree(m); 22519c1ea14SYan, Zheng kfree(fi); 22619c1ea14SYan, Zheng } 2271da177e4SLinus Torvalds 2281da177e4SLinus Torvalds void free_fib_info(struct fib_info *fi) 2291da177e4SLinus Torvalds { 2301da177e4SLinus Torvalds if (fi->fib_dead == 0) { 231058bd4d2SJoe Perches pr_warn("Freeing alive fib_info %p\n", fi); 2321da177e4SLinus Torvalds return; 2331da177e4SLinus Torvalds } 2341da177e4SLinus Torvalds fib_info_cnt--; 2357a9bc9b8SDavid S. Miller #ifdef CONFIG_IP_ROUTE_CLASSID 2367a9bc9b8SDavid S. Miller change_nexthops(fi) { 2377a9bc9b8SDavid S. Miller if (nexthop_nh->nh_tclassid) 238f4530fa5SDavid S. Miller fi->fib_net->ipv4.fib_num_tclassid_users--; 2397a9bc9b8SDavid S. Miller } endfor_nexthops(fi); 2407a9bc9b8SDavid S. Miller #endif 24119c1ea14SYan, Zheng call_rcu(&fi->rcu, free_fib_info_rcu); 2421da177e4SLinus Torvalds } 243b423cb10SIdo Schimmel EXPORT_SYMBOL_GPL(free_fib_info); 2441da177e4SLinus Torvalds 2451da177e4SLinus Torvalds void fib_release_info(struct fib_info *fi) 2461da177e4SLinus Torvalds { 247832b4c5eSStephen Hemminger spin_lock_bh(&fib_info_lock); 2481da177e4SLinus Torvalds if (fi && --fi->fib_treeref == 0) { 2491da177e4SLinus Torvalds hlist_del(&fi->fib_hash); 2501da177e4SLinus Torvalds if (fi->fib_prefsrc) 2511da177e4SLinus Torvalds hlist_del(&fi->fib_lhash); 2521da177e4SLinus Torvalds change_nexthops(fi) { 25371fceff0SDavid S. Miller if (!nexthop_nh->nh_dev) 2541da177e4SLinus Torvalds continue; 25571fceff0SDavid S. Miller hlist_del(&nexthop_nh->nh_hash); 2561da177e4SLinus Torvalds } endfor_nexthops(fi) 2571da177e4SLinus Torvalds fi->fib_dead = 1; 2581da177e4SLinus Torvalds fib_info_put(fi); 2591da177e4SLinus Torvalds } 260832b4c5eSStephen Hemminger spin_unlock_bh(&fib_info_lock); 2611da177e4SLinus Torvalds } 2621da177e4SLinus Torvalds 2636a31d2a9SEric Dumazet static inline int nh_comp(const struct fib_info *fi, const struct fib_info *ofi) 2641da177e4SLinus Torvalds { 2651da177e4SLinus Torvalds const struct fib_nh *onh = ofi->fib_nh; 2661da177e4SLinus Torvalds 2671da177e4SLinus Torvalds for_nexthops(fi) { 2681da177e4SLinus Torvalds if (nh->nh_oif != onh->nh_oif || 2691da177e4SLinus Torvalds nh->nh_gw != onh->nh_gw || 2701da177e4SLinus Torvalds nh->nh_scope != onh->nh_scope || 2711da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 2721da177e4SLinus Torvalds nh->nh_weight != onh->nh_weight || 2731da177e4SLinus Torvalds #endif 274c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID 2751da177e4SLinus Torvalds nh->nh_tclassid != onh->nh_tclassid || 2761da177e4SLinus Torvalds #endif 277571e7226SRoopa Prabhu lwtunnel_cmp_encap(nh->nh_lwtstate, onh->nh_lwtstate) || 2788a3d0316SAndy Gospodarek ((nh->nh_flags ^ onh->nh_flags) & ~RTNH_COMPARE_MASK)) 2791da177e4SLinus Torvalds return -1; 2801da177e4SLinus Torvalds onh++; 2811da177e4SLinus Torvalds } endfor_nexthops(fi); 2821da177e4SLinus Torvalds return 0; 2831da177e4SLinus Torvalds } 2841da177e4SLinus Torvalds 28588ebc72fSDavid S. Miller static inline unsigned int fib_devindex_hashfn(unsigned int val) 28688ebc72fSDavid S. Miller { 28788ebc72fSDavid S. Miller unsigned int mask = DEVINDEX_HASHSIZE - 1; 28888ebc72fSDavid S. Miller 28988ebc72fSDavid S. Miller return (val ^ 29088ebc72fSDavid S. Miller (val >> DEVINDEX_HASHBITS) ^ 29188ebc72fSDavid S. Miller (val >> (DEVINDEX_HASHBITS * 2))) & mask; 29288ebc72fSDavid S. Miller } 29388ebc72fSDavid S. Miller 2941da177e4SLinus Torvalds static inline unsigned int fib_info_hashfn(const struct fib_info *fi) 2951da177e4SLinus Torvalds { 296123b9731SDavid S. Miller unsigned int mask = (fib_info_hash_size - 1); 2971da177e4SLinus Torvalds unsigned int val = fi->fib_nhs; 2981da177e4SLinus Torvalds 29937e826c5SDavid S. Miller val ^= (fi->fib_protocol << 8) | fi->fib_scope; 30081f7bf6cSAl Viro val ^= (__force u32)fi->fib_prefsrc; 3011da177e4SLinus Torvalds val ^= fi->fib_priority; 30288ebc72fSDavid S. Miller for_nexthops(fi) { 30388ebc72fSDavid S. Miller val ^= fib_devindex_hashfn(nh->nh_oif); 30488ebc72fSDavid S. Miller } endfor_nexthops(fi) 3051da177e4SLinus Torvalds 3061da177e4SLinus Torvalds return (val ^ (val >> 7) ^ (val >> 12)) & mask; 3071da177e4SLinus Torvalds } 3081da177e4SLinus Torvalds 3091da177e4SLinus Torvalds static struct fib_info *fib_find_info(const struct fib_info *nfi) 3101da177e4SLinus Torvalds { 3111da177e4SLinus Torvalds struct hlist_head *head; 3121da177e4SLinus Torvalds struct fib_info *fi; 3131da177e4SLinus Torvalds unsigned int hash; 3141da177e4SLinus Torvalds 3151da177e4SLinus Torvalds hash = fib_info_hashfn(nfi); 3161da177e4SLinus Torvalds head = &fib_info_hash[hash]; 3171da177e4SLinus Torvalds 318b67bfe0dSSasha Levin hlist_for_each_entry(fi, head, fib_hash) { 31909ad9bc7SOctavian Purdila if (!net_eq(fi->fib_net, nfi->fib_net)) 3204814bdbdSDenis V. Lunev continue; 3211da177e4SLinus Torvalds if (fi->fib_nhs != nfi->fib_nhs) 3221da177e4SLinus Torvalds continue; 3231da177e4SLinus Torvalds if (nfi->fib_protocol == fi->fib_protocol && 32437e826c5SDavid S. Miller nfi->fib_scope == fi->fib_scope && 3251da177e4SLinus Torvalds nfi->fib_prefsrc == fi->fib_prefsrc && 3261da177e4SLinus Torvalds nfi->fib_priority == fi->fib_priority && 327f4ef85bbSEric Dumazet nfi->fib_type == fi->fib_type && 3281da177e4SLinus Torvalds memcmp(nfi->fib_metrics, fi->fib_metrics, 329fcd13f42SEric Dumazet sizeof(u32) * RTAX_MAX) == 0 && 3308a3d0316SAndy Gospodarek !((nfi->fib_flags ^ fi->fib_flags) & ~RTNH_COMPARE_MASK) && 3311da177e4SLinus Torvalds (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0)) 3321da177e4SLinus Torvalds return fi; 3331da177e4SLinus Torvalds } 3341da177e4SLinus Torvalds 3351da177e4SLinus Torvalds return NULL; 3361da177e4SLinus Torvalds } 3371da177e4SLinus Torvalds 3381da177e4SLinus Torvalds /* Check, that the gateway is already configured. 3396a31d2a9SEric Dumazet * Used only by redirect accept routine. 3401da177e4SLinus Torvalds */ 341d878e72eSAl Viro int ip_fib_check_default(__be32 gw, struct net_device *dev) 3421da177e4SLinus Torvalds { 3431da177e4SLinus Torvalds struct hlist_head *head; 3441da177e4SLinus Torvalds struct fib_nh *nh; 3451da177e4SLinus Torvalds unsigned int hash; 3461da177e4SLinus Torvalds 347832b4c5eSStephen Hemminger spin_lock(&fib_info_lock); 3481da177e4SLinus Torvalds 3491da177e4SLinus Torvalds hash = fib_devindex_hashfn(dev->ifindex); 3501da177e4SLinus Torvalds head = &fib_info_devhash[hash]; 351b67bfe0dSSasha Levin hlist_for_each_entry(nh, head, nh_hash) { 3521da177e4SLinus Torvalds if (nh->nh_dev == dev && 3531da177e4SLinus Torvalds nh->nh_gw == gw && 3541da177e4SLinus Torvalds !(nh->nh_flags & RTNH_F_DEAD)) { 355832b4c5eSStephen Hemminger spin_unlock(&fib_info_lock); 3561da177e4SLinus Torvalds return 0; 3571da177e4SLinus Torvalds } 3581da177e4SLinus Torvalds } 3591da177e4SLinus Torvalds 360832b4c5eSStephen Hemminger spin_unlock(&fib_info_lock); 3611da177e4SLinus Torvalds 3621da177e4SLinus Torvalds return -1; 3631da177e4SLinus Torvalds } 3641da177e4SLinus Torvalds 365339bf98fSThomas Graf static inline size_t fib_nlmsg_size(struct fib_info *fi) 366339bf98fSThomas Graf { 367339bf98fSThomas Graf size_t payload = NLMSG_ALIGN(sizeof(struct rtmsg)) 368339bf98fSThomas Graf + nla_total_size(4) /* RTA_TABLE */ 369339bf98fSThomas Graf + nla_total_size(4) /* RTA_DST */ 370339bf98fSThomas Graf + nla_total_size(4) /* RTA_PRIORITY */ 371ea697639SDaniel Borkmann + nla_total_size(4) /* RTA_PREFSRC */ 372ea697639SDaniel Borkmann + nla_total_size(TCP_CA_NAME_MAX); /* RTAX_CC_ALGO */ 373339bf98fSThomas Graf 374339bf98fSThomas Graf /* space for nested metrics */ 375339bf98fSThomas Graf payload += nla_total_size((RTAX_MAX * nla_total_size(4))); 376339bf98fSThomas Graf 377339bf98fSThomas Graf if (fi->fib_nhs) { 378571e7226SRoopa Prabhu size_t nh_encapsize = 0; 379339bf98fSThomas Graf /* Also handles the special case fib_nhs == 1 */ 380339bf98fSThomas Graf 381339bf98fSThomas Graf /* each nexthop is packed in an attribute */ 382339bf98fSThomas Graf size_t nhsize = nla_total_size(sizeof(struct rtnexthop)); 383339bf98fSThomas Graf 384339bf98fSThomas Graf /* may contain flow and gateway attribute */ 385339bf98fSThomas Graf nhsize += 2 * nla_total_size(4); 386339bf98fSThomas Graf 387571e7226SRoopa Prabhu /* grab encap info */ 388571e7226SRoopa Prabhu for_nexthops(fi) { 389571e7226SRoopa Prabhu if (nh->nh_lwtstate) { 390571e7226SRoopa Prabhu /* RTA_ENCAP_TYPE */ 391571e7226SRoopa Prabhu nh_encapsize += lwtunnel_get_encap_size( 392571e7226SRoopa Prabhu nh->nh_lwtstate); 393571e7226SRoopa Prabhu /* RTA_ENCAP */ 394571e7226SRoopa Prabhu nh_encapsize += nla_total_size(2); 395571e7226SRoopa Prabhu } 396571e7226SRoopa Prabhu } endfor_nexthops(fi); 397571e7226SRoopa Prabhu 398339bf98fSThomas Graf /* all nexthops are packed in a nested attribute */ 399571e7226SRoopa Prabhu payload += nla_total_size((fi->fib_nhs * nhsize) + 400571e7226SRoopa Prabhu nh_encapsize); 401571e7226SRoopa Prabhu 402339bf98fSThomas Graf } 403339bf98fSThomas Graf 404339bf98fSThomas Graf return payload; 405339bf98fSThomas Graf } 406339bf98fSThomas Graf 40781f7bf6cSAl Viro void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, 4089877b253SJoe Perches int dst_len, u32 tb_id, const struct nl_info *info, 409b8f55831SMilan Kocian unsigned int nlm_flags) 4101da177e4SLinus Torvalds { 4111da177e4SLinus Torvalds struct sk_buff *skb; 4124e902c57SThomas Graf u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0; 413f21c7bc5SThomas Graf int err = -ENOBUFS; 4141da177e4SLinus Torvalds 415339bf98fSThomas Graf skb = nlmsg_new(fib_nlmsg_size(fa->fa_info), GFP_KERNEL); 41651456b29SIan Morris if (!skb) 417f21c7bc5SThomas Graf goto errout; 4181da177e4SLinus Torvalds 41915e47304SEric W. Biederman err = fib_dump_info(skb, info->portid, seq, event, tb_id, 42037e826c5SDavid S. Miller fa->fa_type, key, dst_len, 421b8f55831SMilan Kocian fa->fa_tos, fa->fa_info, nlm_flags); 42226932566SPatrick McHardy if (err < 0) { 42326932566SPatrick McHardy /* -EMSGSIZE implies BUG in fib_nlmsg_size() */ 42426932566SPatrick McHardy WARN_ON(err == -EMSGSIZE); 42526932566SPatrick McHardy kfree_skb(skb); 42626932566SPatrick McHardy goto errout; 42726932566SPatrick McHardy } 42815e47304SEric W. Biederman rtnl_notify(skb, info->nl_net, info->portid, RTNLGRP_IPV4_ROUTE, 4294e902c57SThomas Graf info->nlh, GFP_KERNEL); 4301ce85fe4SPablo Neira Ayuso return; 431f21c7bc5SThomas Graf errout: 432f21c7bc5SThomas Graf if (err < 0) 4334d1169c1SDenis V. Lunev rtnl_set_sk_err(info->nl_net, RTNLGRP_IPV4_ROUTE, err); 4341da177e4SLinus Torvalds } 4351da177e4SLinus Torvalds 436c9cb6b6eSStephen Hemminger static int fib_detect_death(struct fib_info *fi, int order, 437c9cb6b6eSStephen Hemminger struct fib_info **last_resort, int *last_idx, 438c9cb6b6eSStephen Hemminger int dflt) 4391da177e4SLinus Torvalds { 4401da177e4SLinus Torvalds struct neighbour *n; 4411da177e4SLinus Torvalds int state = NUD_NONE; 4421da177e4SLinus Torvalds 4431da177e4SLinus Torvalds n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev); 4441da177e4SLinus Torvalds if (n) { 4451da177e4SLinus Torvalds state = n->nud_state; 4461da177e4SLinus Torvalds neigh_release(n); 44788f64320SJulian Anastasov } else { 44888f64320SJulian Anastasov return 0; 4491da177e4SLinus Torvalds } 4501da177e4SLinus Torvalds if (state == NUD_REACHABLE) 4511da177e4SLinus Torvalds return 0; 452c17860a0SDenis V. Lunev if ((state & NUD_VALID) && order != dflt) 4531da177e4SLinus Torvalds return 0; 4541da177e4SLinus Torvalds if ((state & NUD_VALID) || 45588f64320SJulian Anastasov (*last_idx < 0 && order > dflt && state != NUD_INCOMPLETE)) { 4561da177e4SLinus Torvalds *last_resort = fi; 4571da177e4SLinus Torvalds *last_idx = order; 4581da177e4SLinus Torvalds } 4591da177e4SLinus Torvalds return 1; 4601da177e4SLinus Torvalds } 4611da177e4SLinus Torvalds 4621da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 4631da177e4SLinus Torvalds 4646d8422a1SDavid Ahern static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining, 4656d8422a1SDavid Ahern struct netlink_ext_ack *extack) 4661da177e4SLinus Torvalds { 4671da177e4SLinus Torvalds int nhs = 0; 4681da177e4SLinus Torvalds 4694e902c57SThomas Graf while (rtnh_ok(rtnh, remaining)) { 4701da177e4SLinus Torvalds nhs++; 4714e902c57SThomas Graf rtnh = rtnh_next(rtnh, &remaining); 4721da177e4SLinus Torvalds } 4731da177e4SLinus Torvalds 4744e902c57SThomas Graf /* leftover implies invalid nexthop configuration, discard it */ 475c3ab2b4eSDavid Ahern if (remaining > 0) { 476c3ab2b4eSDavid Ahern NL_SET_ERR_MSG(extack, 477c3ab2b4eSDavid Ahern "Invalid nexthop configuration - extra data after nexthops"); 478c3ab2b4eSDavid Ahern nhs = 0; 479c3ab2b4eSDavid Ahern } 480c3ab2b4eSDavid Ahern 481c3ab2b4eSDavid Ahern return nhs; 4824e902c57SThomas Graf } 4831da177e4SLinus Torvalds 4844e902c57SThomas Graf static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, 4856d8422a1SDavid Ahern int remaining, struct fib_config *cfg, 4866d8422a1SDavid Ahern struct netlink_ext_ack *extack) 4874e902c57SThomas Graf { 488571e7226SRoopa Prabhu int ret; 489571e7226SRoopa Prabhu 4901da177e4SLinus Torvalds change_nexthops(fi) { 4914e902c57SThomas Graf int attrlen; 4924e902c57SThomas Graf 493c3ab2b4eSDavid Ahern if (!rtnh_ok(rtnh, remaining)) { 494c3ab2b4eSDavid Ahern NL_SET_ERR_MSG(extack, 495c3ab2b4eSDavid Ahern "Invalid nexthop configuration - extra data after nexthop"); 4961da177e4SLinus Torvalds return -EINVAL; 497c3ab2b4eSDavid Ahern } 4984e902c57SThomas Graf 499c3ab2b4eSDavid Ahern if (rtnh->rtnh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)) { 500c3ab2b4eSDavid Ahern NL_SET_ERR_MSG(extack, 501c3ab2b4eSDavid Ahern "Invalid flags for nexthop - can not contain DEAD or LINKDOWN"); 50280610229SJulian Anastasov return -EINVAL; 503c3ab2b4eSDavid Ahern } 50480610229SJulian Anastasov 50571fceff0SDavid S. Miller nexthop_nh->nh_flags = 50671fceff0SDavid S. Miller (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags; 50771fceff0SDavid S. Miller nexthop_nh->nh_oif = rtnh->rtnh_ifindex; 50871fceff0SDavid S. Miller nexthop_nh->nh_weight = rtnh->rtnh_hops + 1; 5094e902c57SThomas Graf 5104e902c57SThomas Graf attrlen = rtnh_attrlen(rtnh); 5114e902c57SThomas Graf if (attrlen > 0) { 5124e902c57SThomas Graf struct nlattr *nla, *attrs = rtnh_attrs(rtnh); 5134e902c57SThomas Graf 5144e902c57SThomas Graf nla = nla_find(attrs, attrlen, RTA_GATEWAY); 51567b61f6cSJiri Benc nexthop_nh->nh_gw = nla ? nla_get_in_addr(nla) : 0; 516c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID 5174e902c57SThomas Graf nla = nla_find(attrs, attrlen, RTA_FLOW); 51871fceff0SDavid S. Miller nexthop_nh->nh_tclassid = nla ? nla_get_u32(nla) : 0; 5197a9bc9b8SDavid S. Miller if (nexthop_nh->nh_tclassid) 520f4530fa5SDavid S. Miller fi->fib_net->ipv4.fib_num_tclassid_users++; 5211da177e4SLinus Torvalds #endif 522571e7226SRoopa Prabhu nla = nla_find(attrs, attrlen, RTA_ENCAP); 523571e7226SRoopa Prabhu if (nla) { 524571e7226SRoopa Prabhu struct lwtunnel_state *lwtstate; 525571e7226SRoopa Prabhu struct nlattr *nla_entype; 526571e7226SRoopa Prabhu 527571e7226SRoopa Prabhu nla_entype = nla_find(attrs, attrlen, 528571e7226SRoopa Prabhu RTA_ENCAP_TYPE); 529c3ab2b4eSDavid Ahern if (!nla_entype) { 530c3ab2b4eSDavid Ahern NL_SET_BAD_ATTR(extack, nla); 531c3ab2b4eSDavid Ahern NL_SET_ERR_MSG(extack, 532c3ab2b4eSDavid Ahern "Encap type is missing"); 533571e7226SRoopa Prabhu goto err_inval; 534c3ab2b4eSDavid Ahern } 53530357d7dSDavid Ahern 53630357d7dSDavid Ahern ret = lwtunnel_build_state(nla_get_u16( 537571e7226SRoopa Prabhu nla_entype), 538127eb7cdSTom Herbert nla, AF_INET, cfg, 5399ae28727SDavid Ahern &lwtstate, extack); 540571e7226SRoopa Prabhu if (ret) 541571e7226SRoopa Prabhu goto errout; 5425a6228a0SNicolas Dichtel nexthop_nh->nh_lwtstate = 5435a6228a0SNicolas Dichtel lwtstate_get(lwtstate); 544571e7226SRoopa Prabhu } 5451da177e4SLinus Torvalds } 5464e902c57SThomas Graf 5474e902c57SThomas Graf rtnh = rtnh_next(rtnh, &remaining); 5481da177e4SLinus Torvalds } endfor_nexthops(fi); 5494e902c57SThomas Graf 5501da177e4SLinus Torvalds return 0; 551571e7226SRoopa Prabhu 552571e7226SRoopa Prabhu err_inval: 553571e7226SRoopa Prabhu ret = -EINVAL; 554571e7226SRoopa Prabhu 555571e7226SRoopa Prabhu errout: 556571e7226SRoopa Prabhu return ret; 5571da177e4SLinus Torvalds } 5581da177e4SLinus Torvalds 5590e884c78SPeter Nørlund static void fib_rebalance(struct fib_info *fi) 5600e884c78SPeter Nørlund { 5610e884c78SPeter Nørlund int total; 5620e884c78SPeter Nørlund int w; 5630e884c78SPeter Nørlund struct in_device *in_dev; 5640e884c78SPeter Nørlund 5650e884c78SPeter Nørlund if (fi->fib_nhs < 2) 5660e884c78SPeter Nørlund return; 5670e884c78SPeter Nørlund 5680e884c78SPeter Nørlund total = 0; 5690e884c78SPeter Nørlund for_nexthops(fi) { 5700e884c78SPeter Nørlund if (nh->nh_flags & RTNH_F_DEAD) 5710e884c78SPeter Nørlund continue; 5720e884c78SPeter Nørlund 57351161aa9SDavid Ahern in_dev = __in_dev_get_rtnl(nh->nh_dev); 5740e884c78SPeter Nørlund 5750e884c78SPeter Nørlund if (in_dev && 5760e884c78SPeter Nørlund IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) && 5770e884c78SPeter Nørlund nh->nh_flags & RTNH_F_LINKDOWN) 5780e884c78SPeter Nørlund continue; 5790e884c78SPeter Nørlund 5800e884c78SPeter Nørlund total += nh->nh_weight; 5810e884c78SPeter Nørlund } endfor_nexthops(fi); 5820e884c78SPeter Nørlund 5830e884c78SPeter Nørlund w = 0; 5840e884c78SPeter Nørlund change_nexthops(fi) { 5850e884c78SPeter Nørlund int upper_bound; 5860e884c78SPeter Nørlund 58751161aa9SDavid Ahern in_dev = __in_dev_get_rtnl(nexthop_nh->nh_dev); 5880e884c78SPeter Nørlund 5890e884c78SPeter Nørlund if (nexthop_nh->nh_flags & RTNH_F_DEAD) { 5900e884c78SPeter Nørlund upper_bound = -1; 5910e884c78SPeter Nørlund } else if (in_dev && 5920e884c78SPeter Nørlund IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) && 5930e884c78SPeter Nørlund nexthop_nh->nh_flags & RTNH_F_LINKDOWN) { 5940e884c78SPeter Nørlund upper_bound = -1; 5950e884c78SPeter Nørlund } else { 5960e884c78SPeter Nørlund w += nexthop_nh->nh_weight; 5970a837fe4SPeter Nørlund upper_bound = DIV_ROUND_CLOSEST_ULL((u64)w << 31, 5980e884c78SPeter Nørlund total) - 1; 5990e884c78SPeter Nørlund } 6000e884c78SPeter Nørlund 6010e884c78SPeter Nørlund atomic_set(&nexthop_nh->nh_upper_bound, upper_bound); 6020e884c78SPeter Nørlund } endfor_nexthops(fi); 6030e884c78SPeter Nørlund } 6040e884c78SPeter Nørlund 6050e884c78SPeter Nørlund static inline void fib_add_weight(struct fib_info *fi, 6060e884c78SPeter Nørlund const struct fib_nh *nh) 6070e884c78SPeter Nørlund { 6080e884c78SPeter Nørlund fi->fib_weight += nh->nh_weight; 6090e884c78SPeter Nørlund } 6100e884c78SPeter Nørlund 6110e884c78SPeter Nørlund #else /* CONFIG_IP_ROUTE_MULTIPATH */ 6120e884c78SPeter Nørlund 6130e884c78SPeter Nørlund #define fib_rebalance(fi) do { } while (0) 6140e884c78SPeter Nørlund #define fib_add_weight(fi, nh) do { } while (0) 6150e884c78SPeter Nørlund 6160e884c78SPeter Nørlund #endif /* CONFIG_IP_ROUTE_MULTIPATH */ 6171da177e4SLinus Torvalds 61830357d7dSDavid Ahern static int fib_encap_match(u16 encap_type, 619571e7226SRoopa Prabhu struct nlattr *encap, 62030357d7dSDavid Ahern const struct fib_nh *nh, 6219ae28727SDavid Ahern const struct fib_config *cfg, 6229ae28727SDavid Ahern struct netlink_ext_ack *extack) 623571e7226SRoopa Prabhu { 624571e7226SRoopa Prabhu struct lwtunnel_state *lwtstate; 625df383e62SJiri Benc int ret, result = 0; 626571e7226SRoopa Prabhu 627571e7226SRoopa Prabhu if (encap_type == LWTUNNEL_ENCAP_NONE) 628571e7226SRoopa Prabhu return 0; 629571e7226SRoopa Prabhu 6309ae28727SDavid Ahern ret = lwtunnel_build_state(encap_type, encap, AF_INET, 6319ae28727SDavid Ahern cfg, &lwtstate, extack); 632df383e62SJiri Benc if (!ret) { 633df383e62SJiri Benc result = lwtunnel_cmp_encap(lwtstate, nh->nh_lwtstate); 634df383e62SJiri Benc lwtstate_free(lwtstate); 635df383e62SJiri Benc } 636571e7226SRoopa Prabhu 637df383e62SJiri Benc return result; 638571e7226SRoopa Prabhu } 639571e7226SRoopa Prabhu 6409ae28727SDavid Ahern int fib_nh_match(struct fib_config *cfg, struct fib_info *fi, 6419ae28727SDavid Ahern struct netlink_ext_ack *extack) 6421da177e4SLinus Torvalds { 6431da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 6444e902c57SThomas Graf struct rtnexthop *rtnh; 6454e902c57SThomas Graf int remaining; 6461da177e4SLinus Torvalds #endif 6471da177e4SLinus Torvalds 6484e902c57SThomas Graf if (cfg->fc_priority && cfg->fc_priority != fi->fib_priority) 6491da177e4SLinus Torvalds return 1; 6501da177e4SLinus Torvalds 6514e902c57SThomas Graf if (cfg->fc_oif || cfg->fc_gw) { 652571e7226SRoopa Prabhu if (cfg->fc_encap) { 6539ae28727SDavid Ahern if (fib_encap_match(cfg->fc_encap_type, cfg->fc_encap, 6549ae28727SDavid Ahern fi->fib_nh, cfg, extack)) 655571e7226SRoopa Prabhu return 1; 656571e7226SRoopa Prabhu } 6574e902c57SThomas Graf if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) && 6584e902c57SThomas Graf (!cfg->fc_gw || cfg->fc_gw == fi->fib_nh->nh_gw)) 6591da177e4SLinus Torvalds return 0; 6601da177e4SLinus Torvalds return 1; 6611da177e4SLinus Torvalds } 6621da177e4SLinus Torvalds 6631da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 66451456b29SIan Morris if (!cfg->fc_mp) 6651da177e4SLinus Torvalds return 0; 6664e902c57SThomas Graf 6674e902c57SThomas Graf rtnh = cfg->fc_mp; 6684e902c57SThomas Graf remaining = cfg->fc_mp_len; 6691da177e4SLinus Torvalds 6701da177e4SLinus Torvalds for_nexthops(fi) { 6714e902c57SThomas Graf int attrlen; 6721da177e4SLinus Torvalds 6734e902c57SThomas Graf if (!rtnh_ok(rtnh, remaining)) 6741da177e4SLinus Torvalds return -EINVAL; 6754e902c57SThomas Graf 6764e902c57SThomas Graf if (rtnh->rtnh_ifindex && rtnh->rtnh_ifindex != nh->nh_oif) 6771da177e4SLinus Torvalds return 1; 6784e902c57SThomas Graf 6794e902c57SThomas Graf attrlen = rtnh_attrlen(rtnh); 680f76936d0SJiri Pirko if (attrlen > 0) { 6814e902c57SThomas Graf struct nlattr *nla, *attrs = rtnh_attrs(rtnh); 6824e902c57SThomas Graf 6834e902c57SThomas Graf nla = nla_find(attrs, attrlen, RTA_GATEWAY); 68467b61f6cSJiri Benc if (nla && nla_get_in_addr(nla) != nh->nh_gw) 6851da177e4SLinus Torvalds return 1; 686c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID 6874e902c57SThomas Graf nla = nla_find(attrs, attrlen, RTA_FLOW); 6884e902c57SThomas Graf if (nla && nla_get_u32(nla) != nh->nh_tclassid) 6891da177e4SLinus Torvalds return 1; 6901da177e4SLinus Torvalds #endif 6911da177e4SLinus Torvalds } 6924e902c57SThomas Graf 6934e902c57SThomas Graf rtnh = rtnh_next(rtnh, &remaining); 6941da177e4SLinus Torvalds } endfor_nexthops(fi); 6951da177e4SLinus Torvalds #endif 6961da177e4SLinus Torvalds return 0; 6971da177e4SLinus Torvalds } 6981da177e4SLinus Torvalds 6995f9ae3d9SXin Long bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi) 7005f9ae3d9SXin Long { 7015f9ae3d9SXin Long struct nlattr *nla; 7025f9ae3d9SXin Long int remaining; 7035f9ae3d9SXin Long 7045f9ae3d9SXin Long if (!cfg->fc_mx) 7055f9ae3d9SXin Long return true; 7065f9ae3d9SXin Long 7075f9ae3d9SXin Long nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { 7085f9ae3d9SXin Long int type = nla_type(nla); 7095f9ae3d9SXin Long u32 val; 7105f9ae3d9SXin Long 7115f9ae3d9SXin Long if (!type) 7125f9ae3d9SXin Long continue; 7135f9ae3d9SXin Long if (type > RTAX_MAX) 7145f9ae3d9SXin Long return false; 7155f9ae3d9SXin Long 7165f9ae3d9SXin Long if (type == RTAX_CC_ALGO) { 7175f9ae3d9SXin Long char tmp[TCP_CA_NAME_MAX]; 7185f9ae3d9SXin Long bool ecn_ca = false; 7195f9ae3d9SXin Long 7205f9ae3d9SXin Long nla_strlcpy(tmp, nla, sizeof(tmp)); 7215f9ae3d9SXin Long val = tcp_ca_get_key_by_name(tmp, &ecn_ca); 7225f9ae3d9SXin Long } else { 7235f9ae3d9SXin Long val = nla_get_u32(nla); 7245f9ae3d9SXin Long } 7255f9ae3d9SXin Long 7265f9ae3d9SXin Long if (fi->fib_metrics->metrics[type - 1] != val) 7275f9ae3d9SXin Long return false; 7285f9ae3d9SXin Long } 7295f9ae3d9SXin Long 7305f9ae3d9SXin Long return true; 7315f9ae3d9SXin Long } 7325f9ae3d9SXin Long 7331da177e4SLinus Torvalds 7341da177e4SLinus Torvalds /* 7356a31d2a9SEric Dumazet * Picture 7366a31d2a9SEric Dumazet * ------- 7376a31d2a9SEric Dumazet * 7386a31d2a9SEric Dumazet * Semantics of nexthop is very messy by historical reasons. 7396a31d2a9SEric Dumazet * We have to take into account, that: 7406a31d2a9SEric Dumazet * a) gateway can be actually local interface address, 7416a31d2a9SEric Dumazet * so that gatewayed route is direct. 7426a31d2a9SEric Dumazet * b) gateway must be on-link address, possibly 7436a31d2a9SEric Dumazet * described not by an ifaddr, but also by a direct route. 7446a31d2a9SEric Dumazet * c) If both gateway and interface are specified, they should not 7456a31d2a9SEric Dumazet * contradict. 7466a31d2a9SEric Dumazet * d) If we use tunnel routes, gateway could be not on-link. 7476a31d2a9SEric Dumazet * 7486a31d2a9SEric Dumazet * Attempt to reconcile all of these (alas, self-contradictory) conditions 7496a31d2a9SEric Dumazet * results in pretty ugly and hairy code with obscure logic. 7506a31d2a9SEric Dumazet * 7516a31d2a9SEric Dumazet * I chose to generalized it instead, so that the size 7526a31d2a9SEric Dumazet * of code does not increase practically, but it becomes 7536a31d2a9SEric Dumazet * much more general. 7546a31d2a9SEric Dumazet * Every prefix is assigned a "scope" value: "host" is local address, 7556a31d2a9SEric Dumazet * "link" is direct route, 7566a31d2a9SEric Dumazet * [ ... "site" ... "interior" ... ] 7576a31d2a9SEric Dumazet * and "universe" is true gateway route with global meaning. 7586a31d2a9SEric Dumazet * 7596a31d2a9SEric Dumazet * Every prefix refers to a set of "nexthop"s (gw, oif), 7606a31d2a9SEric Dumazet * where gw must have narrower scope. This recursion stops 7616a31d2a9SEric Dumazet * when gw has LOCAL scope or if "nexthop" is declared ONLINK, 7626a31d2a9SEric Dumazet * which means that gw is forced to be on link. 7636a31d2a9SEric Dumazet * 7646a31d2a9SEric Dumazet * Code is still hairy, but now it is apparently logically 7656a31d2a9SEric Dumazet * consistent and very flexible. F.e. as by-product it allows 7666a31d2a9SEric Dumazet * to co-exists in peace independent exterior and interior 7676a31d2a9SEric Dumazet * routing processes. 7686a31d2a9SEric Dumazet * 7696a31d2a9SEric Dumazet * Normally it looks as following. 7706a31d2a9SEric Dumazet * 7716a31d2a9SEric Dumazet * {universe prefix} -> (gw, oif) [scope link] 7726a31d2a9SEric Dumazet * | 7736a31d2a9SEric Dumazet * |-> {link prefix} -> (gw, oif) [scope local] 7746a31d2a9SEric Dumazet * | 7756a31d2a9SEric Dumazet * |-> {local prefix} (terminal node) 7761da177e4SLinus Torvalds */ 7774e902c57SThomas Graf static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, 7786d8422a1SDavid Ahern struct fib_nh *nh, struct netlink_ext_ack *extack) 7791da177e4SLinus Torvalds { 780127eb7cdSTom Herbert int err = 0; 78186167a37SDenis V. Lunev struct net *net; 7826a31d2a9SEric Dumazet struct net_device *dev; 7831da177e4SLinus Torvalds 78486167a37SDenis V. Lunev net = cfg->fc_nlinfo.nl_net; 7851da177e4SLinus Torvalds if (nh->nh_gw) { 7861da177e4SLinus Torvalds struct fib_result res; 7871da177e4SLinus Torvalds 7881da177e4SLinus Torvalds if (nh->nh_flags & RTNH_F_ONLINK) { 78930bbaa19SDavid Ahern unsigned int addr_type; 7901da177e4SLinus Torvalds 791c3ab2b4eSDavid Ahern if (cfg->fc_scope >= RT_SCOPE_LINK) { 792c3ab2b4eSDavid Ahern NL_SET_ERR_MSG(extack, 793c3ab2b4eSDavid Ahern "Nexthop has invalid scope"); 7941da177e4SLinus Torvalds return -EINVAL; 795c3ab2b4eSDavid Ahern } 7966a31d2a9SEric Dumazet dev = __dev_get_by_index(net, nh->nh_oif); 7976a31d2a9SEric Dumazet if (!dev) 7981da177e4SLinus Torvalds return -ENODEV; 799c3ab2b4eSDavid Ahern if (!(dev->flags & IFF_UP)) { 800c3ab2b4eSDavid Ahern NL_SET_ERR_MSG(extack, 801c3ab2b4eSDavid Ahern "Nexthop device is not up"); 8021da177e4SLinus Torvalds return -ENETDOWN; 803c3ab2b4eSDavid Ahern } 80430bbaa19SDavid Ahern addr_type = inet_addr_type_dev_table(net, dev, nh->nh_gw); 805c3ab2b4eSDavid Ahern if (addr_type != RTN_UNICAST) { 806c3ab2b4eSDavid Ahern NL_SET_ERR_MSG(extack, 807c3ab2b4eSDavid Ahern "Nexthop has invalid gateway"); 80830bbaa19SDavid Ahern return -EINVAL; 809c3ab2b4eSDavid Ahern } 8108a3d0316SAndy Gospodarek if (!netif_carrier_ok(dev)) 8118a3d0316SAndy Gospodarek nh->nh_flags |= RTNH_F_LINKDOWN; 8121da177e4SLinus Torvalds nh->nh_dev = dev; 8131da177e4SLinus Torvalds dev_hold(dev); 8141da177e4SLinus Torvalds nh->nh_scope = RT_SCOPE_LINK; 8151da177e4SLinus Torvalds return 0; 8161da177e4SLinus Torvalds } 817ebc0ffaeSEric Dumazet rcu_read_lock(); 8181da177e4SLinus Torvalds { 8193bfd8472SDavid Ahern struct fib_table *tbl = NULL; 8209ade2286SDavid S. Miller struct flowi4 fl4 = { 8219ade2286SDavid S. Miller .daddr = nh->nh_gw, 8229ade2286SDavid S. Miller .flowi4_scope = cfg->fc_scope + 1, 8239ade2286SDavid S. Miller .flowi4_oif = nh->nh_oif, 8246a662719SCong Wang .flowi4_iif = LOOPBACK_IFINDEX, 8254e902c57SThomas Graf }; 8261da177e4SLinus Torvalds 8271da177e4SLinus Torvalds /* It is not necessary, but requires a bit of thinking */ 8289ade2286SDavid S. Miller if (fl4.flowi4_scope < RT_SCOPE_LINK) 8299ade2286SDavid S. Miller fl4.flowi4_scope = RT_SCOPE_LINK; 8303bfd8472SDavid Ahern 8313bfd8472SDavid Ahern if (cfg->fc_table) 8323bfd8472SDavid Ahern tbl = fib_get_table(net, cfg->fc_table); 8333bfd8472SDavid Ahern 8343bfd8472SDavid Ahern if (tbl) 8353bfd8472SDavid Ahern err = fib_table_lookup(tbl, &fl4, &res, 8361e313678SEric Dumazet FIB_LOOKUP_IGNORE_LINKSTATE | 8371e313678SEric Dumazet FIB_LOOKUP_NOREF); 8384c9bcd11SDavid Ahern 8394c9bcd11SDavid Ahern /* on error or if no table given do full lookup. This 8404c9bcd11SDavid Ahern * is needed for example when nexthops are in the local 8414c9bcd11SDavid Ahern * table rather than the given table 8424c9bcd11SDavid Ahern */ 8434c9bcd11SDavid Ahern if (!tbl || err) { 8440eeb075fSAndy Gospodarek err = fib_lookup(net, &fl4, &res, 8450eeb075fSAndy Gospodarek FIB_LOOKUP_IGNORE_LINKSTATE); 8464c9bcd11SDavid Ahern } 8474c9bcd11SDavid Ahern 848ebc0ffaeSEric Dumazet if (err) { 849c3ab2b4eSDavid Ahern NL_SET_ERR_MSG(extack, 850c3ab2b4eSDavid Ahern "Nexthop has invalid gateway"); 851ebc0ffaeSEric Dumazet rcu_read_unlock(); 8521da177e4SLinus Torvalds return err; 8531da177e4SLinus Torvalds } 854ebc0ffaeSEric Dumazet } 8551da177e4SLinus Torvalds err = -EINVAL; 856c3ab2b4eSDavid Ahern if (res.type != RTN_UNICAST && res.type != RTN_LOCAL) { 857c3ab2b4eSDavid Ahern NL_SET_ERR_MSG(extack, "Nexthop has invalid gateway"); 8581da177e4SLinus Torvalds goto out; 859c3ab2b4eSDavid Ahern } 8601da177e4SLinus Torvalds nh->nh_scope = res.scope; 8611da177e4SLinus Torvalds nh->nh_oif = FIB_RES_OIF(res); 8626a31d2a9SEric Dumazet nh->nh_dev = dev = FIB_RES_DEV(res); 863c3ab2b4eSDavid Ahern if (!dev) { 864c3ab2b4eSDavid Ahern NL_SET_ERR_MSG(extack, 865c3ab2b4eSDavid Ahern "No egress device for nexthop gateway"); 8661da177e4SLinus Torvalds goto out; 867c3ab2b4eSDavid Ahern } 8686a31d2a9SEric Dumazet dev_hold(dev); 8698a3d0316SAndy Gospodarek if (!netif_carrier_ok(dev)) 8708a3d0316SAndy Gospodarek nh->nh_flags |= RTNH_F_LINKDOWN; 8718723e1b4SEric Dumazet err = (dev->flags & IFF_UP) ? 0 : -ENETDOWN; 8721da177e4SLinus Torvalds } else { 8731da177e4SLinus Torvalds struct in_device *in_dev; 8741da177e4SLinus Torvalds 875c3ab2b4eSDavid Ahern if (nh->nh_flags & (RTNH_F_PERVASIVE | RTNH_F_ONLINK)) { 876c3ab2b4eSDavid Ahern NL_SET_ERR_MSG(extack, 877c3ab2b4eSDavid Ahern "Invalid flags for nexthop - PERVASIVE and ONLINK can not be set"); 8781da177e4SLinus Torvalds return -EINVAL; 879c3ab2b4eSDavid Ahern } 8808723e1b4SEric Dumazet rcu_read_lock(); 8818723e1b4SEric Dumazet err = -ENODEV; 88286167a37SDenis V. Lunev in_dev = inetdev_by_index(net, nh->nh_oif); 88351456b29SIan Morris if (!in_dev) 8848723e1b4SEric Dumazet goto out; 8858723e1b4SEric Dumazet err = -ENETDOWN; 886c3ab2b4eSDavid Ahern if (!(in_dev->dev->flags & IFF_UP)) { 887c3ab2b4eSDavid Ahern NL_SET_ERR_MSG(extack, "Device for nexthop is not up"); 8888723e1b4SEric Dumazet goto out; 889c3ab2b4eSDavid Ahern } 8901da177e4SLinus Torvalds nh->nh_dev = in_dev->dev; 8911da177e4SLinus Torvalds dev_hold(nh->nh_dev); 8921da177e4SLinus Torvalds nh->nh_scope = RT_SCOPE_HOST; 8938a3d0316SAndy Gospodarek if (!netif_carrier_ok(nh->nh_dev)) 8948a3d0316SAndy Gospodarek nh->nh_flags |= RTNH_F_LINKDOWN; 8958723e1b4SEric Dumazet err = 0; 8961da177e4SLinus Torvalds } 8978723e1b4SEric Dumazet out: 8988723e1b4SEric Dumazet rcu_read_unlock(); 8998723e1b4SEric Dumazet return err; 9001da177e4SLinus Torvalds } 9011da177e4SLinus Torvalds 90281f7bf6cSAl Viro static inline unsigned int fib_laddr_hashfn(__be32 val) 9031da177e4SLinus Torvalds { 904123b9731SDavid S. Miller unsigned int mask = (fib_info_hash_size - 1); 9051da177e4SLinus Torvalds 9066a31d2a9SEric Dumazet return ((__force u32)val ^ 9076a31d2a9SEric Dumazet ((__force u32)val >> 7) ^ 9086a31d2a9SEric Dumazet ((__force u32)val >> 14)) & mask; 9091da177e4SLinus Torvalds } 9101da177e4SLinus Torvalds 911123b9731SDavid S. Miller static struct hlist_head *fib_info_hash_alloc(int bytes) 9121da177e4SLinus Torvalds { 9131da177e4SLinus Torvalds if (bytes <= PAGE_SIZE) 91488f83491SJoonwoo Park return kzalloc(bytes, GFP_KERNEL); 9151da177e4SLinus Torvalds else 9161da177e4SLinus Torvalds return (struct hlist_head *) 9176a31d2a9SEric Dumazet __get_free_pages(GFP_KERNEL | __GFP_ZERO, 9186a31d2a9SEric Dumazet get_order(bytes)); 9191da177e4SLinus Torvalds } 9201da177e4SLinus Torvalds 921123b9731SDavid S. Miller static void fib_info_hash_free(struct hlist_head *hash, int bytes) 9221da177e4SLinus Torvalds { 9231da177e4SLinus Torvalds if (!hash) 9241da177e4SLinus Torvalds return; 9251da177e4SLinus Torvalds 9261da177e4SLinus Torvalds if (bytes <= PAGE_SIZE) 9271da177e4SLinus Torvalds kfree(hash); 9281da177e4SLinus Torvalds else 9291da177e4SLinus Torvalds free_pages((unsigned long) hash, get_order(bytes)); 9301da177e4SLinus Torvalds } 9311da177e4SLinus Torvalds 932123b9731SDavid S. Miller static void fib_info_hash_move(struct hlist_head *new_info_hash, 9331da177e4SLinus Torvalds struct hlist_head *new_laddrhash, 9341da177e4SLinus Torvalds unsigned int new_size) 9351da177e4SLinus Torvalds { 936b7656e7fSDavid S. Miller struct hlist_head *old_info_hash, *old_laddrhash; 937123b9731SDavid S. Miller unsigned int old_size = fib_info_hash_size; 938b7656e7fSDavid S. Miller unsigned int i, bytes; 9391da177e4SLinus Torvalds 940832b4c5eSStephen Hemminger spin_lock_bh(&fib_info_lock); 941b7656e7fSDavid S. Miller old_info_hash = fib_info_hash; 942b7656e7fSDavid S. Miller old_laddrhash = fib_info_laddrhash; 943123b9731SDavid S. Miller fib_info_hash_size = new_size; 9441da177e4SLinus Torvalds 9451da177e4SLinus Torvalds for (i = 0; i < old_size; i++) { 9461da177e4SLinus Torvalds struct hlist_head *head = &fib_info_hash[i]; 947b67bfe0dSSasha Levin struct hlist_node *n; 9481da177e4SLinus Torvalds struct fib_info *fi; 9491da177e4SLinus Torvalds 950b67bfe0dSSasha Levin hlist_for_each_entry_safe(fi, n, head, fib_hash) { 9511da177e4SLinus Torvalds struct hlist_head *dest; 9521da177e4SLinus Torvalds unsigned int new_hash; 9531da177e4SLinus Torvalds 9541da177e4SLinus Torvalds new_hash = fib_info_hashfn(fi); 9551da177e4SLinus Torvalds dest = &new_info_hash[new_hash]; 9561da177e4SLinus Torvalds hlist_add_head(&fi->fib_hash, dest); 9571da177e4SLinus Torvalds } 9581da177e4SLinus Torvalds } 9591da177e4SLinus Torvalds fib_info_hash = new_info_hash; 9601da177e4SLinus Torvalds 9611da177e4SLinus Torvalds for (i = 0; i < old_size; i++) { 9621da177e4SLinus Torvalds struct hlist_head *lhead = &fib_info_laddrhash[i]; 963b67bfe0dSSasha Levin struct hlist_node *n; 9641da177e4SLinus Torvalds struct fib_info *fi; 9651da177e4SLinus Torvalds 966b67bfe0dSSasha Levin hlist_for_each_entry_safe(fi, n, lhead, fib_lhash) { 9671da177e4SLinus Torvalds struct hlist_head *ldest; 9681da177e4SLinus Torvalds unsigned int new_hash; 9691da177e4SLinus Torvalds 9701da177e4SLinus Torvalds new_hash = fib_laddr_hashfn(fi->fib_prefsrc); 9711da177e4SLinus Torvalds ldest = &new_laddrhash[new_hash]; 9721da177e4SLinus Torvalds hlist_add_head(&fi->fib_lhash, ldest); 9731da177e4SLinus Torvalds } 9741da177e4SLinus Torvalds } 9751da177e4SLinus Torvalds fib_info_laddrhash = new_laddrhash; 9761da177e4SLinus Torvalds 977832b4c5eSStephen Hemminger spin_unlock_bh(&fib_info_lock); 978b7656e7fSDavid S. Miller 979b7656e7fSDavid S. Miller bytes = old_size * sizeof(struct hlist_head *); 980123b9731SDavid S. Miller fib_info_hash_free(old_info_hash, bytes); 981123b9731SDavid S. Miller fib_info_hash_free(old_laddrhash, bytes); 9821da177e4SLinus Torvalds } 9831da177e4SLinus Torvalds 984436c3b66SDavid S. Miller __be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh) 985436c3b66SDavid S. Miller { 986436c3b66SDavid S. Miller nh->nh_saddr = inet_select_addr(nh->nh_dev, 987436c3b66SDavid S. Miller nh->nh_gw, 98837e826c5SDavid S. Miller nh->nh_parent->fib_scope); 989436c3b66SDavid S. Miller nh->nh_saddr_genid = atomic_read(&net->ipv4.dev_addr_genid); 990436c3b66SDavid S. Miller 991436c3b66SDavid S. Miller return nh->nh_saddr; 992436c3b66SDavid S. Miller } 993436c3b66SDavid S. Miller 994021dd3b8SDavid Ahern static bool fib_valid_prefsrc(struct fib_config *cfg, __be32 fib_prefsrc) 995021dd3b8SDavid Ahern { 996021dd3b8SDavid Ahern if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst || 997021dd3b8SDavid Ahern fib_prefsrc != cfg->fc_dst) { 9989b8ff518SDavid Ahern u32 tb_id = cfg->fc_table; 999e1b8d903SDavid Ahern int rc; 1000021dd3b8SDavid Ahern 1001021dd3b8SDavid Ahern if (tb_id == RT_TABLE_MAIN) 1002021dd3b8SDavid Ahern tb_id = RT_TABLE_LOCAL; 1003021dd3b8SDavid Ahern 1004e1b8d903SDavid Ahern rc = inet_addr_type_table(cfg->fc_nlinfo.nl_net, 1005e1b8d903SDavid Ahern fib_prefsrc, tb_id); 1006e1b8d903SDavid Ahern 1007e1b8d903SDavid Ahern if (rc != RTN_LOCAL && tb_id != RT_TABLE_LOCAL) { 1008e1b8d903SDavid Ahern rc = inet_addr_type_table(cfg->fc_nlinfo.nl_net, 1009e1b8d903SDavid Ahern fib_prefsrc, RT_TABLE_LOCAL); 1010021dd3b8SDavid Ahern } 1011e1b8d903SDavid Ahern 1012e1b8d903SDavid Ahern if (rc != RTN_LOCAL) 1013e1b8d903SDavid Ahern return false; 1014021dd3b8SDavid Ahern } 1015021dd3b8SDavid Ahern return true; 1016021dd3b8SDavid Ahern } 1017021dd3b8SDavid Ahern 10186cf9dfd3SFlorian Westphal static int 10196cf9dfd3SFlorian Westphal fib_convert_metrics(struct fib_info *fi, const struct fib_config *cfg) 10206cf9dfd3SFlorian Westphal { 1021c3a8d947SDaniel Borkmann bool ecn_ca = false; 10226cf9dfd3SFlorian Westphal struct nlattr *nla; 10236cf9dfd3SFlorian Westphal int remaining; 10246cf9dfd3SFlorian Westphal 10256cf9dfd3SFlorian Westphal if (!cfg->fc_mx) 10266cf9dfd3SFlorian Westphal return 0; 10276cf9dfd3SFlorian Westphal 10286cf9dfd3SFlorian Westphal nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { 10296cf9dfd3SFlorian Westphal int type = nla_type(nla); 10306cf9dfd3SFlorian Westphal u32 val; 10316cf9dfd3SFlorian Westphal 10326cf9dfd3SFlorian Westphal if (!type) 10336cf9dfd3SFlorian Westphal continue; 10346cf9dfd3SFlorian Westphal if (type > RTAX_MAX) 10356cf9dfd3SFlorian Westphal return -EINVAL; 10366cf9dfd3SFlorian Westphal 10376cf9dfd3SFlorian Westphal if (type == RTAX_CC_ALGO) { 10386cf9dfd3SFlorian Westphal char tmp[TCP_CA_NAME_MAX]; 10396cf9dfd3SFlorian Westphal 10406cf9dfd3SFlorian Westphal nla_strlcpy(tmp, nla, sizeof(tmp)); 1041c3a8d947SDaniel Borkmann val = tcp_ca_get_key_by_name(tmp, &ecn_ca); 10426cf9dfd3SFlorian Westphal if (val == TCP_CA_UNSPEC) 10436cf9dfd3SFlorian Westphal return -EINVAL; 10446cf9dfd3SFlorian Westphal } else { 10456cf9dfd3SFlorian Westphal val = nla_get_u32(nla); 10466cf9dfd3SFlorian Westphal } 10476cf9dfd3SFlorian Westphal if (type == RTAX_ADVMSS && val > 65535 - 40) 10486cf9dfd3SFlorian Westphal val = 65535 - 40; 10496cf9dfd3SFlorian Westphal if (type == RTAX_MTU && val > 65535 - 15) 10506cf9dfd3SFlorian Westphal val = 65535 - 15; 1051626abd59SPaolo Abeni if (type == RTAX_HOPLIMIT && val > 255) 1052626abd59SPaolo Abeni val = 255; 1053b8d3e416SDaniel Borkmann if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK)) 1054b8d3e416SDaniel Borkmann return -EINVAL; 10553fb07dafSEric Dumazet fi->fib_metrics->metrics[type - 1] = val; 10566cf9dfd3SFlorian Westphal } 10576cf9dfd3SFlorian Westphal 1058c3a8d947SDaniel Borkmann if (ecn_ca) 10593fb07dafSEric Dumazet fi->fib_metrics->metrics[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA; 1060c3a8d947SDaniel Borkmann 10616cf9dfd3SFlorian Westphal return 0; 10626cf9dfd3SFlorian Westphal } 10636cf9dfd3SFlorian Westphal 10646d8422a1SDavid Ahern struct fib_info *fib_create_info(struct fib_config *cfg, 10656d8422a1SDavid Ahern struct netlink_ext_ack *extack) 10661da177e4SLinus Torvalds { 10671da177e4SLinus Torvalds int err; 10681da177e4SLinus Torvalds struct fib_info *fi = NULL; 10691da177e4SLinus Torvalds struct fib_info *ofi; 10701da177e4SLinus Torvalds int nhs = 1; 10717462bd74SDenis V. Lunev struct net *net = cfg->fc_nlinfo.nl_net; 10721da177e4SLinus Torvalds 10734c8237cdSDavid S. Miller if (cfg->fc_type > RTN_MAX) 10744c8237cdSDavid S. Miller goto err_inval; 10754c8237cdSDavid S. Miller 10761da177e4SLinus Torvalds /* Fast check to catch the most weird cases */ 1077c3ab2b4eSDavid Ahern if (fib_props[cfg->fc_type].scope > cfg->fc_scope) { 1078c3ab2b4eSDavid Ahern NL_SET_ERR_MSG(extack, "Invalid scope"); 10791da177e4SLinus Torvalds goto err_inval; 1080c3ab2b4eSDavid Ahern } 10811da177e4SLinus Torvalds 1082c3ab2b4eSDavid Ahern if (cfg->fc_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)) { 1083c3ab2b4eSDavid Ahern NL_SET_ERR_MSG(extack, 1084c3ab2b4eSDavid Ahern "Invalid rtm_flags - can not contain DEAD or LINKDOWN"); 108580610229SJulian Anastasov goto err_inval; 1086c3ab2b4eSDavid Ahern } 108780610229SJulian Anastasov 10881da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 10894e902c57SThomas Graf if (cfg->fc_mp) { 10906d8422a1SDavid Ahern nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len, extack); 10911da177e4SLinus Torvalds if (nhs == 0) 10921da177e4SLinus Torvalds goto err_inval; 10931da177e4SLinus Torvalds } 10941da177e4SLinus Torvalds #endif 10951da177e4SLinus Torvalds 10961da177e4SLinus Torvalds err = -ENOBUFS; 1097123b9731SDavid S. Miller if (fib_info_cnt >= fib_info_hash_size) { 1098123b9731SDavid S. Miller unsigned int new_size = fib_info_hash_size << 1; 10991da177e4SLinus Torvalds struct hlist_head *new_info_hash; 11001da177e4SLinus Torvalds struct hlist_head *new_laddrhash; 11011da177e4SLinus Torvalds unsigned int bytes; 11021da177e4SLinus Torvalds 11031da177e4SLinus Torvalds if (!new_size) 1104d94ce9b2SEric Dumazet new_size = 16; 11051da177e4SLinus Torvalds bytes = new_size * sizeof(struct hlist_head *); 1106123b9731SDavid S. Miller new_info_hash = fib_info_hash_alloc(bytes); 1107123b9731SDavid S. Miller new_laddrhash = fib_info_hash_alloc(bytes); 11081da177e4SLinus Torvalds if (!new_info_hash || !new_laddrhash) { 1109123b9731SDavid S. Miller fib_info_hash_free(new_info_hash, bytes); 1110123b9731SDavid S. Miller fib_info_hash_free(new_laddrhash, bytes); 111188f83491SJoonwoo Park } else 1112123b9731SDavid S. Miller fib_info_hash_move(new_info_hash, new_laddrhash, new_size); 11131da177e4SLinus Torvalds 1114123b9731SDavid S. Miller if (!fib_info_hash_size) 11151da177e4SLinus Torvalds goto failure; 11161da177e4SLinus Torvalds } 11171da177e4SLinus Torvalds 11180da974f4SPanagiotis Issaris fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL); 111951456b29SIan Morris if (!fi) 11201da177e4SLinus Torvalds goto failure; 1121725d1e1bSDavid S. Miller if (cfg->fc_mx) { 11223fb07dafSEric Dumazet fi->fib_metrics = kzalloc(sizeof(*fi->fib_metrics), GFP_KERNEL); 1123187e5b3aSEric Dumazet if (unlikely(!fi->fib_metrics)) { 1124187e5b3aSEric Dumazet kfree(fi); 1125187e5b3aSEric Dumazet return ERR_PTR(err); 1126187e5b3aSEric Dumazet } 11279620fef2SEric Dumazet refcount_set(&fi->fib_metrics->refcnt, 1); 1128187e5b3aSEric Dumazet } else { 11293fb07dafSEric Dumazet fi->fib_metrics = (struct dst_metrics *)&dst_default_metrics; 1130187e5b3aSEric Dumazet } 1131187e5b3aSEric Dumazet fib_info_cnt++; 1132efd7ef1cSEric W. Biederman fi->fib_net = net; 11334e902c57SThomas Graf fi->fib_protocol = cfg->fc_protocol; 113437e826c5SDavid S. Miller fi->fib_scope = cfg->fc_scope; 11354e902c57SThomas Graf fi->fib_flags = cfg->fc_flags; 11364e902c57SThomas Graf fi->fib_priority = cfg->fc_priority; 11374e902c57SThomas Graf fi->fib_prefsrc = cfg->fc_prefsrc; 1138f4ef85bbSEric Dumazet fi->fib_type = cfg->fc_type; 11395a56a0b3SMark Tomlinson fi->fib_tb_id = cfg->fc_table; 11401da177e4SLinus Torvalds 11411da177e4SLinus Torvalds fi->fib_nhs = nhs; 11421da177e4SLinus Torvalds change_nexthops(fi) { 114371fceff0SDavid S. Miller nexthop_nh->nh_parent = fi; 1144d26b3a7cSEric Dumazet nexthop_nh->nh_pcpu_rth_output = alloc_percpu(struct rtable __rcu *); 1145f8a17175SJulian Anastasov if (!nexthop_nh->nh_pcpu_rth_output) 1146f8a17175SJulian Anastasov goto failure; 11471da177e4SLinus Torvalds } endfor_nexthops(fi) 11481da177e4SLinus Torvalds 11496cf9dfd3SFlorian Westphal err = fib_convert_metrics(fi, cfg); 11506cf9dfd3SFlorian Westphal if (err) 11516cf9dfd3SFlorian Westphal goto failure; 11521da177e4SLinus Torvalds 11534e902c57SThomas Graf if (cfg->fc_mp) { 11541da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 11556d8422a1SDavid Ahern err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg, extack); 11564e902c57SThomas Graf if (err != 0) 11571da177e4SLinus Torvalds goto failure; 1158c3ab2b4eSDavid Ahern if (cfg->fc_oif && fi->fib_nh->nh_oif != cfg->fc_oif) { 1159c3ab2b4eSDavid Ahern NL_SET_ERR_MSG(extack, 1160c3ab2b4eSDavid Ahern "Nexthop device index does not match RTA_OIF"); 11611da177e4SLinus Torvalds goto err_inval; 1162c3ab2b4eSDavid Ahern } 1163c3ab2b4eSDavid Ahern if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw) { 1164c3ab2b4eSDavid Ahern NL_SET_ERR_MSG(extack, 1165c3ab2b4eSDavid Ahern "Nexthop gateway does not match RTA_GATEWAY"); 11661da177e4SLinus Torvalds goto err_inval; 1167c3ab2b4eSDavid Ahern } 1168c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID 1169c3ab2b4eSDavid Ahern if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow) { 1170c3ab2b4eSDavid Ahern NL_SET_ERR_MSG(extack, 1171c3ab2b4eSDavid Ahern "Nexthop class id does not match RTA_FLOW"); 11721da177e4SLinus Torvalds goto err_inval; 1173c3ab2b4eSDavid Ahern } 11741da177e4SLinus Torvalds #endif 11751da177e4SLinus Torvalds #else 1176c3ab2b4eSDavid Ahern NL_SET_ERR_MSG(extack, 1177c3ab2b4eSDavid Ahern "Multipath support not enabled in kernel"); 11781da177e4SLinus Torvalds goto err_inval; 11791da177e4SLinus Torvalds #endif 11801da177e4SLinus Torvalds } else { 11811da177e4SLinus Torvalds struct fib_nh *nh = fi->fib_nh; 11824e902c57SThomas Graf 1183571e7226SRoopa Prabhu if (cfg->fc_encap) { 1184571e7226SRoopa Prabhu struct lwtunnel_state *lwtstate; 1185571e7226SRoopa Prabhu 1186c3ab2b4eSDavid Ahern if (cfg->fc_encap_type == LWTUNNEL_ENCAP_NONE) { 1187c3ab2b4eSDavid Ahern NL_SET_ERR_MSG(extack, 1188c3ab2b4eSDavid Ahern "LWT encap type not specified"); 1189571e7226SRoopa Prabhu goto err_inval; 1190c3ab2b4eSDavid Ahern } 119130357d7dSDavid Ahern err = lwtunnel_build_state(cfg->fc_encap_type, 1192127eb7cdSTom Herbert cfg->fc_encap, AF_INET, cfg, 11939ae28727SDavid Ahern &lwtstate, extack); 1194571e7226SRoopa Prabhu if (err) 1195571e7226SRoopa Prabhu goto failure; 1196571e7226SRoopa Prabhu 11975a6228a0SNicolas Dichtel nh->nh_lwtstate = lwtstate_get(lwtstate); 1198571e7226SRoopa Prabhu } 11994e902c57SThomas Graf nh->nh_oif = cfg->fc_oif; 12004e902c57SThomas Graf nh->nh_gw = cfg->fc_gw; 12014e902c57SThomas Graf nh->nh_flags = cfg->fc_flags; 1202c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID 12034e902c57SThomas Graf nh->nh_tclassid = cfg->fc_flow; 12047a9bc9b8SDavid S. Miller if (nh->nh_tclassid) 1205f4530fa5SDavid S. Miller fi->fib_net->ipv4.fib_num_tclassid_users++; 12061da177e4SLinus Torvalds #endif 12071da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 12081da177e4SLinus Torvalds nh->nh_weight = 1; 12091da177e4SLinus Torvalds #endif 12101da177e4SLinus Torvalds } 12111da177e4SLinus Torvalds 12124e902c57SThomas Graf if (fib_props[cfg->fc_type].error) { 1213c3ab2b4eSDavid Ahern if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp) { 1214c3ab2b4eSDavid Ahern NL_SET_ERR_MSG(extack, 1215c3ab2b4eSDavid Ahern "Gateway, device and multipath can not be specified for this route type"); 12161da177e4SLinus Torvalds goto err_inval; 1217c3ab2b4eSDavid Ahern } 12181da177e4SLinus Torvalds goto link_it; 12194c8237cdSDavid S. Miller } else { 12204c8237cdSDavid S. Miller switch (cfg->fc_type) { 12214c8237cdSDavid S. Miller case RTN_UNICAST: 12224c8237cdSDavid S. Miller case RTN_LOCAL: 12234c8237cdSDavid S. Miller case RTN_BROADCAST: 12244c8237cdSDavid S. Miller case RTN_ANYCAST: 12254c8237cdSDavid S. Miller case RTN_MULTICAST: 12264c8237cdSDavid S. Miller break; 12274c8237cdSDavid S. Miller default: 1228c3ab2b4eSDavid Ahern NL_SET_ERR_MSG(extack, "Invalid route type"); 12294c8237cdSDavid S. Miller goto err_inval; 12304c8237cdSDavid S. Miller } 12311da177e4SLinus Torvalds } 12321da177e4SLinus Torvalds 1233c3ab2b4eSDavid Ahern if (cfg->fc_scope > RT_SCOPE_HOST) { 1234c3ab2b4eSDavid Ahern NL_SET_ERR_MSG(extack, "Invalid scope"); 12351da177e4SLinus Torvalds goto err_inval; 1236c3ab2b4eSDavid Ahern } 12371da177e4SLinus Torvalds 12384e902c57SThomas Graf if (cfg->fc_scope == RT_SCOPE_HOST) { 12391da177e4SLinus Torvalds struct fib_nh *nh = fi->fib_nh; 12401da177e4SLinus Torvalds 12411da177e4SLinus Torvalds /* Local address is added. */ 1242c3ab2b4eSDavid Ahern if (nhs != 1) { 1243c3ab2b4eSDavid Ahern NL_SET_ERR_MSG(extack, 1244c3ab2b4eSDavid Ahern "Route with host scope can not have multiple nexthops"); 12456d8422a1SDavid Ahern goto err_inval; 1246c3ab2b4eSDavid Ahern } 1247c3ab2b4eSDavid Ahern if (nh->nh_gw) { 1248c3ab2b4eSDavid Ahern NL_SET_ERR_MSG(extack, 1249c3ab2b4eSDavid Ahern "Route with host scope can not have a gateway"); 12501da177e4SLinus Torvalds goto err_inval; 1251c3ab2b4eSDavid Ahern } 12521da177e4SLinus Torvalds nh->nh_scope = RT_SCOPE_NOWHERE; 12537462bd74SDenis V. Lunev nh->nh_dev = dev_get_by_index(net, fi->fib_nh->nh_oif); 12541da177e4SLinus Torvalds err = -ENODEV; 125551456b29SIan Morris if (!nh->nh_dev) 12561da177e4SLinus Torvalds goto failure; 12571da177e4SLinus Torvalds } else { 12588a3d0316SAndy Gospodarek int linkdown = 0; 12598a3d0316SAndy Gospodarek 12601da177e4SLinus Torvalds change_nexthops(fi) { 12616d8422a1SDavid Ahern err = fib_check_nh(cfg, fi, nexthop_nh, extack); 12626a31d2a9SEric Dumazet if (err != 0) 12631da177e4SLinus Torvalds goto failure; 12648a3d0316SAndy Gospodarek if (nexthop_nh->nh_flags & RTNH_F_LINKDOWN) 12658a3d0316SAndy Gospodarek linkdown++; 12661da177e4SLinus Torvalds } endfor_nexthops(fi) 12678a3d0316SAndy Gospodarek if (linkdown == fi->fib_nhs) 12688a3d0316SAndy Gospodarek fi->fib_flags |= RTNH_F_LINKDOWN; 12691da177e4SLinus Torvalds } 12701da177e4SLinus Torvalds 1271c3ab2b4eSDavid Ahern if (fi->fib_prefsrc && !fib_valid_prefsrc(cfg, fi->fib_prefsrc)) { 1272c3ab2b4eSDavid Ahern NL_SET_ERR_MSG(extack, "Invalid prefsrc address"); 12731da177e4SLinus Torvalds goto err_inval; 1274c3ab2b4eSDavid Ahern } 12751da177e4SLinus Torvalds 12761fc050a1SDavid S. Miller change_nexthops(fi) { 1277436c3b66SDavid S. Miller fib_info_update_nh_saddr(net, nexthop_nh); 12780e884c78SPeter Nørlund fib_add_weight(fi, nexthop_nh); 12791fc050a1SDavid S. Miller } endfor_nexthops(fi) 12801fc050a1SDavid S. Miller 12810e884c78SPeter Nørlund fib_rebalance(fi); 12820e884c78SPeter Nørlund 12831da177e4SLinus Torvalds link_it: 12846a31d2a9SEric Dumazet ofi = fib_find_info(fi); 12856a31d2a9SEric Dumazet if (ofi) { 12861da177e4SLinus Torvalds fi->fib_dead = 1; 12871da177e4SLinus Torvalds free_fib_info(fi); 12881da177e4SLinus Torvalds ofi->fib_treeref++; 12891da177e4SLinus Torvalds return ofi; 12901da177e4SLinus Torvalds } 12911da177e4SLinus Torvalds 12921da177e4SLinus Torvalds fi->fib_treeref++; 12930029c0deSReshetova, Elena refcount_set(&fi->fib_clntref, 1); 1294832b4c5eSStephen Hemminger spin_lock_bh(&fib_info_lock); 12951da177e4SLinus Torvalds hlist_add_head(&fi->fib_hash, 12961da177e4SLinus Torvalds &fib_info_hash[fib_info_hashfn(fi)]); 12971da177e4SLinus Torvalds if (fi->fib_prefsrc) { 12981da177e4SLinus Torvalds struct hlist_head *head; 12991da177e4SLinus Torvalds 13001da177e4SLinus Torvalds head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)]; 13011da177e4SLinus Torvalds hlist_add_head(&fi->fib_lhash, head); 13021da177e4SLinus Torvalds } 13031da177e4SLinus Torvalds change_nexthops(fi) { 13041da177e4SLinus Torvalds struct hlist_head *head; 13051da177e4SLinus Torvalds unsigned int hash; 13061da177e4SLinus Torvalds 130771fceff0SDavid S. Miller if (!nexthop_nh->nh_dev) 13081da177e4SLinus Torvalds continue; 130971fceff0SDavid S. Miller hash = fib_devindex_hashfn(nexthop_nh->nh_dev->ifindex); 13101da177e4SLinus Torvalds head = &fib_info_devhash[hash]; 131171fceff0SDavid S. Miller hlist_add_head(&nexthop_nh->nh_hash, head); 13121da177e4SLinus Torvalds } endfor_nexthops(fi) 1313832b4c5eSStephen Hemminger spin_unlock_bh(&fib_info_lock); 13141da177e4SLinus Torvalds return fi; 13151da177e4SLinus Torvalds 13161da177e4SLinus Torvalds err_inval: 13171da177e4SLinus Torvalds err = -EINVAL; 13181da177e4SLinus Torvalds 13191da177e4SLinus Torvalds failure: 13201da177e4SLinus Torvalds if (fi) { 13211da177e4SLinus Torvalds fi->fib_dead = 1; 13221da177e4SLinus Torvalds free_fib_info(fi); 13231da177e4SLinus Torvalds } 13244e902c57SThomas Graf 13254e902c57SThomas Graf return ERR_PTR(err); 13261da177e4SLinus Torvalds } 13271da177e4SLinus Torvalds 132815e47304SEric W. Biederman int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, 132937e826c5SDavid S. Miller u32 tb_id, u8 type, __be32 dst, int dst_len, u8 tos, 1330b6544c0bSJamal Hadi Salim struct fib_info *fi, unsigned int flags) 13311da177e4SLinus Torvalds { 13321da177e4SLinus Torvalds struct nlmsghdr *nlh; 1333be403ea1SThomas Graf struct rtmsg *rtm; 13341da177e4SLinus Torvalds 133515e47304SEric W. Biederman nlh = nlmsg_put(skb, portid, seq, event, sizeof(*rtm), flags); 133651456b29SIan Morris if (!nlh) 133726932566SPatrick McHardy return -EMSGSIZE; 1338be403ea1SThomas Graf 1339be403ea1SThomas Graf rtm = nlmsg_data(nlh); 13401da177e4SLinus Torvalds rtm->rtm_family = AF_INET; 13411da177e4SLinus Torvalds rtm->rtm_dst_len = dst_len; 13421da177e4SLinus Torvalds rtm->rtm_src_len = 0; 13431da177e4SLinus Torvalds rtm->rtm_tos = tos; 1344709772e6SKrzysztof Piotr Oledzki if (tb_id < 256) 13451da177e4SLinus Torvalds rtm->rtm_table = tb_id; 1346709772e6SKrzysztof Piotr Oledzki else 1347709772e6SKrzysztof Piotr Oledzki rtm->rtm_table = RT_TABLE_COMPAT; 1348f3756b79SDavid S. Miller if (nla_put_u32(skb, RTA_TABLE, tb_id)) 1349f3756b79SDavid S. Miller goto nla_put_failure; 13501da177e4SLinus Torvalds rtm->rtm_type = type; 13511da177e4SLinus Torvalds rtm->rtm_flags = fi->fib_flags; 135237e826c5SDavid S. Miller rtm->rtm_scope = fi->fib_scope; 13531da177e4SLinus Torvalds rtm->rtm_protocol = fi->fib_protocol; 1354be403ea1SThomas Graf 1355f3756b79SDavid S. Miller if (rtm->rtm_dst_len && 1356930345eaSJiri Benc nla_put_in_addr(skb, RTA_DST, dst)) 1357f3756b79SDavid S. Miller goto nla_put_failure; 1358f3756b79SDavid S. Miller if (fi->fib_priority && 1359f3756b79SDavid S. Miller nla_put_u32(skb, RTA_PRIORITY, fi->fib_priority)) 1360f3756b79SDavid S. Miller goto nla_put_failure; 13613fb07dafSEric Dumazet if (rtnetlink_put_metrics(skb, fi->fib_metrics->metrics) < 0) 1362be403ea1SThomas Graf goto nla_put_failure; 1363be403ea1SThomas Graf 1364f3756b79SDavid S. Miller if (fi->fib_prefsrc && 1365930345eaSJiri Benc nla_put_in_addr(skb, RTA_PREFSRC, fi->fib_prefsrc)) 1366f3756b79SDavid S. Miller goto nla_put_failure; 13671da177e4SLinus Torvalds if (fi->fib_nhs == 1) { 1368f3756b79SDavid S. Miller if (fi->fib_nh->nh_gw && 1369930345eaSJiri Benc nla_put_in_addr(skb, RTA_GATEWAY, fi->fib_nh->nh_gw)) 1370f3756b79SDavid S. Miller goto nla_put_failure; 1371f3756b79SDavid S. Miller if (fi->fib_nh->nh_oif && 1372f3756b79SDavid S. Miller nla_put_u32(skb, RTA_OIF, fi->fib_nh->nh_oif)) 1373f3756b79SDavid S. Miller goto nla_put_failure; 13740eeb075fSAndy Gospodarek if (fi->fib_nh->nh_flags & RTNH_F_LINKDOWN) { 1375*25dd169aSFlorian Westphal struct in_device *in_dev; 1376*25dd169aSFlorian Westphal 1377*25dd169aSFlorian Westphal rcu_read_lock(); 1378*25dd169aSFlorian Westphal in_dev = __in_dev_get_rcu(fi->fib_nh->nh_dev); 13790eeb075fSAndy Gospodarek if (in_dev && 13800eeb075fSAndy Gospodarek IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev)) 13810eeb075fSAndy Gospodarek rtm->rtm_flags |= RTNH_F_DEAD; 1382*25dd169aSFlorian Westphal rcu_read_unlock(); 13830eeb075fSAndy Gospodarek } 1384475abbf1SIdo Schimmel if (fi->fib_nh->nh_flags & RTNH_F_OFFLOAD) 1385475abbf1SIdo Schimmel rtm->rtm_flags |= RTNH_F_OFFLOAD; 1386c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID 1387f3756b79SDavid S. Miller if (fi->fib_nh[0].nh_tclassid && 1388f3756b79SDavid S. Miller nla_put_u32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid)) 1389f3756b79SDavid S. Miller goto nla_put_failure; 13908265abc0SPatrick McHardy #endif 1391ea7a8085SDavid Ahern if (fi->fib_nh->nh_lwtstate && 1392ea7a8085SDavid Ahern lwtunnel_fill_encap(skb, fi->fib_nh->nh_lwtstate) < 0) 1393ea7a8085SDavid Ahern goto nla_put_failure; 13941da177e4SLinus Torvalds } 13951da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 13961da177e4SLinus Torvalds if (fi->fib_nhs > 1) { 1397be403ea1SThomas Graf struct rtnexthop *rtnh; 1398be403ea1SThomas Graf struct nlattr *mp; 1399be403ea1SThomas Graf 1400be403ea1SThomas Graf mp = nla_nest_start(skb, RTA_MULTIPATH); 140151456b29SIan Morris if (!mp) 1402be403ea1SThomas Graf goto nla_put_failure; 14031da177e4SLinus Torvalds 14041da177e4SLinus Torvalds for_nexthops(fi) { 1405be403ea1SThomas Graf rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh)); 140651456b29SIan Morris if (!rtnh) 1407be403ea1SThomas Graf goto nla_put_failure; 1408be403ea1SThomas Graf 1409be403ea1SThomas Graf rtnh->rtnh_flags = nh->nh_flags & 0xFF; 14100eeb075fSAndy Gospodarek if (nh->nh_flags & RTNH_F_LINKDOWN) { 1411*25dd169aSFlorian Westphal struct in_device *in_dev; 1412*25dd169aSFlorian Westphal 1413*25dd169aSFlorian Westphal rcu_read_lock(); 1414*25dd169aSFlorian Westphal in_dev = __in_dev_get_rcu(nh->nh_dev); 14150eeb075fSAndy Gospodarek if (in_dev && 14160eeb075fSAndy Gospodarek IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev)) 14170eeb075fSAndy Gospodarek rtnh->rtnh_flags |= RTNH_F_DEAD; 1418*25dd169aSFlorian Westphal rcu_read_unlock(); 14190eeb075fSAndy Gospodarek } 1420be403ea1SThomas Graf rtnh->rtnh_hops = nh->nh_weight - 1; 1421be403ea1SThomas Graf rtnh->rtnh_ifindex = nh->nh_oif; 1422be403ea1SThomas Graf 1423f3756b79SDavid S. Miller if (nh->nh_gw && 1424930345eaSJiri Benc nla_put_in_addr(skb, RTA_GATEWAY, nh->nh_gw)) 1425f3756b79SDavid S. Miller goto nla_put_failure; 1426c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID 1427f3756b79SDavid S. Miller if (nh->nh_tclassid && 1428f3756b79SDavid S. Miller nla_put_u32(skb, RTA_FLOW, nh->nh_tclassid)) 1429f3756b79SDavid S. Miller goto nla_put_failure; 14308265abc0SPatrick McHardy #endif 1431ea7a8085SDavid Ahern if (nh->nh_lwtstate && 1432ea7a8085SDavid Ahern lwtunnel_fill_encap(skb, nh->nh_lwtstate) < 0) 1433ea7a8085SDavid Ahern goto nla_put_failure; 1434ea7a8085SDavid Ahern 1435be403ea1SThomas Graf /* length of rtnetlink header + attributes */ 1436be403ea1SThomas Graf rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh; 14371da177e4SLinus Torvalds } endfor_nexthops(fi); 1438be403ea1SThomas Graf 1439be403ea1SThomas Graf nla_nest_end(skb, mp); 14401da177e4SLinus Torvalds } 14411da177e4SLinus Torvalds #endif 1442053c095aSJohannes Berg nlmsg_end(skb, nlh); 1443053c095aSJohannes Berg return 0; 14441da177e4SLinus Torvalds 1445be403ea1SThomas Graf nla_put_failure: 144626932566SPatrick McHardy nlmsg_cancel(skb, nlh); 144726932566SPatrick McHardy return -EMSGSIZE; 14481da177e4SLinus Torvalds } 14491da177e4SLinus Torvalds 14501da177e4SLinus Torvalds /* 14516a31d2a9SEric Dumazet * Update FIB if: 14526a31d2a9SEric Dumazet * - local address disappeared -> we must delete all the entries 14536a31d2a9SEric Dumazet * referring to it. 14546a31d2a9SEric Dumazet * - device went down -> we must shutdown all nexthops going via it. 14551da177e4SLinus Torvalds */ 14565a56a0b3SMark Tomlinson int fib_sync_down_addr(struct net_device *dev, __be32 local) 14571da177e4SLinus Torvalds { 14581da177e4SLinus Torvalds int ret = 0; 14591da177e4SLinus Torvalds unsigned int hash = fib_laddr_hashfn(local); 14601da177e4SLinus Torvalds struct hlist_head *head = &fib_info_laddrhash[hash]; 14615a56a0b3SMark Tomlinson struct net *net = dev_net(dev); 14625a56a0b3SMark Tomlinson int tb_id = l3mdev_fib_table(dev); 14631da177e4SLinus Torvalds struct fib_info *fi; 14641da177e4SLinus Torvalds 146551456b29SIan Morris if (!fib_info_laddrhash || local == 0) 146685326fa5SDenis V. Lunev return 0; 146785326fa5SDenis V. Lunev 1468b67bfe0dSSasha Levin hlist_for_each_entry(fi, head, fib_lhash) { 14695a56a0b3SMark Tomlinson if (!net_eq(fi->fib_net, net) || 14705a56a0b3SMark Tomlinson fi->fib_tb_id != tb_id) 14714814bdbdSDenis V. Lunev continue; 14721da177e4SLinus Torvalds if (fi->fib_prefsrc == local) { 14731da177e4SLinus Torvalds fi->fib_flags |= RTNH_F_DEAD; 14741da177e4SLinus Torvalds ret++; 14751da177e4SLinus Torvalds } 14761da177e4SLinus Torvalds } 147785326fa5SDenis V. Lunev return ret; 14781da177e4SLinus Torvalds } 14791da177e4SLinus Torvalds 1480982acb97SIdo Schimmel static int call_fib_nh_notifiers(struct fib_nh *fib_nh, 1481982acb97SIdo Schimmel enum fib_event_type event_type) 1482982acb97SIdo Schimmel { 1483982acb97SIdo Schimmel struct in_device *in_dev = __in_dev_get_rtnl(fib_nh->nh_dev); 1484982acb97SIdo Schimmel struct fib_nh_notifier_info info = { 1485982acb97SIdo Schimmel .fib_nh = fib_nh, 1486982acb97SIdo Schimmel }; 1487982acb97SIdo Schimmel 1488982acb97SIdo Schimmel switch (event_type) { 1489982acb97SIdo Schimmel case FIB_EVENT_NH_ADD: 1490982acb97SIdo Schimmel if (fib_nh->nh_flags & RTNH_F_DEAD) 1491982acb97SIdo Schimmel break; 1492982acb97SIdo Schimmel if (IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) && 1493982acb97SIdo Schimmel fib_nh->nh_flags & RTNH_F_LINKDOWN) 1494982acb97SIdo Schimmel break; 149504b1d4e5SIdo Schimmel return call_fib4_notifiers(dev_net(fib_nh->nh_dev), event_type, 1496982acb97SIdo Schimmel &info.info); 1497982acb97SIdo Schimmel case FIB_EVENT_NH_DEL: 149871ed7ee3SIdo Schimmel if ((in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) && 1499982acb97SIdo Schimmel fib_nh->nh_flags & RTNH_F_LINKDOWN) || 1500982acb97SIdo Schimmel (fib_nh->nh_flags & RTNH_F_DEAD)) 150104b1d4e5SIdo Schimmel return call_fib4_notifiers(dev_net(fib_nh->nh_dev), 1502982acb97SIdo Schimmel event_type, &info.info); 1503982acb97SIdo Schimmel default: 1504982acb97SIdo Schimmel break; 1505982acb97SIdo Schimmel } 1506982acb97SIdo Schimmel 1507982acb97SIdo Schimmel return NOTIFY_DONE; 1508982acb97SIdo Schimmel } 1509982acb97SIdo Schimmel 15104f823defSJulian Anastasov /* Event force Flags Description 15114f823defSJulian Anastasov * NETDEV_CHANGE 0 LINKDOWN Carrier OFF, not for scope host 15124f823defSJulian Anastasov * NETDEV_DOWN 0 LINKDOWN|DEAD Link down, not for scope host 15134f823defSJulian Anastasov * NETDEV_DOWN 1 LINKDOWN|DEAD Last address removed 15144f823defSJulian Anastasov * NETDEV_UNREGISTER 1 LINKDOWN|DEAD Device removed 15154f823defSJulian Anastasov */ 15164f823defSJulian Anastasov int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force) 151785326fa5SDenis V. Lunev { 151885326fa5SDenis V. Lunev int ret = 0; 151985326fa5SDenis V. Lunev int scope = RT_SCOPE_NOWHERE; 15201da177e4SLinus Torvalds struct fib_info *prev_fi = NULL; 15211da177e4SLinus Torvalds unsigned int hash = fib_devindex_hashfn(dev->ifindex); 15221da177e4SLinus Torvalds struct hlist_head *head = &fib_info_devhash[hash]; 15231da177e4SLinus Torvalds struct fib_nh *nh; 15241da177e4SLinus Torvalds 15254f823defSJulian Anastasov if (force) 152685326fa5SDenis V. Lunev scope = -1; 152785326fa5SDenis V. Lunev 1528b67bfe0dSSasha Levin hlist_for_each_entry(nh, head, nh_hash) { 15291da177e4SLinus Torvalds struct fib_info *fi = nh->nh_parent; 15301da177e4SLinus Torvalds int dead; 15311da177e4SLinus Torvalds 15321da177e4SLinus Torvalds BUG_ON(!fi->fib_nhs); 15331da177e4SLinus Torvalds if (nh->nh_dev != dev || fi == prev_fi) 15341da177e4SLinus Torvalds continue; 15351da177e4SLinus Torvalds prev_fi = fi; 15361da177e4SLinus Torvalds dead = 0; 15371da177e4SLinus Torvalds change_nexthops(fi) { 153871fceff0SDavid S. Miller if (nexthop_nh->nh_flags & RTNH_F_DEAD) 15391da177e4SLinus Torvalds dead++; 154071fceff0SDavid S. Miller else if (nexthop_nh->nh_dev == dev && 154171fceff0SDavid S. Miller nexthop_nh->nh_scope != scope) { 15428a3d0316SAndy Gospodarek switch (event) { 15438a3d0316SAndy Gospodarek case NETDEV_DOWN: 15448a3d0316SAndy Gospodarek case NETDEV_UNREGISTER: 154571fceff0SDavid S. Miller nexthop_nh->nh_flags |= RTNH_F_DEAD; 15468a3d0316SAndy Gospodarek /* fall through */ 15478a3d0316SAndy Gospodarek case NETDEV_CHANGE: 15488a3d0316SAndy Gospodarek nexthop_nh->nh_flags |= RTNH_F_LINKDOWN; 15498a3d0316SAndy Gospodarek break; 15508a3d0316SAndy Gospodarek } 1551982acb97SIdo Schimmel call_fib_nh_notifiers(nexthop_nh, 1552982acb97SIdo Schimmel FIB_EVENT_NH_DEL); 15531da177e4SLinus Torvalds dead++; 15541da177e4SLinus Torvalds } 15551da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 15568a3d0316SAndy Gospodarek if (event == NETDEV_UNREGISTER && 15578a3d0316SAndy Gospodarek nexthop_nh->nh_dev == dev) { 15581da177e4SLinus Torvalds dead = fi->fib_nhs; 15591da177e4SLinus Torvalds break; 15601da177e4SLinus Torvalds } 15611da177e4SLinus Torvalds #endif 15621da177e4SLinus Torvalds } endfor_nexthops(fi) 15631da177e4SLinus Torvalds if (dead == fi->fib_nhs) { 15648a3d0316SAndy Gospodarek switch (event) { 15658a3d0316SAndy Gospodarek case NETDEV_DOWN: 15668a3d0316SAndy Gospodarek case NETDEV_UNREGISTER: 15671da177e4SLinus Torvalds fi->fib_flags |= RTNH_F_DEAD; 15688a3d0316SAndy Gospodarek /* fall through */ 15698a3d0316SAndy Gospodarek case NETDEV_CHANGE: 15708a3d0316SAndy Gospodarek fi->fib_flags |= RTNH_F_LINKDOWN; 15718a3d0316SAndy Gospodarek break; 15728a3d0316SAndy Gospodarek } 15731da177e4SLinus Torvalds ret++; 15741da177e4SLinus Torvalds } 15750e884c78SPeter Nørlund 15760e884c78SPeter Nørlund fib_rebalance(fi); 15771da177e4SLinus Torvalds } 15781da177e4SLinus Torvalds 15791da177e4SLinus Torvalds return ret; 15801da177e4SLinus Torvalds } 15811da177e4SLinus Torvalds 15820c838ff1SDavid S. Miller /* Must be invoked inside of an RCU protected region. */ 1583c7b371e3SDavid Ahern static void fib_select_default(const struct flowi4 *flp, struct fib_result *res) 15840c838ff1SDavid S. Miller { 15850c838ff1SDavid S. Miller struct fib_info *fi = NULL, *last_resort = NULL; 158656315f9eSAlexander Duyck struct hlist_head *fa_head = res->fa_head; 15870c838ff1SDavid S. Miller struct fib_table *tb = res->table; 158818a912e9SJulian Anastasov u8 slen = 32 - res->prefixlen; 15890c838ff1SDavid S. Miller int order = -1, last_idx = -1; 15902392debcSJulian Anastasov struct fib_alias *fa, *fa1 = NULL; 15912392debcSJulian Anastasov u32 last_prio = res->fi->fib_priority; 15922392debcSJulian Anastasov u8 last_tos = 0; 15930c838ff1SDavid S. Miller 159456315f9eSAlexander Duyck hlist_for_each_entry_rcu(fa, fa_head, fa_list) { 15950c838ff1SDavid S. Miller struct fib_info *next_fi = fa->fa_info; 15960c838ff1SDavid S. Miller 159718a912e9SJulian Anastasov if (fa->fa_slen != slen) 159818a912e9SJulian Anastasov continue; 15992392debcSJulian Anastasov if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos) 16002392debcSJulian Anastasov continue; 160118a912e9SJulian Anastasov if (fa->tb_id != tb->tb_id) 160218a912e9SJulian Anastasov continue; 16032392debcSJulian Anastasov if (next_fi->fib_priority > last_prio && 16042392debcSJulian Anastasov fa->fa_tos == last_tos) { 16052392debcSJulian Anastasov if (last_tos) 16062392debcSJulian Anastasov continue; 16072392debcSJulian Anastasov break; 16082392debcSJulian Anastasov } 16092392debcSJulian Anastasov if (next_fi->fib_flags & RTNH_F_DEAD) 16102392debcSJulian Anastasov continue; 16112392debcSJulian Anastasov last_tos = fa->fa_tos; 16122392debcSJulian Anastasov last_prio = next_fi->fib_priority; 16132392debcSJulian Anastasov 161437e826c5SDavid S. Miller if (next_fi->fib_scope != res->scope || 16150c838ff1SDavid S. Miller fa->fa_type != RTN_UNICAST) 16160c838ff1SDavid S. Miller continue; 16170c838ff1SDavid S. Miller if (!next_fi->fib_nh[0].nh_gw || 16180c838ff1SDavid S. Miller next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK) 16190c838ff1SDavid S. Miller continue; 16200c838ff1SDavid S. Miller 16210c838ff1SDavid S. Miller fib_alias_accessed(fa); 16220c838ff1SDavid S. Miller 162351456b29SIan Morris if (!fi) { 16240c838ff1SDavid S. Miller if (next_fi != res->fi) 16250c838ff1SDavid S. Miller break; 16262392debcSJulian Anastasov fa1 = fa; 16270c838ff1SDavid S. Miller } else if (!fib_detect_death(fi, order, &last_resort, 16282392debcSJulian Anastasov &last_idx, fa1->fa_default)) { 16290c838ff1SDavid S. Miller fib_result_assign(res, fi); 16302392debcSJulian Anastasov fa1->fa_default = order; 16310c838ff1SDavid S. Miller goto out; 16320c838ff1SDavid S. Miller } 16330c838ff1SDavid S. Miller fi = next_fi; 16340c838ff1SDavid S. Miller order++; 16350c838ff1SDavid S. Miller } 16360c838ff1SDavid S. Miller 163751456b29SIan Morris if (order <= 0 || !fi) { 16382392debcSJulian Anastasov if (fa1) 16392392debcSJulian Anastasov fa1->fa_default = -1; 16400c838ff1SDavid S. Miller goto out; 16410c838ff1SDavid S. Miller } 16420c838ff1SDavid S. Miller 16430c838ff1SDavid S. Miller if (!fib_detect_death(fi, order, &last_resort, &last_idx, 16442392debcSJulian Anastasov fa1->fa_default)) { 16450c838ff1SDavid S. Miller fib_result_assign(res, fi); 16462392debcSJulian Anastasov fa1->fa_default = order; 16470c838ff1SDavid S. Miller goto out; 16480c838ff1SDavid S. Miller } 16490c838ff1SDavid S. Miller 16500c838ff1SDavid S. Miller if (last_idx >= 0) 16510c838ff1SDavid S. Miller fib_result_assign(res, last_resort); 16522392debcSJulian Anastasov fa1->fa_default = last_idx; 16530c838ff1SDavid S. Miller out: 165431d40937SEric Dumazet return; 16550c838ff1SDavid S. Miller } 16560c838ff1SDavid S. Miller 16571da177e4SLinus Torvalds /* 16586a31d2a9SEric Dumazet * Dead device goes up. We wake up dead nexthops. 16596a31d2a9SEric Dumazet * It takes sense only on multipath routes. 16601da177e4SLinus Torvalds */ 16618a3d0316SAndy Gospodarek int fib_sync_up(struct net_device *dev, unsigned int nh_flags) 16621da177e4SLinus Torvalds { 16631da177e4SLinus Torvalds struct fib_info *prev_fi; 16641da177e4SLinus Torvalds unsigned int hash; 16651da177e4SLinus Torvalds struct hlist_head *head; 16661da177e4SLinus Torvalds struct fib_nh *nh; 16671da177e4SLinus Torvalds int ret; 16681da177e4SLinus Torvalds 16691da177e4SLinus Torvalds if (!(dev->flags & IFF_UP)) 16701da177e4SLinus Torvalds return 0; 16711da177e4SLinus Torvalds 1672c9b3292eSJulian Anastasov if (nh_flags & RTNH_F_DEAD) { 1673c9b3292eSJulian Anastasov unsigned int flags = dev_get_flags(dev); 1674c9b3292eSJulian Anastasov 1675c9b3292eSJulian Anastasov if (flags & (IFF_RUNNING | IFF_LOWER_UP)) 1676c9b3292eSJulian Anastasov nh_flags |= RTNH_F_LINKDOWN; 1677c9b3292eSJulian Anastasov } 1678c9b3292eSJulian Anastasov 16791da177e4SLinus Torvalds prev_fi = NULL; 16801da177e4SLinus Torvalds hash = fib_devindex_hashfn(dev->ifindex); 16811da177e4SLinus Torvalds head = &fib_info_devhash[hash]; 16821da177e4SLinus Torvalds ret = 0; 16831da177e4SLinus Torvalds 1684b67bfe0dSSasha Levin hlist_for_each_entry(nh, head, nh_hash) { 16851da177e4SLinus Torvalds struct fib_info *fi = nh->nh_parent; 16861da177e4SLinus Torvalds int alive; 16871da177e4SLinus Torvalds 16881da177e4SLinus Torvalds BUG_ON(!fi->fib_nhs); 16891da177e4SLinus Torvalds if (nh->nh_dev != dev || fi == prev_fi) 16901da177e4SLinus Torvalds continue; 16911da177e4SLinus Torvalds 16921da177e4SLinus Torvalds prev_fi = fi; 16931da177e4SLinus Torvalds alive = 0; 16941da177e4SLinus Torvalds change_nexthops(fi) { 16958a3d0316SAndy Gospodarek if (!(nexthop_nh->nh_flags & nh_flags)) { 16961da177e4SLinus Torvalds alive++; 16971da177e4SLinus Torvalds continue; 16981da177e4SLinus Torvalds } 169951456b29SIan Morris if (!nexthop_nh->nh_dev || 170071fceff0SDavid S. Miller !(nexthop_nh->nh_dev->flags & IFF_UP)) 17011da177e4SLinus Torvalds continue; 170271fceff0SDavid S. Miller if (nexthop_nh->nh_dev != dev || 170371fceff0SDavid S. Miller !__in_dev_get_rtnl(dev)) 17041da177e4SLinus Torvalds continue; 17051da177e4SLinus Torvalds alive++; 17068a3d0316SAndy Gospodarek nexthop_nh->nh_flags &= ~nh_flags; 1707982acb97SIdo Schimmel call_fib_nh_notifiers(nexthop_nh, FIB_EVENT_NH_ADD); 17081da177e4SLinus Torvalds } endfor_nexthops(fi) 17091da177e4SLinus Torvalds 17101da177e4SLinus Torvalds if (alive > 0) { 17118a3d0316SAndy Gospodarek fi->fib_flags &= ~nh_flags; 17121da177e4SLinus Torvalds ret++; 17131da177e4SLinus Torvalds } 17140e884c78SPeter Nørlund 17150e884c78SPeter Nørlund fib_rebalance(fi); 17161da177e4SLinus Torvalds } 17171da177e4SLinus Torvalds 17181da177e4SLinus Torvalds return ret; 17191da177e4SLinus Torvalds } 17201da177e4SLinus Torvalds 17218a3d0316SAndy Gospodarek #ifdef CONFIG_IP_ROUTE_MULTIPATH 1722a6db4494SDavid Ahern static bool fib_good_nh(const struct fib_nh *nh) 1723a6db4494SDavid Ahern { 1724a6db4494SDavid Ahern int state = NUD_REACHABLE; 1725a6db4494SDavid Ahern 1726a6db4494SDavid Ahern if (nh->nh_scope == RT_SCOPE_LINK) { 1727a6db4494SDavid Ahern struct neighbour *n; 1728a6db4494SDavid Ahern 1729a6db4494SDavid Ahern rcu_read_lock_bh(); 1730a6db4494SDavid Ahern 1731d985d151SEric Dumazet n = __ipv4_neigh_lookup_noref(nh->nh_dev, 1732d985d151SEric Dumazet (__force u32)nh->nh_gw); 1733a6db4494SDavid Ahern if (n) 1734a6db4494SDavid Ahern state = n->nud_state; 1735a6db4494SDavid Ahern 1736a6db4494SDavid Ahern rcu_read_unlock_bh(); 1737a6db4494SDavid Ahern } 1738a6db4494SDavid Ahern 1739a6db4494SDavid Ahern return !!(state & NUD_VALID); 1740a6db4494SDavid Ahern } 17418a3d0316SAndy Gospodarek 17420e884c78SPeter Nørlund void fib_select_multipath(struct fib_result *res, int hash) 17431da177e4SLinus Torvalds { 17441da177e4SLinus Torvalds struct fib_info *fi = res->fi; 1745a6db4494SDavid Ahern struct net *net = fi->fib_net; 1746a6db4494SDavid Ahern bool first = false; 17471da177e4SLinus Torvalds 17480e884c78SPeter Nørlund for_nexthops(fi) { 17490e884c78SPeter Nørlund if (hash > atomic_read(&nh->nh_upper_bound)) 17500eeb075fSAndy Gospodarek continue; 17511da177e4SLinus Torvalds 1752a6db4494SDavid Ahern if (!net->ipv4.sysctl_fib_multipath_use_neigh || 1753a6db4494SDavid Ahern fib_good_nh(nh)) { 17541da177e4SLinus Torvalds res->nh_sel = nhsel; 17551da177e4SLinus Torvalds return; 1756a6db4494SDavid Ahern } 1757a6db4494SDavid Ahern if (!first) { 1758a6db4494SDavid Ahern res->nh_sel = nhsel; 1759a6db4494SDavid Ahern first = true; 1760a6db4494SDavid Ahern } 17611da177e4SLinus Torvalds } endfor_nexthops(fi); 17621da177e4SLinus Torvalds } 17631da177e4SLinus Torvalds #endif 17643ce58d84SDavid Ahern 17653ce58d84SDavid Ahern void fib_select_path(struct net *net, struct fib_result *res, 1766bf4e0a3dSNikolay Aleksandrov struct flowi4 *fl4, const struct sk_buff *skb) 17673ce58d84SDavid Ahern { 17687a18c5b9SDavid Ahern bool oif_check; 17697a18c5b9SDavid Ahern 17707a18c5b9SDavid Ahern oif_check = (fl4->flowi4_oif == 0 || 17717a18c5b9SDavid Ahern fl4->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF); 17727a18c5b9SDavid Ahern 17733ce58d84SDavid Ahern #ifdef CONFIG_IP_ROUTE_MULTIPATH 17747a18c5b9SDavid Ahern if (res->fi->fib_nhs > 1 && oif_check) { 1775bf4e0a3dSNikolay Aleksandrov int h = fib_multipath_hash(res->fi, fl4, skb); 17769920e48bSPaolo Abeni 1777bf4e0a3dSNikolay Aleksandrov fib_select_multipath(res, h); 17783ce58d84SDavid Ahern } 17793ce58d84SDavid Ahern else 17803ce58d84SDavid Ahern #endif 17813ce58d84SDavid Ahern if (!res->prefixlen && 17823ce58d84SDavid Ahern res->table->tb_num_default > 1 && 17837a18c5b9SDavid Ahern res->type == RTN_UNICAST && oif_check) 17843ce58d84SDavid Ahern fib_select_default(fl4, res); 17853ce58d84SDavid Ahern 17863ce58d84SDavid Ahern if (!fl4->saddr) 17873ce58d84SDavid Ahern fl4->saddr = FIB_RES_PREFSRC(net, *res); 17883ce58d84SDavid Ahern } 17893ce58d84SDavid Ahern EXPORT_SYMBOL_GPL(fib_select_path); 1790