11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * INET An implementation of the TCP/IP protocol suite for the LINUX 31da177e4SLinus Torvalds * operating system. INET is implemented using the BSD Socket 41da177e4SLinus Torvalds * interface as the means of communication with the user level. 51da177e4SLinus Torvalds * 61da177e4SLinus Torvalds * IPv4 Forwarding Information Base: semantics. 71da177e4SLinus Torvalds * 81da177e4SLinus Torvalds * Version: $Id: fib_semantics.c,v 1.19 2002/01/12 07:54:56 davem Exp $ 91da177e4SLinus Torvalds * 101da177e4SLinus Torvalds * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 111da177e4SLinus Torvalds * 121da177e4SLinus Torvalds * This program is free software; you can redistribute it and/or 131da177e4SLinus Torvalds * modify it under the terms of the GNU General Public License 141da177e4SLinus Torvalds * as published by the Free Software Foundation; either version 151da177e4SLinus Torvalds * 2 of the License, or (at your option) any later version. 161da177e4SLinus Torvalds */ 171da177e4SLinus Torvalds 181da177e4SLinus Torvalds #include <asm/uaccess.h> 191da177e4SLinus Torvalds #include <asm/system.h> 201da177e4SLinus Torvalds #include <linux/bitops.h> 211da177e4SLinus Torvalds #include <linux/types.h> 221da177e4SLinus Torvalds #include <linux/kernel.h> 231da177e4SLinus Torvalds #include <linux/jiffies.h> 241da177e4SLinus Torvalds #include <linux/mm.h> 251da177e4SLinus Torvalds #include <linux/string.h> 261da177e4SLinus Torvalds #include <linux/socket.h> 271da177e4SLinus Torvalds #include <linux/sockios.h> 281da177e4SLinus Torvalds #include <linux/errno.h> 291da177e4SLinus Torvalds #include <linux/in.h> 301da177e4SLinus Torvalds #include <linux/inet.h> 3114c85021SArnaldo Carvalho de Melo #include <linux/inetdevice.h> 321da177e4SLinus Torvalds #include <linux/netdevice.h> 331da177e4SLinus Torvalds #include <linux/if_arp.h> 341da177e4SLinus Torvalds #include <linux/proc_fs.h> 351da177e4SLinus Torvalds #include <linux/skbuff.h> 361da177e4SLinus Torvalds #include <linux/netlink.h> 371da177e4SLinus Torvalds #include <linux/init.h> 381da177e4SLinus Torvalds 3914c85021SArnaldo Carvalho de Melo #include <net/arp.h> 401da177e4SLinus Torvalds #include <net/ip.h> 411da177e4SLinus Torvalds #include <net/protocol.h> 421da177e4SLinus Torvalds #include <net/route.h> 431da177e4SLinus Torvalds #include <net/tcp.h> 441da177e4SLinus Torvalds #include <net/sock.h> 451da177e4SLinus Torvalds #include <net/ip_fib.h> 461da177e4SLinus Torvalds #include <net/ip_mp_alg.h> 471da177e4SLinus Torvalds 481da177e4SLinus Torvalds #include "fib_lookup.h" 491da177e4SLinus Torvalds 501da177e4SLinus Torvalds #define FSprintk(a...) 511da177e4SLinus Torvalds 521da177e4SLinus Torvalds static DEFINE_RWLOCK(fib_info_lock); 531da177e4SLinus Torvalds static struct hlist_head *fib_info_hash; 541da177e4SLinus Torvalds static struct hlist_head *fib_info_laddrhash; 551da177e4SLinus Torvalds static unsigned int fib_hash_size; 561da177e4SLinus Torvalds static unsigned int fib_info_cnt; 571da177e4SLinus Torvalds 581da177e4SLinus Torvalds #define DEVINDEX_HASHBITS 8 591da177e4SLinus Torvalds #define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS) 601da177e4SLinus Torvalds static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE]; 611da177e4SLinus Torvalds 621da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 631da177e4SLinus Torvalds 641da177e4SLinus Torvalds static DEFINE_SPINLOCK(fib_multipath_lock); 651da177e4SLinus Torvalds 661da177e4SLinus Torvalds #define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \ 671da177e4SLinus Torvalds for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++) 681da177e4SLinus Torvalds 691da177e4SLinus Torvalds #define change_nexthops(fi) { int nhsel; struct fib_nh * nh; \ 701da177e4SLinus Torvalds for (nhsel=0, nh = (struct fib_nh*)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++) 711da177e4SLinus Torvalds 721da177e4SLinus Torvalds #else /* CONFIG_IP_ROUTE_MULTIPATH */ 731da177e4SLinus Torvalds 741da177e4SLinus Torvalds /* Hope, that gcc will optimize it to get rid of dummy loop */ 751da177e4SLinus Torvalds 761da177e4SLinus Torvalds #define for_nexthops(fi) { int nhsel=0; const struct fib_nh * nh = (fi)->fib_nh; \ 771da177e4SLinus Torvalds for (nhsel=0; nhsel < 1; nhsel++) 781da177e4SLinus Torvalds 791da177e4SLinus Torvalds #define change_nexthops(fi) { int nhsel=0; struct fib_nh * nh = (struct fib_nh*)((fi)->fib_nh); \ 801da177e4SLinus Torvalds for (nhsel=0; nhsel < 1; nhsel++) 811da177e4SLinus Torvalds 821da177e4SLinus Torvalds #endif /* CONFIG_IP_ROUTE_MULTIPATH */ 831da177e4SLinus Torvalds 841da177e4SLinus Torvalds #define endfor_nexthops(fi) } 851da177e4SLinus Torvalds 861da177e4SLinus Torvalds 879b5b5cffSArjan van de Ven static const struct 881da177e4SLinus Torvalds { 891da177e4SLinus Torvalds int error; 901da177e4SLinus Torvalds u8 scope; 911da177e4SLinus Torvalds } fib_props[RTA_MAX + 1] = { 921da177e4SLinus Torvalds { 931da177e4SLinus Torvalds .error = 0, 941da177e4SLinus Torvalds .scope = RT_SCOPE_NOWHERE, 951da177e4SLinus Torvalds }, /* RTN_UNSPEC */ 961da177e4SLinus Torvalds { 971da177e4SLinus Torvalds .error = 0, 981da177e4SLinus Torvalds .scope = RT_SCOPE_UNIVERSE, 991da177e4SLinus Torvalds }, /* RTN_UNICAST */ 1001da177e4SLinus Torvalds { 1011da177e4SLinus Torvalds .error = 0, 1021da177e4SLinus Torvalds .scope = RT_SCOPE_HOST, 1031da177e4SLinus Torvalds }, /* RTN_LOCAL */ 1041da177e4SLinus Torvalds { 1051da177e4SLinus Torvalds .error = 0, 1061da177e4SLinus Torvalds .scope = RT_SCOPE_LINK, 1071da177e4SLinus Torvalds }, /* RTN_BROADCAST */ 1081da177e4SLinus Torvalds { 1091da177e4SLinus Torvalds .error = 0, 1101da177e4SLinus Torvalds .scope = RT_SCOPE_LINK, 1111da177e4SLinus Torvalds }, /* RTN_ANYCAST */ 1121da177e4SLinus Torvalds { 1131da177e4SLinus Torvalds .error = 0, 1141da177e4SLinus Torvalds .scope = RT_SCOPE_UNIVERSE, 1151da177e4SLinus Torvalds }, /* RTN_MULTICAST */ 1161da177e4SLinus Torvalds { 1171da177e4SLinus Torvalds .error = -EINVAL, 1181da177e4SLinus Torvalds .scope = RT_SCOPE_UNIVERSE, 1191da177e4SLinus Torvalds }, /* RTN_BLACKHOLE */ 1201da177e4SLinus Torvalds { 1211da177e4SLinus Torvalds .error = -EHOSTUNREACH, 1221da177e4SLinus Torvalds .scope = RT_SCOPE_UNIVERSE, 1231da177e4SLinus Torvalds }, /* RTN_UNREACHABLE */ 1241da177e4SLinus Torvalds { 1251da177e4SLinus Torvalds .error = -EACCES, 1261da177e4SLinus Torvalds .scope = RT_SCOPE_UNIVERSE, 1271da177e4SLinus Torvalds }, /* RTN_PROHIBIT */ 1281da177e4SLinus Torvalds { 1291da177e4SLinus Torvalds .error = -EAGAIN, 1301da177e4SLinus Torvalds .scope = RT_SCOPE_UNIVERSE, 1311da177e4SLinus Torvalds }, /* RTN_THROW */ 1321da177e4SLinus Torvalds { 1331da177e4SLinus Torvalds .error = -EINVAL, 1341da177e4SLinus Torvalds .scope = RT_SCOPE_NOWHERE, 1351da177e4SLinus Torvalds }, /* RTN_NAT */ 1361da177e4SLinus Torvalds { 1371da177e4SLinus Torvalds .error = -EINVAL, 1381da177e4SLinus Torvalds .scope = RT_SCOPE_NOWHERE, 1391da177e4SLinus Torvalds }, /* RTN_XRESOLVE */ 1401da177e4SLinus Torvalds }; 1411da177e4SLinus Torvalds 1421da177e4SLinus Torvalds 1431da177e4SLinus Torvalds /* Release a nexthop info record */ 1441da177e4SLinus Torvalds 1451da177e4SLinus Torvalds void free_fib_info(struct fib_info *fi) 1461da177e4SLinus Torvalds { 1471da177e4SLinus Torvalds if (fi->fib_dead == 0) { 1481da177e4SLinus Torvalds printk("Freeing alive fib_info %p\n", fi); 1491da177e4SLinus Torvalds return; 1501da177e4SLinus Torvalds } 1511da177e4SLinus Torvalds change_nexthops(fi) { 1521da177e4SLinus Torvalds if (nh->nh_dev) 1531da177e4SLinus Torvalds dev_put(nh->nh_dev); 1541da177e4SLinus Torvalds nh->nh_dev = NULL; 1551da177e4SLinus Torvalds } endfor_nexthops(fi); 1561da177e4SLinus Torvalds fib_info_cnt--; 1571da177e4SLinus Torvalds kfree(fi); 1581da177e4SLinus Torvalds } 1591da177e4SLinus Torvalds 1601da177e4SLinus Torvalds void fib_release_info(struct fib_info *fi) 1611da177e4SLinus Torvalds { 1621da177e4SLinus Torvalds write_lock(&fib_info_lock); 1631da177e4SLinus Torvalds if (fi && --fi->fib_treeref == 0) { 1641da177e4SLinus Torvalds hlist_del(&fi->fib_hash); 1651da177e4SLinus Torvalds if (fi->fib_prefsrc) 1661da177e4SLinus Torvalds hlist_del(&fi->fib_lhash); 1671da177e4SLinus Torvalds change_nexthops(fi) { 1681da177e4SLinus Torvalds if (!nh->nh_dev) 1691da177e4SLinus Torvalds continue; 1701da177e4SLinus Torvalds hlist_del(&nh->nh_hash); 1711da177e4SLinus Torvalds } endfor_nexthops(fi) 1721da177e4SLinus Torvalds fi->fib_dead = 1; 1731da177e4SLinus Torvalds fib_info_put(fi); 1741da177e4SLinus Torvalds } 1751da177e4SLinus Torvalds write_unlock(&fib_info_lock); 1761da177e4SLinus Torvalds } 1771da177e4SLinus Torvalds 1781da177e4SLinus Torvalds static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi) 1791da177e4SLinus Torvalds { 1801da177e4SLinus Torvalds const struct fib_nh *onh = ofi->fib_nh; 1811da177e4SLinus Torvalds 1821da177e4SLinus Torvalds for_nexthops(fi) { 1831da177e4SLinus Torvalds if (nh->nh_oif != onh->nh_oif || 1841da177e4SLinus Torvalds nh->nh_gw != onh->nh_gw || 1851da177e4SLinus Torvalds nh->nh_scope != onh->nh_scope || 1861da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 1871da177e4SLinus Torvalds nh->nh_weight != onh->nh_weight || 1881da177e4SLinus Torvalds #endif 1891da177e4SLinus Torvalds #ifdef CONFIG_NET_CLS_ROUTE 1901da177e4SLinus Torvalds nh->nh_tclassid != onh->nh_tclassid || 1911da177e4SLinus Torvalds #endif 1921da177e4SLinus Torvalds ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD)) 1931da177e4SLinus Torvalds return -1; 1941da177e4SLinus Torvalds onh++; 1951da177e4SLinus Torvalds } endfor_nexthops(fi); 1961da177e4SLinus Torvalds return 0; 1971da177e4SLinus Torvalds } 1981da177e4SLinus Torvalds 1991da177e4SLinus Torvalds static inline unsigned int fib_info_hashfn(const struct fib_info *fi) 2001da177e4SLinus Torvalds { 2011da177e4SLinus Torvalds unsigned int mask = (fib_hash_size - 1); 2021da177e4SLinus Torvalds unsigned int val = fi->fib_nhs; 2031da177e4SLinus Torvalds 2041da177e4SLinus Torvalds val ^= fi->fib_protocol; 2051da177e4SLinus Torvalds val ^= fi->fib_prefsrc; 2061da177e4SLinus Torvalds val ^= fi->fib_priority; 2071da177e4SLinus Torvalds 2081da177e4SLinus Torvalds return (val ^ (val >> 7) ^ (val >> 12)) & mask; 2091da177e4SLinus Torvalds } 2101da177e4SLinus Torvalds 2111da177e4SLinus Torvalds static struct fib_info *fib_find_info(const struct fib_info *nfi) 2121da177e4SLinus Torvalds { 2131da177e4SLinus Torvalds struct hlist_head *head; 2141da177e4SLinus Torvalds struct hlist_node *node; 2151da177e4SLinus Torvalds struct fib_info *fi; 2161da177e4SLinus Torvalds unsigned int hash; 2171da177e4SLinus Torvalds 2181da177e4SLinus Torvalds hash = fib_info_hashfn(nfi); 2191da177e4SLinus Torvalds head = &fib_info_hash[hash]; 2201da177e4SLinus Torvalds 2211da177e4SLinus Torvalds hlist_for_each_entry(fi, node, head, fib_hash) { 2221da177e4SLinus Torvalds if (fi->fib_nhs != nfi->fib_nhs) 2231da177e4SLinus Torvalds continue; 2241da177e4SLinus Torvalds if (nfi->fib_protocol == fi->fib_protocol && 2251da177e4SLinus Torvalds nfi->fib_prefsrc == fi->fib_prefsrc && 2261da177e4SLinus Torvalds nfi->fib_priority == fi->fib_priority && 2271da177e4SLinus Torvalds memcmp(nfi->fib_metrics, fi->fib_metrics, 2281da177e4SLinus Torvalds sizeof(fi->fib_metrics)) == 0 && 2291da177e4SLinus Torvalds ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 && 2301da177e4SLinus Torvalds (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0)) 2311da177e4SLinus Torvalds return fi; 2321da177e4SLinus Torvalds } 2331da177e4SLinus Torvalds 2341da177e4SLinus Torvalds return NULL; 2351da177e4SLinus Torvalds } 2361da177e4SLinus Torvalds 2371da177e4SLinus Torvalds static inline unsigned int fib_devindex_hashfn(unsigned int val) 2381da177e4SLinus Torvalds { 2391da177e4SLinus Torvalds unsigned int mask = DEVINDEX_HASHSIZE - 1; 2401da177e4SLinus Torvalds 2411da177e4SLinus Torvalds return (val ^ 2421da177e4SLinus Torvalds (val >> DEVINDEX_HASHBITS) ^ 2431da177e4SLinus Torvalds (val >> (DEVINDEX_HASHBITS * 2))) & mask; 2441da177e4SLinus Torvalds } 2451da177e4SLinus Torvalds 2461da177e4SLinus Torvalds /* Check, that the gateway is already configured. 2471da177e4SLinus Torvalds Used only by redirect accept routine. 2481da177e4SLinus Torvalds */ 2491da177e4SLinus Torvalds 2501da177e4SLinus Torvalds int ip_fib_check_default(u32 gw, struct net_device *dev) 2511da177e4SLinus Torvalds { 2521da177e4SLinus Torvalds struct hlist_head *head; 2531da177e4SLinus Torvalds struct hlist_node *node; 2541da177e4SLinus Torvalds struct fib_nh *nh; 2551da177e4SLinus Torvalds unsigned int hash; 2561da177e4SLinus Torvalds 2571da177e4SLinus Torvalds read_lock(&fib_info_lock); 2581da177e4SLinus Torvalds 2591da177e4SLinus Torvalds hash = fib_devindex_hashfn(dev->ifindex); 2601da177e4SLinus Torvalds head = &fib_info_devhash[hash]; 2611da177e4SLinus Torvalds hlist_for_each_entry(nh, node, head, nh_hash) { 2621da177e4SLinus Torvalds if (nh->nh_dev == dev && 2631da177e4SLinus Torvalds nh->nh_gw == gw && 2641da177e4SLinus Torvalds !(nh->nh_flags&RTNH_F_DEAD)) { 2651da177e4SLinus Torvalds read_unlock(&fib_info_lock); 2661da177e4SLinus Torvalds return 0; 2671da177e4SLinus Torvalds } 2681da177e4SLinus Torvalds } 2691da177e4SLinus Torvalds 2701da177e4SLinus Torvalds read_unlock(&fib_info_lock); 2711da177e4SLinus Torvalds 2721da177e4SLinus Torvalds return -1; 2731da177e4SLinus Torvalds } 2741da177e4SLinus Torvalds 2751da177e4SLinus Torvalds void rtmsg_fib(int event, u32 key, struct fib_alias *fa, 2761da177e4SLinus Torvalds int z, int tb_id, 2771da177e4SLinus Torvalds struct nlmsghdr *n, struct netlink_skb_parms *req) 2781da177e4SLinus Torvalds { 2791da177e4SLinus Torvalds struct sk_buff *skb; 2809ed19f33SJamal Hadi Salim u32 pid = req ? req->pid : n->nlmsg_pid; 2811da177e4SLinus Torvalds int size = NLMSG_SPACE(sizeof(struct rtmsg)+256); 2821da177e4SLinus Torvalds 2831da177e4SLinus Torvalds skb = alloc_skb(size, GFP_KERNEL); 2841da177e4SLinus Torvalds if (!skb) 2851da177e4SLinus Torvalds return; 2861da177e4SLinus Torvalds 2871da177e4SLinus Torvalds if (fib_dump_info(skb, pid, n->nlmsg_seq, event, tb_id, 2881da177e4SLinus Torvalds fa->fa_type, fa->fa_scope, &key, z, 2891da177e4SLinus Torvalds fa->fa_tos, 290b6544c0bSJamal Hadi Salim fa->fa_info, 0) < 0) { 2911da177e4SLinus Torvalds kfree_skb(skb); 2921da177e4SLinus Torvalds return; 2931da177e4SLinus Torvalds } 294ac6d439dSPatrick McHardy NETLINK_CB(skb).dst_group = RTNLGRP_IPV4_ROUTE; 2951da177e4SLinus Torvalds if (n->nlmsg_flags&NLM_F_ECHO) 2961da177e4SLinus Torvalds atomic_inc(&skb->users); 297ac6d439dSPatrick McHardy netlink_broadcast(rtnl, skb, pid, RTNLGRP_IPV4_ROUTE, GFP_KERNEL); 2981da177e4SLinus Torvalds if (n->nlmsg_flags&NLM_F_ECHO) 2991da177e4SLinus Torvalds netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT); 3001da177e4SLinus Torvalds } 3011da177e4SLinus Torvalds 3021da177e4SLinus Torvalds /* Return the first fib alias matching TOS with 3031da177e4SLinus Torvalds * priority less than or equal to PRIO. 3041da177e4SLinus Torvalds */ 3051da177e4SLinus Torvalds struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio) 3061da177e4SLinus Torvalds { 3071da177e4SLinus Torvalds if (fah) { 3081da177e4SLinus Torvalds struct fib_alias *fa; 3091da177e4SLinus Torvalds list_for_each_entry(fa, fah, fa_list) { 3101da177e4SLinus Torvalds if (fa->fa_tos > tos) 3111da177e4SLinus Torvalds continue; 3121da177e4SLinus Torvalds if (fa->fa_info->fib_priority >= prio || 3131da177e4SLinus Torvalds fa->fa_tos < tos) 3141da177e4SLinus Torvalds return fa; 3151da177e4SLinus Torvalds } 3161da177e4SLinus Torvalds } 3171da177e4SLinus Torvalds return NULL; 3181da177e4SLinus Torvalds } 3191da177e4SLinus Torvalds 3201da177e4SLinus Torvalds int fib_detect_death(struct fib_info *fi, int order, 3211da177e4SLinus Torvalds struct fib_info **last_resort, int *last_idx, int *dflt) 3221da177e4SLinus Torvalds { 3231da177e4SLinus Torvalds struct neighbour *n; 3241da177e4SLinus Torvalds int state = NUD_NONE; 3251da177e4SLinus Torvalds 3261da177e4SLinus Torvalds n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev); 3271da177e4SLinus Torvalds if (n) { 3281da177e4SLinus Torvalds state = n->nud_state; 3291da177e4SLinus Torvalds neigh_release(n); 3301da177e4SLinus Torvalds } 3311da177e4SLinus Torvalds if (state==NUD_REACHABLE) 3321da177e4SLinus Torvalds return 0; 3331da177e4SLinus Torvalds if ((state&NUD_VALID) && order != *dflt) 3341da177e4SLinus Torvalds return 0; 3351da177e4SLinus Torvalds if ((state&NUD_VALID) || 3361da177e4SLinus Torvalds (*last_idx<0 && order > *dflt)) { 3371da177e4SLinus Torvalds *last_resort = fi; 3381da177e4SLinus Torvalds *last_idx = order; 3391da177e4SLinus Torvalds } 3401da177e4SLinus Torvalds return 1; 3411da177e4SLinus Torvalds } 3421da177e4SLinus Torvalds 3431da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 3441da177e4SLinus Torvalds 3451da177e4SLinus Torvalds static u32 fib_get_attr32(struct rtattr *attr, int attrlen, int type) 3461da177e4SLinus Torvalds { 3471da177e4SLinus Torvalds while (RTA_OK(attr,attrlen)) { 3481da177e4SLinus Torvalds if (attr->rta_type == type) 3491da177e4SLinus Torvalds return *(u32*)RTA_DATA(attr); 3501da177e4SLinus Torvalds attr = RTA_NEXT(attr, attrlen); 3511da177e4SLinus Torvalds } 3521da177e4SLinus Torvalds return 0; 3531da177e4SLinus Torvalds } 3541da177e4SLinus Torvalds 3551da177e4SLinus Torvalds static int 3561da177e4SLinus Torvalds fib_count_nexthops(struct rtattr *rta) 3571da177e4SLinus Torvalds { 3581da177e4SLinus Torvalds int nhs = 0; 3591da177e4SLinus Torvalds struct rtnexthop *nhp = RTA_DATA(rta); 3601da177e4SLinus Torvalds int nhlen = RTA_PAYLOAD(rta); 3611da177e4SLinus Torvalds 3621da177e4SLinus Torvalds while (nhlen >= (int)sizeof(struct rtnexthop)) { 3631da177e4SLinus Torvalds if ((nhlen -= nhp->rtnh_len) < 0) 3641da177e4SLinus Torvalds return 0; 3651da177e4SLinus Torvalds nhs++; 3661da177e4SLinus Torvalds nhp = RTNH_NEXT(nhp); 3671da177e4SLinus Torvalds }; 3681da177e4SLinus Torvalds return nhs; 3691da177e4SLinus Torvalds } 3701da177e4SLinus Torvalds 3711da177e4SLinus Torvalds static int 3721da177e4SLinus Torvalds fib_get_nhs(struct fib_info *fi, const struct rtattr *rta, const struct rtmsg *r) 3731da177e4SLinus Torvalds { 3741da177e4SLinus Torvalds struct rtnexthop *nhp = RTA_DATA(rta); 3751da177e4SLinus Torvalds int nhlen = RTA_PAYLOAD(rta); 3761da177e4SLinus Torvalds 3771da177e4SLinus Torvalds change_nexthops(fi) { 3781da177e4SLinus Torvalds int attrlen = nhlen - sizeof(struct rtnexthop); 3791da177e4SLinus Torvalds if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0) 3801da177e4SLinus Torvalds return -EINVAL; 3811da177e4SLinus Torvalds nh->nh_flags = (r->rtm_flags&~0xFF) | nhp->rtnh_flags; 3821da177e4SLinus Torvalds nh->nh_oif = nhp->rtnh_ifindex; 3831da177e4SLinus Torvalds nh->nh_weight = nhp->rtnh_hops + 1; 3841da177e4SLinus Torvalds if (attrlen) { 3851da177e4SLinus Torvalds nh->nh_gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY); 3861da177e4SLinus Torvalds #ifdef CONFIG_NET_CLS_ROUTE 3871da177e4SLinus Torvalds nh->nh_tclassid = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW); 3881da177e4SLinus Torvalds #endif 3891da177e4SLinus Torvalds } 3901da177e4SLinus Torvalds nhp = RTNH_NEXT(nhp); 3911da177e4SLinus Torvalds } endfor_nexthops(fi); 3921da177e4SLinus Torvalds return 0; 3931da177e4SLinus Torvalds } 3941da177e4SLinus Torvalds 3951da177e4SLinus Torvalds #endif 3961da177e4SLinus Torvalds 3971da177e4SLinus Torvalds int fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct kern_rta *rta, 3981da177e4SLinus Torvalds struct fib_info *fi) 3991da177e4SLinus Torvalds { 4001da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 4011da177e4SLinus Torvalds struct rtnexthop *nhp; 4021da177e4SLinus Torvalds int nhlen; 4031da177e4SLinus Torvalds #endif 4041da177e4SLinus Torvalds 4051da177e4SLinus Torvalds if (rta->rta_priority && 4061da177e4SLinus Torvalds *rta->rta_priority != fi->fib_priority) 4071da177e4SLinus Torvalds return 1; 4081da177e4SLinus Torvalds 4091da177e4SLinus Torvalds if (rta->rta_oif || rta->rta_gw) { 4101da177e4SLinus Torvalds if ((!rta->rta_oif || *rta->rta_oif == fi->fib_nh->nh_oif) && 4111da177e4SLinus Torvalds (!rta->rta_gw || memcmp(rta->rta_gw, &fi->fib_nh->nh_gw, 4) == 0)) 4121da177e4SLinus Torvalds return 0; 4131da177e4SLinus Torvalds return 1; 4141da177e4SLinus Torvalds } 4151da177e4SLinus Torvalds 4161da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 4171da177e4SLinus Torvalds if (rta->rta_mp == NULL) 4181da177e4SLinus Torvalds return 0; 4191da177e4SLinus Torvalds nhp = RTA_DATA(rta->rta_mp); 4201da177e4SLinus Torvalds nhlen = RTA_PAYLOAD(rta->rta_mp); 4211da177e4SLinus Torvalds 4221da177e4SLinus Torvalds for_nexthops(fi) { 4231da177e4SLinus Torvalds int attrlen = nhlen - sizeof(struct rtnexthop); 4241da177e4SLinus Torvalds u32 gw; 4251da177e4SLinus Torvalds 4261da177e4SLinus Torvalds if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0) 4271da177e4SLinus Torvalds return -EINVAL; 4281da177e4SLinus Torvalds if (nhp->rtnh_ifindex && nhp->rtnh_ifindex != nh->nh_oif) 4291da177e4SLinus Torvalds return 1; 4301da177e4SLinus Torvalds if (attrlen) { 4311da177e4SLinus Torvalds gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY); 4321da177e4SLinus Torvalds if (gw && gw != nh->nh_gw) 4331da177e4SLinus Torvalds return 1; 4341da177e4SLinus Torvalds #ifdef CONFIG_NET_CLS_ROUTE 4351da177e4SLinus Torvalds gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW); 4361da177e4SLinus Torvalds if (gw && gw != nh->nh_tclassid) 4371da177e4SLinus Torvalds return 1; 4381da177e4SLinus Torvalds #endif 4391da177e4SLinus Torvalds } 4401da177e4SLinus Torvalds nhp = RTNH_NEXT(nhp); 4411da177e4SLinus Torvalds } endfor_nexthops(fi); 4421da177e4SLinus Torvalds #endif 4431da177e4SLinus Torvalds return 0; 4441da177e4SLinus Torvalds } 4451da177e4SLinus Torvalds 4461da177e4SLinus Torvalds 4471da177e4SLinus Torvalds /* 4481da177e4SLinus Torvalds Picture 4491da177e4SLinus Torvalds ------- 4501da177e4SLinus Torvalds 4511da177e4SLinus Torvalds Semantics of nexthop is very messy by historical reasons. 4521da177e4SLinus Torvalds We have to take into account, that: 4531da177e4SLinus Torvalds a) gateway can be actually local interface address, 4541da177e4SLinus Torvalds so that gatewayed route is direct. 4551da177e4SLinus Torvalds b) gateway must be on-link address, possibly 4561da177e4SLinus Torvalds described not by an ifaddr, but also by a direct route. 4571da177e4SLinus Torvalds c) If both gateway and interface are specified, they should not 4581da177e4SLinus Torvalds contradict. 4591da177e4SLinus Torvalds d) If we use tunnel routes, gateway could be not on-link. 4601da177e4SLinus Torvalds 4611da177e4SLinus Torvalds Attempt to reconcile all of these (alas, self-contradictory) conditions 4621da177e4SLinus Torvalds results in pretty ugly and hairy code with obscure logic. 4631da177e4SLinus Torvalds 4641da177e4SLinus Torvalds I chose to generalized it instead, so that the size 4651da177e4SLinus Torvalds of code does not increase practically, but it becomes 4661da177e4SLinus Torvalds much more general. 4671da177e4SLinus Torvalds Every prefix is assigned a "scope" value: "host" is local address, 4681da177e4SLinus Torvalds "link" is direct route, 4691da177e4SLinus Torvalds [ ... "site" ... "interior" ... ] 4701da177e4SLinus Torvalds and "universe" is true gateway route with global meaning. 4711da177e4SLinus Torvalds 4721da177e4SLinus Torvalds Every prefix refers to a set of "nexthop"s (gw, oif), 4731da177e4SLinus Torvalds where gw must have narrower scope. This recursion stops 4741da177e4SLinus Torvalds when gw has LOCAL scope or if "nexthop" is declared ONLINK, 4751da177e4SLinus Torvalds which means that gw is forced to be on link. 4761da177e4SLinus Torvalds 4771da177e4SLinus Torvalds Code is still hairy, but now it is apparently logically 4781da177e4SLinus Torvalds consistent and very flexible. F.e. as by-product it allows 4791da177e4SLinus Torvalds to co-exists in peace independent exterior and interior 4801da177e4SLinus Torvalds routing processes. 4811da177e4SLinus Torvalds 4821da177e4SLinus Torvalds Normally it looks as following. 4831da177e4SLinus Torvalds 4841da177e4SLinus Torvalds {universe prefix} -> (gw, oif) [scope link] 4851da177e4SLinus Torvalds | 4861da177e4SLinus Torvalds |-> {link prefix} -> (gw, oif) [scope local] 4871da177e4SLinus Torvalds | 4881da177e4SLinus Torvalds |-> {local prefix} (terminal node) 4891da177e4SLinus Torvalds */ 4901da177e4SLinus Torvalds 4911da177e4SLinus Torvalds static int fib_check_nh(const struct rtmsg *r, struct fib_info *fi, struct fib_nh *nh) 4921da177e4SLinus Torvalds { 4931da177e4SLinus Torvalds int err; 4941da177e4SLinus Torvalds 4951da177e4SLinus Torvalds if (nh->nh_gw) { 4961da177e4SLinus Torvalds struct fib_result res; 4971da177e4SLinus Torvalds 4981da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_PERVASIVE 4991da177e4SLinus Torvalds if (nh->nh_flags&RTNH_F_PERVASIVE) 5001da177e4SLinus Torvalds return 0; 5011da177e4SLinus Torvalds #endif 5021da177e4SLinus Torvalds if (nh->nh_flags&RTNH_F_ONLINK) { 5031da177e4SLinus Torvalds struct net_device *dev; 5041da177e4SLinus Torvalds 5051da177e4SLinus Torvalds if (r->rtm_scope >= RT_SCOPE_LINK) 5061da177e4SLinus Torvalds return -EINVAL; 5071da177e4SLinus Torvalds if (inet_addr_type(nh->nh_gw) != RTN_UNICAST) 5081da177e4SLinus Torvalds return -EINVAL; 5091da177e4SLinus Torvalds if ((dev = __dev_get_by_index(nh->nh_oif)) == NULL) 5101da177e4SLinus Torvalds return -ENODEV; 5111da177e4SLinus Torvalds if (!(dev->flags&IFF_UP)) 5121da177e4SLinus Torvalds return -ENETDOWN; 5131da177e4SLinus Torvalds nh->nh_dev = dev; 5141da177e4SLinus Torvalds dev_hold(dev); 5151da177e4SLinus Torvalds nh->nh_scope = RT_SCOPE_LINK; 5161da177e4SLinus Torvalds return 0; 5171da177e4SLinus Torvalds } 5181da177e4SLinus Torvalds { 5191da177e4SLinus Torvalds struct flowi fl = { .nl_u = { .ip4_u = 5201da177e4SLinus Torvalds { .daddr = nh->nh_gw, 5211da177e4SLinus Torvalds .scope = r->rtm_scope + 1 } }, 5221da177e4SLinus Torvalds .oif = nh->nh_oif }; 5231da177e4SLinus Torvalds 5241da177e4SLinus Torvalds /* It is not necessary, but requires a bit of thinking */ 5251da177e4SLinus Torvalds if (fl.fl4_scope < RT_SCOPE_LINK) 5261da177e4SLinus Torvalds fl.fl4_scope = RT_SCOPE_LINK; 5271da177e4SLinus Torvalds if ((err = fib_lookup(&fl, &res)) != 0) 5281da177e4SLinus Torvalds return err; 5291da177e4SLinus Torvalds } 5301da177e4SLinus Torvalds err = -EINVAL; 5311da177e4SLinus Torvalds if (res.type != RTN_UNICAST && res.type != RTN_LOCAL) 5321da177e4SLinus Torvalds goto out; 5331da177e4SLinus Torvalds nh->nh_scope = res.scope; 5341da177e4SLinus Torvalds nh->nh_oif = FIB_RES_OIF(res); 5351da177e4SLinus Torvalds if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL) 5361da177e4SLinus Torvalds goto out; 5371da177e4SLinus Torvalds dev_hold(nh->nh_dev); 5381da177e4SLinus Torvalds err = -ENETDOWN; 5391da177e4SLinus Torvalds if (!(nh->nh_dev->flags & IFF_UP)) 5401da177e4SLinus Torvalds goto out; 5411da177e4SLinus Torvalds err = 0; 5421da177e4SLinus Torvalds out: 5431da177e4SLinus Torvalds fib_res_put(&res); 5441da177e4SLinus Torvalds return err; 5451da177e4SLinus Torvalds } else { 5461da177e4SLinus Torvalds struct in_device *in_dev; 5471da177e4SLinus Torvalds 5481da177e4SLinus Torvalds if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK)) 5491da177e4SLinus Torvalds return -EINVAL; 5501da177e4SLinus Torvalds 5511da177e4SLinus Torvalds in_dev = inetdev_by_index(nh->nh_oif); 5521da177e4SLinus Torvalds if (in_dev == NULL) 5531da177e4SLinus Torvalds return -ENODEV; 5541da177e4SLinus Torvalds if (!(in_dev->dev->flags&IFF_UP)) { 5551da177e4SLinus Torvalds in_dev_put(in_dev); 5561da177e4SLinus Torvalds return -ENETDOWN; 5571da177e4SLinus Torvalds } 5581da177e4SLinus Torvalds nh->nh_dev = in_dev->dev; 5591da177e4SLinus Torvalds dev_hold(nh->nh_dev); 5601da177e4SLinus Torvalds nh->nh_scope = RT_SCOPE_HOST; 5611da177e4SLinus Torvalds in_dev_put(in_dev); 5621da177e4SLinus Torvalds } 5631da177e4SLinus Torvalds return 0; 5641da177e4SLinus Torvalds } 5651da177e4SLinus Torvalds 5661da177e4SLinus Torvalds static inline unsigned int fib_laddr_hashfn(u32 val) 5671da177e4SLinus Torvalds { 5681da177e4SLinus Torvalds unsigned int mask = (fib_hash_size - 1); 5691da177e4SLinus Torvalds 5701da177e4SLinus Torvalds return (val ^ (val >> 7) ^ (val >> 14)) & mask; 5711da177e4SLinus Torvalds } 5721da177e4SLinus Torvalds 5731da177e4SLinus Torvalds static struct hlist_head *fib_hash_alloc(int bytes) 5741da177e4SLinus Torvalds { 5751da177e4SLinus Torvalds if (bytes <= PAGE_SIZE) 5761da177e4SLinus Torvalds return kmalloc(bytes, GFP_KERNEL); 5771da177e4SLinus Torvalds else 5781da177e4SLinus Torvalds return (struct hlist_head *) 5791da177e4SLinus Torvalds __get_free_pages(GFP_KERNEL, get_order(bytes)); 5801da177e4SLinus Torvalds } 5811da177e4SLinus Torvalds 5821da177e4SLinus Torvalds static void fib_hash_free(struct hlist_head *hash, int bytes) 5831da177e4SLinus Torvalds { 5841da177e4SLinus Torvalds if (!hash) 5851da177e4SLinus Torvalds return; 5861da177e4SLinus Torvalds 5871da177e4SLinus Torvalds if (bytes <= PAGE_SIZE) 5881da177e4SLinus Torvalds kfree(hash); 5891da177e4SLinus Torvalds else 5901da177e4SLinus Torvalds free_pages((unsigned long) hash, get_order(bytes)); 5911da177e4SLinus Torvalds } 5921da177e4SLinus Torvalds 5931da177e4SLinus Torvalds static void fib_hash_move(struct hlist_head *new_info_hash, 5941da177e4SLinus Torvalds struct hlist_head *new_laddrhash, 5951da177e4SLinus Torvalds unsigned int new_size) 5961da177e4SLinus Torvalds { 597b7656e7fSDavid S. Miller struct hlist_head *old_info_hash, *old_laddrhash; 5981da177e4SLinus Torvalds unsigned int old_size = fib_hash_size; 599b7656e7fSDavid S. Miller unsigned int i, bytes; 6001da177e4SLinus Torvalds 6011da177e4SLinus Torvalds write_lock(&fib_info_lock); 602b7656e7fSDavid S. Miller old_info_hash = fib_info_hash; 603b7656e7fSDavid S. Miller old_laddrhash = fib_info_laddrhash; 6041da177e4SLinus Torvalds fib_hash_size = new_size; 6051da177e4SLinus Torvalds 6061da177e4SLinus Torvalds for (i = 0; i < old_size; i++) { 6071da177e4SLinus Torvalds struct hlist_head *head = &fib_info_hash[i]; 6081da177e4SLinus Torvalds struct hlist_node *node, *n; 6091da177e4SLinus Torvalds struct fib_info *fi; 6101da177e4SLinus Torvalds 6111da177e4SLinus Torvalds hlist_for_each_entry_safe(fi, node, n, head, fib_hash) { 6121da177e4SLinus Torvalds struct hlist_head *dest; 6131da177e4SLinus Torvalds unsigned int new_hash; 6141da177e4SLinus Torvalds 6151da177e4SLinus Torvalds hlist_del(&fi->fib_hash); 6161da177e4SLinus Torvalds 6171da177e4SLinus Torvalds new_hash = fib_info_hashfn(fi); 6181da177e4SLinus Torvalds dest = &new_info_hash[new_hash]; 6191da177e4SLinus Torvalds hlist_add_head(&fi->fib_hash, dest); 6201da177e4SLinus Torvalds } 6211da177e4SLinus Torvalds } 6221da177e4SLinus Torvalds fib_info_hash = new_info_hash; 6231da177e4SLinus Torvalds 6241da177e4SLinus Torvalds for (i = 0; i < old_size; i++) { 6251da177e4SLinus Torvalds struct hlist_head *lhead = &fib_info_laddrhash[i]; 6261da177e4SLinus Torvalds struct hlist_node *node, *n; 6271da177e4SLinus Torvalds struct fib_info *fi; 6281da177e4SLinus Torvalds 6291da177e4SLinus Torvalds hlist_for_each_entry_safe(fi, node, n, lhead, fib_lhash) { 6301da177e4SLinus Torvalds struct hlist_head *ldest; 6311da177e4SLinus Torvalds unsigned int new_hash; 6321da177e4SLinus Torvalds 6331da177e4SLinus Torvalds hlist_del(&fi->fib_lhash); 6341da177e4SLinus Torvalds 6351da177e4SLinus Torvalds new_hash = fib_laddr_hashfn(fi->fib_prefsrc); 6361da177e4SLinus Torvalds ldest = &new_laddrhash[new_hash]; 6371da177e4SLinus Torvalds hlist_add_head(&fi->fib_lhash, ldest); 6381da177e4SLinus Torvalds } 6391da177e4SLinus Torvalds } 6401da177e4SLinus Torvalds fib_info_laddrhash = new_laddrhash; 6411da177e4SLinus Torvalds 6421da177e4SLinus Torvalds write_unlock(&fib_info_lock); 643b7656e7fSDavid S. Miller 644b7656e7fSDavid S. Miller bytes = old_size * sizeof(struct hlist_head *); 645b7656e7fSDavid S. Miller fib_hash_free(old_info_hash, bytes); 646b7656e7fSDavid S. Miller fib_hash_free(old_laddrhash, bytes); 6471da177e4SLinus Torvalds } 6481da177e4SLinus Torvalds 6491da177e4SLinus Torvalds struct fib_info * 6501da177e4SLinus Torvalds fib_create_info(const struct rtmsg *r, struct kern_rta *rta, 6511da177e4SLinus Torvalds const struct nlmsghdr *nlh, int *errp) 6521da177e4SLinus Torvalds { 6531da177e4SLinus Torvalds int err; 6541da177e4SLinus Torvalds struct fib_info *fi = NULL; 6551da177e4SLinus Torvalds struct fib_info *ofi; 6561da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 6571da177e4SLinus Torvalds int nhs = 1; 6581da177e4SLinus Torvalds #else 6591da177e4SLinus Torvalds const int nhs = 1; 6601da177e4SLinus Torvalds #endif 6611da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED 6621da177e4SLinus Torvalds u32 mp_alg = IP_MP_ALG_NONE; 6631da177e4SLinus Torvalds #endif 6641da177e4SLinus Torvalds 6651da177e4SLinus Torvalds /* Fast check to catch the most weird cases */ 6661da177e4SLinus Torvalds if (fib_props[r->rtm_type].scope > r->rtm_scope) 6671da177e4SLinus Torvalds goto err_inval; 6681da177e4SLinus Torvalds 6691da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 6701da177e4SLinus Torvalds if (rta->rta_mp) { 6711da177e4SLinus Torvalds nhs = fib_count_nexthops(rta->rta_mp); 6721da177e4SLinus Torvalds if (nhs == 0) 6731da177e4SLinus Torvalds goto err_inval; 6741da177e4SLinus Torvalds } 6751da177e4SLinus Torvalds #endif 6761da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED 6771da177e4SLinus Torvalds if (rta->rta_mp_alg) { 6781da177e4SLinus Torvalds mp_alg = *rta->rta_mp_alg; 6791da177e4SLinus Torvalds 6801da177e4SLinus Torvalds if (mp_alg < IP_MP_ALG_NONE || 6811da177e4SLinus Torvalds mp_alg > IP_MP_ALG_MAX) 6821da177e4SLinus Torvalds goto err_inval; 6831da177e4SLinus Torvalds } 6841da177e4SLinus Torvalds #endif 6851da177e4SLinus Torvalds 6861da177e4SLinus Torvalds err = -ENOBUFS; 6871da177e4SLinus Torvalds if (fib_info_cnt >= fib_hash_size) { 6881da177e4SLinus Torvalds unsigned int new_size = fib_hash_size << 1; 6891da177e4SLinus Torvalds struct hlist_head *new_info_hash; 6901da177e4SLinus Torvalds struct hlist_head *new_laddrhash; 6911da177e4SLinus Torvalds unsigned int bytes; 6921da177e4SLinus Torvalds 6931da177e4SLinus Torvalds if (!new_size) 6941da177e4SLinus Torvalds new_size = 1; 6951da177e4SLinus Torvalds bytes = new_size * sizeof(struct hlist_head *); 6961da177e4SLinus Torvalds new_info_hash = fib_hash_alloc(bytes); 6971da177e4SLinus Torvalds new_laddrhash = fib_hash_alloc(bytes); 6981da177e4SLinus Torvalds if (!new_info_hash || !new_laddrhash) { 6991da177e4SLinus Torvalds fib_hash_free(new_info_hash, bytes); 7001da177e4SLinus Torvalds fib_hash_free(new_laddrhash, bytes); 7011da177e4SLinus Torvalds } else { 7021da177e4SLinus Torvalds memset(new_info_hash, 0, bytes); 7031da177e4SLinus Torvalds memset(new_laddrhash, 0, bytes); 7041da177e4SLinus Torvalds 7051da177e4SLinus Torvalds fib_hash_move(new_info_hash, new_laddrhash, new_size); 7061da177e4SLinus Torvalds } 7071da177e4SLinus Torvalds 7081da177e4SLinus Torvalds if (!fib_hash_size) 7091da177e4SLinus Torvalds goto failure; 7101da177e4SLinus Torvalds } 7111da177e4SLinus Torvalds 7120da974f4SPanagiotis Issaris fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL); 7131da177e4SLinus Torvalds if (fi == NULL) 7141da177e4SLinus Torvalds goto failure; 7151da177e4SLinus Torvalds fib_info_cnt++; 7161da177e4SLinus Torvalds 7171da177e4SLinus Torvalds fi->fib_protocol = r->rtm_protocol; 7181da177e4SLinus Torvalds 7191da177e4SLinus Torvalds fi->fib_nhs = nhs; 7201da177e4SLinus Torvalds change_nexthops(fi) { 7211da177e4SLinus Torvalds nh->nh_parent = fi; 7221da177e4SLinus Torvalds } endfor_nexthops(fi) 7231da177e4SLinus Torvalds 7241da177e4SLinus Torvalds fi->fib_flags = r->rtm_flags; 7251da177e4SLinus Torvalds if (rta->rta_priority) 7261da177e4SLinus Torvalds fi->fib_priority = *rta->rta_priority; 7271da177e4SLinus Torvalds if (rta->rta_mx) { 7281da177e4SLinus Torvalds int attrlen = RTA_PAYLOAD(rta->rta_mx); 7291da177e4SLinus Torvalds struct rtattr *attr = RTA_DATA(rta->rta_mx); 7301da177e4SLinus Torvalds 7311da177e4SLinus Torvalds while (RTA_OK(attr, attrlen)) { 7321da177e4SLinus Torvalds unsigned flavor = attr->rta_type; 7331da177e4SLinus Torvalds if (flavor) { 7341da177e4SLinus Torvalds if (flavor > RTAX_MAX) 7351da177e4SLinus Torvalds goto err_inval; 7361da177e4SLinus Torvalds fi->fib_metrics[flavor-1] = *(unsigned*)RTA_DATA(attr); 7371da177e4SLinus Torvalds } 7381da177e4SLinus Torvalds attr = RTA_NEXT(attr, attrlen); 7391da177e4SLinus Torvalds } 7401da177e4SLinus Torvalds } 7411da177e4SLinus Torvalds if (rta->rta_prefsrc) 7421da177e4SLinus Torvalds memcpy(&fi->fib_prefsrc, rta->rta_prefsrc, 4); 7431da177e4SLinus Torvalds 7441da177e4SLinus Torvalds if (rta->rta_mp) { 7451da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 7461da177e4SLinus Torvalds if ((err = fib_get_nhs(fi, rta->rta_mp, r)) != 0) 7471da177e4SLinus Torvalds goto failure; 7481da177e4SLinus Torvalds if (rta->rta_oif && fi->fib_nh->nh_oif != *rta->rta_oif) 7491da177e4SLinus Torvalds goto err_inval; 7501da177e4SLinus Torvalds if (rta->rta_gw && memcmp(&fi->fib_nh->nh_gw, rta->rta_gw, 4)) 7511da177e4SLinus Torvalds goto err_inval; 7521da177e4SLinus Torvalds #ifdef CONFIG_NET_CLS_ROUTE 7531da177e4SLinus Torvalds if (rta->rta_flow && memcmp(&fi->fib_nh->nh_tclassid, rta->rta_flow, 4)) 7541da177e4SLinus Torvalds goto err_inval; 7551da177e4SLinus Torvalds #endif 7561da177e4SLinus Torvalds #else 7571da177e4SLinus Torvalds goto err_inval; 7581da177e4SLinus Torvalds #endif 7591da177e4SLinus Torvalds } else { 7601da177e4SLinus Torvalds struct fib_nh *nh = fi->fib_nh; 7611da177e4SLinus Torvalds if (rta->rta_oif) 7621da177e4SLinus Torvalds nh->nh_oif = *rta->rta_oif; 7631da177e4SLinus Torvalds if (rta->rta_gw) 7641da177e4SLinus Torvalds memcpy(&nh->nh_gw, rta->rta_gw, 4); 7651da177e4SLinus Torvalds #ifdef CONFIG_NET_CLS_ROUTE 7661da177e4SLinus Torvalds if (rta->rta_flow) 7671da177e4SLinus Torvalds memcpy(&nh->nh_tclassid, rta->rta_flow, 4); 7681da177e4SLinus Torvalds #endif 7691da177e4SLinus Torvalds nh->nh_flags = r->rtm_flags; 7701da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 7711da177e4SLinus Torvalds nh->nh_weight = 1; 7721da177e4SLinus Torvalds #endif 7731da177e4SLinus Torvalds } 7741da177e4SLinus Torvalds 7751da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED 7761da177e4SLinus Torvalds fi->fib_mp_alg = mp_alg; 7771da177e4SLinus Torvalds #endif 7781da177e4SLinus Torvalds 7791da177e4SLinus Torvalds if (fib_props[r->rtm_type].error) { 7801da177e4SLinus Torvalds if (rta->rta_gw || rta->rta_oif || rta->rta_mp) 7811da177e4SLinus Torvalds goto err_inval; 7821da177e4SLinus Torvalds goto link_it; 7831da177e4SLinus Torvalds } 7841da177e4SLinus Torvalds 7851da177e4SLinus Torvalds if (r->rtm_scope > RT_SCOPE_HOST) 7861da177e4SLinus Torvalds goto err_inval; 7871da177e4SLinus Torvalds 7881da177e4SLinus Torvalds if (r->rtm_scope == RT_SCOPE_HOST) { 7891da177e4SLinus Torvalds struct fib_nh *nh = fi->fib_nh; 7901da177e4SLinus Torvalds 7911da177e4SLinus Torvalds /* Local address is added. */ 7921da177e4SLinus Torvalds if (nhs != 1 || nh->nh_gw) 7931da177e4SLinus Torvalds goto err_inval; 7941da177e4SLinus Torvalds nh->nh_scope = RT_SCOPE_NOWHERE; 7951da177e4SLinus Torvalds nh->nh_dev = dev_get_by_index(fi->fib_nh->nh_oif); 7961da177e4SLinus Torvalds err = -ENODEV; 7971da177e4SLinus Torvalds if (nh->nh_dev == NULL) 7981da177e4SLinus Torvalds goto failure; 7991da177e4SLinus Torvalds } else { 8001da177e4SLinus Torvalds change_nexthops(fi) { 8011da177e4SLinus Torvalds if ((err = fib_check_nh(r, fi, nh)) != 0) 8021da177e4SLinus Torvalds goto failure; 8031da177e4SLinus Torvalds } endfor_nexthops(fi) 8041da177e4SLinus Torvalds } 8051da177e4SLinus Torvalds 8061da177e4SLinus Torvalds if (fi->fib_prefsrc) { 8071da177e4SLinus Torvalds if (r->rtm_type != RTN_LOCAL || rta->rta_dst == NULL || 8081da177e4SLinus Torvalds memcmp(&fi->fib_prefsrc, rta->rta_dst, 4)) 8091da177e4SLinus Torvalds if (inet_addr_type(fi->fib_prefsrc) != RTN_LOCAL) 8101da177e4SLinus Torvalds goto err_inval; 8111da177e4SLinus Torvalds } 8121da177e4SLinus Torvalds 8131da177e4SLinus Torvalds link_it: 8141da177e4SLinus Torvalds if ((ofi = fib_find_info(fi)) != NULL) { 8151da177e4SLinus Torvalds fi->fib_dead = 1; 8161da177e4SLinus Torvalds free_fib_info(fi); 8171da177e4SLinus Torvalds ofi->fib_treeref++; 8181da177e4SLinus Torvalds return ofi; 8191da177e4SLinus Torvalds } 8201da177e4SLinus Torvalds 8211da177e4SLinus Torvalds fi->fib_treeref++; 8221da177e4SLinus Torvalds atomic_inc(&fi->fib_clntref); 8231da177e4SLinus Torvalds write_lock(&fib_info_lock); 8241da177e4SLinus Torvalds hlist_add_head(&fi->fib_hash, 8251da177e4SLinus Torvalds &fib_info_hash[fib_info_hashfn(fi)]); 8261da177e4SLinus Torvalds if (fi->fib_prefsrc) { 8271da177e4SLinus Torvalds struct hlist_head *head; 8281da177e4SLinus Torvalds 8291da177e4SLinus Torvalds head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)]; 8301da177e4SLinus Torvalds hlist_add_head(&fi->fib_lhash, head); 8311da177e4SLinus Torvalds } 8321da177e4SLinus Torvalds change_nexthops(fi) { 8331da177e4SLinus Torvalds struct hlist_head *head; 8341da177e4SLinus Torvalds unsigned int hash; 8351da177e4SLinus Torvalds 8361da177e4SLinus Torvalds if (!nh->nh_dev) 8371da177e4SLinus Torvalds continue; 8381da177e4SLinus Torvalds hash = fib_devindex_hashfn(nh->nh_dev->ifindex); 8391da177e4SLinus Torvalds head = &fib_info_devhash[hash]; 8401da177e4SLinus Torvalds hlist_add_head(&nh->nh_hash, head); 8411da177e4SLinus Torvalds } endfor_nexthops(fi) 8421da177e4SLinus Torvalds write_unlock(&fib_info_lock); 8431da177e4SLinus Torvalds return fi; 8441da177e4SLinus Torvalds 8451da177e4SLinus Torvalds err_inval: 8461da177e4SLinus Torvalds err = -EINVAL; 8471da177e4SLinus Torvalds 8481da177e4SLinus Torvalds failure: 8491da177e4SLinus Torvalds *errp = err; 8501da177e4SLinus Torvalds if (fi) { 8511da177e4SLinus Torvalds fi->fib_dead = 1; 8521da177e4SLinus Torvalds free_fib_info(fi); 8531da177e4SLinus Torvalds } 8541da177e4SLinus Torvalds return NULL; 8551da177e4SLinus Torvalds } 8561da177e4SLinus Torvalds 857e5b43760SRobert Olsson /* Note! fib_semantic_match intentionally uses RCU list functions. */ 8581da177e4SLinus Torvalds int fib_semantic_match(struct list_head *head, const struct flowi *flp, 8591da177e4SLinus Torvalds struct fib_result *res, __u32 zone, __u32 mask, 8601da177e4SLinus Torvalds int prefixlen) 8611da177e4SLinus Torvalds { 8621da177e4SLinus Torvalds struct fib_alias *fa; 8631da177e4SLinus Torvalds int nh_sel = 0; 8641da177e4SLinus Torvalds 865e5b43760SRobert Olsson list_for_each_entry_rcu(fa, head, fa_list) { 8661da177e4SLinus Torvalds int err; 8671da177e4SLinus Torvalds 8681da177e4SLinus Torvalds if (fa->fa_tos && 8691da177e4SLinus Torvalds fa->fa_tos != flp->fl4_tos) 8701da177e4SLinus Torvalds continue; 8711da177e4SLinus Torvalds 8721da177e4SLinus Torvalds if (fa->fa_scope < flp->fl4_scope) 8731da177e4SLinus Torvalds continue; 8741da177e4SLinus Torvalds 8751da177e4SLinus Torvalds fa->fa_state |= FA_S_ACCESSED; 8761da177e4SLinus Torvalds 8771da177e4SLinus Torvalds err = fib_props[fa->fa_type].error; 8781da177e4SLinus Torvalds if (err == 0) { 8791da177e4SLinus Torvalds struct fib_info *fi = fa->fa_info; 8801da177e4SLinus Torvalds 8811da177e4SLinus Torvalds if (fi->fib_flags & RTNH_F_DEAD) 8821da177e4SLinus Torvalds continue; 8831da177e4SLinus Torvalds 8841da177e4SLinus Torvalds switch (fa->fa_type) { 8851da177e4SLinus Torvalds case RTN_UNICAST: 8861da177e4SLinus Torvalds case RTN_LOCAL: 8871da177e4SLinus Torvalds case RTN_BROADCAST: 8881da177e4SLinus Torvalds case RTN_ANYCAST: 8891da177e4SLinus Torvalds case RTN_MULTICAST: 8901da177e4SLinus Torvalds for_nexthops(fi) { 8911da177e4SLinus Torvalds if (nh->nh_flags&RTNH_F_DEAD) 8921da177e4SLinus Torvalds continue; 8931da177e4SLinus Torvalds if (!flp->oif || flp->oif == nh->nh_oif) 8941da177e4SLinus Torvalds break; 8951da177e4SLinus Torvalds } 8961da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 8971da177e4SLinus Torvalds if (nhsel < fi->fib_nhs) { 8981da177e4SLinus Torvalds nh_sel = nhsel; 8991da177e4SLinus Torvalds goto out_fill_res; 9001da177e4SLinus Torvalds } 9011da177e4SLinus Torvalds #else 9021da177e4SLinus Torvalds if (nhsel < 1) { 9031da177e4SLinus Torvalds goto out_fill_res; 9041da177e4SLinus Torvalds } 9051da177e4SLinus Torvalds #endif 9061da177e4SLinus Torvalds endfor_nexthops(fi); 9071da177e4SLinus Torvalds continue; 9081da177e4SLinus Torvalds 9091da177e4SLinus Torvalds default: 9101da177e4SLinus Torvalds printk(KERN_DEBUG "impossible 102\n"); 9111da177e4SLinus Torvalds return -EINVAL; 9121da177e4SLinus Torvalds }; 9131da177e4SLinus Torvalds } 9141da177e4SLinus Torvalds return err; 9151da177e4SLinus Torvalds } 9161da177e4SLinus Torvalds return 1; 9171da177e4SLinus Torvalds 9181da177e4SLinus Torvalds out_fill_res: 9191da177e4SLinus Torvalds res->prefixlen = prefixlen; 9201da177e4SLinus Torvalds res->nh_sel = nh_sel; 9211da177e4SLinus Torvalds res->type = fa->fa_type; 9221da177e4SLinus Torvalds res->scope = fa->fa_scope; 9231da177e4SLinus Torvalds res->fi = fa->fa_info; 9241da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED 9251da177e4SLinus Torvalds res->netmask = mask; 9261da177e4SLinus Torvalds res->network = zone & 9271da177e4SLinus Torvalds (0xFFFFFFFF >> (32 - prefixlen)); 9281da177e4SLinus Torvalds #endif 9291da177e4SLinus Torvalds atomic_inc(&res->fi->fib_clntref); 9301da177e4SLinus Torvalds return 0; 9311da177e4SLinus Torvalds } 9321da177e4SLinus Torvalds 9331da177e4SLinus Torvalds /* Find appropriate source address to this destination */ 9341da177e4SLinus Torvalds 9351da177e4SLinus Torvalds u32 __fib_res_prefsrc(struct fib_result *res) 9361da177e4SLinus Torvalds { 9371da177e4SLinus Torvalds return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope); 9381da177e4SLinus Torvalds } 9391da177e4SLinus Torvalds 9401da177e4SLinus Torvalds int 9411da177e4SLinus Torvalds fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, 9421da177e4SLinus Torvalds u8 tb_id, u8 type, u8 scope, void *dst, int dst_len, u8 tos, 943b6544c0bSJamal Hadi Salim struct fib_info *fi, unsigned int flags) 9441da177e4SLinus Torvalds { 9451da177e4SLinus Torvalds struct rtmsg *rtm; 9461da177e4SLinus Torvalds struct nlmsghdr *nlh; 9471da177e4SLinus Torvalds unsigned char *b = skb->tail; 9481da177e4SLinus Torvalds 949b6544c0bSJamal Hadi Salim nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*rtm), flags); 9501da177e4SLinus Torvalds rtm = NLMSG_DATA(nlh); 9511da177e4SLinus Torvalds rtm->rtm_family = AF_INET; 9521da177e4SLinus Torvalds rtm->rtm_dst_len = dst_len; 9531da177e4SLinus Torvalds rtm->rtm_src_len = 0; 9541da177e4SLinus Torvalds rtm->rtm_tos = tos; 9551da177e4SLinus Torvalds rtm->rtm_table = tb_id; 9561da177e4SLinus Torvalds rtm->rtm_type = type; 9571da177e4SLinus Torvalds rtm->rtm_flags = fi->fib_flags; 9581da177e4SLinus Torvalds rtm->rtm_scope = scope; 9591da177e4SLinus Torvalds if (rtm->rtm_dst_len) 9601da177e4SLinus Torvalds RTA_PUT(skb, RTA_DST, 4, dst); 9611da177e4SLinus Torvalds rtm->rtm_protocol = fi->fib_protocol; 9621da177e4SLinus Torvalds if (fi->fib_priority) 9631da177e4SLinus Torvalds RTA_PUT(skb, RTA_PRIORITY, 4, &fi->fib_priority); 9641da177e4SLinus Torvalds if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0) 9651da177e4SLinus Torvalds goto rtattr_failure; 9661da177e4SLinus Torvalds if (fi->fib_prefsrc) 9671da177e4SLinus Torvalds RTA_PUT(skb, RTA_PREFSRC, 4, &fi->fib_prefsrc); 9681da177e4SLinus Torvalds if (fi->fib_nhs == 1) { 9691da177e4SLinus Torvalds if (fi->fib_nh->nh_gw) 9701da177e4SLinus Torvalds RTA_PUT(skb, RTA_GATEWAY, 4, &fi->fib_nh->nh_gw); 9711da177e4SLinus Torvalds if (fi->fib_nh->nh_oif) 9721da177e4SLinus Torvalds RTA_PUT(skb, RTA_OIF, sizeof(int), &fi->fib_nh->nh_oif); 973*8265abc0SPatrick McHardy #ifdef CONFIG_NET_CLS_ROUTE 974*8265abc0SPatrick McHardy if (fi->fib_nh[0].nh_tclassid) 975*8265abc0SPatrick McHardy RTA_PUT(skb, RTA_FLOW, 4, &fi->fib_nh[0].nh_tclassid); 976*8265abc0SPatrick McHardy #endif 9771da177e4SLinus Torvalds } 9781da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 9791da177e4SLinus Torvalds if (fi->fib_nhs > 1) { 9801da177e4SLinus Torvalds struct rtnexthop *nhp; 9811da177e4SLinus Torvalds struct rtattr *mp_head; 9821da177e4SLinus Torvalds if (skb_tailroom(skb) <= RTA_SPACE(0)) 9831da177e4SLinus Torvalds goto rtattr_failure; 9841da177e4SLinus Torvalds mp_head = (struct rtattr*)skb_put(skb, RTA_SPACE(0)); 9851da177e4SLinus Torvalds 9861da177e4SLinus Torvalds for_nexthops(fi) { 9871da177e4SLinus Torvalds if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4)) 9881da177e4SLinus Torvalds goto rtattr_failure; 9891da177e4SLinus Torvalds nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp))); 9901da177e4SLinus Torvalds nhp->rtnh_flags = nh->nh_flags & 0xFF; 9911da177e4SLinus Torvalds nhp->rtnh_hops = nh->nh_weight-1; 9921da177e4SLinus Torvalds nhp->rtnh_ifindex = nh->nh_oif; 9931da177e4SLinus Torvalds if (nh->nh_gw) 9941da177e4SLinus Torvalds RTA_PUT(skb, RTA_GATEWAY, 4, &nh->nh_gw); 995*8265abc0SPatrick McHardy #ifdef CONFIG_NET_CLS_ROUTE 996*8265abc0SPatrick McHardy if (nh->nh_tclassid) 997*8265abc0SPatrick McHardy RTA_PUT(skb, RTA_FLOW, 4, &nh->nh_tclassid); 998*8265abc0SPatrick McHardy #endif 9991da177e4SLinus Torvalds nhp->rtnh_len = skb->tail - (unsigned char*)nhp; 10001da177e4SLinus Torvalds } endfor_nexthops(fi); 10011da177e4SLinus Torvalds mp_head->rta_type = RTA_MULTIPATH; 10021da177e4SLinus Torvalds mp_head->rta_len = skb->tail - (u8*)mp_head; 10031da177e4SLinus Torvalds } 10041da177e4SLinus Torvalds #endif 10051da177e4SLinus Torvalds nlh->nlmsg_len = skb->tail - b; 10061da177e4SLinus Torvalds return skb->len; 10071da177e4SLinus Torvalds 10081da177e4SLinus Torvalds nlmsg_failure: 10091da177e4SLinus Torvalds rtattr_failure: 10101da177e4SLinus Torvalds skb_trim(skb, b - skb->data); 10111da177e4SLinus Torvalds return -1; 10121da177e4SLinus Torvalds } 10131da177e4SLinus Torvalds 10141da177e4SLinus Torvalds #ifndef CONFIG_IP_NOSIOCRT 10151da177e4SLinus Torvalds 10161da177e4SLinus Torvalds int 10171da177e4SLinus Torvalds fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm, 10181da177e4SLinus Torvalds struct kern_rta *rta, struct rtentry *r) 10191da177e4SLinus Torvalds { 10201da177e4SLinus Torvalds int plen; 10211da177e4SLinus Torvalds u32 *ptr; 10221da177e4SLinus Torvalds 10231da177e4SLinus Torvalds memset(rtm, 0, sizeof(*rtm)); 10241da177e4SLinus Torvalds memset(rta, 0, sizeof(*rta)); 10251da177e4SLinus Torvalds 10261da177e4SLinus Torvalds if (r->rt_dst.sa_family != AF_INET) 10271da177e4SLinus Torvalds return -EAFNOSUPPORT; 10281da177e4SLinus Torvalds 10291da177e4SLinus Torvalds /* Check mask for validity: 10301da177e4SLinus Torvalds a) it must be contiguous. 10311da177e4SLinus Torvalds b) destination must have all host bits clear. 10321da177e4SLinus Torvalds c) if application forgot to set correct family (AF_INET), 10331da177e4SLinus Torvalds reject request unless it is absolutely clear i.e. 10341da177e4SLinus Torvalds both family and mask are zero. 10351da177e4SLinus Torvalds */ 10361da177e4SLinus Torvalds plen = 32; 10371da177e4SLinus Torvalds ptr = &((struct sockaddr_in*)&r->rt_dst)->sin_addr.s_addr; 10381da177e4SLinus Torvalds if (!(r->rt_flags&RTF_HOST)) { 10391da177e4SLinus Torvalds u32 mask = ((struct sockaddr_in*)&r->rt_genmask)->sin_addr.s_addr; 10401da177e4SLinus Torvalds if (r->rt_genmask.sa_family != AF_INET) { 10411da177e4SLinus Torvalds if (mask || r->rt_genmask.sa_family) 10421da177e4SLinus Torvalds return -EAFNOSUPPORT; 10431da177e4SLinus Torvalds } 10441da177e4SLinus Torvalds if (bad_mask(mask, *ptr)) 10451da177e4SLinus Torvalds return -EINVAL; 10461da177e4SLinus Torvalds plen = inet_mask_len(mask); 10471da177e4SLinus Torvalds } 10481da177e4SLinus Torvalds 10491da177e4SLinus Torvalds nl->nlmsg_flags = NLM_F_REQUEST; 105028633514SAlexey Kuznetsov nl->nlmsg_pid = 0; 10511da177e4SLinus Torvalds nl->nlmsg_seq = 0; 10521da177e4SLinus Torvalds nl->nlmsg_len = NLMSG_LENGTH(sizeof(*rtm)); 10531da177e4SLinus Torvalds if (cmd == SIOCDELRT) { 10541da177e4SLinus Torvalds nl->nlmsg_type = RTM_DELROUTE; 10551da177e4SLinus Torvalds nl->nlmsg_flags = 0; 10561da177e4SLinus Torvalds } else { 10571da177e4SLinus Torvalds nl->nlmsg_type = RTM_NEWROUTE; 10581da177e4SLinus Torvalds nl->nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE; 10591da177e4SLinus Torvalds rtm->rtm_protocol = RTPROT_BOOT; 10601da177e4SLinus Torvalds } 10611da177e4SLinus Torvalds 10621da177e4SLinus Torvalds rtm->rtm_dst_len = plen; 10631da177e4SLinus Torvalds rta->rta_dst = ptr; 10641da177e4SLinus Torvalds 10651da177e4SLinus Torvalds if (r->rt_metric) { 10661da177e4SLinus Torvalds *(u32*)&r->rt_pad3 = r->rt_metric - 1; 10671da177e4SLinus Torvalds rta->rta_priority = (u32*)&r->rt_pad3; 10681da177e4SLinus Torvalds } 10691da177e4SLinus Torvalds if (r->rt_flags&RTF_REJECT) { 10701da177e4SLinus Torvalds rtm->rtm_scope = RT_SCOPE_HOST; 10711da177e4SLinus Torvalds rtm->rtm_type = RTN_UNREACHABLE; 10721da177e4SLinus Torvalds return 0; 10731da177e4SLinus Torvalds } 10741da177e4SLinus Torvalds rtm->rtm_scope = RT_SCOPE_NOWHERE; 10751da177e4SLinus Torvalds rtm->rtm_type = RTN_UNICAST; 10761da177e4SLinus Torvalds 10771da177e4SLinus Torvalds if (r->rt_dev) { 10781da177e4SLinus Torvalds char *colon; 10791da177e4SLinus Torvalds struct net_device *dev; 10801da177e4SLinus Torvalds char devname[IFNAMSIZ]; 10811da177e4SLinus Torvalds 10821da177e4SLinus Torvalds if (copy_from_user(devname, r->rt_dev, IFNAMSIZ-1)) 10831da177e4SLinus Torvalds return -EFAULT; 10841da177e4SLinus Torvalds devname[IFNAMSIZ-1] = 0; 10851da177e4SLinus Torvalds colon = strchr(devname, ':'); 10861da177e4SLinus Torvalds if (colon) 10871da177e4SLinus Torvalds *colon = 0; 10881da177e4SLinus Torvalds dev = __dev_get_by_name(devname); 10891da177e4SLinus Torvalds if (!dev) 10901da177e4SLinus Torvalds return -ENODEV; 10911da177e4SLinus Torvalds rta->rta_oif = &dev->ifindex; 10921da177e4SLinus Torvalds if (colon) { 10931da177e4SLinus Torvalds struct in_ifaddr *ifa; 1094e5ed6399SHerbert Xu struct in_device *in_dev = __in_dev_get_rtnl(dev); 10951da177e4SLinus Torvalds if (!in_dev) 10961da177e4SLinus Torvalds return -ENODEV; 10971da177e4SLinus Torvalds *colon = ':'; 10981da177e4SLinus Torvalds for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) 10991da177e4SLinus Torvalds if (strcmp(ifa->ifa_label, devname) == 0) 11001da177e4SLinus Torvalds break; 11011da177e4SLinus Torvalds if (ifa == NULL) 11021da177e4SLinus Torvalds return -ENODEV; 11031da177e4SLinus Torvalds rta->rta_prefsrc = &ifa->ifa_local; 11041da177e4SLinus Torvalds } 11051da177e4SLinus Torvalds } 11061da177e4SLinus Torvalds 11071da177e4SLinus Torvalds ptr = &((struct sockaddr_in*)&r->rt_gateway)->sin_addr.s_addr; 11081da177e4SLinus Torvalds if (r->rt_gateway.sa_family == AF_INET && *ptr) { 11091da177e4SLinus Torvalds rta->rta_gw = ptr; 11101da177e4SLinus Torvalds if (r->rt_flags&RTF_GATEWAY && inet_addr_type(*ptr) == RTN_UNICAST) 11111da177e4SLinus Torvalds rtm->rtm_scope = RT_SCOPE_UNIVERSE; 11121da177e4SLinus Torvalds } 11131da177e4SLinus Torvalds 11141da177e4SLinus Torvalds if (cmd == SIOCDELRT) 11151da177e4SLinus Torvalds return 0; 11161da177e4SLinus Torvalds 11171da177e4SLinus Torvalds if (r->rt_flags&RTF_GATEWAY && rta->rta_gw == NULL) 11181da177e4SLinus Torvalds return -EINVAL; 11191da177e4SLinus Torvalds 11201da177e4SLinus Torvalds if (rtm->rtm_scope == RT_SCOPE_NOWHERE) 11211da177e4SLinus Torvalds rtm->rtm_scope = RT_SCOPE_LINK; 11221da177e4SLinus Torvalds 11231da177e4SLinus Torvalds if (r->rt_flags&(RTF_MTU|RTF_WINDOW|RTF_IRTT)) { 11241da177e4SLinus Torvalds struct rtattr *rec; 11251da177e4SLinus Torvalds struct rtattr *mx = kmalloc(RTA_LENGTH(3*RTA_LENGTH(4)), GFP_KERNEL); 11261da177e4SLinus Torvalds if (mx == NULL) 11271da177e4SLinus Torvalds return -ENOMEM; 11281da177e4SLinus Torvalds rta->rta_mx = mx; 11291da177e4SLinus Torvalds mx->rta_type = RTA_METRICS; 11301da177e4SLinus Torvalds mx->rta_len = RTA_LENGTH(0); 11311da177e4SLinus Torvalds if (r->rt_flags&RTF_MTU) { 11321da177e4SLinus Torvalds rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len)); 11331da177e4SLinus Torvalds rec->rta_type = RTAX_ADVMSS; 11341da177e4SLinus Torvalds rec->rta_len = RTA_LENGTH(4); 11351da177e4SLinus Torvalds mx->rta_len += RTA_LENGTH(4); 11361da177e4SLinus Torvalds *(u32*)RTA_DATA(rec) = r->rt_mtu - 40; 11371da177e4SLinus Torvalds } 11381da177e4SLinus Torvalds if (r->rt_flags&RTF_WINDOW) { 11391da177e4SLinus Torvalds rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len)); 11401da177e4SLinus Torvalds rec->rta_type = RTAX_WINDOW; 11411da177e4SLinus Torvalds rec->rta_len = RTA_LENGTH(4); 11421da177e4SLinus Torvalds mx->rta_len += RTA_LENGTH(4); 11431da177e4SLinus Torvalds *(u32*)RTA_DATA(rec) = r->rt_window; 11441da177e4SLinus Torvalds } 11451da177e4SLinus Torvalds if (r->rt_flags&RTF_IRTT) { 11461da177e4SLinus Torvalds rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len)); 11471da177e4SLinus Torvalds rec->rta_type = RTAX_RTT; 11481da177e4SLinus Torvalds rec->rta_len = RTA_LENGTH(4); 11491da177e4SLinus Torvalds mx->rta_len += RTA_LENGTH(4); 11501da177e4SLinus Torvalds *(u32*)RTA_DATA(rec) = r->rt_irtt<<3; 11511da177e4SLinus Torvalds } 11521da177e4SLinus Torvalds } 11531da177e4SLinus Torvalds return 0; 11541da177e4SLinus Torvalds } 11551da177e4SLinus Torvalds 11561da177e4SLinus Torvalds #endif 11571da177e4SLinus Torvalds 11581da177e4SLinus Torvalds /* 11591da177e4SLinus Torvalds Update FIB if: 11601da177e4SLinus Torvalds - local address disappeared -> we must delete all the entries 11611da177e4SLinus Torvalds referring to it. 11621da177e4SLinus Torvalds - device went down -> we must shutdown all nexthops going via it. 11631da177e4SLinus Torvalds */ 11641da177e4SLinus Torvalds 11651da177e4SLinus Torvalds int fib_sync_down(u32 local, struct net_device *dev, int force) 11661da177e4SLinus Torvalds { 11671da177e4SLinus Torvalds int ret = 0; 11681da177e4SLinus Torvalds int scope = RT_SCOPE_NOWHERE; 11691da177e4SLinus Torvalds 11701da177e4SLinus Torvalds if (force) 11711da177e4SLinus Torvalds scope = -1; 11721da177e4SLinus Torvalds 11731da177e4SLinus Torvalds if (local && fib_info_laddrhash) { 11741da177e4SLinus Torvalds unsigned int hash = fib_laddr_hashfn(local); 11751da177e4SLinus Torvalds struct hlist_head *head = &fib_info_laddrhash[hash]; 11761da177e4SLinus Torvalds struct hlist_node *node; 11771da177e4SLinus Torvalds struct fib_info *fi; 11781da177e4SLinus Torvalds 11791da177e4SLinus Torvalds hlist_for_each_entry(fi, node, head, fib_lhash) { 11801da177e4SLinus Torvalds if (fi->fib_prefsrc == local) { 11811da177e4SLinus Torvalds fi->fib_flags |= RTNH_F_DEAD; 11821da177e4SLinus Torvalds ret++; 11831da177e4SLinus Torvalds } 11841da177e4SLinus Torvalds } 11851da177e4SLinus Torvalds } 11861da177e4SLinus Torvalds 11871da177e4SLinus Torvalds if (dev) { 11881da177e4SLinus Torvalds struct fib_info *prev_fi = NULL; 11891da177e4SLinus Torvalds unsigned int hash = fib_devindex_hashfn(dev->ifindex); 11901da177e4SLinus Torvalds struct hlist_head *head = &fib_info_devhash[hash]; 11911da177e4SLinus Torvalds struct hlist_node *node; 11921da177e4SLinus Torvalds struct fib_nh *nh; 11931da177e4SLinus Torvalds 11941da177e4SLinus Torvalds hlist_for_each_entry(nh, node, head, nh_hash) { 11951da177e4SLinus Torvalds struct fib_info *fi = nh->nh_parent; 11961da177e4SLinus Torvalds int dead; 11971da177e4SLinus Torvalds 11981da177e4SLinus Torvalds BUG_ON(!fi->fib_nhs); 11991da177e4SLinus Torvalds if (nh->nh_dev != dev || fi == prev_fi) 12001da177e4SLinus Torvalds continue; 12011da177e4SLinus Torvalds prev_fi = fi; 12021da177e4SLinus Torvalds dead = 0; 12031da177e4SLinus Torvalds change_nexthops(fi) { 12041da177e4SLinus Torvalds if (nh->nh_flags&RTNH_F_DEAD) 12051da177e4SLinus Torvalds dead++; 12061da177e4SLinus Torvalds else if (nh->nh_dev == dev && 12071da177e4SLinus Torvalds nh->nh_scope != scope) { 12081da177e4SLinus Torvalds nh->nh_flags |= RTNH_F_DEAD; 12091da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 12101da177e4SLinus Torvalds spin_lock_bh(&fib_multipath_lock); 12111da177e4SLinus Torvalds fi->fib_power -= nh->nh_power; 12121da177e4SLinus Torvalds nh->nh_power = 0; 12131da177e4SLinus Torvalds spin_unlock_bh(&fib_multipath_lock); 12141da177e4SLinus Torvalds #endif 12151da177e4SLinus Torvalds dead++; 12161da177e4SLinus Torvalds } 12171da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 12181da177e4SLinus Torvalds if (force > 1 && nh->nh_dev == dev) { 12191da177e4SLinus Torvalds dead = fi->fib_nhs; 12201da177e4SLinus Torvalds break; 12211da177e4SLinus Torvalds } 12221da177e4SLinus Torvalds #endif 12231da177e4SLinus Torvalds } endfor_nexthops(fi) 12241da177e4SLinus Torvalds if (dead == fi->fib_nhs) { 12251da177e4SLinus Torvalds fi->fib_flags |= RTNH_F_DEAD; 12261da177e4SLinus Torvalds ret++; 12271da177e4SLinus Torvalds } 12281da177e4SLinus Torvalds } 12291da177e4SLinus Torvalds } 12301da177e4SLinus Torvalds 12311da177e4SLinus Torvalds return ret; 12321da177e4SLinus Torvalds } 12331da177e4SLinus Torvalds 12341da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 12351da177e4SLinus Torvalds 12361da177e4SLinus Torvalds /* 12371da177e4SLinus Torvalds Dead device goes up. We wake up dead nexthops. 12381da177e4SLinus Torvalds It takes sense only on multipath routes. 12391da177e4SLinus Torvalds */ 12401da177e4SLinus Torvalds 12411da177e4SLinus Torvalds int fib_sync_up(struct net_device *dev) 12421da177e4SLinus Torvalds { 12431da177e4SLinus Torvalds struct fib_info *prev_fi; 12441da177e4SLinus Torvalds unsigned int hash; 12451da177e4SLinus Torvalds struct hlist_head *head; 12461da177e4SLinus Torvalds struct hlist_node *node; 12471da177e4SLinus Torvalds struct fib_nh *nh; 12481da177e4SLinus Torvalds int ret; 12491da177e4SLinus Torvalds 12501da177e4SLinus Torvalds if (!(dev->flags&IFF_UP)) 12511da177e4SLinus Torvalds return 0; 12521da177e4SLinus Torvalds 12531da177e4SLinus Torvalds prev_fi = NULL; 12541da177e4SLinus Torvalds hash = fib_devindex_hashfn(dev->ifindex); 12551da177e4SLinus Torvalds head = &fib_info_devhash[hash]; 12561da177e4SLinus Torvalds ret = 0; 12571da177e4SLinus Torvalds 12581da177e4SLinus Torvalds hlist_for_each_entry(nh, node, head, nh_hash) { 12591da177e4SLinus Torvalds struct fib_info *fi = nh->nh_parent; 12601da177e4SLinus Torvalds int alive; 12611da177e4SLinus Torvalds 12621da177e4SLinus Torvalds BUG_ON(!fi->fib_nhs); 12631da177e4SLinus Torvalds if (nh->nh_dev != dev || fi == prev_fi) 12641da177e4SLinus Torvalds continue; 12651da177e4SLinus Torvalds 12661da177e4SLinus Torvalds prev_fi = fi; 12671da177e4SLinus Torvalds alive = 0; 12681da177e4SLinus Torvalds change_nexthops(fi) { 12691da177e4SLinus Torvalds if (!(nh->nh_flags&RTNH_F_DEAD)) { 12701da177e4SLinus Torvalds alive++; 12711da177e4SLinus Torvalds continue; 12721da177e4SLinus Torvalds } 12731da177e4SLinus Torvalds if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP)) 12741da177e4SLinus Torvalds continue; 1275e5ed6399SHerbert Xu if (nh->nh_dev != dev || !__in_dev_get_rtnl(dev)) 12761da177e4SLinus Torvalds continue; 12771da177e4SLinus Torvalds alive++; 12781da177e4SLinus Torvalds spin_lock_bh(&fib_multipath_lock); 12791da177e4SLinus Torvalds nh->nh_power = 0; 12801da177e4SLinus Torvalds nh->nh_flags &= ~RTNH_F_DEAD; 12811da177e4SLinus Torvalds spin_unlock_bh(&fib_multipath_lock); 12821da177e4SLinus Torvalds } endfor_nexthops(fi) 12831da177e4SLinus Torvalds 12841da177e4SLinus Torvalds if (alive > 0) { 12851da177e4SLinus Torvalds fi->fib_flags &= ~RTNH_F_DEAD; 12861da177e4SLinus Torvalds ret++; 12871da177e4SLinus Torvalds } 12881da177e4SLinus Torvalds } 12891da177e4SLinus Torvalds 12901da177e4SLinus Torvalds return ret; 12911da177e4SLinus Torvalds } 12921da177e4SLinus Torvalds 12931da177e4SLinus Torvalds /* 12941da177e4SLinus Torvalds The algorithm is suboptimal, but it provides really 12951da177e4SLinus Torvalds fair weighted route distribution. 12961da177e4SLinus Torvalds */ 12971da177e4SLinus Torvalds 12981da177e4SLinus Torvalds void fib_select_multipath(const struct flowi *flp, struct fib_result *res) 12991da177e4SLinus Torvalds { 13001da177e4SLinus Torvalds struct fib_info *fi = res->fi; 13011da177e4SLinus Torvalds int w; 13021da177e4SLinus Torvalds 13031da177e4SLinus Torvalds spin_lock_bh(&fib_multipath_lock); 13041da177e4SLinus Torvalds if (fi->fib_power <= 0) { 13051da177e4SLinus Torvalds int power = 0; 13061da177e4SLinus Torvalds change_nexthops(fi) { 13071da177e4SLinus Torvalds if (!(nh->nh_flags&RTNH_F_DEAD)) { 13081da177e4SLinus Torvalds power += nh->nh_weight; 13091da177e4SLinus Torvalds nh->nh_power = nh->nh_weight; 13101da177e4SLinus Torvalds } 13111da177e4SLinus Torvalds } endfor_nexthops(fi); 13121da177e4SLinus Torvalds fi->fib_power = power; 13131da177e4SLinus Torvalds if (power <= 0) { 13141da177e4SLinus Torvalds spin_unlock_bh(&fib_multipath_lock); 13151da177e4SLinus Torvalds /* Race condition: route has just become dead. */ 13161da177e4SLinus Torvalds res->nh_sel = 0; 13171da177e4SLinus Torvalds return; 13181da177e4SLinus Torvalds } 13191da177e4SLinus Torvalds } 13201da177e4SLinus Torvalds 13211da177e4SLinus Torvalds 13221da177e4SLinus Torvalds /* w should be random number [0..fi->fib_power-1], 13231da177e4SLinus Torvalds it is pretty bad approximation. 13241da177e4SLinus Torvalds */ 13251da177e4SLinus Torvalds 13261da177e4SLinus Torvalds w = jiffies % fi->fib_power; 13271da177e4SLinus Torvalds 13281da177e4SLinus Torvalds change_nexthops(fi) { 13291da177e4SLinus Torvalds if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) { 13301da177e4SLinus Torvalds if ((w -= nh->nh_power) <= 0) { 13311da177e4SLinus Torvalds nh->nh_power--; 13321da177e4SLinus Torvalds fi->fib_power--; 13331da177e4SLinus Torvalds res->nh_sel = nhsel; 13341da177e4SLinus Torvalds spin_unlock_bh(&fib_multipath_lock); 13351da177e4SLinus Torvalds return; 13361da177e4SLinus Torvalds } 13371da177e4SLinus Torvalds } 13381da177e4SLinus Torvalds } endfor_nexthops(fi); 13391da177e4SLinus Torvalds 13401da177e4SLinus Torvalds /* Race condition: route has just become dead. */ 13411da177e4SLinus Torvalds res->nh_sel = 0; 13421da177e4SLinus Torvalds spin_unlock_bh(&fib_multipath_lock); 13431da177e4SLinus Torvalds } 13441da177e4SLinus Torvalds #endif 1345