11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * INET An implementation of the TCP/IP protocol suite for the LINUX 31da177e4SLinus Torvalds * operating system. INET is implemented using the BSD Socket 41da177e4SLinus Torvalds * interface as the means of communication with the user level. 51da177e4SLinus Torvalds * 61da177e4SLinus Torvalds * IPv4 Forwarding Information Base: semantics. 71da177e4SLinus Torvalds * 81da177e4SLinus Torvalds * Version: $Id: fib_semantics.c,v 1.19 2002/01/12 07:54:56 davem Exp $ 91da177e4SLinus Torvalds * 101da177e4SLinus Torvalds * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 111da177e4SLinus Torvalds * 121da177e4SLinus Torvalds * This program is free software; you can redistribute it and/or 131da177e4SLinus Torvalds * modify it under the terms of the GNU General Public License 141da177e4SLinus Torvalds * as published by the Free Software Foundation; either version 151da177e4SLinus Torvalds * 2 of the License, or (at your option) any later version. 161da177e4SLinus Torvalds */ 171da177e4SLinus Torvalds 181da177e4SLinus Torvalds #include <linux/config.h> 191da177e4SLinus Torvalds #include <asm/uaccess.h> 201da177e4SLinus Torvalds #include <asm/system.h> 211da177e4SLinus Torvalds #include <linux/bitops.h> 221da177e4SLinus Torvalds #include <linux/types.h> 231da177e4SLinus Torvalds #include <linux/kernel.h> 241da177e4SLinus Torvalds #include <linux/jiffies.h> 251da177e4SLinus Torvalds #include <linux/mm.h> 261da177e4SLinus Torvalds #include <linux/string.h> 271da177e4SLinus Torvalds #include <linux/socket.h> 281da177e4SLinus Torvalds #include <linux/sockios.h> 291da177e4SLinus Torvalds #include <linux/errno.h> 301da177e4SLinus Torvalds #include <linux/in.h> 311da177e4SLinus Torvalds #include <linux/inet.h> 321da177e4SLinus Torvalds #include <linux/netdevice.h> 331da177e4SLinus Torvalds #include <linux/if_arp.h> 341da177e4SLinus Torvalds #include <linux/proc_fs.h> 351da177e4SLinus Torvalds #include <linux/skbuff.h> 361da177e4SLinus Torvalds #include <linux/netlink.h> 371da177e4SLinus Torvalds #include <linux/init.h> 381da177e4SLinus Torvalds 391da177e4SLinus Torvalds #include <net/ip.h> 401da177e4SLinus Torvalds #include <net/protocol.h> 411da177e4SLinus Torvalds #include <net/route.h> 421da177e4SLinus Torvalds #include <net/tcp.h> 431da177e4SLinus Torvalds #include <net/sock.h> 441da177e4SLinus Torvalds #include <net/ip_fib.h> 451da177e4SLinus Torvalds #include <net/ip_mp_alg.h> 461da177e4SLinus Torvalds 471da177e4SLinus Torvalds #include "fib_lookup.h" 481da177e4SLinus Torvalds 491da177e4SLinus Torvalds #define FSprintk(a...) 501da177e4SLinus Torvalds 511da177e4SLinus Torvalds static DEFINE_RWLOCK(fib_info_lock); 521da177e4SLinus Torvalds static struct hlist_head *fib_info_hash; 531da177e4SLinus Torvalds static struct hlist_head *fib_info_laddrhash; 541da177e4SLinus Torvalds static unsigned int fib_hash_size; 551da177e4SLinus Torvalds static unsigned int fib_info_cnt; 561da177e4SLinus Torvalds 571da177e4SLinus Torvalds #define DEVINDEX_HASHBITS 8 581da177e4SLinus Torvalds #define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS) 591da177e4SLinus Torvalds static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE]; 601da177e4SLinus Torvalds 611da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 621da177e4SLinus Torvalds 631da177e4SLinus Torvalds static DEFINE_SPINLOCK(fib_multipath_lock); 641da177e4SLinus Torvalds 651da177e4SLinus Torvalds #define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \ 661da177e4SLinus Torvalds for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++) 671da177e4SLinus Torvalds 681da177e4SLinus Torvalds #define change_nexthops(fi) { int nhsel; struct fib_nh * nh; \ 691da177e4SLinus Torvalds for (nhsel=0, nh = (struct fib_nh*)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++) 701da177e4SLinus Torvalds 711da177e4SLinus Torvalds #else /* CONFIG_IP_ROUTE_MULTIPATH */ 721da177e4SLinus Torvalds 731da177e4SLinus Torvalds /* Hope, that gcc will optimize it to get rid of dummy loop */ 741da177e4SLinus Torvalds 751da177e4SLinus Torvalds #define for_nexthops(fi) { int nhsel=0; const struct fib_nh * nh = (fi)->fib_nh; \ 761da177e4SLinus Torvalds for (nhsel=0; nhsel < 1; nhsel++) 771da177e4SLinus Torvalds 781da177e4SLinus Torvalds #define change_nexthops(fi) { int nhsel=0; struct fib_nh * nh = (struct fib_nh*)((fi)->fib_nh); \ 791da177e4SLinus Torvalds for (nhsel=0; nhsel < 1; nhsel++) 801da177e4SLinus Torvalds 811da177e4SLinus Torvalds #endif /* CONFIG_IP_ROUTE_MULTIPATH */ 821da177e4SLinus Torvalds 831da177e4SLinus Torvalds #define endfor_nexthops(fi) } 841da177e4SLinus Torvalds 851da177e4SLinus Torvalds 861da177e4SLinus Torvalds static struct 871da177e4SLinus Torvalds { 881da177e4SLinus Torvalds int error; 891da177e4SLinus Torvalds u8 scope; 901da177e4SLinus Torvalds } fib_props[RTA_MAX + 1] = { 911da177e4SLinus Torvalds { 921da177e4SLinus Torvalds .error = 0, 931da177e4SLinus Torvalds .scope = RT_SCOPE_NOWHERE, 941da177e4SLinus Torvalds }, /* RTN_UNSPEC */ 951da177e4SLinus Torvalds { 961da177e4SLinus Torvalds .error = 0, 971da177e4SLinus Torvalds .scope = RT_SCOPE_UNIVERSE, 981da177e4SLinus Torvalds }, /* RTN_UNICAST */ 991da177e4SLinus Torvalds { 1001da177e4SLinus Torvalds .error = 0, 1011da177e4SLinus Torvalds .scope = RT_SCOPE_HOST, 1021da177e4SLinus Torvalds }, /* RTN_LOCAL */ 1031da177e4SLinus Torvalds { 1041da177e4SLinus Torvalds .error = 0, 1051da177e4SLinus Torvalds .scope = RT_SCOPE_LINK, 1061da177e4SLinus Torvalds }, /* RTN_BROADCAST */ 1071da177e4SLinus Torvalds { 1081da177e4SLinus Torvalds .error = 0, 1091da177e4SLinus Torvalds .scope = RT_SCOPE_LINK, 1101da177e4SLinus Torvalds }, /* RTN_ANYCAST */ 1111da177e4SLinus Torvalds { 1121da177e4SLinus Torvalds .error = 0, 1131da177e4SLinus Torvalds .scope = RT_SCOPE_UNIVERSE, 1141da177e4SLinus Torvalds }, /* RTN_MULTICAST */ 1151da177e4SLinus Torvalds { 1161da177e4SLinus Torvalds .error = -EINVAL, 1171da177e4SLinus Torvalds .scope = RT_SCOPE_UNIVERSE, 1181da177e4SLinus Torvalds }, /* RTN_BLACKHOLE */ 1191da177e4SLinus Torvalds { 1201da177e4SLinus Torvalds .error = -EHOSTUNREACH, 1211da177e4SLinus Torvalds .scope = RT_SCOPE_UNIVERSE, 1221da177e4SLinus Torvalds }, /* RTN_UNREACHABLE */ 1231da177e4SLinus Torvalds { 1241da177e4SLinus Torvalds .error = -EACCES, 1251da177e4SLinus Torvalds .scope = RT_SCOPE_UNIVERSE, 1261da177e4SLinus Torvalds }, /* RTN_PROHIBIT */ 1271da177e4SLinus Torvalds { 1281da177e4SLinus Torvalds .error = -EAGAIN, 1291da177e4SLinus Torvalds .scope = RT_SCOPE_UNIVERSE, 1301da177e4SLinus Torvalds }, /* RTN_THROW */ 1311da177e4SLinus Torvalds { 1321da177e4SLinus Torvalds .error = -EINVAL, 1331da177e4SLinus Torvalds .scope = RT_SCOPE_NOWHERE, 1341da177e4SLinus Torvalds }, /* RTN_NAT */ 1351da177e4SLinus Torvalds { 1361da177e4SLinus Torvalds .error = -EINVAL, 1371da177e4SLinus Torvalds .scope = RT_SCOPE_NOWHERE, 1381da177e4SLinus Torvalds }, /* RTN_XRESOLVE */ 1391da177e4SLinus Torvalds }; 1401da177e4SLinus Torvalds 1411da177e4SLinus Torvalds 1421da177e4SLinus Torvalds /* Release a nexthop info record */ 1431da177e4SLinus Torvalds 1441da177e4SLinus Torvalds void free_fib_info(struct fib_info *fi) 1451da177e4SLinus Torvalds { 1461da177e4SLinus Torvalds if (fi->fib_dead == 0) { 1471da177e4SLinus Torvalds printk("Freeing alive fib_info %p\n", fi); 1481da177e4SLinus Torvalds return; 1491da177e4SLinus Torvalds } 1501da177e4SLinus Torvalds change_nexthops(fi) { 1511da177e4SLinus Torvalds if (nh->nh_dev) 1521da177e4SLinus Torvalds dev_put(nh->nh_dev); 1531da177e4SLinus Torvalds nh->nh_dev = NULL; 1541da177e4SLinus Torvalds } endfor_nexthops(fi); 1551da177e4SLinus Torvalds fib_info_cnt--; 1561da177e4SLinus Torvalds kfree(fi); 1571da177e4SLinus Torvalds } 1581da177e4SLinus Torvalds 1591da177e4SLinus Torvalds void fib_release_info(struct fib_info *fi) 1601da177e4SLinus Torvalds { 1611da177e4SLinus Torvalds write_lock(&fib_info_lock); 1621da177e4SLinus Torvalds if (fi && --fi->fib_treeref == 0) { 1631da177e4SLinus Torvalds hlist_del(&fi->fib_hash); 1641da177e4SLinus Torvalds if (fi->fib_prefsrc) 1651da177e4SLinus Torvalds hlist_del(&fi->fib_lhash); 1661da177e4SLinus Torvalds change_nexthops(fi) { 1671da177e4SLinus Torvalds if (!nh->nh_dev) 1681da177e4SLinus Torvalds continue; 1691da177e4SLinus Torvalds hlist_del(&nh->nh_hash); 1701da177e4SLinus Torvalds } endfor_nexthops(fi) 1711da177e4SLinus Torvalds fi->fib_dead = 1; 1721da177e4SLinus Torvalds fib_info_put(fi); 1731da177e4SLinus Torvalds } 1741da177e4SLinus Torvalds write_unlock(&fib_info_lock); 1751da177e4SLinus Torvalds } 1761da177e4SLinus Torvalds 1771da177e4SLinus Torvalds static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi) 1781da177e4SLinus Torvalds { 1791da177e4SLinus Torvalds const struct fib_nh *onh = ofi->fib_nh; 1801da177e4SLinus Torvalds 1811da177e4SLinus Torvalds for_nexthops(fi) { 1821da177e4SLinus Torvalds if (nh->nh_oif != onh->nh_oif || 1831da177e4SLinus Torvalds nh->nh_gw != onh->nh_gw || 1841da177e4SLinus Torvalds nh->nh_scope != onh->nh_scope || 1851da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 1861da177e4SLinus Torvalds nh->nh_weight != onh->nh_weight || 1871da177e4SLinus Torvalds #endif 1881da177e4SLinus Torvalds #ifdef CONFIG_NET_CLS_ROUTE 1891da177e4SLinus Torvalds nh->nh_tclassid != onh->nh_tclassid || 1901da177e4SLinus Torvalds #endif 1911da177e4SLinus Torvalds ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD)) 1921da177e4SLinus Torvalds return -1; 1931da177e4SLinus Torvalds onh++; 1941da177e4SLinus Torvalds } endfor_nexthops(fi); 1951da177e4SLinus Torvalds return 0; 1961da177e4SLinus Torvalds } 1971da177e4SLinus Torvalds 1981da177e4SLinus Torvalds static inline unsigned int fib_info_hashfn(const struct fib_info *fi) 1991da177e4SLinus Torvalds { 2001da177e4SLinus Torvalds unsigned int mask = (fib_hash_size - 1); 2011da177e4SLinus Torvalds unsigned int val = fi->fib_nhs; 2021da177e4SLinus Torvalds 2031da177e4SLinus Torvalds val ^= fi->fib_protocol; 2041da177e4SLinus Torvalds val ^= fi->fib_prefsrc; 2051da177e4SLinus Torvalds val ^= fi->fib_priority; 2061da177e4SLinus Torvalds 2071da177e4SLinus Torvalds return (val ^ (val >> 7) ^ (val >> 12)) & mask; 2081da177e4SLinus Torvalds } 2091da177e4SLinus Torvalds 2101da177e4SLinus Torvalds static struct fib_info *fib_find_info(const struct fib_info *nfi) 2111da177e4SLinus Torvalds { 2121da177e4SLinus Torvalds struct hlist_head *head; 2131da177e4SLinus Torvalds struct hlist_node *node; 2141da177e4SLinus Torvalds struct fib_info *fi; 2151da177e4SLinus Torvalds unsigned int hash; 2161da177e4SLinus Torvalds 2171da177e4SLinus Torvalds hash = fib_info_hashfn(nfi); 2181da177e4SLinus Torvalds head = &fib_info_hash[hash]; 2191da177e4SLinus Torvalds 2201da177e4SLinus Torvalds hlist_for_each_entry(fi, node, head, fib_hash) { 2211da177e4SLinus Torvalds if (fi->fib_nhs != nfi->fib_nhs) 2221da177e4SLinus Torvalds continue; 2231da177e4SLinus Torvalds if (nfi->fib_protocol == fi->fib_protocol && 2241da177e4SLinus Torvalds nfi->fib_prefsrc == fi->fib_prefsrc && 2251da177e4SLinus Torvalds nfi->fib_priority == fi->fib_priority && 2261da177e4SLinus Torvalds memcmp(nfi->fib_metrics, fi->fib_metrics, 2271da177e4SLinus Torvalds sizeof(fi->fib_metrics)) == 0 && 2281da177e4SLinus Torvalds ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 && 2291da177e4SLinus Torvalds (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0)) 2301da177e4SLinus Torvalds return fi; 2311da177e4SLinus Torvalds } 2321da177e4SLinus Torvalds 2331da177e4SLinus Torvalds return NULL; 2341da177e4SLinus Torvalds } 2351da177e4SLinus Torvalds 2361da177e4SLinus Torvalds static inline unsigned int fib_devindex_hashfn(unsigned int val) 2371da177e4SLinus Torvalds { 2381da177e4SLinus Torvalds unsigned int mask = DEVINDEX_HASHSIZE - 1; 2391da177e4SLinus Torvalds 2401da177e4SLinus Torvalds return (val ^ 2411da177e4SLinus Torvalds (val >> DEVINDEX_HASHBITS) ^ 2421da177e4SLinus Torvalds (val >> (DEVINDEX_HASHBITS * 2))) & mask; 2431da177e4SLinus Torvalds } 2441da177e4SLinus Torvalds 2451da177e4SLinus Torvalds /* Check, that the gateway is already configured. 2461da177e4SLinus Torvalds Used only by redirect accept routine. 2471da177e4SLinus Torvalds */ 2481da177e4SLinus Torvalds 2491da177e4SLinus Torvalds int ip_fib_check_default(u32 gw, struct net_device *dev) 2501da177e4SLinus Torvalds { 2511da177e4SLinus Torvalds struct hlist_head *head; 2521da177e4SLinus Torvalds struct hlist_node *node; 2531da177e4SLinus Torvalds struct fib_nh *nh; 2541da177e4SLinus Torvalds unsigned int hash; 2551da177e4SLinus Torvalds 2561da177e4SLinus Torvalds read_lock(&fib_info_lock); 2571da177e4SLinus Torvalds 2581da177e4SLinus Torvalds hash = fib_devindex_hashfn(dev->ifindex); 2591da177e4SLinus Torvalds head = &fib_info_devhash[hash]; 2601da177e4SLinus Torvalds hlist_for_each_entry(nh, node, head, nh_hash) { 2611da177e4SLinus Torvalds if (nh->nh_dev == dev && 2621da177e4SLinus Torvalds nh->nh_gw == gw && 2631da177e4SLinus Torvalds !(nh->nh_flags&RTNH_F_DEAD)) { 2641da177e4SLinus Torvalds read_unlock(&fib_info_lock); 2651da177e4SLinus Torvalds return 0; 2661da177e4SLinus Torvalds } 2671da177e4SLinus Torvalds } 2681da177e4SLinus Torvalds 2691da177e4SLinus Torvalds read_unlock(&fib_info_lock); 2701da177e4SLinus Torvalds 2711da177e4SLinus Torvalds return -1; 2721da177e4SLinus Torvalds } 2731da177e4SLinus Torvalds 2741da177e4SLinus Torvalds void rtmsg_fib(int event, u32 key, struct fib_alias *fa, 2751da177e4SLinus Torvalds int z, int tb_id, 2761da177e4SLinus Torvalds struct nlmsghdr *n, struct netlink_skb_parms *req) 2771da177e4SLinus Torvalds { 2781da177e4SLinus Torvalds struct sk_buff *skb; 2799ed19f33SJamal Hadi Salim u32 pid = req ? req->pid : n->nlmsg_pid; 2801da177e4SLinus Torvalds int size = NLMSG_SPACE(sizeof(struct rtmsg)+256); 2811da177e4SLinus Torvalds 2821da177e4SLinus Torvalds skb = alloc_skb(size, GFP_KERNEL); 2831da177e4SLinus Torvalds if (!skb) 2841da177e4SLinus Torvalds return; 2851da177e4SLinus Torvalds 2861da177e4SLinus Torvalds if (fib_dump_info(skb, pid, n->nlmsg_seq, event, tb_id, 2871da177e4SLinus Torvalds fa->fa_type, fa->fa_scope, &key, z, 2881da177e4SLinus Torvalds fa->fa_tos, 289b6544c0bSJamal Hadi Salim fa->fa_info, 0) < 0) { 2901da177e4SLinus Torvalds kfree_skb(skb); 2911da177e4SLinus Torvalds return; 2921da177e4SLinus Torvalds } 2931da177e4SLinus Torvalds NETLINK_CB(skb).dst_groups = RTMGRP_IPV4_ROUTE; 2941da177e4SLinus Torvalds if (n->nlmsg_flags&NLM_F_ECHO) 2951da177e4SLinus Torvalds atomic_inc(&skb->users); 2961da177e4SLinus Torvalds netlink_broadcast(rtnl, skb, pid, RTMGRP_IPV4_ROUTE, GFP_KERNEL); 2971da177e4SLinus Torvalds if (n->nlmsg_flags&NLM_F_ECHO) 2981da177e4SLinus Torvalds netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT); 2991da177e4SLinus Torvalds } 3001da177e4SLinus Torvalds 3011da177e4SLinus Torvalds /* Return the first fib alias matching TOS with 3021da177e4SLinus Torvalds * priority less than or equal to PRIO. 3031da177e4SLinus Torvalds */ 3041da177e4SLinus Torvalds struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio) 3051da177e4SLinus Torvalds { 3061da177e4SLinus Torvalds if (fah) { 3071da177e4SLinus Torvalds struct fib_alias *fa; 3081da177e4SLinus Torvalds list_for_each_entry(fa, fah, fa_list) { 3091da177e4SLinus Torvalds if (fa->fa_tos > tos) 3101da177e4SLinus Torvalds continue; 3111da177e4SLinus Torvalds if (fa->fa_info->fib_priority >= prio || 3121da177e4SLinus Torvalds fa->fa_tos < tos) 3131da177e4SLinus Torvalds return fa; 3141da177e4SLinus Torvalds } 3151da177e4SLinus Torvalds } 3161da177e4SLinus Torvalds return NULL; 3171da177e4SLinus Torvalds } 3181da177e4SLinus Torvalds 3191da177e4SLinus Torvalds int fib_detect_death(struct fib_info *fi, int order, 3201da177e4SLinus Torvalds struct fib_info **last_resort, int *last_idx, int *dflt) 3211da177e4SLinus Torvalds { 3221da177e4SLinus Torvalds struct neighbour *n; 3231da177e4SLinus Torvalds int state = NUD_NONE; 3241da177e4SLinus Torvalds 3251da177e4SLinus Torvalds n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev); 3261da177e4SLinus Torvalds if (n) { 3271da177e4SLinus Torvalds state = n->nud_state; 3281da177e4SLinus Torvalds neigh_release(n); 3291da177e4SLinus Torvalds } 3301da177e4SLinus Torvalds if (state==NUD_REACHABLE) 3311da177e4SLinus Torvalds return 0; 3321da177e4SLinus Torvalds if ((state&NUD_VALID) && order != *dflt) 3331da177e4SLinus Torvalds return 0; 3341da177e4SLinus Torvalds if ((state&NUD_VALID) || 3351da177e4SLinus Torvalds (*last_idx<0 && order > *dflt)) { 3361da177e4SLinus Torvalds *last_resort = fi; 3371da177e4SLinus Torvalds *last_idx = order; 3381da177e4SLinus Torvalds } 3391da177e4SLinus Torvalds return 1; 3401da177e4SLinus Torvalds } 3411da177e4SLinus Torvalds 3421da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 3431da177e4SLinus Torvalds 3441da177e4SLinus Torvalds static u32 fib_get_attr32(struct rtattr *attr, int attrlen, int type) 3451da177e4SLinus Torvalds { 3461da177e4SLinus Torvalds while (RTA_OK(attr,attrlen)) { 3471da177e4SLinus Torvalds if (attr->rta_type == type) 3481da177e4SLinus Torvalds return *(u32*)RTA_DATA(attr); 3491da177e4SLinus Torvalds attr = RTA_NEXT(attr, attrlen); 3501da177e4SLinus Torvalds } 3511da177e4SLinus Torvalds return 0; 3521da177e4SLinus Torvalds } 3531da177e4SLinus Torvalds 3541da177e4SLinus Torvalds static int 3551da177e4SLinus Torvalds fib_count_nexthops(struct rtattr *rta) 3561da177e4SLinus Torvalds { 3571da177e4SLinus Torvalds int nhs = 0; 3581da177e4SLinus Torvalds struct rtnexthop *nhp = RTA_DATA(rta); 3591da177e4SLinus Torvalds int nhlen = RTA_PAYLOAD(rta); 3601da177e4SLinus Torvalds 3611da177e4SLinus Torvalds while (nhlen >= (int)sizeof(struct rtnexthop)) { 3621da177e4SLinus Torvalds if ((nhlen -= nhp->rtnh_len) < 0) 3631da177e4SLinus Torvalds return 0; 3641da177e4SLinus Torvalds nhs++; 3651da177e4SLinus Torvalds nhp = RTNH_NEXT(nhp); 3661da177e4SLinus Torvalds }; 3671da177e4SLinus Torvalds return nhs; 3681da177e4SLinus Torvalds } 3691da177e4SLinus Torvalds 3701da177e4SLinus Torvalds static int 3711da177e4SLinus Torvalds fib_get_nhs(struct fib_info *fi, const struct rtattr *rta, const struct rtmsg *r) 3721da177e4SLinus Torvalds { 3731da177e4SLinus Torvalds struct rtnexthop *nhp = RTA_DATA(rta); 3741da177e4SLinus Torvalds int nhlen = RTA_PAYLOAD(rta); 3751da177e4SLinus Torvalds 3761da177e4SLinus Torvalds change_nexthops(fi) { 3771da177e4SLinus Torvalds int attrlen = nhlen - sizeof(struct rtnexthop); 3781da177e4SLinus Torvalds if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0) 3791da177e4SLinus Torvalds return -EINVAL; 3801da177e4SLinus Torvalds nh->nh_flags = (r->rtm_flags&~0xFF) | nhp->rtnh_flags; 3811da177e4SLinus Torvalds nh->nh_oif = nhp->rtnh_ifindex; 3821da177e4SLinus Torvalds nh->nh_weight = nhp->rtnh_hops + 1; 3831da177e4SLinus Torvalds if (attrlen) { 3841da177e4SLinus Torvalds nh->nh_gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY); 3851da177e4SLinus Torvalds #ifdef CONFIG_NET_CLS_ROUTE 3861da177e4SLinus Torvalds nh->nh_tclassid = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW); 3871da177e4SLinus Torvalds #endif 3881da177e4SLinus Torvalds } 3891da177e4SLinus Torvalds nhp = RTNH_NEXT(nhp); 3901da177e4SLinus Torvalds } endfor_nexthops(fi); 3911da177e4SLinus Torvalds return 0; 3921da177e4SLinus Torvalds } 3931da177e4SLinus Torvalds 3941da177e4SLinus Torvalds #endif 3951da177e4SLinus Torvalds 3961da177e4SLinus Torvalds int fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct kern_rta *rta, 3971da177e4SLinus Torvalds struct fib_info *fi) 3981da177e4SLinus Torvalds { 3991da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 4001da177e4SLinus Torvalds struct rtnexthop *nhp; 4011da177e4SLinus Torvalds int nhlen; 4021da177e4SLinus Torvalds #endif 4031da177e4SLinus Torvalds 4041da177e4SLinus Torvalds if (rta->rta_priority && 4051da177e4SLinus Torvalds *rta->rta_priority != fi->fib_priority) 4061da177e4SLinus Torvalds return 1; 4071da177e4SLinus Torvalds 4081da177e4SLinus Torvalds if (rta->rta_oif || rta->rta_gw) { 4091da177e4SLinus Torvalds if ((!rta->rta_oif || *rta->rta_oif == fi->fib_nh->nh_oif) && 4101da177e4SLinus Torvalds (!rta->rta_gw || memcmp(rta->rta_gw, &fi->fib_nh->nh_gw, 4) == 0)) 4111da177e4SLinus Torvalds return 0; 4121da177e4SLinus Torvalds return 1; 4131da177e4SLinus Torvalds } 4141da177e4SLinus Torvalds 4151da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 4161da177e4SLinus Torvalds if (rta->rta_mp == NULL) 4171da177e4SLinus Torvalds return 0; 4181da177e4SLinus Torvalds nhp = RTA_DATA(rta->rta_mp); 4191da177e4SLinus Torvalds nhlen = RTA_PAYLOAD(rta->rta_mp); 4201da177e4SLinus Torvalds 4211da177e4SLinus Torvalds for_nexthops(fi) { 4221da177e4SLinus Torvalds int attrlen = nhlen - sizeof(struct rtnexthop); 4231da177e4SLinus Torvalds u32 gw; 4241da177e4SLinus Torvalds 4251da177e4SLinus Torvalds if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0) 4261da177e4SLinus Torvalds return -EINVAL; 4271da177e4SLinus Torvalds if (nhp->rtnh_ifindex && nhp->rtnh_ifindex != nh->nh_oif) 4281da177e4SLinus Torvalds return 1; 4291da177e4SLinus Torvalds if (attrlen) { 4301da177e4SLinus Torvalds gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY); 4311da177e4SLinus Torvalds if (gw && gw != nh->nh_gw) 4321da177e4SLinus Torvalds return 1; 4331da177e4SLinus Torvalds #ifdef CONFIG_NET_CLS_ROUTE 4341da177e4SLinus Torvalds gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW); 4351da177e4SLinus Torvalds if (gw && gw != nh->nh_tclassid) 4361da177e4SLinus Torvalds return 1; 4371da177e4SLinus Torvalds #endif 4381da177e4SLinus Torvalds } 4391da177e4SLinus Torvalds nhp = RTNH_NEXT(nhp); 4401da177e4SLinus Torvalds } endfor_nexthops(fi); 4411da177e4SLinus Torvalds #endif 4421da177e4SLinus Torvalds return 0; 4431da177e4SLinus Torvalds } 4441da177e4SLinus Torvalds 4451da177e4SLinus Torvalds 4461da177e4SLinus Torvalds /* 4471da177e4SLinus Torvalds Picture 4481da177e4SLinus Torvalds ------- 4491da177e4SLinus Torvalds 4501da177e4SLinus Torvalds Semantics of nexthop is very messy by historical reasons. 4511da177e4SLinus Torvalds We have to take into account, that: 4521da177e4SLinus Torvalds a) gateway can be actually local interface address, 4531da177e4SLinus Torvalds so that gatewayed route is direct. 4541da177e4SLinus Torvalds b) gateway must be on-link address, possibly 4551da177e4SLinus Torvalds described not by an ifaddr, but also by a direct route. 4561da177e4SLinus Torvalds c) If both gateway and interface are specified, they should not 4571da177e4SLinus Torvalds contradict. 4581da177e4SLinus Torvalds d) If we use tunnel routes, gateway could be not on-link. 4591da177e4SLinus Torvalds 4601da177e4SLinus Torvalds Attempt to reconcile all of these (alas, self-contradictory) conditions 4611da177e4SLinus Torvalds results in pretty ugly and hairy code with obscure logic. 4621da177e4SLinus Torvalds 4631da177e4SLinus Torvalds I chose to generalized it instead, so that the size 4641da177e4SLinus Torvalds of code does not increase practically, but it becomes 4651da177e4SLinus Torvalds much more general. 4661da177e4SLinus Torvalds Every prefix is assigned a "scope" value: "host" is local address, 4671da177e4SLinus Torvalds "link" is direct route, 4681da177e4SLinus Torvalds [ ... "site" ... "interior" ... ] 4691da177e4SLinus Torvalds and "universe" is true gateway route with global meaning. 4701da177e4SLinus Torvalds 4711da177e4SLinus Torvalds Every prefix refers to a set of "nexthop"s (gw, oif), 4721da177e4SLinus Torvalds where gw must have narrower scope. This recursion stops 4731da177e4SLinus Torvalds when gw has LOCAL scope or if "nexthop" is declared ONLINK, 4741da177e4SLinus Torvalds which means that gw is forced to be on link. 4751da177e4SLinus Torvalds 4761da177e4SLinus Torvalds Code is still hairy, but now it is apparently logically 4771da177e4SLinus Torvalds consistent and very flexible. F.e. as by-product it allows 4781da177e4SLinus Torvalds to co-exists in peace independent exterior and interior 4791da177e4SLinus Torvalds routing processes. 4801da177e4SLinus Torvalds 4811da177e4SLinus Torvalds Normally it looks as following. 4821da177e4SLinus Torvalds 4831da177e4SLinus Torvalds {universe prefix} -> (gw, oif) [scope link] 4841da177e4SLinus Torvalds | 4851da177e4SLinus Torvalds |-> {link prefix} -> (gw, oif) [scope local] 4861da177e4SLinus Torvalds | 4871da177e4SLinus Torvalds |-> {local prefix} (terminal node) 4881da177e4SLinus Torvalds */ 4891da177e4SLinus Torvalds 4901da177e4SLinus Torvalds static int fib_check_nh(const struct rtmsg *r, struct fib_info *fi, struct fib_nh *nh) 4911da177e4SLinus Torvalds { 4921da177e4SLinus Torvalds int err; 4931da177e4SLinus Torvalds 4941da177e4SLinus Torvalds if (nh->nh_gw) { 4951da177e4SLinus Torvalds struct fib_result res; 4961da177e4SLinus Torvalds 4971da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_PERVASIVE 4981da177e4SLinus Torvalds if (nh->nh_flags&RTNH_F_PERVASIVE) 4991da177e4SLinus Torvalds return 0; 5001da177e4SLinus Torvalds #endif 5011da177e4SLinus Torvalds if (nh->nh_flags&RTNH_F_ONLINK) { 5021da177e4SLinus Torvalds struct net_device *dev; 5031da177e4SLinus Torvalds 5041da177e4SLinus Torvalds if (r->rtm_scope >= RT_SCOPE_LINK) 5051da177e4SLinus Torvalds return -EINVAL; 5061da177e4SLinus Torvalds if (inet_addr_type(nh->nh_gw) != RTN_UNICAST) 5071da177e4SLinus Torvalds return -EINVAL; 5081da177e4SLinus Torvalds if ((dev = __dev_get_by_index(nh->nh_oif)) == NULL) 5091da177e4SLinus Torvalds return -ENODEV; 5101da177e4SLinus Torvalds if (!(dev->flags&IFF_UP)) 5111da177e4SLinus Torvalds return -ENETDOWN; 5121da177e4SLinus Torvalds nh->nh_dev = dev; 5131da177e4SLinus Torvalds dev_hold(dev); 5141da177e4SLinus Torvalds nh->nh_scope = RT_SCOPE_LINK; 5151da177e4SLinus Torvalds return 0; 5161da177e4SLinus Torvalds } 5171da177e4SLinus Torvalds { 5181da177e4SLinus Torvalds struct flowi fl = { .nl_u = { .ip4_u = 5191da177e4SLinus Torvalds { .daddr = nh->nh_gw, 5201da177e4SLinus Torvalds .scope = r->rtm_scope + 1 } }, 5211da177e4SLinus Torvalds .oif = nh->nh_oif }; 5221da177e4SLinus Torvalds 5231da177e4SLinus Torvalds /* It is not necessary, but requires a bit of thinking */ 5241da177e4SLinus Torvalds if (fl.fl4_scope < RT_SCOPE_LINK) 5251da177e4SLinus Torvalds fl.fl4_scope = RT_SCOPE_LINK; 5261da177e4SLinus Torvalds if ((err = fib_lookup(&fl, &res)) != 0) 5271da177e4SLinus Torvalds return err; 5281da177e4SLinus Torvalds } 5291da177e4SLinus Torvalds err = -EINVAL; 5301da177e4SLinus Torvalds if (res.type != RTN_UNICAST && res.type != RTN_LOCAL) 5311da177e4SLinus Torvalds goto out; 5321da177e4SLinus Torvalds nh->nh_scope = res.scope; 5331da177e4SLinus Torvalds nh->nh_oif = FIB_RES_OIF(res); 5341da177e4SLinus Torvalds if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL) 5351da177e4SLinus Torvalds goto out; 5361da177e4SLinus Torvalds dev_hold(nh->nh_dev); 5371da177e4SLinus Torvalds err = -ENETDOWN; 5381da177e4SLinus Torvalds if (!(nh->nh_dev->flags & IFF_UP)) 5391da177e4SLinus Torvalds goto out; 5401da177e4SLinus Torvalds err = 0; 5411da177e4SLinus Torvalds out: 5421da177e4SLinus Torvalds fib_res_put(&res); 5431da177e4SLinus Torvalds return err; 5441da177e4SLinus Torvalds } else { 5451da177e4SLinus Torvalds struct in_device *in_dev; 5461da177e4SLinus Torvalds 5471da177e4SLinus Torvalds if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK)) 5481da177e4SLinus Torvalds return -EINVAL; 5491da177e4SLinus Torvalds 5501da177e4SLinus Torvalds in_dev = inetdev_by_index(nh->nh_oif); 5511da177e4SLinus Torvalds if (in_dev == NULL) 5521da177e4SLinus Torvalds return -ENODEV; 5531da177e4SLinus Torvalds if (!(in_dev->dev->flags&IFF_UP)) { 5541da177e4SLinus Torvalds in_dev_put(in_dev); 5551da177e4SLinus Torvalds return -ENETDOWN; 5561da177e4SLinus Torvalds } 5571da177e4SLinus Torvalds nh->nh_dev = in_dev->dev; 5581da177e4SLinus Torvalds dev_hold(nh->nh_dev); 5591da177e4SLinus Torvalds nh->nh_scope = RT_SCOPE_HOST; 5601da177e4SLinus Torvalds in_dev_put(in_dev); 5611da177e4SLinus Torvalds } 5621da177e4SLinus Torvalds return 0; 5631da177e4SLinus Torvalds } 5641da177e4SLinus Torvalds 5651da177e4SLinus Torvalds static inline unsigned int fib_laddr_hashfn(u32 val) 5661da177e4SLinus Torvalds { 5671da177e4SLinus Torvalds unsigned int mask = (fib_hash_size - 1); 5681da177e4SLinus Torvalds 5691da177e4SLinus Torvalds return (val ^ (val >> 7) ^ (val >> 14)) & mask; 5701da177e4SLinus Torvalds } 5711da177e4SLinus Torvalds 5721da177e4SLinus Torvalds static struct hlist_head *fib_hash_alloc(int bytes) 5731da177e4SLinus Torvalds { 5741da177e4SLinus Torvalds if (bytes <= PAGE_SIZE) 5751da177e4SLinus Torvalds return kmalloc(bytes, GFP_KERNEL); 5761da177e4SLinus Torvalds else 5771da177e4SLinus Torvalds return (struct hlist_head *) 5781da177e4SLinus Torvalds __get_free_pages(GFP_KERNEL, get_order(bytes)); 5791da177e4SLinus Torvalds } 5801da177e4SLinus Torvalds 5811da177e4SLinus Torvalds static void fib_hash_free(struct hlist_head *hash, int bytes) 5821da177e4SLinus Torvalds { 5831da177e4SLinus Torvalds if (!hash) 5841da177e4SLinus Torvalds return; 5851da177e4SLinus Torvalds 5861da177e4SLinus Torvalds if (bytes <= PAGE_SIZE) 5871da177e4SLinus Torvalds kfree(hash); 5881da177e4SLinus Torvalds else 5891da177e4SLinus Torvalds free_pages((unsigned long) hash, get_order(bytes)); 5901da177e4SLinus Torvalds } 5911da177e4SLinus Torvalds 5921da177e4SLinus Torvalds static void fib_hash_move(struct hlist_head *new_info_hash, 5931da177e4SLinus Torvalds struct hlist_head *new_laddrhash, 5941da177e4SLinus Torvalds unsigned int new_size) 5951da177e4SLinus Torvalds { 596*b7656e7fSDavid S. Miller struct hlist_head *old_info_hash, *old_laddrhash; 5971da177e4SLinus Torvalds unsigned int old_size = fib_hash_size; 598*b7656e7fSDavid S. Miller unsigned int i, bytes; 5991da177e4SLinus Torvalds 6001da177e4SLinus Torvalds write_lock(&fib_info_lock); 601*b7656e7fSDavid S. Miller old_info_hash = fib_info_hash; 602*b7656e7fSDavid S. Miller old_laddrhash = fib_info_laddrhash; 6031da177e4SLinus Torvalds fib_hash_size = new_size; 6041da177e4SLinus Torvalds 6051da177e4SLinus Torvalds for (i = 0; i < old_size; i++) { 6061da177e4SLinus Torvalds struct hlist_head *head = &fib_info_hash[i]; 6071da177e4SLinus Torvalds struct hlist_node *node, *n; 6081da177e4SLinus Torvalds struct fib_info *fi; 6091da177e4SLinus Torvalds 6101da177e4SLinus Torvalds hlist_for_each_entry_safe(fi, node, n, head, fib_hash) { 6111da177e4SLinus Torvalds struct hlist_head *dest; 6121da177e4SLinus Torvalds unsigned int new_hash; 6131da177e4SLinus Torvalds 6141da177e4SLinus Torvalds hlist_del(&fi->fib_hash); 6151da177e4SLinus Torvalds 6161da177e4SLinus Torvalds new_hash = fib_info_hashfn(fi); 6171da177e4SLinus Torvalds dest = &new_info_hash[new_hash]; 6181da177e4SLinus Torvalds hlist_add_head(&fi->fib_hash, dest); 6191da177e4SLinus Torvalds } 6201da177e4SLinus Torvalds } 6211da177e4SLinus Torvalds fib_info_hash = new_info_hash; 6221da177e4SLinus Torvalds 6231da177e4SLinus Torvalds for (i = 0; i < old_size; i++) { 6241da177e4SLinus Torvalds struct hlist_head *lhead = &fib_info_laddrhash[i]; 6251da177e4SLinus Torvalds struct hlist_node *node, *n; 6261da177e4SLinus Torvalds struct fib_info *fi; 6271da177e4SLinus Torvalds 6281da177e4SLinus Torvalds hlist_for_each_entry_safe(fi, node, n, lhead, fib_lhash) { 6291da177e4SLinus Torvalds struct hlist_head *ldest; 6301da177e4SLinus Torvalds unsigned int new_hash; 6311da177e4SLinus Torvalds 6321da177e4SLinus Torvalds hlist_del(&fi->fib_lhash); 6331da177e4SLinus Torvalds 6341da177e4SLinus Torvalds new_hash = fib_laddr_hashfn(fi->fib_prefsrc); 6351da177e4SLinus Torvalds ldest = &new_laddrhash[new_hash]; 6361da177e4SLinus Torvalds hlist_add_head(&fi->fib_lhash, ldest); 6371da177e4SLinus Torvalds } 6381da177e4SLinus Torvalds } 6391da177e4SLinus Torvalds fib_info_laddrhash = new_laddrhash; 6401da177e4SLinus Torvalds 6411da177e4SLinus Torvalds write_unlock(&fib_info_lock); 642*b7656e7fSDavid S. Miller 643*b7656e7fSDavid S. Miller bytes = old_size * sizeof(struct hlist_head *); 644*b7656e7fSDavid S. Miller fib_hash_free(old_info_hash, bytes); 645*b7656e7fSDavid S. Miller fib_hash_free(old_laddrhash, bytes); 6461da177e4SLinus Torvalds } 6471da177e4SLinus Torvalds 6481da177e4SLinus Torvalds struct fib_info * 6491da177e4SLinus Torvalds fib_create_info(const struct rtmsg *r, struct kern_rta *rta, 6501da177e4SLinus Torvalds const struct nlmsghdr *nlh, int *errp) 6511da177e4SLinus Torvalds { 6521da177e4SLinus Torvalds int err; 6531da177e4SLinus Torvalds struct fib_info *fi = NULL; 6541da177e4SLinus Torvalds struct fib_info *ofi; 6551da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 6561da177e4SLinus Torvalds int nhs = 1; 6571da177e4SLinus Torvalds #else 6581da177e4SLinus Torvalds const int nhs = 1; 6591da177e4SLinus Torvalds #endif 6601da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED 6611da177e4SLinus Torvalds u32 mp_alg = IP_MP_ALG_NONE; 6621da177e4SLinus Torvalds #endif 6631da177e4SLinus Torvalds 6641da177e4SLinus Torvalds /* Fast check to catch the most weird cases */ 6651da177e4SLinus Torvalds if (fib_props[r->rtm_type].scope > r->rtm_scope) 6661da177e4SLinus Torvalds goto err_inval; 6671da177e4SLinus Torvalds 6681da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 6691da177e4SLinus Torvalds if (rta->rta_mp) { 6701da177e4SLinus Torvalds nhs = fib_count_nexthops(rta->rta_mp); 6711da177e4SLinus Torvalds if (nhs == 0) 6721da177e4SLinus Torvalds goto err_inval; 6731da177e4SLinus Torvalds } 6741da177e4SLinus Torvalds #endif 6751da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED 6761da177e4SLinus Torvalds if (rta->rta_mp_alg) { 6771da177e4SLinus Torvalds mp_alg = *rta->rta_mp_alg; 6781da177e4SLinus Torvalds 6791da177e4SLinus Torvalds if (mp_alg < IP_MP_ALG_NONE || 6801da177e4SLinus Torvalds mp_alg > IP_MP_ALG_MAX) 6811da177e4SLinus Torvalds goto err_inval; 6821da177e4SLinus Torvalds } 6831da177e4SLinus Torvalds #endif 6841da177e4SLinus Torvalds 6851da177e4SLinus Torvalds err = -ENOBUFS; 6861da177e4SLinus Torvalds if (fib_info_cnt >= fib_hash_size) { 6871da177e4SLinus Torvalds unsigned int new_size = fib_hash_size << 1; 6881da177e4SLinus Torvalds struct hlist_head *new_info_hash; 6891da177e4SLinus Torvalds struct hlist_head *new_laddrhash; 6901da177e4SLinus Torvalds unsigned int bytes; 6911da177e4SLinus Torvalds 6921da177e4SLinus Torvalds if (!new_size) 6931da177e4SLinus Torvalds new_size = 1; 6941da177e4SLinus Torvalds bytes = new_size * sizeof(struct hlist_head *); 6951da177e4SLinus Torvalds new_info_hash = fib_hash_alloc(bytes); 6961da177e4SLinus Torvalds new_laddrhash = fib_hash_alloc(bytes); 6971da177e4SLinus Torvalds if (!new_info_hash || !new_laddrhash) { 6981da177e4SLinus Torvalds fib_hash_free(new_info_hash, bytes); 6991da177e4SLinus Torvalds fib_hash_free(new_laddrhash, bytes); 7001da177e4SLinus Torvalds } else { 7011da177e4SLinus Torvalds memset(new_info_hash, 0, bytes); 7021da177e4SLinus Torvalds memset(new_laddrhash, 0, bytes); 7031da177e4SLinus Torvalds 7041da177e4SLinus Torvalds fib_hash_move(new_info_hash, new_laddrhash, new_size); 7051da177e4SLinus Torvalds } 7061da177e4SLinus Torvalds 7071da177e4SLinus Torvalds if (!fib_hash_size) 7081da177e4SLinus Torvalds goto failure; 7091da177e4SLinus Torvalds } 7101da177e4SLinus Torvalds 7111da177e4SLinus Torvalds fi = kmalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL); 7121da177e4SLinus Torvalds if (fi == NULL) 7131da177e4SLinus Torvalds goto failure; 7141da177e4SLinus Torvalds fib_info_cnt++; 7151da177e4SLinus Torvalds memset(fi, 0, sizeof(*fi)+nhs*sizeof(struct fib_nh)); 7161da177e4SLinus Torvalds 7171da177e4SLinus Torvalds fi->fib_protocol = r->rtm_protocol; 7181da177e4SLinus Torvalds 7191da177e4SLinus Torvalds fi->fib_nhs = nhs; 7201da177e4SLinus Torvalds change_nexthops(fi) { 7211da177e4SLinus Torvalds nh->nh_parent = fi; 7221da177e4SLinus Torvalds } endfor_nexthops(fi) 7231da177e4SLinus Torvalds 7241da177e4SLinus Torvalds fi->fib_flags = r->rtm_flags; 7251da177e4SLinus Torvalds if (rta->rta_priority) 7261da177e4SLinus Torvalds fi->fib_priority = *rta->rta_priority; 7271da177e4SLinus Torvalds if (rta->rta_mx) { 7281da177e4SLinus Torvalds int attrlen = RTA_PAYLOAD(rta->rta_mx); 7291da177e4SLinus Torvalds struct rtattr *attr = RTA_DATA(rta->rta_mx); 7301da177e4SLinus Torvalds 7311da177e4SLinus Torvalds while (RTA_OK(attr, attrlen)) { 7321da177e4SLinus Torvalds unsigned flavor = attr->rta_type; 7331da177e4SLinus Torvalds if (flavor) { 7341da177e4SLinus Torvalds if (flavor > RTAX_MAX) 7351da177e4SLinus Torvalds goto err_inval; 7361da177e4SLinus Torvalds fi->fib_metrics[flavor-1] = *(unsigned*)RTA_DATA(attr); 7371da177e4SLinus Torvalds } 7381da177e4SLinus Torvalds attr = RTA_NEXT(attr, attrlen); 7391da177e4SLinus Torvalds } 7401da177e4SLinus Torvalds } 7411da177e4SLinus Torvalds if (rta->rta_prefsrc) 7421da177e4SLinus Torvalds memcpy(&fi->fib_prefsrc, rta->rta_prefsrc, 4); 7431da177e4SLinus Torvalds 7441da177e4SLinus Torvalds if (rta->rta_mp) { 7451da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 7461da177e4SLinus Torvalds if ((err = fib_get_nhs(fi, rta->rta_mp, r)) != 0) 7471da177e4SLinus Torvalds goto failure; 7481da177e4SLinus Torvalds if (rta->rta_oif && fi->fib_nh->nh_oif != *rta->rta_oif) 7491da177e4SLinus Torvalds goto err_inval; 7501da177e4SLinus Torvalds if (rta->rta_gw && memcmp(&fi->fib_nh->nh_gw, rta->rta_gw, 4)) 7511da177e4SLinus Torvalds goto err_inval; 7521da177e4SLinus Torvalds #ifdef CONFIG_NET_CLS_ROUTE 7531da177e4SLinus Torvalds if (rta->rta_flow && memcmp(&fi->fib_nh->nh_tclassid, rta->rta_flow, 4)) 7541da177e4SLinus Torvalds goto err_inval; 7551da177e4SLinus Torvalds #endif 7561da177e4SLinus Torvalds #else 7571da177e4SLinus Torvalds goto err_inval; 7581da177e4SLinus Torvalds #endif 7591da177e4SLinus Torvalds } else { 7601da177e4SLinus Torvalds struct fib_nh *nh = fi->fib_nh; 7611da177e4SLinus Torvalds if (rta->rta_oif) 7621da177e4SLinus Torvalds nh->nh_oif = *rta->rta_oif; 7631da177e4SLinus Torvalds if (rta->rta_gw) 7641da177e4SLinus Torvalds memcpy(&nh->nh_gw, rta->rta_gw, 4); 7651da177e4SLinus Torvalds #ifdef CONFIG_NET_CLS_ROUTE 7661da177e4SLinus Torvalds if (rta->rta_flow) 7671da177e4SLinus Torvalds memcpy(&nh->nh_tclassid, rta->rta_flow, 4); 7681da177e4SLinus Torvalds #endif 7691da177e4SLinus Torvalds nh->nh_flags = r->rtm_flags; 7701da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 7711da177e4SLinus Torvalds nh->nh_weight = 1; 7721da177e4SLinus Torvalds #endif 7731da177e4SLinus Torvalds } 7741da177e4SLinus Torvalds 7751da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED 7761da177e4SLinus Torvalds fi->fib_mp_alg = mp_alg; 7771da177e4SLinus Torvalds #endif 7781da177e4SLinus Torvalds 7791da177e4SLinus Torvalds if (fib_props[r->rtm_type].error) { 7801da177e4SLinus Torvalds if (rta->rta_gw || rta->rta_oif || rta->rta_mp) 7811da177e4SLinus Torvalds goto err_inval; 7821da177e4SLinus Torvalds goto link_it; 7831da177e4SLinus Torvalds } 7841da177e4SLinus Torvalds 7851da177e4SLinus Torvalds if (r->rtm_scope > RT_SCOPE_HOST) 7861da177e4SLinus Torvalds goto err_inval; 7871da177e4SLinus Torvalds 7881da177e4SLinus Torvalds if (r->rtm_scope == RT_SCOPE_HOST) { 7891da177e4SLinus Torvalds struct fib_nh *nh = fi->fib_nh; 7901da177e4SLinus Torvalds 7911da177e4SLinus Torvalds /* Local address is added. */ 7921da177e4SLinus Torvalds if (nhs != 1 || nh->nh_gw) 7931da177e4SLinus Torvalds goto err_inval; 7941da177e4SLinus Torvalds nh->nh_scope = RT_SCOPE_NOWHERE; 7951da177e4SLinus Torvalds nh->nh_dev = dev_get_by_index(fi->fib_nh->nh_oif); 7961da177e4SLinus Torvalds err = -ENODEV; 7971da177e4SLinus Torvalds if (nh->nh_dev == NULL) 7981da177e4SLinus Torvalds goto failure; 7991da177e4SLinus Torvalds } else { 8001da177e4SLinus Torvalds change_nexthops(fi) { 8011da177e4SLinus Torvalds if ((err = fib_check_nh(r, fi, nh)) != 0) 8021da177e4SLinus Torvalds goto failure; 8031da177e4SLinus Torvalds } endfor_nexthops(fi) 8041da177e4SLinus Torvalds } 8051da177e4SLinus Torvalds 8061da177e4SLinus Torvalds if (fi->fib_prefsrc) { 8071da177e4SLinus Torvalds if (r->rtm_type != RTN_LOCAL || rta->rta_dst == NULL || 8081da177e4SLinus Torvalds memcmp(&fi->fib_prefsrc, rta->rta_dst, 4)) 8091da177e4SLinus Torvalds if (inet_addr_type(fi->fib_prefsrc) != RTN_LOCAL) 8101da177e4SLinus Torvalds goto err_inval; 8111da177e4SLinus Torvalds } 8121da177e4SLinus Torvalds 8131da177e4SLinus Torvalds link_it: 8141da177e4SLinus Torvalds if ((ofi = fib_find_info(fi)) != NULL) { 8151da177e4SLinus Torvalds fi->fib_dead = 1; 8161da177e4SLinus Torvalds free_fib_info(fi); 8171da177e4SLinus Torvalds ofi->fib_treeref++; 8181da177e4SLinus Torvalds return ofi; 8191da177e4SLinus Torvalds } 8201da177e4SLinus Torvalds 8211da177e4SLinus Torvalds fi->fib_treeref++; 8221da177e4SLinus Torvalds atomic_inc(&fi->fib_clntref); 8231da177e4SLinus Torvalds write_lock(&fib_info_lock); 8241da177e4SLinus Torvalds hlist_add_head(&fi->fib_hash, 8251da177e4SLinus Torvalds &fib_info_hash[fib_info_hashfn(fi)]); 8261da177e4SLinus Torvalds if (fi->fib_prefsrc) { 8271da177e4SLinus Torvalds struct hlist_head *head; 8281da177e4SLinus Torvalds 8291da177e4SLinus Torvalds head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)]; 8301da177e4SLinus Torvalds hlist_add_head(&fi->fib_lhash, head); 8311da177e4SLinus Torvalds } 8321da177e4SLinus Torvalds change_nexthops(fi) { 8331da177e4SLinus Torvalds struct hlist_head *head; 8341da177e4SLinus Torvalds unsigned int hash; 8351da177e4SLinus Torvalds 8361da177e4SLinus Torvalds if (!nh->nh_dev) 8371da177e4SLinus Torvalds continue; 8381da177e4SLinus Torvalds hash = fib_devindex_hashfn(nh->nh_dev->ifindex); 8391da177e4SLinus Torvalds head = &fib_info_devhash[hash]; 8401da177e4SLinus Torvalds hlist_add_head(&nh->nh_hash, head); 8411da177e4SLinus Torvalds } endfor_nexthops(fi) 8421da177e4SLinus Torvalds write_unlock(&fib_info_lock); 8431da177e4SLinus Torvalds return fi; 8441da177e4SLinus Torvalds 8451da177e4SLinus Torvalds err_inval: 8461da177e4SLinus Torvalds err = -EINVAL; 8471da177e4SLinus Torvalds 8481da177e4SLinus Torvalds failure: 8491da177e4SLinus Torvalds *errp = err; 8501da177e4SLinus Torvalds if (fi) { 8511da177e4SLinus Torvalds fi->fib_dead = 1; 8521da177e4SLinus Torvalds free_fib_info(fi); 8531da177e4SLinus Torvalds } 8541da177e4SLinus Torvalds return NULL; 8551da177e4SLinus Torvalds } 8561da177e4SLinus Torvalds 8571da177e4SLinus Torvalds int fib_semantic_match(struct list_head *head, const struct flowi *flp, 8581da177e4SLinus Torvalds struct fib_result *res, __u32 zone, __u32 mask, 8591da177e4SLinus Torvalds int prefixlen) 8601da177e4SLinus Torvalds { 8611da177e4SLinus Torvalds struct fib_alias *fa; 8621da177e4SLinus Torvalds int nh_sel = 0; 8631da177e4SLinus Torvalds 8641da177e4SLinus Torvalds list_for_each_entry(fa, head, fa_list) { 8651da177e4SLinus Torvalds int err; 8661da177e4SLinus Torvalds 8671da177e4SLinus Torvalds if (fa->fa_tos && 8681da177e4SLinus Torvalds fa->fa_tos != flp->fl4_tos) 8691da177e4SLinus Torvalds continue; 8701da177e4SLinus Torvalds 8711da177e4SLinus Torvalds if (fa->fa_scope < flp->fl4_scope) 8721da177e4SLinus Torvalds continue; 8731da177e4SLinus Torvalds 8741da177e4SLinus Torvalds fa->fa_state |= FA_S_ACCESSED; 8751da177e4SLinus Torvalds 8761da177e4SLinus Torvalds err = fib_props[fa->fa_type].error; 8771da177e4SLinus Torvalds if (err == 0) { 8781da177e4SLinus Torvalds struct fib_info *fi = fa->fa_info; 8791da177e4SLinus Torvalds 8801da177e4SLinus Torvalds if (fi->fib_flags & RTNH_F_DEAD) 8811da177e4SLinus Torvalds continue; 8821da177e4SLinus Torvalds 8831da177e4SLinus Torvalds switch (fa->fa_type) { 8841da177e4SLinus Torvalds case RTN_UNICAST: 8851da177e4SLinus Torvalds case RTN_LOCAL: 8861da177e4SLinus Torvalds case RTN_BROADCAST: 8871da177e4SLinus Torvalds case RTN_ANYCAST: 8881da177e4SLinus Torvalds case RTN_MULTICAST: 8891da177e4SLinus Torvalds for_nexthops(fi) { 8901da177e4SLinus Torvalds if (nh->nh_flags&RTNH_F_DEAD) 8911da177e4SLinus Torvalds continue; 8921da177e4SLinus Torvalds if (!flp->oif || flp->oif == nh->nh_oif) 8931da177e4SLinus Torvalds break; 8941da177e4SLinus Torvalds } 8951da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 8961da177e4SLinus Torvalds if (nhsel < fi->fib_nhs) { 8971da177e4SLinus Torvalds nh_sel = nhsel; 8981da177e4SLinus Torvalds goto out_fill_res; 8991da177e4SLinus Torvalds } 9001da177e4SLinus Torvalds #else 9011da177e4SLinus Torvalds if (nhsel < 1) { 9021da177e4SLinus Torvalds goto out_fill_res; 9031da177e4SLinus Torvalds } 9041da177e4SLinus Torvalds #endif 9051da177e4SLinus Torvalds endfor_nexthops(fi); 9061da177e4SLinus Torvalds continue; 9071da177e4SLinus Torvalds 9081da177e4SLinus Torvalds default: 9091da177e4SLinus Torvalds printk(KERN_DEBUG "impossible 102\n"); 9101da177e4SLinus Torvalds return -EINVAL; 9111da177e4SLinus Torvalds }; 9121da177e4SLinus Torvalds } 9131da177e4SLinus Torvalds return err; 9141da177e4SLinus Torvalds } 9151da177e4SLinus Torvalds return 1; 9161da177e4SLinus Torvalds 9171da177e4SLinus Torvalds out_fill_res: 9181da177e4SLinus Torvalds res->prefixlen = prefixlen; 9191da177e4SLinus Torvalds res->nh_sel = nh_sel; 9201da177e4SLinus Torvalds res->type = fa->fa_type; 9211da177e4SLinus Torvalds res->scope = fa->fa_scope; 9221da177e4SLinus Torvalds res->fi = fa->fa_info; 9231da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED 9241da177e4SLinus Torvalds res->netmask = mask; 9251da177e4SLinus Torvalds res->network = zone & 9261da177e4SLinus Torvalds (0xFFFFFFFF >> (32 - prefixlen)); 9271da177e4SLinus Torvalds #endif 9281da177e4SLinus Torvalds atomic_inc(&res->fi->fib_clntref); 9291da177e4SLinus Torvalds return 0; 9301da177e4SLinus Torvalds } 9311da177e4SLinus Torvalds 9321da177e4SLinus Torvalds /* Find appropriate source address to this destination */ 9331da177e4SLinus Torvalds 9341da177e4SLinus Torvalds u32 __fib_res_prefsrc(struct fib_result *res) 9351da177e4SLinus Torvalds { 9361da177e4SLinus Torvalds return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope); 9371da177e4SLinus Torvalds } 9381da177e4SLinus Torvalds 9391da177e4SLinus Torvalds int 9401da177e4SLinus Torvalds fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, 9411da177e4SLinus Torvalds u8 tb_id, u8 type, u8 scope, void *dst, int dst_len, u8 tos, 942b6544c0bSJamal Hadi Salim struct fib_info *fi, unsigned int flags) 9431da177e4SLinus Torvalds { 9441da177e4SLinus Torvalds struct rtmsg *rtm; 9451da177e4SLinus Torvalds struct nlmsghdr *nlh; 9461da177e4SLinus Torvalds unsigned char *b = skb->tail; 9471da177e4SLinus Torvalds 948b6544c0bSJamal Hadi Salim nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*rtm), flags); 9491da177e4SLinus Torvalds rtm = NLMSG_DATA(nlh); 9501da177e4SLinus Torvalds rtm->rtm_family = AF_INET; 9511da177e4SLinus Torvalds rtm->rtm_dst_len = dst_len; 9521da177e4SLinus Torvalds rtm->rtm_src_len = 0; 9531da177e4SLinus Torvalds rtm->rtm_tos = tos; 9541da177e4SLinus Torvalds rtm->rtm_table = tb_id; 9551da177e4SLinus Torvalds rtm->rtm_type = type; 9561da177e4SLinus Torvalds rtm->rtm_flags = fi->fib_flags; 9571da177e4SLinus Torvalds rtm->rtm_scope = scope; 9581da177e4SLinus Torvalds if (rtm->rtm_dst_len) 9591da177e4SLinus Torvalds RTA_PUT(skb, RTA_DST, 4, dst); 9601da177e4SLinus Torvalds rtm->rtm_protocol = fi->fib_protocol; 9611da177e4SLinus Torvalds if (fi->fib_priority) 9621da177e4SLinus Torvalds RTA_PUT(skb, RTA_PRIORITY, 4, &fi->fib_priority); 9631da177e4SLinus Torvalds #ifdef CONFIG_NET_CLS_ROUTE 9641da177e4SLinus Torvalds if (fi->fib_nh[0].nh_tclassid) 9651da177e4SLinus Torvalds RTA_PUT(skb, RTA_FLOW, 4, &fi->fib_nh[0].nh_tclassid); 9661da177e4SLinus Torvalds #endif 9671da177e4SLinus Torvalds if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0) 9681da177e4SLinus Torvalds goto rtattr_failure; 9691da177e4SLinus Torvalds if (fi->fib_prefsrc) 9701da177e4SLinus Torvalds RTA_PUT(skb, RTA_PREFSRC, 4, &fi->fib_prefsrc); 9711da177e4SLinus Torvalds if (fi->fib_nhs == 1) { 9721da177e4SLinus Torvalds if (fi->fib_nh->nh_gw) 9731da177e4SLinus Torvalds RTA_PUT(skb, RTA_GATEWAY, 4, &fi->fib_nh->nh_gw); 9741da177e4SLinus Torvalds if (fi->fib_nh->nh_oif) 9751da177e4SLinus Torvalds RTA_PUT(skb, RTA_OIF, sizeof(int), &fi->fib_nh->nh_oif); 9761da177e4SLinus Torvalds } 9771da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 9781da177e4SLinus Torvalds if (fi->fib_nhs > 1) { 9791da177e4SLinus Torvalds struct rtnexthop *nhp; 9801da177e4SLinus Torvalds struct rtattr *mp_head; 9811da177e4SLinus Torvalds if (skb_tailroom(skb) <= RTA_SPACE(0)) 9821da177e4SLinus Torvalds goto rtattr_failure; 9831da177e4SLinus Torvalds mp_head = (struct rtattr*)skb_put(skb, RTA_SPACE(0)); 9841da177e4SLinus Torvalds 9851da177e4SLinus Torvalds for_nexthops(fi) { 9861da177e4SLinus Torvalds if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4)) 9871da177e4SLinus Torvalds goto rtattr_failure; 9881da177e4SLinus Torvalds nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp))); 9891da177e4SLinus Torvalds nhp->rtnh_flags = nh->nh_flags & 0xFF; 9901da177e4SLinus Torvalds nhp->rtnh_hops = nh->nh_weight-1; 9911da177e4SLinus Torvalds nhp->rtnh_ifindex = nh->nh_oif; 9921da177e4SLinus Torvalds if (nh->nh_gw) 9931da177e4SLinus Torvalds RTA_PUT(skb, RTA_GATEWAY, 4, &nh->nh_gw); 9941da177e4SLinus Torvalds nhp->rtnh_len = skb->tail - (unsigned char*)nhp; 9951da177e4SLinus Torvalds } endfor_nexthops(fi); 9961da177e4SLinus Torvalds mp_head->rta_type = RTA_MULTIPATH; 9971da177e4SLinus Torvalds mp_head->rta_len = skb->tail - (u8*)mp_head; 9981da177e4SLinus Torvalds } 9991da177e4SLinus Torvalds #endif 10001da177e4SLinus Torvalds nlh->nlmsg_len = skb->tail - b; 10011da177e4SLinus Torvalds return skb->len; 10021da177e4SLinus Torvalds 10031da177e4SLinus Torvalds nlmsg_failure: 10041da177e4SLinus Torvalds rtattr_failure: 10051da177e4SLinus Torvalds skb_trim(skb, b - skb->data); 10061da177e4SLinus Torvalds return -1; 10071da177e4SLinus Torvalds } 10081da177e4SLinus Torvalds 10091da177e4SLinus Torvalds #ifndef CONFIG_IP_NOSIOCRT 10101da177e4SLinus Torvalds 10111da177e4SLinus Torvalds int 10121da177e4SLinus Torvalds fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm, 10131da177e4SLinus Torvalds struct kern_rta *rta, struct rtentry *r) 10141da177e4SLinus Torvalds { 10151da177e4SLinus Torvalds int plen; 10161da177e4SLinus Torvalds u32 *ptr; 10171da177e4SLinus Torvalds 10181da177e4SLinus Torvalds memset(rtm, 0, sizeof(*rtm)); 10191da177e4SLinus Torvalds memset(rta, 0, sizeof(*rta)); 10201da177e4SLinus Torvalds 10211da177e4SLinus Torvalds if (r->rt_dst.sa_family != AF_INET) 10221da177e4SLinus Torvalds return -EAFNOSUPPORT; 10231da177e4SLinus Torvalds 10241da177e4SLinus Torvalds /* Check mask for validity: 10251da177e4SLinus Torvalds a) it must be contiguous. 10261da177e4SLinus Torvalds b) destination must have all host bits clear. 10271da177e4SLinus Torvalds c) if application forgot to set correct family (AF_INET), 10281da177e4SLinus Torvalds reject request unless it is absolutely clear i.e. 10291da177e4SLinus Torvalds both family and mask are zero. 10301da177e4SLinus Torvalds */ 10311da177e4SLinus Torvalds plen = 32; 10321da177e4SLinus Torvalds ptr = &((struct sockaddr_in*)&r->rt_dst)->sin_addr.s_addr; 10331da177e4SLinus Torvalds if (!(r->rt_flags&RTF_HOST)) { 10341da177e4SLinus Torvalds u32 mask = ((struct sockaddr_in*)&r->rt_genmask)->sin_addr.s_addr; 10351da177e4SLinus Torvalds if (r->rt_genmask.sa_family != AF_INET) { 10361da177e4SLinus Torvalds if (mask || r->rt_genmask.sa_family) 10371da177e4SLinus Torvalds return -EAFNOSUPPORT; 10381da177e4SLinus Torvalds } 10391da177e4SLinus Torvalds if (bad_mask(mask, *ptr)) 10401da177e4SLinus Torvalds return -EINVAL; 10411da177e4SLinus Torvalds plen = inet_mask_len(mask); 10421da177e4SLinus Torvalds } 10431da177e4SLinus Torvalds 10441da177e4SLinus Torvalds nl->nlmsg_flags = NLM_F_REQUEST; 10459ed19f33SJamal Hadi Salim nl->nlmsg_pid = current->pid; 10461da177e4SLinus Torvalds nl->nlmsg_seq = 0; 10471da177e4SLinus Torvalds nl->nlmsg_len = NLMSG_LENGTH(sizeof(*rtm)); 10481da177e4SLinus Torvalds if (cmd == SIOCDELRT) { 10491da177e4SLinus Torvalds nl->nlmsg_type = RTM_DELROUTE; 10501da177e4SLinus Torvalds nl->nlmsg_flags = 0; 10511da177e4SLinus Torvalds } else { 10521da177e4SLinus Torvalds nl->nlmsg_type = RTM_NEWROUTE; 10531da177e4SLinus Torvalds nl->nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE; 10541da177e4SLinus Torvalds rtm->rtm_protocol = RTPROT_BOOT; 10551da177e4SLinus Torvalds } 10561da177e4SLinus Torvalds 10571da177e4SLinus Torvalds rtm->rtm_dst_len = plen; 10581da177e4SLinus Torvalds rta->rta_dst = ptr; 10591da177e4SLinus Torvalds 10601da177e4SLinus Torvalds if (r->rt_metric) { 10611da177e4SLinus Torvalds *(u32*)&r->rt_pad3 = r->rt_metric - 1; 10621da177e4SLinus Torvalds rta->rta_priority = (u32*)&r->rt_pad3; 10631da177e4SLinus Torvalds } 10641da177e4SLinus Torvalds if (r->rt_flags&RTF_REJECT) { 10651da177e4SLinus Torvalds rtm->rtm_scope = RT_SCOPE_HOST; 10661da177e4SLinus Torvalds rtm->rtm_type = RTN_UNREACHABLE; 10671da177e4SLinus Torvalds return 0; 10681da177e4SLinus Torvalds } 10691da177e4SLinus Torvalds rtm->rtm_scope = RT_SCOPE_NOWHERE; 10701da177e4SLinus Torvalds rtm->rtm_type = RTN_UNICAST; 10711da177e4SLinus Torvalds 10721da177e4SLinus Torvalds if (r->rt_dev) { 10731da177e4SLinus Torvalds char *colon; 10741da177e4SLinus Torvalds struct net_device *dev; 10751da177e4SLinus Torvalds char devname[IFNAMSIZ]; 10761da177e4SLinus Torvalds 10771da177e4SLinus Torvalds if (copy_from_user(devname, r->rt_dev, IFNAMSIZ-1)) 10781da177e4SLinus Torvalds return -EFAULT; 10791da177e4SLinus Torvalds devname[IFNAMSIZ-1] = 0; 10801da177e4SLinus Torvalds colon = strchr(devname, ':'); 10811da177e4SLinus Torvalds if (colon) 10821da177e4SLinus Torvalds *colon = 0; 10831da177e4SLinus Torvalds dev = __dev_get_by_name(devname); 10841da177e4SLinus Torvalds if (!dev) 10851da177e4SLinus Torvalds return -ENODEV; 10861da177e4SLinus Torvalds rta->rta_oif = &dev->ifindex; 10871da177e4SLinus Torvalds if (colon) { 10881da177e4SLinus Torvalds struct in_ifaddr *ifa; 10891da177e4SLinus Torvalds struct in_device *in_dev = __in_dev_get(dev); 10901da177e4SLinus Torvalds if (!in_dev) 10911da177e4SLinus Torvalds return -ENODEV; 10921da177e4SLinus Torvalds *colon = ':'; 10931da177e4SLinus Torvalds for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) 10941da177e4SLinus Torvalds if (strcmp(ifa->ifa_label, devname) == 0) 10951da177e4SLinus Torvalds break; 10961da177e4SLinus Torvalds if (ifa == NULL) 10971da177e4SLinus Torvalds return -ENODEV; 10981da177e4SLinus Torvalds rta->rta_prefsrc = &ifa->ifa_local; 10991da177e4SLinus Torvalds } 11001da177e4SLinus Torvalds } 11011da177e4SLinus Torvalds 11021da177e4SLinus Torvalds ptr = &((struct sockaddr_in*)&r->rt_gateway)->sin_addr.s_addr; 11031da177e4SLinus Torvalds if (r->rt_gateway.sa_family == AF_INET && *ptr) { 11041da177e4SLinus Torvalds rta->rta_gw = ptr; 11051da177e4SLinus Torvalds if (r->rt_flags&RTF_GATEWAY && inet_addr_type(*ptr) == RTN_UNICAST) 11061da177e4SLinus Torvalds rtm->rtm_scope = RT_SCOPE_UNIVERSE; 11071da177e4SLinus Torvalds } 11081da177e4SLinus Torvalds 11091da177e4SLinus Torvalds if (cmd == SIOCDELRT) 11101da177e4SLinus Torvalds return 0; 11111da177e4SLinus Torvalds 11121da177e4SLinus Torvalds if (r->rt_flags&RTF_GATEWAY && rta->rta_gw == NULL) 11131da177e4SLinus Torvalds return -EINVAL; 11141da177e4SLinus Torvalds 11151da177e4SLinus Torvalds if (rtm->rtm_scope == RT_SCOPE_NOWHERE) 11161da177e4SLinus Torvalds rtm->rtm_scope = RT_SCOPE_LINK; 11171da177e4SLinus Torvalds 11181da177e4SLinus Torvalds if (r->rt_flags&(RTF_MTU|RTF_WINDOW|RTF_IRTT)) { 11191da177e4SLinus Torvalds struct rtattr *rec; 11201da177e4SLinus Torvalds struct rtattr *mx = kmalloc(RTA_LENGTH(3*RTA_LENGTH(4)), GFP_KERNEL); 11211da177e4SLinus Torvalds if (mx == NULL) 11221da177e4SLinus Torvalds return -ENOMEM; 11231da177e4SLinus Torvalds rta->rta_mx = mx; 11241da177e4SLinus Torvalds mx->rta_type = RTA_METRICS; 11251da177e4SLinus Torvalds mx->rta_len = RTA_LENGTH(0); 11261da177e4SLinus Torvalds if (r->rt_flags&RTF_MTU) { 11271da177e4SLinus Torvalds rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len)); 11281da177e4SLinus Torvalds rec->rta_type = RTAX_ADVMSS; 11291da177e4SLinus Torvalds rec->rta_len = RTA_LENGTH(4); 11301da177e4SLinus Torvalds mx->rta_len += RTA_LENGTH(4); 11311da177e4SLinus Torvalds *(u32*)RTA_DATA(rec) = r->rt_mtu - 40; 11321da177e4SLinus Torvalds } 11331da177e4SLinus Torvalds if (r->rt_flags&RTF_WINDOW) { 11341da177e4SLinus Torvalds rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len)); 11351da177e4SLinus Torvalds rec->rta_type = RTAX_WINDOW; 11361da177e4SLinus Torvalds rec->rta_len = RTA_LENGTH(4); 11371da177e4SLinus Torvalds mx->rta_len += RTA_LENGTH(4); 11381da177e4SLinus Torvalds *(u32*)RTA_DATA(rec) = r->rt_window; 11391da177e4SLinus Torvalds } 11401da177e4SLinus Torvalds if (r->rt_flags&RTF_IRTT) { 11411da177e4SLinus Torvalds rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len)); 11421da177e4SLinus Torvalds rec->rta_type = RTAX_RTT; 11431da177e4SLinus Torvalds rec->rta_len = RTA_LENGTH(4); 11441da177e4SLinus Torvalds mx->rta_len += RTA_LENGTH(4); 11451da177e4SLinus Torvalds *(u32*)RTA_DATA(rec) = r->rt_irtt<<3; 11461da177e4SLinus Torvalds } 11471da177e4SLinus Torvalds } 11481da177e4SLinus Torvalds return 0; 11491da177e4SLinus Torvalds } 11501da177e4SLinus Torvalds 11511da177e4SLinus Torvalds #endif 11521da177e4SLinus Torvalds 11531da177e4SLinus Torvalds /* 11541da177e4SLinus Torvalds Update FIB if: 11551da177e4SLinus Torvalds - local address disappeared -> we must delete all the entries 11561da177e4SLinus Torvalds referring to it. 11571da177e4SLinus Torvalds - device went down -> we must shutdown all nexthops going via it. 11581da177e4SLinus Torvalds */ 11591da177e4SLinus Torvalds 11601da177e4SLinus Torvalds int fib_sync_down(u32 local, struct net_device *dev, int force) 11611da177e4SLinus Torvalds { 11621da177e4SLinus Torvalds int ret = 0; 11631da177e4SLinus Torvalds int scope = RT_SCOPE_NOWHERE; 11641da177e4SLinus Torvalds 11651da177e4SLinus Torvalds if (force) 11661da177e4SLinus Torvalds scope = -1; 11671da177e4SLinus Torvalds 11681da177e4SLinus Torvalds if (local && fib_info_laddrhash) { 11691da177e4SLinus Torvalds unsigned int hash = fib_laddr_hashfn(local); 11701da177e4SLinus Torvalds struct hlist_head *head = &fib_info_laddrhash[hash]; 11711da177e4SLinus Torvalds struct hlist_node *node; 11721da177e4SLinus Torvalds struct fib_info *fi; 11731da177e4SLinus Torvalds 11741da177e4SLinus Torvalds hlist_for_each_entry(fi, node, head, fib_lhash) { 11751da177e4SLinus Torvalds if (fi->fib_prefsrc == local) { 11761da177e4SLinus Torvalds fi->fib_flags |= RTNH_F_DEAD; 11771da177e4SLinus Torvalds ret++; 11781da177e4SLinus Torvalds } 11791da177e4SLinus Torvalds } 11801da177e4SLinus Torvalds } 11811da177e4SLinus Torvalds 11821da177e4SLinus Torvalds if (dev) { 11831da177e4SLinus Torvalds struct fib_info *prev_fi = NULL; 11841da177e4SLinus Torvalds unsigned int hash = fib_devindex_hashfn(dev->ifindex); 11851da177e4SLinus Torvalds struct hlist_head *head = &fib_info_devhash[hash]; 11861da177e4SLinus Torvalds struct hlist_node *node; 11871da177e4SLinus Torvalds struct fib_nh *nh; 11881da177e4SLinus Torvalds 11891da177e4SLinus Torvalds hlist_for_each_entry(nh, node, head, nh_hash) { 11901da177e4SLinus Torvalds struct fib_info *fi = nh->nh_parent; 11911da177e4SLinus Torvalds int dead; 11921da177e4SLinus Torvalds 11931da177e4SLinus Torvalds BUG_ON(!fi->fib_nhs); 11941da177e4SLinus Torvalds if (nh->nh_dev != dev || fi == prev_fi) 11951da177e4SLinus Torvalds continue; 11961da177e4SLinus Torvalds prev_fi = fi; 11971da177e4SLinus Torvalds dead = 0; 11981da177e4SLinus Torvalds change_nexthops(fi) { 11991da177e4SLinus Torvalds if (nh->nh_flags&RTNH_F_DEAD) 12001da177e4SLinus Torvalds dead++; 12011da177e4SLinus Torvalds else if (nh->nh_dev == dev && 12021da177e4SLinus Torvalds nh->nh_scope != scope) { 12031da177e4SLinus Torvalds nh->nh_flags |= RTNH_F_DEAD; 12041da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 12051da177e4SLinus Torvalds spin_lock_bh(&fib_multipath_lock); 12061da177e4SLinus Torvalds fi->fib_power -= nh->nh_power; 12071da177e4SLinus Torvalds nh->nh_power = 0; 12081da177e4SLinus Torvalds spin_unlock_bh(&fib_multipath_lock); 12091da177e4SLinus Torvalds #endif 12101da177e4SLinus Torvalds dead++; 12111da177e4SLinus Torvalds } 12121da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 12131da177e4SLinus Torvalds if (force > 1 && nh->nh_dev == dev) { 12141da177e4SLinus Torvalds dead = fi->fib_nhs; 12151da177e4SLinus Torvalds break; 12161da177e4SLinus Torvalds } 12171da177e4SLinus Torvalds #endif 12181da177e4SLinus Torvalds } endfor_nexthops(fi) 12191da177e4SLinus Torvalds if (dead == fi->fib_nhs) { 12201da177e4SLinus Torvalds fi->fib_flags |= RTNH_F_DEAD; 12211da177e4SLinus Torvalds ret++; 12221da177e4SLinus Torvalds } 12231da177e4SLinus Torvalds } 12241da177e4SLinus Torvalds } 12251da177e4SLinus Torvalds 12261da177e4SLinus Torvalds return ret; 12271da177e4SLinus Torvalds } 12281da177e4SLinus Torvalds 12291da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 12301da177e4SLinus Torvalds 12311da177e4SLinus Torvalds /* 12321da177e4SLinus Torvalds Dead device goes up. We wake up dead nexthops. 12331da177e4SLinus Torvalds It takes sense only on multipath routes. 12341da177e4SLinus Torvalds */ 12351da177e4SLinus Torvalds 12361da177e4SLinus Torvalds int fib_sync_up(struct net_device *dev) 12371da177e4SLinus Torvalds { 12381da177e4SLinus Torvalds struct fib_info *prev_fi; 12391da177e4SLinus Torvalds unsigned int hash; 12401da177e4SLinus Torvalds struct hlist_head *head; 12411da177e4SLinus Torvalds struct hlist_node *node; 12421da177e4SLinus Torvalds struct fib_nh *nh; 12431da177e4SLinus Torvalds int ret; 12441da177e4SLinus Torvalds 12451da177e4SLinus Torvalds if (!(dev->flags&IFF_UP)) 12461da177e4SLinus Torvalds return 0; 12471da177e4SLinus Torvalds 12481da177e4SLinus Torvalds prev_fi = NULL; 12491da177e4SLinus Torvalds hash = fib_devindex_hashfn(dev->ifindex); 12501da177e4SLinus Torvalds head = &fib_info_devhash[hash]; 12511da177e4SLinus Torvalds ret = 0; 12521da177e4SLinus Torvalds 12531da177e4SLinus Torvalds hlist_for_each_entry(nh, node, head, nh_hash) { 12541da177e4SLinus Torvalds struct fib_info *fi = nh->nh_parent; 12551da177e4SLinus Torvalds int alive; 12561da177e4SLinus Torvalds 12571da177e4SLinus Torvalds BUG_ON(!fi->fib_nhs); 12581da177e4SLinus Torvalds if (nh->nh_dev != dev || fi == prev_fi) 12591da177e4SLinus Torvalds continue; 12601da177e4SLinus Torvalds 12611da177e4SLinus Torvalds prev_fi = fi; 12621da177e4SLinus Torvalds alive = 0; 12631da177e4SLinus Torvalds change_nexthops(fi) { 12641da177e4SLinus Torvalds if (!(nh->nh_flags&RTNH_F_DEAD)) { 12651da177e4SLinus Torvalds alive++; 12661da177e4SLinus Torvalds continue; 12671da177e4SLinus Torvalds } 12681da177e4SLinus Torvalds if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP)) 12691da177e4SLinus Torvalds continue; 12701da177e4SLinus Torvalds if (nh->nh_dev != dev || __in_dev_get(dev) == NULL) 12711da177e4SLinus Torvalds continue; 12721da177e4SLinus Torvalds alive++; 12731da177e4SLinus Torvalds spin_lock_bh(&fib_multipath_lock); 12741da177e4SLinus Torvalds nh->nh_power = 0; 12751da177e4SLinus Torvalds nh->nh_flags &= ~RTNH_F_DEAD; 12761da177e4SLinus Torvalds spin_unlock_bh(&fib_multipath_lock); 12771da177e4SLinus Torvalds } endfor_nexthops(fi) 12781da177e4SLinus Torvalds 12791da177e4SLinus Torvalds if (alive > 0) { 12801da177e4SLinus Torvalds fi->fib_flags &= ~RTNH_F_DEAD; 12811da177e4SLinus Torvalds ret++; 12821da177e4SLinus Torvalds } 12831da177e4SLinus Torvalds } 12841da177e4SLinus Torvalds 12851da177e4SLinus Torvalds return ret; 12861da177e4SLinus Torvalds } 12871da177e4SLinus Torvalds 12881da177e4SLinus Torvalds /* 12891da177e4SLinus Torvalds The algorithm is suboptimal, but it provides really 12901da177e4SLinus Torvalds fair weighted route distribution. 12911da177e4SLinus Torvalds */ 12921da177e4SLinus Torvalds 12931da177e4SLinus Torvalds void fib_select_multipath(const struct flowi *flp, struct fib_result *res) 12941da177e4SLinus Torvalds { 12951da177e4SLinus Torvalds struct fib_info *fi = res->fi; 12961da177e4SLinus Torvalds int w; 12971da177e4SLinus Torvalds 12981da177e4SLinus Torvalds spin_lock_bh(&fib_multipath_lock); 12991da177e4SLinus Torvalds if (fi->fib_power <= 0) { 13001da177e4SLinus Torvalds int power = 0; 13011da177e4SLinus Torvalds change_nexthops(fi) { 13021da177e4SLinus Torvalds if (!(nh->nh_flags&RTNH_F_DEAD)) { 13031da177e4SLinus Torvalds power += nh->nh_weight; 13041da177e4SLinus Torvalds nh->nh_power = nh->nh_weight; 13051da177e4SLinus Torvalds } 13061da177e4SLinus Torvalds } endfor_nexthops(fi); 13071da177e4SLinus Torvalds fi->fib_power = power; 13081da177e4SLinus Torvalds if (power <= 0) { 13091da177e4SLinus Torvalds spin_unlock_bh(&fib_multipath_lock); 13101da177e4SLinus Torvalds /* Race condition: route has just become dead. */ 13111da177e4SLinus Torvalds res->nh_sel = 0; 13121da177e4SLinus Torvalds return; 13131da177e4SLinus Torvalds } 13141da177e4SLinus Torvalds } 13151da177e4SLinus Torvalds 13161da177e4SLinus Torvalds 13171da177e4SLinus Torvalds /* w should be random number [0..fi->fib_power-1], 13181da177e4SLinus Torvalds it is pretty bad approximation. 13191da177e4SLinus Torvalds */ 13201da177e4SLinus Torvalds 13211da177e4SLinus Torvalds w = jiffies % fi->fib_power; 13221da177e4SLinus Torvalds 13231da177e4SLinus Torvalds change_nexthops(fi) { 13241da177e4SLinus Torvalds if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) { 13251da177e4SLinus Torvalds if ((w -= nh->nh_power) <= 0) { 13261da177e4SLinus Torvalds nh->nh_power--; 13271da177e4SLinus Torvalds fi->fib_power--; 13281da177e4SLinus Torvalds res->nh_sel = nhsel; 13291da177e4SLinus Torvalds spin_unlock_bh(&fib_multipath_lock); 13301da177e4SLinus Torvalds return; 13311da177e4SLinus Torvalds } 13321da177e4SLinus Torvalds } 13331da177e4SLinus Torvalds } endfor_nexthops(fi); 13341da177e4SLinus Torvalds 13351da177e4SLinus Torvalds /* Race condition: route has just become dead. */ 13361da177e4SLinus Torvalds res->nh_sel = 0; 13371da177e4SLinus Torvalds spin_unlock_bh(&fib_multipath_lock); 13381da177e4SLinus Torvalds } 13391da177e4SLinus Torvalds #endif 1340