1ab84be7eSDavid Ahern // SPDX-License-Identifier: GPL-2.0 2ab84be7eSDavid Ahern /* Generic nexthop implementation 3ab84be7eSDavid Ahern * 4ab84be7eSDavid Ahern * Copyright (c) 2017-19 Cumulus Networks 5ab84be7eSDavid Ahern * Copyright (c) 2017-19 David Ahern <dsa@cumulusnetworks.com> 6ab84be7eSDavid Ahern */ 7ab84be7eSDavid Ahern 8ab84be7eSDavid Ahern #include <linux/nexthop.h> 9ab84be7eSDavid Ahern #include <linux/rtnetlink.h> 10ab84be7eSDavid Ahern #include <linux/slab.h> 11b6459415SJakub Kicinski #include <linux/vmalloc.h> 12430a0491SDavid Ahern #include <net/arp.h> 1353010f99SDavid Ahern #include <net/ipv6_stubs.h> 14b513bd03SDavid Ahern #include <net/lwtunnel.h> 15430a0491SDavid Ahern #include <net/ndisc.h> 16ab84be7eSDavid Ahern #include <net/nexthop.h> 17597cfe4fSDavid Ahern #include <net/route.h> 18ab84be7eSDavid Ahern #include <net/sock.h> 19ab84be7eSDavid Ahern 20a2601e2bSPetr Machata #define NH_RES_DEFAULT_IDLE_TIMER (120 * HZ) 21a2601e2bSPetr Machata #define NH_RES_DEFAULT_UNBALANCED_TIMER 0 /* No forced rebalancing. */ 22a2601e2bSPetr Machata 23430a0491SDavid Ahern static void remove_nexthop(struct net *net, struct nexthop *nh, 24430a0491SDavid Ahern struct nl_info *nlinfo); 25430a0491SDavid Ahern 26597cfe4fSDavid Ahern #define NH_DEV_HASHBITS 8 27597cfe4fSDavid Ahern #define NH_DEV_HASHSIZE (1U << NH_DEV_HASHBITS) 28597cfe4fSDavid Ahern 29*95fedd76SIdo Schimmel #define NHA_OP_FLAGS_DUMP_ALL (NHA_OP_FLAG_DUMP_STATS) 30*95fedd76SIdo Schimmel 31643d0878SPetr Machata static const struct nla_policy rtm_nh_policy_new[] = { 32ab84be7eSDavid Ahern [NHA_ID] = { .type = NLA_U32 }, 33ab84be7eSDavid Ahern [NHA_GROUP] = { .type = NLA_BINARY }, 34ab84be7eSDavid Ahern [NHA_GROUP_TYPE] = { .type = NLA_U16 }, 35ab84be7eSDavid Ahern [NHA_BLACKHOLE] = { .type = NLA_FLAG }, 36ab84be7eSDavid Ahern [NHA_OIF] = { .type = NLA_U32 }, 37ab84be7eSDavid Ahern [NHA_GATEWAY] = { .type = NLA_BINARY }, 38ab84be7eSDavid Ahern [NHA_ENCAP_TYPE] = { .type = NLA_U16 }, 39ab84be7eSDavid Ahern [NHA_ENCAP] = { .type = NLA_NESTED }, 4038428d68SRoopa Prabhu [NHA_FDB] = { .type = NLA_FLAG }, 41a2601e2bSPetr Machata [NHA_RES_GROUP] = { .type = NLA_NESTED }, 42ab84be7eSDavid Ahern }; 43ab84be7eSDavid Ahern 4460f5ad5eSPetr Machata static const struct nla_policy rtm_nh_policy_get[] = { 4560f5ad5eSPetr Machata [NHA_ID] = { .type = NLA_U32 }, 46*95fedd76SIdo Schimmel [NHA_OP_FLAGS] = NLA_POLICY_MASK(NLA_U32, 47*95fedd76SIdo Schimmel NHA_OP_FLAGS_DUMP_ALL), 4860f5ad5eSPetr Machata }; 4960f5ad5eSPetr Machata 502118f939SPetr Machata static const struct nla_policy rtm_nh_policy_del[] = { 512118f939SPetr Machata [NHA_ID] = { .type = NLA_U32 }, 522118f939SPetr Machata }; 532118f939SPetr Machata 5444551bffSPetr Machata static const struct nla_policy rtm_nh_policy_dump[] = { 5544551bffSPetr Machata [NHA_OIF] = { .type = NLA_U32 }, 5644551bffSPetr Machata [NHA_GROUPS] = { .type = NLA_FLAG }, 5744551bffSPetr Machata [NHA_MASTER] = { .type = NLA_U32 }, 5844551bffSPetr Machata [NHA_FDB] = { .type = NLA_FLAG }, 59*95fedd76SIdo Schimmel [NHA_OP_FLAGS] = NLA_POLICY_MASK(NLA_U32, 60*95fedd76SIdo Schimmel NHA_OP_FLAGS_DUMP_ALL), 6144551bffSPetr Machata }; 6244551bffSPetr Machata 63a2601e2bSPetr Machata static const struct nla_policy rtm_nh_res_policy_new[] = { 64a2601e2bSPetr Machata [NHA_RES_GROUP_BUCKETS] = { .type = NLA_U16 }, 65a2601e2bSPetr Machata [NHA_RES_GROUP_IDLE_TIMER] = { .type = NLA_U32 }, 66a2601e2bSPetr Machata [NHA_RES_GROUP_UNBALANCED_TIMER] = { .type = NLA_U32 }, 67a2601e2bSPetr Machata }; 68a2601e2bSPetr Machata 698a1bbabbSPetr Machata static const struct nla_policy rtm_nh_policy_dump_bucket[] = { 708a1bbabbSPetr Machata [NHA_ID] = { .type = NLA_U32 }, 718a1bbabbSPetr Machata [NHA_OIF] = { .type = NLA_U32 }, 728a1bbabbSPetr Machata [NHA_MASTER] = { .type = NLA_U32 }, 738a1bbabbSPetr Machata [NHA_RES_BUCKET] = { .type = NLA_NESTED }, 748a1bbabbSPetr Machata }; 758a1bbabbSPetr Machata 768a1bbabbSPetr Machata static const struct nla_policy rtm_nh_res_bucket_policy_dump[] = { 778a1bbabbSPetr Machata [NHA_RES_BUCKET_NH_ID] = { .type = NLA_U32 }, 788a1bbabbSPetr Machata }; 798a1bbabbSPetr Machata 80187d4c6bSPetr Machata static const struct nla_policy rtm_nh_policy_get_bucket[] = { 81187d4c6bSPetr Machata [NHA_ID] = { .type = NLA_U32 }, 82187d4c6bSPetr Machata [NHA_RES_BUCKET] = { .type = NLA_NESTED }, 83187d4c6bSPetr Machata }; 84187d4c6bSPetr Machata 85187d4c6bSPetr Machata static const struct nla_policy rtm_nh_res_bucket_policy_get[] = { 86187d4c6bSPetr Machata [NHA_RES_BUCKET_INDEX] = { .type = NLA_U16 }, 87187d4c6bSPetr Machata }; 88187d4c6bSPetr Machata 895ca474f2SIdo Schimmel static bool nexthop_notifiers_is_empty(struct net *net) 905ca474f2SIdo Schimmel { 915ca474f2SIdo Schimmel return !net->nexthop.notifier_chain.head; 925ca474f2SIdo Schimmel } 935ca474f2SIdo Schimmel 945ca474f2SIdo Schimmel static void 955ca474f2SIdo Schimmel __nh_notifier_single_info_init(struct nh_notifier_single_info *nh_info, 9696a85625SPetr Machata const struct nh_info *nhi) 975ca474f2SIdo Schimmel { 985ca474f2SIdo Schimmel nh_info->dev = nhi->fib_nhc.nhc_dev; 995ca474f2SIdo Schimmel nh_info->gw_family = nhi->fib_nhc.nhc_gw_family; 1005ca474f2SIdo Schimmel if (nh_info->gw_family == AF_INET) 1015ca474f2SIdo Schimmel nh_info->ipv4 = nhi->fib_nhc.nhc_gw.ipv4; 1025ca474f2SIdo Schimmel else if (nh_info->gw_family == AF_INET6) 1035ca474f2SIdo Schimmel nh_info->ipv6 = nhi->fib_nhc.nhc_gw.ipv6; 1045ca474f2SIdo Schimmel 1055ca474f2SIdo Schimmel nh_info->is_reject = nhi->reject_nh; 1065ca474f2SIdo Schimmel nh_info->is_fdb = nhi->fdb_nh; 1075ca474f2SIdo Schimmel nh_info->has_encap = !!nhi->fib_nhc.nhc_lwtstate; 1085ca474f2SIdo Schimmel } 1095ca474f2SIdo Schimmel 1105ca474f2SIdo Schimmel static int nh_notifier_single_info_init(struct nh_notifier_info *info, 1115ca474f2SIdo Schimmel const struct nexthop *nh) 1125ca474f2SIdo Schimmel { 11396a85625SPetr Machata struct nh_info *nhi = rtnl_dereference(nh->nh_info); 11496a85625SPetr Machata 11509ad6becSIdo Schimmel info->type = NH_NOTIFIER_INFO_TYPE_SINGLE; 1165ca474f2SIdo Schimmel info->nh = kzalloc(sizeof(*info->nh), GFP_KERNEL); 1175ca474f2SIdo Schimmel if (!info->nh) 1185ca474f2SIdo Schimmel return -ENOMEM; 1195ca474f2SIdo Schimmel 12096a85625SPetr Machata __nh_notifier_single_info_init(info->nh, nhi); 1215ca474f2SIdo Schimmel 1225ca474f2SIdo Schimmel return 0; 1235ca474f2SIdo Schimmel } 1245ca474f2SIdo Schimmel 1255ca474f2SIdo Schimmel static void nh_notifier_single_info_fini(struct nh_notifier_info *info) 1265ca474f2SIdo Schimmel { 1275ca474f2SIdo Schimmel kfree(info->nh); 1285ca474f2SIdo Schimmel } 1295ca474f2SIdo Schimmel 130de1d1ee3SPetr Machata static int nh_notifier_mpath_info_init(struct nh_notifier_info *info, 131da230501SPetr Machata struct nh_group *nhg) 1325ca474f2SIdo Schimmel { 1335ca474f2SIdo Schimmel u16 num_nh = nhg->num_nh; 1345ca474f2SIdo Schimmel int i; 1355ca474f2SIdo Schimmel 13609ad6becSIdo Schimmel info->type = NH_NOTIFIER_INFO_TYPE_GRP; 1375ca474f2SIdo Schimmel info->nh_grp = kzalloc(struct_size(info->nh_grp, nh_entries, num_nh), 1385ca474f2SIdo Schimmel GFP_KERNEL); 1395ca474f2SIdo Schimmel if (!info->nh_grp) 1405ca474f2SIdo Schimmel return -ENOMEM; 1415ca474f2SIdo Schimmel 1425ca474f2SIdo Schimmel info->nh_grp->num_nh = num_nh; 1435ca474f2SIdo Schimmel info->nh_grp->is_fdb = nhg->fdb_nh; 1445ca474f2SIdo Schimmel 1455ca474f2SIdo Schimmel for (i = 0; i < num_nh; i++) { 1465ca474f2SIdo Schimmel struct nh_grp_entry *nhge = &nhg->nh_entries[i]; 14796a85625SPetr Machata struct nh_info *nhi; 1485ca474f2SIdo Schimmel 14996a85625SPetr Machata nhi = rtnl_dereference(nhge->nh->nh_info); 1505ca474f2SIdo Schimmel info->nh_grp->nh_entries[i].id = nhge->nh->id; 1515ca474f2SIdo Schimmel info->nh_grp->nh_entries[i].weight = nhge->weight; 1525ca474f2SIdo Schimmel __nh_notifier_single_info_init(&info->nh_grp->nh_entries[i].nh, 15396a85625SPetr Machata nhi); 1545ca474f2SIdo Schimmel } 1555ca474f2SIdo Schimmel 1565ca474f2SIdo Schimmel return 0; 1575ca474f2SIdo Schimmel } 1585ca474f2SIdo Schimmel 1597c37c7e0SPetr Machata static int nh_notifier_res_table_info_init(struct nh_notifier_info *info, 1607c37c7e0SPetr Machata struct nh_group *nhg) 1617c37c7e0SPetr Machata { 1627c37c7e0SPetr Machata struct nh_res_table *res_table = rtnl_dereference(nhg->res_table); 1637c37c7e0SPetr Machata u16 num_nh_buckets = res_table->num_nh_buckets; 1647c37c7e0SPetr Machata unsigned long size; 1657c37c7e0SPetr Machata u16 i; 1667c37c7e0SPetr Machata 1677c37c7e0SPetr Machata info->type = NH_NOTIFIER_INFO_TYPE_RES_TABLE; 1687c37c7e0SPetr Machata size = struct_size(info->nh_res_table, nhs, num_nh_buckets); 1697c37c7e0SPetr Machata info->nh_res_table = __vmalloc(size, GFP_KERNEL | __GFP_ZERO | 1707c37c7e0SPetr Machata __GFP_NOWARN); 1717c37c7e0SPetr Machata if (!info->nh_res_table) 1727c37c7e0SPetr Machata return -ENOMEM; 1737c37c7e0SPetr Machata 1747c37c7e0SPetr Machata info->nh_res_table->num_nh_buckets = num_nh_buckets; 1757c37c7e0SPetr Machata 1767c37c7e0SPetr Machata for (i = 0; i < num_nh_buckets; i++) { 1777c37c7e0SPetr Machata struct nh_res_bucket *bucket = &res_table->nh_buckets[i]; 1787c37c7e0SPetr Machata struct nh_grp_entry *nhge; 1797c37c7e0SPetr Machata struct nh_info *nhi; 1807c37c7e0SPetr Machata 1817c37c7e0SPetr Machata nhge = rtnl_dereference(bucket->nh_entry); 1827c37c7e0SPetr Machata nhi = rtnl_dereference(nhge->nh->nh_info); 1837c37c7e0SPetr Machata __nh_notifier_single_info_init(&info->nh_res_table->nhs[i], 1847c37c7e0SPetr Machata nhi); 1857c37c7e0SPetr Machata } 1867c37c7e0SPetr Machata 1877c37c7e0SPetr Machata return 0; 1887c37c7e0SPetr Machata } 1897c37c7e0SPetr Machata 190da230501SPetr Machata static int nh_notifier_grp_info_init(struct nh_notifier_info *info, 191da230501SPetr Machata const struct nexthop *nh) 1925ca474f2SIdo Schimmel { 193da230501SPetr Machata struct nh_group *nhg = rtnl_dereference(nh->nh_grp); 194da230501SPetr Machata 195de1d1ee3SPetr Machata if (nhg->hash_threshold) 196de1d1ee3SPetr Machata return nh_notifier_mpath_info_init(info, nhg); 1977c37c7e0SPetr Machata else if (nhg->resilient) 1987c37c7e0SPetr Machata return nh_notifier_res_table_info_init(info, nhg); 199da230501SPetr Machata return -EINVAL; 200da230501SPetr Machata } 201da230501SPetr Machata 202da230501SPetr Machata static void nh_notifier_grp_info_fini(struct nh_notifier_info *info, 203da230501SPetr Machata const struct nexthop *nh) 204da230501SPetr Machata { 205da230501SPetr Machata struct nh_group *nhg = rtnl_dereference(nh->nh_grp); 206da230501SPetr Machata 207de1d1ee3SPetr Machata if (nhg->hash_threshold) 2085ca474f2SIdo Schimmel kfree(info->nh_grp); 2097c37c7e0SPetr Machata else if (nhg->resilient) 2107c37c7e0SPetr Machata vfree(info->nh_res_table); 2115ca474f2SIdo Schimmel } 2125ca474f2SIdo Schimmel 2135ca474f2SIdo Schimmel static int nh_notifier_info_init(struct nh_notifier_info *info, 2145ca474f2SIdo Schimmel const struct nexthop *nh) 2155ca474f2SIdo Schimmel { 2165ca474f2SIdo Schimmel info->id = nh->id; 2175ca474f2SIdo Schimmel 21809ad6becSIdo Schimmel if (nh->is_group) 2195ca474f2SIdo Schimmel return nh_notifier_grp_info_init(info, nh); 2205ca474f2SIdo Schimmel else 2215ca474f2SIdo Schimmel return nh_notifier_single_info_init(info, nh); 2225ca474f2SIdo Schimmel } 2235ca474f2SIdo Schimmel 22409ad6becSIdo Schimmel static void nh_notifier_info_fini(struct nh_notifier_info *info, 22509ad6becSIdo Schimmel const struct nexthop *nh) 2265ca474f2SIdo Schimmel { 22709ad6becSIdo Schimmel if (nh->is_group) 228da230501SPetr Machata nh_notifier_grp_info_fini(info, nh); 2295ca474f2SIdo Schimmel else 2305ca474f2SIdo Schimmel nh_notifier_single_info_fini(info); 2315ca474f2SIdo Schimmel } 2325ca474f2SIdo Schimmel 2338590ceedSRoopa Prabhu static int call_nexthop_notifiers(struct net *net, 234d8e79f1dSNathan Chancellor enum nexthop_event_type event_type, 2353578d53dSIdo Schimmel struct nexthop *nh, 2363578d53dSIdo Schimmel struct netlink_ext_ack *extack) 2378590ceedSRoopa Prabhu { 2385ca474f2SIdo Schimmel struct nh_notifier_info info = { 2395ca474f2SIdo Schimmel .net = net, 2405ca474f2SIdo Schimmel .extack = extack, 2415ca474f2SIdo Schimmel }; 2428590ceedSRoopa Prabhu int err; 2438590ceedSRoopa Prabhu 2445ca474f2SIdo Schimmel ASSERT_RTNL(); 2455ca474f2SIdo Schimmel 2465ca474f2SIdo Schimmel if (nexthop_notifiers_is_empty(net)) 2475ca474f2SIdo Schimmel return 0; 2485ca474f2SIdo Schimmel 2495ca474f2SIdo Schimmel err = nh_notifier_info_init(&info, nh); 2505ca474f2SIdo Schimmel if (err) { 2515ca474f2SIdo Schimmel NL_SET_ERR_MSG(extack, "Failed to initialize nexthop notifier info"); 2525ca474f2SIdo Schimmel return err; 2535ca474f2SIdo Schimmel } 2545ca474f2SIdo Schimmel 25580690ec6SIdo Schimmel err = blocking_notifier_call_chain(&net->nexthop.notifier_chain, 2561ec69d18SIdo Schimmel event_type, &info); 25709ad6becSIdo Schimmel nh_notifier_info_fini(&info, nh); 2585ca474f2SIdo Schimmel 2598590ceedSRoopa Prabhu return notifier_to_errno(err); 2608590ceedSRoopa Prabhu } 2618590ceedSRoopa Prabhu 2627c37c7e0SPetr Machata static int 2637c37c7e0SPetr Machata nh_notifier_res_bucket_idle_timer_get(const struct nh_notifier_info *info, 2647c37c7e0SPetr Machata bool force, unsigned int *p_idle_timer_ms) 2657c37c7e0SPetr Machata { 2667c37c7e0SPetr Machata struct nh_res_table *res_table; 2677c37c7e0SPetr Machata struct nh_group *nhg; 2687c37c7e0SPetr Machata struct nexthop *nh; 2697c37c7e0SPetr Machata int err = 0; 2707c37c7e0SPetr Machata 2717c37c7e0SPetr Machata /* When 'force' is false, nexthop bucket replacement is performed 2727c37c7e0SPetr Machata * because the bucket was deemed to be idle. In this case, capable 2737c37c7e0SPetr Machata * listeners can choose to perform an atomic replacement: The bucket is 2747c37c7e0SPetr Machata * only replaced if it is inactive. However, if the idle timer interval 2757c37c7e0SPetr Machata * is smaller than the interval in which a listener is querying 2767c37c7e0SPetr Machata * buckets' activity from the device, then atomic replacement should 2777c37c7e0SPetr Machata * not be tried. Pass the idle timer value to listeners, so that they 2787c37c7e0SPetr Machata * could determine which type of replacement to perform. 2797c37c7e0SPetr Machata */ 2807c37c7e0SPetr Machata if (force) { 2817c37c7e0SPetr Machata *p_idle_timer_ms = 0; 2827c37c7e0SPetr Machata return 0; 2837c37c7e0SPetr Machata } 2847c37c7e0SPetr Machata 2857c37c7e0SPetr Machata rcu_read_lock(); 2867c37c7e0SPetr Machata 2877c37c7e0SPetr Machata nh = nexthop_find_by_id(info->net, info->id); 2887c37c7e0SPetr Machata if (!nh) { 2897c37c7e0SPetr Machata err = -EINVAL; 2907c37c7e0SPetr Machata goto out; 2917c37c7e0SPetr Machata } 2927c37c7e0SPetr Machata 2937c37c7e0SPetr Machata nhg = rcu_dereference(nh->nh_grp); 2947c37c7e0SPetr Machata res_table = rcu_dereference(nhg->res_table); 2957c37c7e0SPetr Machata *p_idle_timer_ms = jiffies_to_msecs(res_table->idle_timer); 2967c37c7e0SPetr Machata 2977c37c7e0SPetr Machata out: 2987c37c7e0SPetr Machata rcu_read_unlock(); 2997c37c7e0SPetr Machata 3007c37c7e0SPetr Machata return err; 3017c37c7e0SPetr Machata } 3027c37c7e0SPetr Machata 3037c37c7e0SPetr Machata static int nh_notifier_res_bucket_info_init(struct nh_notifier_info *info, 3047c37c7e0SPetr Machata u16 bucket_index, bool force, 3057c37c7e0SPetr Machata struct nh_info *oldi, 3067c37c7e0SPetr Machata struct nh_info *newi) 3077c37c7e0SPetr Machata { 3087c37c7e0SPetr Machata unsigned int idle_timer_ms; 3097c37c7e0SPetr Machata int err; 3107c37c7e0SPetr Machata 3117c37c7e0SPetr Machata err = nh_notifier_res_bucket_idle_timer_get(info, force, 3127c37c7e0SPetr Machata &idle_timer_ms); 3137c37c7e0SPetr Machata if (err) 3147c37c7e0SPetr Machata return err; 3157c37c7e0SPetr Machata 3167c37c7e0SPetr Machata info->type = NH_NOTIFIER_INFO_TYPE_RES_BUCKET; 3177c37c7e0SPetr Machata info->nh_res_bucket = kzalloc(sizeof(*info->nh_res_bucket), 3187c37c7e0SPetr Machata GFP_KERNEL); 3197c37c7e0SPetr Machata if (!info->nh_res_bucket) 3207c37c7e0SPetr Machata return -ENOMEM; 3217c37c7e0SPetr Machata 3227c37c7e0SPetr Machata info->nh_res_bucket->bucket_index = bucket_index; 3237c37c7e0SPetr Machata info->nh_res_bucket->idle_timer_ms = idle_timer_ms; 3247c37c7e0SPetr Machata info->nh_res_bucket->force = force; 3257c37c7e0SPetr Machata __nh_notifier_single_info_init(&info->nh_res_bucket->old_nh, oldi); 3267c37c7e0SPetr Machata __nh_notifier_single_info_init(&info->nh_res_bucket->new_nh, newi); 3277c37c7e0SPetr Machata return 0; 3287c37c7e0SPetr Machata } 3297c37c7e0SPetr Machata 3307c37c7e0SPetr Machata static void nh_notifier_res_bucket_info_fini(struct nh_notifier_info *info) 3317c37c7e0SPetr Machata { 3327c37c7e0SPetr Machata kfree(info->nh_res_bucket); 3337c37c7e0SPetr Machata } 3347c37c7e0SPetr Machata 3357c37c7e0SPetr Machata static int __call_nexthop_res_bucket_notifiers(struct net *net, u32 nhg_id, 3367c37c7e0SPetr Machata u16 bucket_index, bool force, 3377c37c7e0SPetr Machata struct nh_info *oldi, 3387c37c7e0SPetr Machata struct nh_info *newi, 3397c37c7e0SPetr Machata struct netlink_ext_ack *extack) 3407c37c7e0SPetr Machata { 3417c37c7e0SPetr Machata struct nh_notifier_info info = { 3427c37c7e0SPetr Machata .net = net, 3437c37c7e0SPetr Machata .extack = extack, 3447c37c7e0SPetr Machata .id = nhg_id, 3457c37c7e0SPetr Machata }; 3467c37c7e0SPetr Machata int err; 3477c37c7e0SPetr Machata 3487c37c7e0SPetr Machata if (nexthop_notifiers_is_empty(net)) 3497c37c7e0SPetr Machata return 0; 3507c37c7e0SPetr Machata 3517c37c7e0SPetr Machata err = nh_notifier_res_bucket_info_init(&info, bucket_index, force, 3527c37c7e0SPetr Machata oldi, newi); 3537c37c7e0SPetr Machata if (err) 3547c37c7e0SPetr Machata return err; 3557c37c7e0SPetr Machata 3567c37c7e0SPetr Machata err = blocking_notifier_call_chain(&net->nexthop.notifier_chain, 3577c37c7e0SPetr Machata NEXTHOP_EVENT_BUCKET_REPLACE, &info); 3587c37c7e0SPetr Machata nh_notifier_res_bucket_info_fini(&info); 3597c37c7e0SPetr Machata 3607c37c7e0SPetr Machata return notifier_to_errno(err); 3617c37c7e0SPetr Machata } 3627c37c7e0SPetr Machata 363283a72a5SPetr Machata /* There are three users of RES_TABLE, and NHs etc. referenced from there: 364283a72a5SPetr Machata * 365283a72a5SPetr Machata * 1) a collection of callbacks for NH maintenance. This operates under 366283a72a5SPetr Machata * RTNL, 367283a72a5SPetr Machata * 2) the delayed work that gradually balances the resilient table, 368283a72a5SPetr Machata * 3) and nexthop_select_path(), operating under RCU. 369283a72a5SPetr Machata * 370283a72a5SPetr Machata * Both the delayed work and the RTNL block are writers, and need to 371283a72a5SPetr Machata * maintain mutual exclusion. Since there are only two and well-known 372283a72a5SPetr Machata * writers for each table, the RTNL code can make sure it has exclusive 373283a72a5SPetr Machata * access thus: 374283a72a5SPetr Machata * 375283a72a5SPetr Machata * - Have the DW operate without locking; 376283a72a5SPetr Machata * - synchronously cancel the DW; 377283a72a5SPetr Machata * - do the writing; 378283a72a5SPetr Machata * - if the write was not actually a delete, call upkeep, which schedules 379283a72a5SPetr Machata * DW again if necessary. 380283a72a5SPetr Machata * 381283a72a5SPetr Machata * The functions that are always called from the RTNL context use 382283a72a5SPetr Machata * rtnl_dereference(). The functions that can also be called from the DW do 383283a72a5SPetr Machata * a raw dereference and rely on the above mutual exclusion scheme. 384283a72a5SPetr Machata */ 385283a72a5SPetr Machata #define nh_res_dereference(p) (rcu_dereference_raw(p)) 386283a72a5SPetr Machata 3877c37c7e0SPetr Machata static int call_nexthop_res_bucket_notifiers(struct net *net, u32 nhg_id, 3887c37c7e0SPetr Machata u16 bucket_index, bool force, 3897c37c7e0SPetr Machata struct nexthop *old_nh, 3907c37c7e0SPetr Machata struct nexthop *new_nh, 3917c37c7e0SPetr Machata struct netlink_ext_ack *extack) 3927c37c7e0SPetr Machata { 3937c37c7e0SPetr Machata struct nh_info *oldi = nh_res_dereference(old_nh->nh_info); 3947c37c7e0SPetr Machata struct nh_info *newi = nh_res_dereference(new_nh->nh_info); 3957c37c7e0SPetr Machata 3967c37c7e0SPetr Machata return __call_nexthop_res_bucket_notifiers(net, nhg_id, bucket_index, 3977c37c7e0SPetr Machata force, oldi, newi, extack); 3987c37c7e0SPetr Machata } 3997c37c7e0SPetr Machata 4007c37c7e0SPetr Machata static int call_nexthop_res_table_notifiers(struct net *net, struct nexthop *nh, 4017c37c7e0SPetr Machata struct netlink_ext_ack *extack) 4027c37c7e0SPetr Machata { 4037c37c7e0SPetr Machata struct nh_notifier_info info = { 4047c37c7e0SPetr Machata .net = net, 4057c37c7e0SPetr Machata .extack = extack, 4067c37c7e0SPetr Machata }; 4077c37c7e0SPetr Machata struct nh_group *nhg; 4087c37c7e0SPetr Machata int err; 4097c37c7e0SPetr Machata 4107c37c7e0SPetr Machata ASSERT_RTNL(); 4117c37c7e0SPetr Machata 4127c37c7e0SPetr Machata if (nexthop_notifiers_is_empty(net)) 4137c37c7e0SPetr Machata return 0; 4147c37c7e0SPetr Machata 4157c37c7e0SPetr Machata /* At this point, the nexthop buckets are still not populated. Only 4167c37c7e0SPetr Machata * emit a notification with the logical nexthops, so that a listener 4177c37c7e0SPetr Machata * could potentially veto it in case of unsupported configuration. 4187c37c7e0SPetr Machata */ 4197c37c7e0SPetr Machata nhg = rtnl_dereference(nh->nh_grp); 420de1d1ee3SPetr Machata err = nh_notifier_mpath_info_init(&info, nhg); 4217c37c7e0SPetr Machata if (err) { 4227c37c7e0SPetr Machata NL_SET_ERR_MSG(extack, "Failed to initialize nexthop notifier info"); 4237c37c7e0SPetr Machata return err; 4247c37c7e0SPetr Machata } 4257c37c7e0SPetr Machata 4267c37c7e0SPetr Machata err = blocking_notifier_call_chain(&net->nexthop.notifier_chain, 4277c37c7e0SPetr Machata NEXTHOP_EVENT_RES_TABLE_PRE_REPLACE, 4287c37c7e0SPetr Machata &info); 4297c37c7e0SPetr Machata kfree(info.nh_grp); 4307c37c7e0SPetr Machata 4317c37c7e0SPetr Machata return notifier_to_errno(err); 4327c37c7e0SPetr Machata } 4337c37c7e0SPetr Machata 434975ff7f3SIdo Schimmel static int call_nexthop_notifier(struct notifier_block *nb, struct net *net, 435975ff7f3SIdo Schimmel enum nexthop_event_type event_type, 436975ff7f3SIdo Schimmel struct nexthop *nh, 437975ff7f3SIdo Schimmel struct netlink_ext_ack *extack) 438975ff7f3SIdo Schimmel { 439975ff7f3SIdo Schimmel struct nh_notifier_info info = { 440975ff7f3SIdo Schimmel .net = net, 441975ff7f3SIdo Schimmel .extack = extack, 442975ff7f3SIdo Schimmel }; 443975ff7f3SIdo Schimmel int err; 444975ff7f3SIdo Schimmel 445975ff7f3SIdo Schimmel err = nh_notifier_info_init(&info, nh); 446975ff7f3SIdo Schimmel if (err) 447975ff7f3SIdo Schimmel return err; 448975ff7f3SIdo Schimmel 449975ff7f3SIdo Schimmel err = nb->notifier_call(nb, event_type, &info); 45009ad6becSIdo Schimmel nh_notifier_info_fini(&info, nh); 451975ff7f3SIdo Schimmel 452975ff7f3SIdo Schimmel return notifier_to_errno(err); 453975ff7f3SIdo Schimmel } 454975ff7f3SIdo Schimmel 455597cfe4fSDavid Ahern static unsigned int nh_dev_hashfn(unsigned int val) 456597cfe4fSDavid Ahern { 457597cfe4fSDavid Ahern unsigned int mask = NH_DEV_HASHSIZE - 1; 458597cfe4fSDavid Ahern 459597cfe4fSDavid Ahern return (val ^ 460597cfe4fSDavid Ahern (val >> NH_DEV_HASHBITS) ^ 461597cfe4fSDavid Ahern (val >> (NH_DEV_HASHBITS * 2))) & mask; 462597cfe4fSDavid Ahern } 463597cfe4fSDavid Ahern 464597cfe4fSDavid Ahern static void nexthop_devhash_add(struct net *net, struct nh_info *nhi) 465597cfe4fSDavid Ahern { 466597cfe4fSDavid Ahern struct net_device *dev = nhi->fib_nhc.nhc_dev; 467597cfe4fSDavid Ahern struct hlist_head *head; 468597cfe4fSDavid Ahern unsigned int hash; 469597cfe4fSDavid Ahern 470597cfe4fSDavid Ahern WARN_ON(!dev); 471597cfe4fSDavid Ahern 472597cfe4fSDavid Ahern hash = nh_dev_hashfn(dev->ifindex); 473597cfe4fSDavid Ahern head = &net->nexthop.devhash[hash]; 474597cfe4fSDavid Ahern hlist_add_head(&nhi->dev_hash, head); 475597cfe4fSDavid Ahern } 476597cfe4fSDavid Ahern 4775d1f0f09SDavid Ahern static void nexthop_free_group(struct nexthop *nh) 478ab84be7eSDavid Ahern { 479430a0491SDavid Ahern struct nh_group *nhg; 480430a0491SDavid Ahern int i; 481430a0491SDavid Ahern 482430a0491SDavid Ahern nhg = rcu_dereference_raw(nh->nh_grp); 48390f33bffSNikolay Aleksandrov for (i = 0; i < nhg->num_nh; ++i) { 48490f33bffSNikolay Aleksandrov struct nh_grp_entry *nhge = &nhg->nh_entries[i]; 485430a0491SDavid Ahern 48690f33bffSNikolay Aleksandrov WARN_ON(!list_empty(&nhge->nh_list)); 487f4676ea7SIdo Schimmel free_percpu(nhge->stats); 48890f33bffSNikolay Aleksandrov nexthop_put(nhge->nh); 48990f33bffSNikolay Aleksandrov } 49090f33bffSNikolay Aleksandrov 49190f33bffSNikolay Aleksandrov WARN_ON(nhg->spare == nhg); 49290f33bffSNikolay Aleksandrov 493283a72a5SPetr Machata if (nhg->resilient) 494283a72a5SPetr Machata vfree(rcu_dereference_raw(nhg->res_table)); 495283a72a5SPetr Machata 49690f33bffSNikolay Aleksandrov kfree(nhg->spare); 497430a0491SDavid Ahern kfree(nhg); 498430a0491SDavid Ahern } 499430a0491SDavid Ahern 500430a0491SDavid Ahern static void nexthop_free_single(struct nexthop *nh) 501430a0491SDavid Ahern { 502ab84be7eSDavid Ahern struct nh_info *nhi; 503ab84be7eSDavid Ahern 504ab84be7eSDavid Ahern nhi = rcu_dereference_raw(nh->nh_info); 505597cfe4fSDavid Ahern switch (nhi->family) { 506597cfe4fSDavid Ahern case AF_INET: 507597cfe4fSDavid Ahern fib_nh_release(nh->net, &nhi->fib_nh); 508597cfe4fSDavid Ahern break; 50953010f99SDavid Ahern case AF_INET6: 51053010f99SDavid Ahern ipv6_stub->fib6_nh_release(&nhi->fib6_nh); 51153010f99SDavid Ahern break; 512597cfe4fSDavid Ahern } 513ab84be7eSDavid Ahern kfree(nhi); 514430a0491SDavid Ahern } 515430a0491SDavid Ahern 516430a0491SDavid Ahern void nexthop_free_rcu(struct rcu_head *head) 517430a0491SDavid Ahern { 518430a0491SDavid Ahern struct nexthop *nh = container_of(head, struct nexthop, rcu); 519430a0491SDavid Ahern 520430a0491SDavid Ahern if (nh->is_group) 5215d1f0f09SDavid Ahern nexthop_free_group(nh); 522430a0491SDavid Ahern else 523430a0491SDavid Ahern nexthop_free_single(nh); 524ab84be7eSDavid Ahern 525ab84be7eSDavid Ahern kfree(nh); 526ab84be7eSDavid Ahern } 527ab84be7eSDavid Ahern EXPORT_SYMBOL_GPL(nexthop_free_rcu); 528ab84be7eSDavid Ahern 529ab84be7eSDavid Ahern static struct nexthop *nexthop_alloc(void) 530ab84be7eSDavid Ahern { 531ab84be7eSDavid Ahern struct nexthop *nh; 532ab84be7eSDavid Ahern 533ab84be7eSDavid Ahern nh = kzalloc(sizeof(struct nexthop), GFP_KERNEL); 534430a0491SDavid Ahern if (nh) { 5354c7e8084SDavid Ahern INIT_LIST_HEAD(&nh->fi_list); 536f88d8ea6SDavid Ahern INIT_LIST_HEAD(&nh->f6i_list); 537430a0491SDavid Ahern INIT_LIST_HEAD(&nh->grp_list); 53838428d68SRoopa Prabhu INIT_LIST_HEAD(&nh->fdb_list); 539430a0491SDavid Ahern } 540ab84be7eSDavid Ahern return nh; 541ab84be7eSDavid Ahern } 542ab84be7eSDavid Ahern 543430a0491SDavid Ahern static struct nh_group *nexthop_grp_alloc(u16 num_nh) 544430a0491SDavid Ahern { 545430a0491SDavid Ahern struct nh_group *nhg; 546430a0491SDavid Ahern 547d7d49dc7SIdo Schimmel nhg = kzalloc(struct_size(nhg, nh_entries, num_nh), GFP_KERNEL); 548430a0491SDavid Ahern if (nhg) 549430a0491SDavid Ahern nhg->num_nh = num_nh; 550430a0491SDavid Ahern 551430a0491SDavid Ahern return nhg; 552430a0491SDavid Ahern } 553430a0491SDavid Ahern 554283a72a5SPetr Machata static void nh_res_table_upkeep_dw(struct work_struct *work); 555283a72a5SPetr Machata 556283a72a5SPetr Machata static struct nh_res_table * 557283a72a5SPetr Machata nexthop_res_table_alloc(struct net *net, u32 nhg_id, struct nh_config *cfg) 558283a72a5SPetr Machata { 559283a72a5SPetr Machata const u16 num_nh_buckets = cfg->nh_grp_res_num_buckets; 560283a72a5SPetr Machata struct nh_res_table *res_table; 561283a72a5SPetr Machata unsigned long size; 562283a72a5SPetr Machata 563283a72a5SPetr Machata size = struct_size(res_table, nh_buckets, num_nh_buckets); 564283a72a5SPetr Machata res_table = __vmalloc(size, GFP_KERNEL | __GFP_ZERO | __GFP_NOWARN); 565283a72a5SPetr Machata if (!res_table) 566283a72a5SPetr Machata return NULL; 567283a72a5SPetr Machata 568283a72a5SPetr Machata res_table->net = net; 569283a72a5SPetr Machata res_table->nhg_id = nhg_id; 570283a72a5SPetr Machata INIT_DELAYED_WORK(&res_table->upkeep_dw, &nh_res_table_upkeep_dw); 571283a72a5SPetr Machata INIT_LIST_HEAD(&res_table->uw_nh_entries); 572283a72a5SPetr Machata res_table->idle_timer = cfg->nh_grp_res_idle_timer; 573283a72a5SPetr Machata res_table->unbalanced_timer = cfg->nh_grp_res_unbalanced_timer; 574283a72a5SPetr Machata res_table->num_nh_buckets = num_nh_buckets; 575283a72a5SPetr Machata return res_table; 576283a72a5SPetr Machata } 577283a72a5SPetr Machata 578ab84be7eSDavid Ahern static void nh_base_seq_inc(struct net *net) 579ab84be7eSDavid Ahern { 580ab84be7eSDavid Ahern while (++net->nexthop.seq == 0) 581ab84be7eSDavid Ahern ; 582ab84be7eSDavid Ahern } 583ab84be7eSDavid Ahern 584ab84be7eSDavid Ahern /* no reference taken; rcu lock or rtnl must be held */ 585ab84be7eSDavid Ahern struct nexthop *nexthop_find_by_id(struct net *net, u32 id) 586ab84be7eSDavid Ahern { 587ab84be7eSDavid Ahern struct rb_node **pp, *parent = NULL, *next; 588ab84be7eSDavid Ahern 589ab84be7eSDavid Ahern pp = &net->nexthop.rb_root.rb_node; 590ab84be7eSDavid Ahern while (1) { 591ab84be7eSDavid Ahern struct nexthop *nh; 592ab84be7eSDavid Ahern 593ab84be7eSDavid Ahern next = rcu_dereference_raw(*pp); 594ab84be7eSDavid Ahern if (!next) 595ab84be7eSDavid Ahern break; 596ab84be7eSDavid Ahern parent = next; 597ab84be7eSDavid Ahern 598ab84be7eSDavid Ahern nh = rb_entry(parent, struct nexthop, rb_node); 599ab84be7eSDavid Ahern if (id < nh->id) 600ab84be7eSDavid Ahern pp = &next->rb_left; 601ab84be7eSDavid Ahern else if (id > nh->id) 602ab84be7eSDavid Ahern pp = &next->rb_right; 603ab84be7eSDavid Ahern else 604ab84be7eSDavid Ahern return nh; 605ab84be7eSDavid Ahern } 606ab84be7eSDavid Ahern return NULL; 607ab84be7eSDavid Ahern } 608ab84be7eSDavid Ahern EXPORT_SYMBOL_GPL(nexthop_find_by_id); 609ab84be7eSDavid Ahern 610ab84be7eSDavid Ahern /* used for auto id allocation; called with rtnl held */ 611ab84be7eSDavid Ahern static u32 nh_find_unused_id(struct net *net) 612ab84be7eSDavid Ahern { 613ab84be7eSDavid Ahern u32 id_start = net->nexthop.last_id_allocated; 614ab84be7eSDavid Ahern 615ab84be7eSDavid Ahern while (1) { 616ab84be7eSDavid Ahern net->nexthop.last_id_allocated++; 617ab84be7eSDavid Ahern if (net->nexthop.last_id_allocated == id_start) 618ab84be7eSDavid Ahern break; 619ab84be7eSDavid Ahern 620ab84be7eSDavid Ahern if (!nexthop_find_by_id(net, net->nexthop.last_id_allocated)) 621ab84be7eSDavid Ahern return net->nexthop.last_id_allocated; 622ab84be7eSDavid Ahern } 623ab84be7eSDavid Ahern return 0; 624ab84be7eSDavid Ahern } 625ab84be7eSDavid Ahern 626283a72a5SPetr Machata static void nh_res_time_set_deadline(unsigned long next_time, 627283a72a5SPetr Machata unsigned long *deadline) 628283a72a5SPetr Machata { 629283a72a5SPetr Machata if (time_before(next_time, *deadline)) 630283a72a5SPetr Machata *deadline = next_time; 631283a72a5SPetr Machata } 632283a72a5SPetr Machata 633a2601e2bSPetr Machata static clock_t nh_res_table_unbalanced_time(struct nh_res_table *res_table) 634a2601e2bSPetr Machata { 635a2601e2bSPetr Machata if (list_empty(&res_table->uw_nh_entries)) 636a2601e2bSPetr Machata return 0; 637a2601e2bSPetr Machata return jiffies_delta_to_clock_t(jiffies - res_table->unbalanced_since); 638a2601e2bSPetr Machata } 639a2601e2bSPetr Machata 640a2601e2bSPetr Machata static int nla_put_nh_group_res(struct sk_buff *skb, struct nh_group *nhg) 641a2601e2bSPetr Machata { 642a2601e2bSPetr Machata struct nh_res_table *res_table = rtnl_dereference(nhg->res_table); 643a2601e2bSPetr Machata struct nlattr *nest; 644a2601e2bSPetr Machata 645a2601e2bSPetr Machata nest = nla_nest_start(skb, NHA_RES_GROUP); 646a2601e2bSPetr Machata if (!nest) 647a2601e2bSPetr Machata return -EMSGSIZE; 648a2601e2bSPetr Machata 649a2601e2bSPetr Machata if (nla_put_u16(skb, NHA_RES_GROUP_BUCKETS, 650a2601e2bSPetr Machata res_table->num_nh_buckets) || 651a2601e2bSPetr Machata nla_put_u32(skb, NHA_RES_GROUP_IDLE_TIMER, 652a2601e2bSPetr Machata jiffies_to_clock_t(res_table->idle_timer)) || 653a2601e2bSPetr Machata nla_put_u32(skb, NHA_RES_GROUP_UNBALANCED_TIMER, 654a2601e2bSPetr Machata jiffies_to_clock_t(res_table->unbalanced_timer)) || 655a2601e2bSPetr Machata nla_put_u64_64bit(skb, NHA_RES_GROUP_UNBALANCED_TIME, 656a2601e2bSPetr Machata nh_res_table_unbalanced_time(res_table), 657a2601e2bSPetr Machata NHA_RES_GROUP_PAD)) 658a2601e2bSPetr Machata goto nla_put_failure; 659a2601e2bSPetr Machata 660a2601e2bSPetr Machata nla_nest_end(skb, nest); 661a2601e2bSPetr Machata return 0; 662a2601e2bSPetr Machata 663a2601e2bSPetr Machata nla_put_failure: 664a2601e2bSPetr Machata nla_nest_cancel(skb, nest); 665a2601e2bSPetr Machata return -EMSGSIZE; 666a2601e2bSPetr Machata } 667a2601e2bSPetr Machata 668f4676ea7SIdo Schimmel static void nh_grp_entry_stats_inc(struct nh_grp_entry *nhge) 669f4676ea7SIdo Schimmel { 670f4676ea7SIdo Schimmel struct nh_grp_entry_stats *cpu_stats; 671f4676ea7SIdo Schimmel 672f4676ea7SIdo Schimmel cpu_stats = this_cpu_ptr(nhge->stats); 673f4676ea7SIdo Schimmel u64_stats_update_begin(&cpu_stats->syncp); 674f4676ea7SIdo Schimmel u64_stats_inc(&cpu_stats->packets); 675f4676ea7SIdo Schimmel u64_stats_update_end(&cpu_stats->syncp); 676f4676ea7SIdo Schimmel } 677f4676ea7SIdo Schimmel 678*95fedd76SIdo Schimmel static void nh_grp_entry_stats_read(struct nh_grp_entry *nhge, 679*95fedd76SIdo Schimmel u64 *ret_packets) 680430a0491SDavid Ahern { 681*95fedd76SIdo Schimmel int i; 682*95fedd76SIdo Schimmel 683*95fedd76SIdo Schimmel *ret_packets = 0; 684*95fedd76SIdo Schimmel 685*95fedd76SIdo Schimmel for_each_possible_cpu(i) { 686*95fedd76SIdo Schimmel struct nh_grp_entry_stats *cpu_stats; 687*95fedd76SIdo Schimmel unsigned int start; 688*95fedd76SIdo Schimmel u64 packets; 689*95fedd76SIdo Schimmel 690*95fedd76SIdo Schimmel cpu_stats = per_cpu_ptr(nhge->stats, i); 691*95fedd76SIdo Schimmel do { 692*95fedd76SIdo Schimmel start = u64_stats_fetch_begin(&cpu_stats->syncp); 693*95fedd76SIdo Schimmel packets = u64_stats_read(&cpu_stats->packets); 694*95fedd76SIdo Schimmel } while (u64_stats_fetch_retry(&cpu_stats->syncp, start)); 695*95fedd76SIdo Schimmel 696*95fedd76SIdo Schimmel *ret_packets += packets; 697*95fedd76SIdo Schimmel } 698*95fedd76SIdo Schimmel } 699*95fedd76SIdo Schimmel 700*95fedd76SIdo Schimmel static int nla_put_nh_group_stats_entry(struct sk_buff *skb, 701*95fedd76SIdo Schimmel struct nh_grp_entry *nhge) 702*95fedd76SIdo Schimmel { 703*95fedd76SIdo Schimmel struct nlattr *nest; 704*95fedd76SIdo Schimmel u64 packets; 705*95fedd76SIdo Schimmel 706*95fedd76SIdo Schimmel nh_grp_entry_stats_read(nhge, &packets); 707*95fedd76SIdo Schimmel 708*95fedd76SIdo Schimmel nest = nla_nest_start(skb, NHA_GROUP_STATS_ENTRY); 709*95fedd76SIdo Schimmel if (!nest) 710*95fedd76SIdo Schimmel return -EMSGSIZE; 711*95fedd76SIdo Schimmel 712*95fedd76SIdo Schimmel if (nla_put_u32(skb, NHA_GROUP_STATS_ENTRY_ID, nhge->nh->id) || 713*95fedd76SIdo Schimmel nla_put_uint(skb, NHA_GROUP_STATS_ENTRY_PACKETS, packets)) 714*95fedd76SIdo Schimmel goto nla_put_failure; 715*95fedd76SIdo Schimmel 716*95fedd76SIdo Schimmel nla_nest_end(skb, nest); 717*95fedd76SIdo Schimmel return 0; 718*95fedd76SIdo Schimmel 719*95fedd76SIdo Schimmel nla_put_failure: 720*95fedd76SIdo Schimmel nla_nest_cancel(skb, nest); 721*95fedd76SIdo Schimmel return -EMSGSIZE; 722*95fedd76SIdo Schimmel } 723*95fedd76SIdo Schimmel 724*95fedd76SIdo Schimmel static int nla_put_nh_group_stats(struct sk_buff *skb, struct nexthop *nh) 725*95fedd76SIdo Schimmel { 726*95fedd76SIdo Schimmel struct nh_group *nhg = rtnl_dereference(nh->nh_grp); 727*95fedd76SIdo Schimmel struct nlattr *nest; 728*95fedd76SIdo Schimmel int i; 729*95fedd76SIdo Schimmel 730*95fedd76SIdo Schimmel nest = nla_nest_start(skb, NHA_GROUP_STATS); 731*95fedd76SIdo Schimmel if (!nest) 732*95fedd76SIdo Schimmel return -EMSGSIZE; 733*95fedd76SIdo Schimmel 734*95fedd76SIdo Schimmel for (i = 0; i < nhg->num_nh; i++) 735*95fedd76SIdo Schimmel if (nla_put_nh_group_stats_entry(skb, &nhg->nh_entries[i])) 736*95fedd76SIdo Schimmel goto cancel_out; 737*95fedd76SIdo Schimmel 738*95fedd76SIdo Schimmel nla_nest_end(skb, nest); 739*95fedd76SIdo Schimmel return 0; 740*95fedd76SIdo Schimmel 741*95fedd76SIdo Schimmel cancel_out: 742*95fedd76SIdo Schimmel nla_nest_cancel(skb, nest); 743*95fedd76SIdo Schimmel return -EMSGSIZE; 744*95fedd76SIdo Schimmel } 745*95fedd76SIdo Schimmel 746*95fedd76SIdo Schimmel static int nla_put_nh_group(struct sk_buff *skb, struct nexthop *nh, 747*95fedd76SIdo Schimmel u32 op_flags) 748*95fedd76SIdo Schimmel { 749*95fedd76SIdo Schimmel struct nh_group *nhg = rtnl_dereference(nh->nh_grp); 750430a0491SDavid Ahern struct nexthop_grp *p; 751430a0491SDavid Ahern size_t len = nhg->num_nh * sizeof(*p); 752430a0491SDavid Ahern struct nlattr *nla; 753430a0491SDavid Ahern u16 group_type = 0; 754430a0491SDavid Ahern int i; 755430a0491SDavid Ahern 756de1d1ee3SPetr Machata if (nhg->hash_threshold) 757430a0491SDavid Ahern group_type = NEXTHOP_GRP_TYPE_MPATH; 758a2601e2bSPetr Machata else if (nhg->resilient) 759a2601e2bSPetr Machata group_type = NEXTHOP_GRP_TYPE_RES; 760430a0491SDavid Ahern 761430a0491SDavid Ahern if (nla_put_u16(skb, NHA_GROUP_TYPE, group_type)) 762430a0491SDavid Ahern goto nla_put_failure; 763430a0491SDavid Ahern 764430a0491SDavid Ahern nla = nla_reserve(skb, NHA_GROUP, len); 765430a0491SDavid Ahern if (!nla) 766430a0491SDavid Ahern goto nla_put_failure; 767430a0491SDavid Ahern 768430a0491SDavid Ahern p = nla_data(nla); 769430a0491SDavid Ahern for (i = 0; i < nhg->num_nh; ++i) { 770430a0491SDavid Ahern p->id = nhg->nh_entries[i].nh->id; 771430a0491SDavid Ahern p->weight = nhg->nh_entries[i].weight - 1; 772430a0491SDavid Ahern p += 1; 773430a0491SDavid Ahern } 774430a0491SDavid Ahern 775a2601e2bSPetr Machata if (nhg->resilient && nla_put_nh_group_res(skb, nhg)) 776a2601e2bSPetr Machata goto nla_put_failure; 777a2601e2bSPetr Machata 778*95fedd76SIdo Schimmel if (op_flags & NHA_OP_FLAG_DUMP_STATS && 779*95fedd76SIdo Schimmel nla_put_nh_group_stats(skb, nh)) 780*95fedd76SIdo Schimmel goto nla_put_failure; 781*95fedd76SIdo Schimmel 782430a0491SDavid Ahern return 0; 783430a0491SDavid Ahern 784430a0491SDavid Ahern nla_put_failure: 785430a0491SDavid Ahern return -EMSGSIZE; 786430a0491SDavid Ahern } 787430a0491SDavid Ahern 788ab84be7eSDavid Ahern static int nh_fill_node(struct sk_buff *skb, struct nexthop *nh, 789*95fedd76SIdo Schimmel int event, u32 portid, u32 seq, unsigned int nlflags, 790*95fedd76SIdo Schimmel u32 op_flags) 791ab84be7eSDavid Ahern { 79253010f99SDavid Ahern struct fib6_nh *fib6_nh; 793597cfe4fSDavid Ahern struct fib_nh *fib_nh; 794ab84be7eSDavid Ahern struct nlmsghdr *nlh; 795ab84be7eSDavid Ahern struct nh_info *nhi; 796ab84be7eSDavid Ahern struct nhmsg *nhm; 797ab84be7eSDavid Ahern 798ab84be7eSDavid Ahern nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nhm), nlflags); 799ab84be7eSDavid Ahern if (!nlh) 800ab84be7eSDavid Ahern return -EMSGSIZE; 801ab84be7eSDavid Ahern 802ab84be7eSDavid Ahern nhm = nlmsg_data(nlh); 803ab84be7eSDavid Ahern nhm->nh_family = AF_UNSPEC; 804ab84be7eSDavid Ahern nhm->nh_flags = nh->nh_flags; 805ab84be7eSDavid Ahern nhm->nh_protocol = nh->protocol; 806ab84be7eSDavid Ahern nhm->nh_scope = 0; 807ab84be7eSDavid Ahern nhm->resvd = 0; 808ab84be7eSDavid Ahern 809ab84be7eSDavid Ahern if (nla_put_u32(skb, NHA_ID, nh->id)) 810ab84be7eSDavid Ahern goto nla_put_failure; 811ab84be7eSDavid Ahern 812430a0491SDavid Ahern if (nh->is_group) { 813430a0491SDavid Ahern struct nh_group *nhg = rtnl_dereference(nh->nh_grp); 814430a0491SDavid Ahern 815ce9ac056SDavid Ahern if (nhg->fdb_nh && nla_put_flag(skb, NHA_FDB)) 816ce9ac056SDavid Ahern goto nla_put_failure; 817*95fedd76SIdo Schimmel if (nla_put_nh_group(skb, nh, op_flags)) 818430a0491SDavid Ahern goto nla_put_failure; 819430a0491SDavid Ahern goto out; 820430a0491SDavid Ahern } 821430a0491SDavid Ahern 822ab84be7eSDavid Ahern nhi = rtnl_dereference(nh->nh_info); 823ab84be7eSDavid Ahern nhm->nh_family = nhi->family; 824ab84be7eSDavid Ahern if (nhi->reject_nh) { 825ab84be7eSDavid Ahern if (nla_put_flag(skb, NHA_BLACKHOLE)) 826ab84be7eSDavid Ahern goto nla_put_failure; 827ab84be7eSDavid Ahern goto out; 828ce9ac056SDavid Ahern } else if (nhi->fdb_nh) { 829ce9ac056SDavid Ahern if (nla_put_flag(skb, NHA_FDB)) 830ce9ac056SDavid Ahern goto nla_put_failure; 831ce9ac056SDavid Ahern } else { 832597cfe4fSDavid Ahern const struct net_device *dev; 833597cfe4fSDavid Ahern 834597cfe4fSDavid Ahern dev = nhi->fib_nhc.nhc_dev; 835597cfe4fSDavid Ahern if (dev && nla_put_u32(skb, NHA_OIF, dev->ifindex)) 836597cfe4fSDavid Ahern goto nla_put_failure; 837597cfe4fSDavid Ahern } 838597cfe4fSDavid Ahern 839597cfe4fSDavid Ahern nhm->nh_scope = nhi->fib_nhc.nhc_scope; 840597cfe4fSDavid Ahern switch (nhi->family) { 841597cfe4fSDavid Ahern case AF_INET: 842597cfe4fSDavid Ahern fib_nh = &nhi->fib_nh; 843597cfe4fSDavid Ahern if (fib_nh->fib_nh_gw_family && 84433d80996SIdo Schimmel nla_put_be32(skb, NHA_GATEWAY, fib_nh->fib_nh_gw4)) 845597cfe4fSDavid Ahern goto nla_put_failure; 846597cfe4fSDavid Ahern break; 84753010f99SDavid Ahern 84853010f99SDavid Ahern case AF_INET6: 84953010f99SDavid Ahern fib6_nh = &nhi->fib6_nh; 85053010f99SDavid Ahern if (fib6_nh->fib_nh_gw_family && 85153010f99SDavid Ahern nla_put_in6_addr(skb, NHA_GATEWAY, &fib6_nh->fib_nh_gw6)) 85253010f99SDavid Ahern goto nla_put_failure; 85353010f99SDavid Ahern break; 854ab84be7eSDavid Ahern } 855ab84be7eSDavid Ahern 856b513bd03SDavid Ahern if (nhi->fib_nhc.nhc_lwtstate && 857b513bd03SDavid Ahern lwtunnel_fill_encap(skb, nhi->fib_nhc.nhc_lwtstate, 858b513bd03SDavid Ahern NHA_ENCAP, NHA_ENCAP_TYPE) < 0) 859b513bd03SDavid Ahern goto nla_put_failure; 860b513bd03SDavid Ahern 861ab84be7eSDavid Ahern out: 862ab84be7eSDavid Ahern nlmsg_end(skb, nlh); 863ab84be7eSDavid Ahern return 0; 864ab84be7eSDavid Ahern 865ab84be7eSDavid Ahern nla_put_failure: 866d69100b8SStephen Worley nlmsg_cancel(skb, nlh); 867ab84be7eSDavid Ahern return -EMSGSIZE; 868ab84be7eSDavid Ahern } 869ab84be7eSDavid Ahern 870a2601e2bSPetr Machata static size_t nh_nlmsg_size_grp_res(struct nh_group *nhg) 871a2601e2bSPetr Machata { 872a2601e2bSPetr Machata return nla_total_size(0) + /* NHA_RES_GROUP */ 873a2601e2bSPetr Machata nla_total_size(2) + /* NHA_RES_GROUP_BUCKETS */ 874a2601e2bSPetr Machata nla_total_size(4) + /* NHA_RES_GROUP_IDLE_TIMER */ 875a2601e2bSPetr Machata nla_total_size(4) + /* NHA_RES_GROUP_UNBALANCED_TIMER */ 876a2601e2bSPetr Machata nla_total_size_64bit(8);/* NHA_RES_GROUP_UNBALANCED_TIME */ 877a2601e2bSPetr Machata } 878a2601e2bSPetr Machata 879430a0491SDavid Ahern static size_t nh_nlmsg_size_grp(struct nexthop *nh) 880430a0491SDavid Ahern { 881430a0491SDavid Ahern struct nh_group *nhg = rtnl_dereference(nh->nh_grp); 882430a0491SDavid Ahern size_t sz = sizeof(struct nexthop_grp) * nhg->num_nh; 883a2601e2bSPetr Machata size_t tot = nla_total_size(sz) + 884430a0491SDavid Ahern nla_total_size(2); /* NHA_GROUP_TYPE */ 885a2601e2bSPetr Machata 886a2601e2bSPetr Machata if (nhg->resilient) 887a2601e2bSPetr Machata tot += nh_nlmsg_size_grp_res(nhg); 888a2601e2bSPetr Machata 889a2601e2bSPetr Machata return tot; 890430a0491SDavid Ahern } 891430a0491SDavid Ahern 892430a0491SDavid Ahern static size_t nh_nlmsg_size_single(struct nexthop *nh) 893ab84be7eSDavid Ahern { 894597cfe4fSDavid Ahern struct nh_info *nhi = rtnl_dereference(nh->nh_info); 895430a0491SDavid Ahern size_t sz; 896ab84be7eSDavid Ahern 897ab84be7eSDavid Ahern /* covers NHA_BLACKHOLE since NHA_OIF and BLACKHOLE 898ab84be7eSDavid Ahern * are mutually exclusive 899ab84be7eSDavid Ahern */ 900430a0491SDavid Ahern sz = nla_total_size(4); /* NHA_OIF */ 901ab84be7eSDavid Ahern 902597cfe4fSDavid Ahern switch (nhi->family) { 903597cfe4fSDavid Ahern case AF_INET: 904597cfe4fSDavid Ahern if (nhi->fib_nh.fib_nh_gw_family) 905597cfe4fSDavid Ahern sz += nla_total_size(4); /* NHA_GATEWAY */ 906597cfe4fSDavid Ahern break; 90753010f99SDavid Ahern 90853010f99SDavid Ahern case AF_INET6: 90953010f99SDavid Ahern /* NHA_GATEWAY */ 91053010f99SDavid Ahern if (nhi->fib6_nh.fib_nh_gw_family) 91153010f99SDavid Ahern sz += nla_total_size(sizeof(const struct in6_addr)); 91253010f99SDavid Ahern break; 913597cfe4fSDavid Ahern } 914597cfe4fSDavid Ahern 915b513bd03SDavid Ahern if (nhi->fib_nhc.nhc_lwtstate) { 916b513bd03SDavid Ahern sz += lwtunnel_get_encap_size(nhi->fib_nhc.nhc_lwtstate); 917b513bd03SDavid Ahern sz += nla_total_size(2); /* NHA_ENCAP_TYPE */ 918b513bd03SDavid Ahern } 919b513bd03SDavid Ahern 920ab84be7eSDavid Ahern return sz; 921ab84be7eSDavid Ahern } 922ab84be7eSDavid Ahern 923430a0491SDavid Ahern static size_t nh_nlmsg_size(struct nexthop *nh) 924430a0491SDavid Ahern { 925f9e95555SStephen Worley size_t sz = NLMSG_ALIGN(sizeof(struct nhmsg)); 926f9e95555SStephen Worley 927f9e95555SStephen Worley sz += nla_total_size(4); /* NHA_ID */ 928430a0491SDavid Ahern 929430a0491SDavid Ahern if (nh->is_group) 930430a0491SDavid Ahern sz += nh_nlmsg_size_grp(nh); 931430a0491SDavid Ahern else 932430a0491SDavid Ahern sz += nh_nlmsg_size_single(nh); 933430a0491SDavid Ahern 934430a0491SDavid Ahern return sz; 935430a0491SDavid Ahern } 936430a0491SDavid Ahern 937ab84be7eSDavid Ahern static void nexthop_notify(int event, struct nexthop *nh, struct nl_info *info) 938ab84be7eSDavid Ahern { 939ab84be7eSDavid Ahern unsigned int nlflags = info->nlh ? info->nlh->nlmsg_flags : 0; 940ab84be7eSDavid Ahern u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0; 941ab84be7eSDavid Ahern struct sk_buff *skb; 942ab84be7eSDavid Ahern int err = -ENOBUFS; 943ab84be7eSDavid Ahern 944ab84be7eSDavid Ahern skb = nlmsg_new(nh_nlmsg_size(nh), gfp_any()); 945ab84be7eSDavid Ahern if (!skb) 946ab84be7eSDavid Ahern goto errout; 947ab84be7eSDavid Ahern 948*95fedd76SIdo Schimmel err = nh_fill_node(skb, nh, event, info->portid, seq, nlflags, 0); 949ab84be7eSDavid Ahern if (err < 0) { 950ab84be7eSDavid Ahern /* -EMSGSIZE implies BUG in nh_nlmsg_size() */ 951ab84be7eSDavid Ahern WARN_ON(err == -EMSGSIZE); 952ab84be7eSDavid Ahern kfree_skb(skb); 953ab84be7eSDavid Ahern goto errout; 954ab84be7eSDavid Ahern } 955ab84be7eSDavid Ahern 956ab84be7eSDavid Ahern rtnl_notify(skb, info->nl_net, info->portid, RTNLGRP_NEXTHOP, 957ab84be7eSDavid Ahern info->nlh, gfp_any()); 958ab84be7eSDavid Ahern return; 959ab84be7eSDavid Ahern errout: 960ab84be7eSDavid Ahern if (err < 0) 961ab84be7eSDavid Ahern rtnl_set_sk_err(info->nl_net, RTNLGRP_NEXTHOP, err); 962ab84be7eSDavid Ahern } 963ab84be7eSDavid Ahern 964283a72a5SPetr Machata static unsigned long nh_res_bucket_used_time(const struct nh_res_bucket *bucket) 965283a72a5SPetr Machata { 966283a72a5SPetr Machata return (unsigned long)atomic_long_read(&bucket->used_time); 967283a72a5SPetr Machata } 968283a72a5SPetr Machata 969283a72a5SPetr Machata static unsigned long 970283a72a5SPetr Machata nh_res_bucket_idle_point(const struct nh_res_table *res_table, 971283a72a5SPetr Machata const struct nh_res_bucket *bucket, 972283a72a5SPetr Machata unsigned long now) 973283a72a5SPetr Machata { 974283a72a5SPetr Machata unsigned long time = nh_res_bucket_used_time(bucket); 975283a72a5SPetr Machata 976283a72a5SPetr Machata /* Bucket was not used since it was migrated. The idle time is now. */ 977283a72a5SPetr Machata if (time == bucket->migrated_time) 978283a72a5SPetr Machata return now; 979283a72a5SPetr Machata 980283a72a5SPetr Machata return time + res_table->idle_timer; 981283a72a5SPetr Machata } 982283a72a5SPetr Machata 983283a72a5SPetr Machata static unsigned long 984283a72a5SPetr Machata nh_res_table_unb_point(const struct nh_res_table *res_table) 985283a72a5SPetr Machata { 986283a72a5SPetr Machata return res_table->unbalanced_since + res_table->unbalanced_timer; 987283a72a5SPetr Machata } 988283a72a5SPetr Machata 989283a72a5SPetr Machata static void nh_res_bucket_set_idle(const struct nh_res_table *res_table, 990283a72a5SPetr Machata struct nh_res_bucket *bucket) 991283a72a5SPetr Machata { 992283a72a5SPetr Machata unsigned long now = jiffies; 993283a72a5SPetr Machata 994283a72a5SPetr Machata atomic_long_set(&bucket->used_time, (long)now); 995283a72a5SPetr Machata bucket->migrated_time = now; 996283a72a5SPetr Machata } 997283a72a5SPetr Machata 998283a72a5SPetr Machata static void nh_res_bucket_set_busy(struct nh_res_bucket *bucket) 999283a72a5SPetr Machata { 1000283a72a5SPetr Machata atomic_long_set(&bucket->used_time, (long)jiffies); 1001283a72a5SPetr Machata } 1002283a72a5SPetr Machata 10038a1bbabbSPetr Machata static clock_t nh_res_bucket_idle_time(const struct nh_res_bucket *bucket) 10048a1bbabbSPetr Machata { 10058a1bbabbSPetr Machata unsigned long used_time = nh_res_bucket_used_time(bucket); 10068a1bbabbSPetr Machata 10078a1bbabbSPetr Machata return jiffies_delta_to_clock_t(jiffies - used_time); 10088a1bbabbSPetr Machata } 10098a1bbabbSPetr Machata 10108a1bbabbSPetr Machata static int nh_fill_res_bucket(struct sk_buff *skb, struct nexthop *nh, 10118a1bbabbSPetr Machata struct nh_res_bucket *bucket, u16 bucket_index, 10128a1bbabbSPetr Machata int event, u32 portid, u32 seq, 10138a1bbabbSPetr Machata unsigned int nlflags, 10148a1bbabbSPetr Machata struct netlink_ext_ack *extack) 10158a1bbabbSPetr Machata { 10168a1bbabbSPetr Machata struct nh_grp_entry *nhge = nh_res_dereference(bucket->nh_entry); 10178a1bbabbSPetr Machata struct nlmsghdr *nlh; 10188a1bbabbSPetr Machata struct nlattr *nest; 10198a1bbabbSPetr Machata struct nhmsg *nhm; 10208a1bbabbSPetr Machata 10218a1bbabbSPetr Machata nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nhm), nlflags); 10228a1bbabbSPetr Machata if (!nlh) 10238a1bbabbSPetr Machata return -EMSGSIZE; 10248a1bbabbSPetr Machata 10258a1bbabbSPetr Machata nhm = nlmsg_data(nlh); 10268a1bbabbSPetr Machata nhm->nh_family = AF_UNSPEC; 10278a1bbabbSPetr Machata nhm->nh_flags = bucket->nh_flags; 10288a1bbabbSPetr Machata nhm->nh_protocol = nh->protocol; 10298a1bbabbSPetr Machata nhm->nh_scope = 0; 10308a1bbabbSPetr Machata nhm->resvd = 0; 10318a1bbabbSPetr Machata 10328a1bbabbSPetr Machata if (nla_put_u32(skb, NHA_ID, nh->id)) 10338a1bbabbSPetr Machata goto nla_put_failure; 10348a1bbabbSPetr Machata 10358a1bbabbSPetr Machata nest = nla_nest_start(skb, NHA_RES_BUCKET); 10368a1bbabbSPetr Machata if (!nest) 10378a1bbabbSPetr Machata goto nla_put_failure; 10388a1bbabbSPetr Machata 10398a1bbabbSPetr Machata if (nla_put_u16(skb, NHA_RES_BUCKET_INDEX, bucket_index) || 10408a1bbabbSPetr Machata nla_put_u32(skb, NHA_RES_BUCKET_NH_ID, nhge->nh->id) || 10418a1bbabbSPetr Machata nla_put_u64_64bit(skb, NHA_RES_BUCKET_IDLE_TIME, 10428a1bbabbSPetr Machata nh_res_bucket_idle_time(bucket), 10438a1bbabbSPetr Machata NHA_RES_BUCKET_PAD)) 10448a1bbabbSPetr Machata goto nla_put_failure_nest; 10458a1bbabbSPetr Machata 10468a1bbabbSPetr Machata nla_nest_end(skb, nest); 10478a1bbabbSPetr Machata nlmsg_end(skb, nlh); 10488a1bbabbSPetr Machata return 0; 10498a1bbabbSPetr Machata 10508a1bbabbSPetr Machata nla_put_failure_nest: 10518a1bbabbSPetr Machata nla_nest_cancel(skb, nest); 10528a1bbabbSPetr Machata nla_put_failure: 10538a1bbabbSPetr Machata nlmsg_cancel(skb, nlh); 10548a1bbabbSPetr Machata return -EMSGSIZE; 10558a1bbabbSPetr Machata } 10568a1bbabbSPetr Machata 10570b4818aaSPetr Machata static void nexthop_bucket_notify(struct nh_res_table *res_table, 10580b4818aaSPetr Machata u16 bucket_index) 10590b4818aaSPetr Machata { 10600b4818aaSPetr Machata struct nh_res_bucket *bucket = &res_table->nh_buckets[bucket_index]; 10610b4818aaSPetr Machata struct nh_grp_entry *nhge = nh_res_dereference(bucket->nh_entry); 10620b4818aaSPetr Machata struct nexthop *nh = nhge->nh_parent; 10630b4818aaSPetr Machata struct sk_buff *skb; 10640b4818aaSPetr Machata int err = -ENOBUFS; 10650b4818aaSPetr Machata 10660b4818aaSPetr Machata skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 10670b4818aaSPetr Machata if (!skb) 10680b4818aaSPetr Machata goto errout; 10690b4818aaSPetr Machata 10700b4818aaSPetr Machata err = nh_fill_res_bucket(skb, nh, bucket, bucket_index, 10710b4818aaSPetr Machata RTM_NEWNEXTHOPBUCKET, 0, 0, NLM_F_REPLACE, 10720b4818aaSPetr Machata NULL); 10730b4818aaSPetr Machata if (err < 0) { 10740b4818aaSPetr Machata kfree_skb(skb); 10750b4818aaSPetr Machata goto errout; 10760b4818aaSPetr Machata } 10770b4818aaSPetr Machata 10780b4818aaSPetr Machata rtnl_notify(skb, nh->net, 0, RTNLGRP_NEXTHOP, NULL, GFP_KERNEL); 10790b4818aaSPetr Machata return; 10800b4818aaSPetr Machata errout: 10810b4818aaSPetr Machata if (err < 0) 10820b4818aaSPetr Machata rtnl_set_sk_err(nh->net, RTNLGRP_NEXTHOP, err); 10830b4818aaSPetr Machata } 10840b4818aaSPetr Machata 1085430a0491SDavid Ahern static bool valid_group_nh(struct nexthop *nh, unsigned int npaths, 1086ce9ac056SDavid Ahern bool *is_fdb, struct netlink_ext_ack *extack) 1087597cfe4fSDavid Ahern { 1088430a0491SDavid Ahern if (nh->is_group) { 1089430a0491SDavid Ahern struct nh_group *nhg = rtnl_dereference(nh->nh_grp); 1090430a0491SDavid Ahern 1091283a72a5SPetr Machata /* Nesting groups within groups is not supported. */ 1092de1d1ee3SPetr Machata if (nhg->hash_threshold) { 1093430a0491SDavid Ahern NL_SET_ERR_MSG(extack, 1094de1d1ee3SPetr Machata "Hash-threshold group can not be a nexthop within a group"); 1095430a0491SDavid Ahern return false; 1096430a0491SDavid Ahern } 1097283a72a5SPetr Machata if (nhg->resilient) { 1098283a72a5SPetr Machata NL_SET_ERR_MSG(extack, 1099283a72a5SPetr Machata "Resilient group can not be a nexthop within a group"); 1100283a72a5SPetr Machata return false; 1101283a72a5SPetr Machata } 1102ce9ac056SDavid Ahern *is_fdb = nhg->fdb_nh; 1103430a0491SDavid Ahern } else { 1104430a0491SDavid Ahern struct nh_info *nhi = rtnl_dereference(nh->nh_info); 1105430a0491SDavid Ahern 1106430a0491SDavid Ahern if (nhi->reject_nh && npaths > 1) { 1107430a0491SDavid Ahern NL_SET_ERR_MSG(extack, 1108430a0491SDavid Ahern "Blackhole nexthop can not be used in a group with more than 1 path"); 1109430a0491SDavid Ahern return false; 1110430a0491SDavid Ahern } 1111ce9ac056SDavid Ahern *is_fdb = nhi->fdb_nh; 1112430a0491SDavid Ahern } 1113430a0491SDavid Ahern 1114430a0491SDavid Ahern return true; 1115430a0491SDavid Ahern } 1116430a0491SDavid Ahern 111738428d68SRoopa Prabhu static int nh_check_attr_fdb_group(struct nexthop *nh, u8 *nh_family, 111838428d68SRoopa Prabhu struct netlink_ext_ack *extack) 111938428d68SRoopa Prabhu { 112038428d68SRoopa Prabhu struct nh_info *nhi; 112138428d68SRoopa Prabhu 1122ce9ac056SDavid Ahern nhi = rtnl_dereference(nh->nh_info); 1123ce9ac056SDavid Ahern 1124ce9ac056SDavid Ahern if (!nhi->fdb_nh) { 112538428d68SRoopa Prabhu NL_SET_ERR_MSG(extack, "FDB nexthop group can only have fdb nexthops"); 112638428d68SRoopa Prabhu return -EINVAL; 112738428d68SRoopa Prabhu } 112838428d68SRoopa Prabhu 112938428d68SRoopa Prabhu if (*nh_family == AF_UNSPEC) { 113038428d68SRoopa Prabhu *nh_family = nhi->family; 113138428d68SRoopa Prabhu } else if (*nh_family != nhi->family) { 113238428d68SRoopa Prabhu NL_SET_ERR_MSG(extack, "FDB nexthop group cannot have mixed family nexthops"); 113338428d68SRoopa Prabhu return -EINVAL; 113438428d68SRoopa Prabhu } 113538428d68SRoopa Prabhu 113638428d68SRoopa Prabhu return 0; 113738428d68SRoopa Prabhu } 113838428d68SRoopa Prabhu 1139643d0878SPetr Machata static int nh_check_attr_group(struct net *net, 1140643d0878SPetr Machata struct nlattr *tb[], size_t tb_size, 1141a2601e2bSPetr Machata u16 nh_grp_type, struct netlink_ext_ack *extack) 1142430a0491SDavid Ahern { 1143430a0491SDavid Ahern unsigned int len = nla_len(tb[NHA_GROUP]); 114438428d68SRoopa Prabhu u8 nh_family = AF_UNSPEC; 1145430a0491SDavid Ahern struct nexthop_grp *nhg; 1146430a0491SDavid Ahern unsigned int i, j; 114738428d68SRoopa Prabhu u8 nhg_fdb = 0; 1148430a0491SDavid Ahern 1149eeaac363SNikolay Aleksandrov if (!len || len & (sizeof(struct nexthop_grp) - 1)) { 1150430a0491SDavid Ahern NL_SET_ERR_MSG(extack, 1151430a0491SDavid Ahern "Invalid length for nexthop group attribute"); 1152430a0491SDavid Ahern return -EINVAL; 1153430a0491SDavid Ahern } 1154430a0491SDavid Ahern 1155430a0491SDavid Ahern /* convert len to number of nexthop ids */ 1156430a0491SDavid Ahern len /= sizeof(*nhg); 1157430a0491SDavid Ahern 1158430a0491SDavid Ahern nhg = nla_data(tb[NHA_GROUP]); 1159430a0491SDavid Ahern for (i = 0; i < len; ++i) { 1160430a0491SDavid Ahern if (nhg[i].resvd1 || nhg[i].resvd2) { 1161430a0491SDavid Ahern NL_SET_ERR_MSG(extack, "Reserved fields in nexthop_grp must be 0"); 1162430a0491SDavid Ahern return -EINVAL; 1163430a0491SDavid Ahern } 1164430a0491SDavid Ahern if (nhg[i].weight > 254) { 1165430a0491SDavid Ahern NL_SET_ERR_MSG(extack, "Invalid value for weight"); 1166430a0491SDavid Ahern return -EINVAL; 1167430a0491SDavid Ahern } 1168430a0491SDavid Ahern for (j = i + 1; j < len; ++j) { 1169430a0491SDavid Ahern if (nhg[i].id == nhg[j].id) { 1170430a0491SDavid Ahern NL_SET_ERR_MSG(extack, "Nexthop id can not be used twice in a group"); 1171430a0491SDavid Ahern return -EINVAL; 1172430a0491SDavid Ahern } 1173430a0491SDavid Ahern } 1174430a0491SDavid Ahern } 1175430a0491SDavid Ahern 117638428d68SRoopa Prabhu if (tb[NHA_FDB]) 117738428d68SRoopa Prabhu nhg_fdb = 1; 1178430a0491SDavid Ahern nhg = nla_data(tb[NHA_GROUP]); 1179430a0491SDavid Ahern for (i = 0; i < len; ++i) { 1180430a0491SDavid Ahern struct nexthop *nh; 1181ce9ac056SDavid Ahern bool is_fdb_nh; 1182430a0491SDavid Ahern 1183430a0491SDavid Ahern nh = nexthop_find_by_id(net, nhg[i].id); 1184430a0491SDavid Ahern if (!nh) { 1185430a0491SDavid Ahern NL_SET_ERR_MSG(extack, "Invalid nexthop id"); 1186430a0491SDavid Ahern return -EINVAL; 1187430a0491SDavid Ahern } 1188ce9ac056SDavid Ahern if (!valid_group_nh(nh, len, &is_fdb_nh, extack)) 1189430a0491SDavid Ahern return -EINVAL; 119038428d68SRoopa Prabhu 119138428d68SRoopa Prabhu if (nhg_fdb && nh_check_attr_fdb_group(nh, &nh_family, extack)) 119238428d68SRoopa Prabhu return -EINVAL; 119338428d68SRoopa Prabhu 1194ce9ac056SDavid Ahern if (!nhg_fdb && is_fdb_nh) { 119538428d68SRoopa Prabhu NL_SET_ERR_MSG(extack, "Non FDB nexthop group cannot have fdb nexthops"); 119638428d68SRoopa Prabhu return -EINVAL; 119738428d68SRoopa Prabhu } 1198430a0491SDavid Ahern } 1199643d0878SPetr Machata for (i = NHA_GROUP_TYPE + 1; i < tb_size; ++i) { 1200430a0491SDavid Ahern if (!tb[i]) 1201430a0491SDavid Ahern continue; 1202a2601e2bSPetr Machata switch (i) { 1203a2601e2bSPetr Machata case NHA_FDB: 120438428d68SRoopa Prabhu continue; 1205a2601e2bSPetr Machata case NHA_RES_GROUP: 1206a2601e2bSPetr Machata if (nh_grp_type == NEXTHOP_GRP_TYPE_RES) 1207a2601e2bSPetr Machata continue; 1208a2601e2bSPetr Machata break; 1209a2601e2bSPetr Machata } 1210430a0491SDavid Ahern NL_SET_ERR_MSG(extack, 1211430a0491SDavid Ahern "No other attributes can be set in nexthop groups"); 1212430a0491SDavid Ahern return -EINVAL; 1213430a0491SDavid Ahern } 1214430a0491SDavid Ahern 1215430a0491SDavid Ahern return 0; 1216430a0491SDavid Ahern } 1217430a0491SDavid Ahern 1218430a0491SDavid Ahern static bool ipv6_good_nh(const struct fib6_nh *nh) 1219430a0491SDavid Ahern { 1220430a0491SDavid Ahern int state = NUD_REACHABLE; 1221430a0491SDavid Ahern struct neighbour *n; 1222430a0491SDavid Ahern 122309eed119SEric Dumazet rcu_read_lock(); 1224430a0491SDavid Ahern 1225430a0491SDavid Ahern n = __ipv6_neigh_lookup_noref_stub(nh->fib_nh_dev, &nh->fib_nh_gw6); 1226430a0491SDavid Ahern if (n) 1227b071af52SEric Dumazet state = READ_ONCE(n->nud_state); 1228430a0491SDavid Ahern 122909eed119SEric Dumazet rcu_read_unlock(); 1230430a0491SDavid Ahern 1231430a0491SDavid Ahern return !!(state & NUD_VALID); 1232430a0491SDavid Ahern } 1233430a0491SDavid Ahern 1234430a0491SDavid Ahern static bool ipv4_good_nh(const struct fib_nh *nh) 1235430a0491SDavid Ahern { 1236430a0491SDavid Ahern int state = NUD_REACHABLE; 1237430a0491SDavid Ahern struct neighbour *n; 1238430a0491SDavid Ahern 123909eed119SEric Dumazet rcu_read_lock(); 1240430a0491SDavid Ahern 1241430a0491SDavid Ahern n = __ipv4_neigh_lookup_noref(nh->fib_nh_dev, 1242430a0491SDavid Ahern (__force u32)nh->fib_nh_gw4); 1243430a0491SDavid Ahern if (n) 1244b071af52SEric Dumazet state = READ_ONCE(n->nud_state); 1245430a0491SDavid Ahern 124609eed119SEric Dumazet rcu_read_unlock(); 1247430a0491SDavid Ahern 1248430a0491SDavid Ahern return !!(state & NUD_VALID); 1249430a0491SDavid Ahern } 1250430a0491SDavid Ahern 12514bb5239bSBenjamin Poirier static bool nexthop_is_good_nh(const struct nexthop *nh) 12524bb5239bSBenjamin Poirier { 12534bb5239bSBenjamin Poirier struct nh_info *nhi = rcu_dereference(nh->nh_info); 12544bb5239bSBenjamin Poirier 12554bb5239bSBenjamin Poirier switch (nhi->family) { 12564bb5239bSBenjamin Poirier case AF_INET: 12574bb5239bSBenjamin Poirier return ipv4_good_nh(&nhi->fib_nh); 12584bb5239bSBenjamin Poirier case AF_INET6: 12594bb5239bSBenjamin Poirier return ipv6_good_nh(&nhi->fib6_nh); 12604bb5239bSBenjamin Poirier } 12614bb5239bSBenjamin Poirier 12624bb5239bSBenjamin Poirier return false; 12634bb5239bSBenjamin Poirier } 12644bb5239bSBenjamin Poirier 1265eedd47a6SBenjamin Poirier static struct nexthop *nexthop_select_path_fdb(struct nh_group *nhg, int hash) 1266eedd47a6SBenjamin Poirier { 1267eedd47a6SBenjamin Poirier int i; 1268eedd47a6SBenjamin Poirier 1269eedd47a6SBenjamin Poirier for (i = 0; i < nhg->num_nh; i++) { 1270eedd47a6SBenjamin Poirier struct nh_grp_entry *nhge = &nhg->nh_entries[i]; 1271eedd47a6SBenjamin Poirier 1272eedd47a6SBenjamin Poirier if (hash > atomic_read(&nhge->hthr.upper_bound)) 1273eedd47a6SBenjamin Poirier continue; 1274eedd47a6SBenjamin Poirier 1275f4676ea7SIdo Schimmel nh_grp_entry_stats_inc(nhge); 1276eedd47a6SBenjamin Poirier return nhge->nh; 1277eedd47a6SBenjamin Poirier } 1278eedd47a6SBenjamin Poirier 1279eedd47a6SBenjamin Poirier WARN_ON_ONCE(1); 1280eedd47a6SBenjamin Poirier return NULL; 1281eedd47a6SBenjamin Poirier } 1282eedd47a6SBenjamin Poirier 1283de1d1ee3SPetr Machata static struct nexthop *nexthop_select_path_hthr(struct nh_group *nhg, int hash) 1284430a0491SDavid Ahern { 1285f4676ea7SIdo Schimmel struct nh_grp_entry *nhge0 = NULL; 1286430a0491SDavid Ahern int i; 1287430a0491SDavid Ahern 1288eedd47a6SBenjamin Poirier if (nhg->fdb_nh) 1289eedd47a6SBenjamin Poirier return nexthop_select_path_fdb(nhg, hash); 1290eedd47a6SBenjamin Poirier 1291430a0491SDavid Ahern for (i = 0; i < nhg->num_nh; ++i) { 1292430a0491SDavid Ahern struct nh_grp_entry *nhge = &nhg->nh_entries[i]; 1293430a0491SDavid Ahern 1294430a0491SDavid Ahern /* nexthops always check if it is good and does 1295430a0491SDavid Ahern * not rely on a sysctl for this behavior 1296430a0491SDavid Ahern */ 129775f5f04cSBenjamin Poirier if (!nexthop_is_good_nh(nhge->nh)) 129875f5f04cSBenjamin Poirier continue; 1299430a0491SDavid Ahern 1300f4676ea7SIdo Schimmel if (!nhge0) 1301f4676ea7SIdo Schimmel nhge0 = nhge; 130275f5f04cSBenjamin Poirier 130375f5f04cSBenjamin Poirier if (hash > atomic_read(&nhge->hthr.upper_bound)) 130475f5f04cSBenjamin Poirier continue; 130575f5f04cSBenjamin Poirier 1306f4676ea7SIdo Schimmel nh_grp_entry_stats_inc(nhge); 130775f5f04cSBenjamin Poirier return nhge->nh; 1308430a0491SDavid Ahern } 1309430a0491SDavid Ahern 1310f4676ea7SIdo Schimmel if (!nhge0) 1311f4676ea7SIdo Schimmel nhge0 = &nhg->nh_entries[0]; 1312f4676ea7SIdo Schimmel nh_grp_entry_stats_inc(nhge0); 1313f4676ea7SIdo Schimmel return nhge0->nh; 1314430a0491SDavid Ahern } 131579bc55e3SPetr Machata 1316283a72a5SPetr Machata static struct nexthop *nexthop_select_path_res(struct nh_group *nhg, int hash) 1317283a72a5SPetr Machata { 1318283a72a5SPetr Machata struct nh_res_table *res_table = rcu_dereference(nhg->res_table); 1319283a72a5SPetr Machata u16 bucket_index = hash % res_table->num_nh_buckets; 1320283a72a5SPetr Machata struct nh_res_bucket *bucket; 1321283a72a5SPetr Machata struct nh_grp_entry *nhge; 1322283a72a5SPetr Machata 1323283a72a5SPetr Machata /* nexthop_select_path() is expected to return a non-NULL value, so 1324283a72a5SPetr Machata * skip protocol validation and just hand out whatever there is. 1325283a72a5SPetr Machata */ 1326283a72a5SPetr Machata bucket = &res_table->nh_buckets[bucket_index]; 1327283a72a5SPetr Machata nh_res_bucket_set_busy(bucket); 1328283a72a5SPetr Machata nhge = rcu_dereference(bucket->nh_entry); 1329f4676ea7SIdo Schimmel nh_grp_entry_stats_inc(nhge); 1330283a72a5SPetr Machata return nhge->nh; 1331283a72a5SPetr Machata } 1332283a72a5SPetr Machata 133379bc55e3SPetr Machata struct nexthop *nexthop_select_path(struct nexthop *nh, int hash) 133479bc55e3SPetr Machata { 133579bc55e3SPetr Machata struct nh_group *nhg; 133679bc55e3SPetr Machata 133779bc55e3SPetr Machata if (!nh->is_group) 133879bc55e3SPetr Machata return nh; 133979bc55e3SPetr Machata 134079bc55e3SPetr Machata nhg = rcu_dereference(nh->nh_grp); 1341de1d1ee3SPetr Machata if (nhg->hash_threshold) 1342de1d1ee3SPetr Machata return nexthop_select_path_hthr(nhg, hash); 1343283a72a5SPetr Machata else if (nhg->resilient) 1344283a72a5SPetr Machata return nexthop_select_path_res(nhg, hash); 134579bc55e3SPetr Machata 134679bc55e3SPetr Machata /* Unreachable. */ 134779bc55e3SPetr Machata return NULL; 134879bc55e3SPetr Machata } 1349430a0491SDavid Ahern EXPORT_SYMBOL_GPL(nexthop_select_path); 1350430a0491SDavid Ahern 1351f88c9aa1SDavid Ahern int nexthop_for_each_fib6_nh(struct nexthop *nh, 1352f88c9aa1SDavid Ahern int (*cb)(struct fib6_nh *nh, void *arg), 1353f88c9aa1SDavid Ahern void *arg) 1354f88c9aa1SDavid Ahern { 1355f88c9aa1SDavid Ahern struct nh_info *nhi; 1356f88c9aa1SDavid Ahern int err; 1357f88c9aa1SDavid Ahern 1358f88c9aa1SDavid Ahern if (nh->is_group) { 1359f88c9aa1SDavid Ahern struct nh_group *nhg; 1360f88c9aa1SDavid Ahern int i; 1361f88c9aa1SDavid Ahern 1362f88c9aa1SDavid Ahern nhg = rcu_dereference_rtnl(nh->nh_grp); 1363f88c9aa1SDavid Ahern for (i = 0; i < nhg->num_nh; i++) { 1364f88c9aa1SDavid Ahern struct nh_grp_entry *nhge = &nhg->nh_entries[i]; 1365f88c9aa1SDavid Ahern 1366f88c9aa1SDavid Ahern nhi = rcu_dereference_rtnl(nhge->nh->nh_info); 1367f88c9aa1SDavid Ahern err = cb(&nhi->fib6_nh, arg); 1368f88c9aa1SDavid Ahern if (err) 1369f88c9aa1SDavid Ahern return err; 1370f88c9aa1SDavid Ahern } 1371f88c9aa1SDavid Ahern } else { 1372f88c9aa1SDavid Ahern nhi = rcu_dereference_rtnl(nh->nh_info); 1373f88c9aa1SDavid Ahern err = cb(&nhi->fib6_nh, arg); 1374f88c9aa1SDavid Ahern if (err) 1375f88c9aa1SDavid Ahern return err; 1376f88c9aa1SDavid Ahern } 1377f88c9aa1SDavid Ahern 1378f88c9aa1SDavid Ahern return 0; 1379f88c9aa1SDavid Ahern } 1380f88c9aa1SDavid Ahern EXPORT_SYMBOL_GPL(nexthop_for_each_fib6_nh); 1381f88c9aa1SDavid Ahern 13827bf4796dSDavid Ahern static int check_src_addr(const struct in6_addr *saddr, 13837bf4796dSDavid Ahern struct netlink_ext_ack *extack) 13847bf4796dSDavid Ahern { 13857bf4796dSDavid Ahern if (!ipv6_addr_any(saddr)) { 13867bf4796dSDavid Ahern NL_SET_ERR_MSG(extack, "IPv6 routes using source address can not use nexthop objects"); 13877bf4796dSDavid Ahern return -EINVAL; 13887bf4796dSDavid Ahern } 13897bf4796dSDavid Ahern return 0; 13907bf4796dSDavid Ahern } 13917bf4796dSDavid Ahern 1392f88d8ea6SDavid Ahern int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg, 1393f88d8ea6SDavid Ahern struct netlink_ext_ack *extack) 1394f88d8ea6SDavid Ahern { 1395f88d8ea6SDavid Ahern struct nh_info *nhi; 1396ce9ac056SDavid Ahern bool is_fdb_nh; 139738428d68SRoopa Prabhu 1398f88d8ea6SDavid Ahern /* fib6_src is unique to a fib6_info and limits the ability to cache 1399f88d8ea6SDavid Ahern * routes in fib6_nh within a nexthop that is potentially shared 1400f88d8ea6SDavid Ahern * across multiple fib entries. If the config wants to use source 1401f88d8ea6SDavid Ahern * routing it can not use nexthop objects. mlxsw also does not allow 1402f88d8ea6SDavid Ahern * fib6_src on routes. 1403f88d8ea6SDavid Ahern */ 14047bf4796dSDavid Ahern if (cfg && check_src_addr(&cfg->fc_src, extack) < 0) 1405f88d8ea6SDavid Ahern return -EINVAL; 1406f88d8ea6SDavid Ahern 1407f88d8ea6SDavid Ahern if (nh->is_group) { 1408f88d8ea6SDavid Ahern struct nh_group *nhg; 1409f88d8ea6SDavid Ahern 1410f88d8ea6SDavid Ahern nhg = rtnl_dereference(nh->nh_grp); 1411f88d8ea6SDavid Ahern if (nhg->has_v4) 1412f88d8ea6SDavid Ahern goto no_v4_nh; 1413ce9ac056SDavid Ahern is_fdb_nh = nhg->fdb_nh; 1414f88d8ea6SDavid Ahern } else { 1415f88d8ea6SDavid Ahern nhi = rtnl_dereference(nh->nh_info); 1416f88d8ea6SDavid Ahern if (nhi->family == AF_INET) 1417f88d8ea6SDavid Ahern goto no_v4_nh; 1418ce9ac056SDavid Ahern is_fdb_nh = nhi->fdb_nh; 1419ce9ac056SDavid Ahern } 1420ce9ac056SDavid Ahern 1421ce9ac056SDavid Ahern if (is_fdb_nh) { 1422ce9ac056SDavid Ahern NL_SET_ERR_MSG(extack, "Route cannot point to a fdb nexthop"); 1423ce9ac056SDavid Ahern return -EINVAL; 1424f88d8ea6SDavid Ahern } 1425f88d8ea6SDavid Ahern 1426f88d8ea6SDavid Ahern return 0; 1427f88d8ea6SDavid Ahern no_v4_nh: 1428f88d8ea6SDavid Ahern NL_SET_ERR_MSG(extack, "IPv6 routes can not use an IPv4 nexthop"); 1429f88d8ea6SDavid Ahern return -EINVAL; 1430f88d8ea6SDavid Ahern } 1431f88d8ea6SDavid Ahern EXPORT_SYMBOL_GPL(fib6_check_nexthop); 1432f88d8ea6SDavid Ahern 14337bf4796dSDavid Ahern /* if existing nexthop has ipv6 routes linked to it, need 14347bf4796dSDavid Ahern * to verify this new spec works with ipv6 14357bf4796dSDavid Ahern */ 14367bf4796dSDavid Ahern static int fib6_check_nh_list(struct nexthop *old, struct nexthop *new, 14377bf4796dSDavid Ahern struct netlink_ext_ack *extack) 14387bf4796dSDavid Ahern { 14397bf4796dSDavid Ahern struct fib6_info *f6i; 14407bf4796dSDavid Ahern 14417bf4796dSDavid Ahern if (list_empty(&old->f6i_list)) 14427bf4796dSDavid Ahern return 0; 14437bf4796dSDavid Ahern 14447bf4796dSDavid Ahern list_for_each_entry(f6i, &old->f6i_list, nh_list) { 14457bf4796dSDavid Ahern if (check_src_addr(&f6i->fib6_src.addr, extack) < 0) 14467bf4796dSDavid Ahern return -EINVAL; 14477bf4796dSDavid Ahern } 14487bf4796dSDavid Ahern 14497bf4796dSDavid Ahern return fib6_check_nexthop(new, NULL, extack); 14507bf4796dSDavid Ahern } 14517bf4796dSDavid Ahern 1452ce9ac056SDavid Ahern static int nexthop_check_scope(struct nh_info *nhi, u8 scope, 14534c7e8084SDavid Ahern struct netlink_ext_ack *extack) 14544c7e8084SDavid Ahern { 14554c7e8084SDavid Ahern if (scope == RT_SCOPE_HOST && nhi->fib_nhc.nhc_gw_family) { 14564c7e8084SDavid Ahern NL_SET_ERR_MSG(extack, 14574c7e8084SDavid Ahern "Route with host scope can not have a gateway"); 14584c7e8084SDavid Ahern return -EINVAL; 14594c7e8084SDavid Ahern } 14604c7e8084SDavid Ahern 14614c7e8084SDavid Ahern if (nhi->fib_nhc.nhc_flags & RTNH_F_ONLINK && scope >= RT_SCOPE_LINK) { 14624c7e8084SDavid Ahern NL_SET_ERR_MSG(extack, "Scope mismatch with nexthop"); 14634c7e8084SDavid Ahern return -EINVAL; 14644c7e8084SDavid Ahern } 14654c7e8084SDavid Ahern 14664c7e8084SDavid Ahern return 0; 14674c7e8084SDavid Ahern } 14684c7e8084SDavid Ahern 14694c7e8084SDavid Ahern /* Invoked by fib add code to verify nexthop by id is ok with 14704c7e8084SDavid Ahern * config for prefix; parts of fib_check_nh not done when nexthop 14714c7e8084SDavid Ahern * object is used. 14724c7e8084SDavid Ahern */ 14734c7e8084SDavid Ahern int fib_check_nexthop(struct nexthop *nh, u8 scope, 14744c7e8084SDavid Ahern struct netlink_ext_ack *extack) 14754c7e8084SDavid Ahern { 1476ce9ac056SDavid Ahern struct nh_info *nhi; 14774c7e8084SDavid Ahern int err = 0; 14784c7e8084SDavid Ahern 1479ce9ac056SDavid Ahern if (nh->is_group) { 1480ce9ac056SDavid Ahern struct nh_group *nhg; 1481ce9ac056SDavid Ahern 1482ce9ac056SDavid Ahern nhg = rtnl_dereference(nh->nh_grp); 1483ce9ac056SDavid Ahern if (nhg->fdb_nh) { 148438428d68SRoopa Prabhu NL_SET_ERR_MSG(extack, "Route cannot point to a fdb nexthop"); 148538428d68SRoopa Prabhu err = -EINVAL; 148638428d68SRoopa Prabhu goto out; 148738428d68SRoopa Prabhu } 148838428d68SRoopa Prabhu 14894c7e8084SDavid Ahern if (scope == RT_SCOPE_HOST) { 14904c7e8084SDavid Ahern NL_SET_ERR_MSG(extack, "Route with host scope can not have multiple nexthops"); 14914c7e8084SDavid Ahern err = -EINVAL; 14924c7e8084SDavid Ahern goto out; 14934c7e8084SDavid Ahern } 14944c7e8084SDavid Ahern 14954c7e8084SDavid Ahern /* all nexthops in a group have the same scope */ 1496ce9ac056SDavid Ahern nhi = rtnl_dereference(nhg->nh_entries[0].nh->nh_info); 1497ce9ac056SDavid Ahern err = nexthop_check_scope(nhi, scope, extack); 14984c7e8084SDavid Ahern } else { 1499ce9ac056SDavid Ahern nhi = rtnl_dereference(nh->nh_info); 1500ce9ac056SDavid Ahern if (nhi->fdb_nh) { 1501ce9ac056SDavid Ahern NL_SET_ERR_MSG(extack, "Route cannot point to a fdb nexthop"); 1502ce9ac056SDavid Ahern err = -EINVAL; 1503ce9ac056SDavid Ahern goto out; 15044c7e8084SDavid Ahern } 1505ce9ac056SDavid Ahern err = nexthop_check_scope(nhi, scope, extack); 1506ce9ac056SDavid Ahern } 1507ce9ac056SDavid Ahern 15084c7e8084SDavid Ahern out: 15094c7e8084SDavid Ahern return err; 15104c7e8084SDavid Ahern } 15114c7e8084SDavid Ahern 15127bf4796dSDavid Ahern static int fib_check_nh_list(struct nexthop *old, struct nexthop *new, 15137bf4796dSDavid Ahern struct netlink_ext_ack *extack) 15147bf4796dSDavid Ahern { 15157bf4796dSDavid Ahern struct fib_info *fi; 15167bf4796dSDavid Ahern 15177bf4796dSDavid Ahern list_for_each_entry(fi, &old->fi_list, nh_list) { 15187bf4796dSDavid Ahern int err; 15197bf4796dSDavid Ahern 15207bf4796dSDavid Ahern err = fib_check_nexthop(new, fi->fib_scope, extack); 15217bf4796dSDavid Ahern if (err) 15227bf4796dSDavid Ahern return err; 15237bf4796dSDavid Ahern } 15247bf4796dSDavid Ahern return 0; 15257bf4796dSDavid Ahern } 15267bf4796dSDavid Ahern 1527283a72a5SPetr Machata static bool nh_res_nhge_is_balanced(const struct nh_grp_entry *nhge) 1528283a72a5SPetr Machata { 1529283a72a5SPetr Machata return nhge->res.count_buckets == nhge->res.wants_buckets; 1530283a72a5SPetr Machata } 1531283a72a5SPetr Machata 1532283a72a5SPetr Machata static bool nh_res_nhge_is_ow(const struct nh_grp_entry *nhge) 1533283a72a5SPetr Machata { 1534283a72a5SPetr Machata return nhge->res.count_buckets > nhge->res.wants_buckets; 1535283a72a5SPetr Machata } 1536283a72a5SPetr Machata 1537283a72a5SPetr Machata static bool nh_res_nhge_is_uw(const struct nh_grp_entry *nhge) 1538283a72a5SPetr Machata { 1539283a72a5SPetr Machata return nhge->res.count_buckets < nhge->res.wants_buckets; 1540283a72a5SPetr Machata } 1541283a72a5SPetr Machata 1542283a72a5SPetr Machata static bool nh_res_table_is_balanced(const struct nh_res_table *res_table) 1543283a72a5SPetr Machata { 1544283a72a5SPetr Machata return list_empty(&res_table->uw_nh_entries); 1545283a72a5SPetr Machata } 1546283a72a5SPetr Machata 1547283a72a5SPetr Machata static void nh_res_bucket_unset_nh(struct nh_res_bucket *bucket) 1548283a72a5SPetr Machata { 1549283a72a5SPetr Machata struct nh_grp_entry *nhge; 1550283a72a5SPetr Machata 1551283a72a5SPetr Machata if (bucket->occupied) { 1552283a72a5SPetr Machata nhge = nh_res_dereference(bucket->nh_entry); 1553283a72a5SPetr Machata nhge->res.count_buckets--; 1554283a72a5SPetr Machata bucket->occupied = false; 1555283a72a5SPetr Machata } 1556283a72a5SPetr Machata } 1557283a72a5SPetr Machata 1558283a72a5SPetr Machata static void nh_res_bucket_set_nh(struct nh_res_bucket *bucket, 1559283a72a5SPetr Machata struct nh_grp_entry *nhge) 1560283a72a5SPetr Machata { 1561283a72a5SPetr Machata nh_res_bucket_unset_nh(bucket); 1562283a72a5SPetr Machata 1563283a72a5SPetr Machata bucket->occupied = true; 1564283a72a5SPetr Machata rcu_assign_pointer(bucket->nh_entry, nhge); 1565283a72a5SPetr Machata nhge->res.count_buckets++; 1566283a72a5SPetr Machata } 1567283a72a5SPetr Machata 1568283a72a5SPetr Machata static bool nh_res_bucket_should_migrate(struct nh_res_table *res_table, 1569283a72a5SPetr Machata struct nh_res_bucket *bucket, 1570283a72a5SPetr Machata unsigned long *deadline, bool *force) 1571283a72a5SPetr Machata { 1572283a72a5SPetr Machata unsigned long now = jiffies; 1573283a72a5SPetr Machata struct nh_grp_entry *nhge; 1574283a72a5SPetr Machata unsigned long idle_point; 1575283a72a5SPetr Machata 1576283a72a5SPetr Machata if (!bucket->occupied) { 1577283a72a5SPetr Machata /* The bucket is not occupied, its NHGE pointer is either 1578283a72a5SPetr Machata * NULL or obsolete. We _have to_ migrate: set force. 1579283a72a5SPetr Machata */ 1580283a72a5SPetr Machata *force = true; 1581283a72a5SPetr Machata return true; 1582283a72a5SPetr Machata } 1583283a72a5SPetr Machata 1584283a72a5SPetr Machata nhge = nh_res_dereference(bucket->nh_entry); 1585283a72a5SPetr Machata 1586283a72a5SPetr Machata /* If the bucket is populated by an underweight or balanced 1587283a72a5SPetr Machata * nexthop, do not migrate. 1588283a72a5SPetr Machata */ 1589283a72a5SPetr Machata if (!nh_res_nhge_is_ow(nhge)) 1590283a72a5SPetr Machata return false; 1591283a72a5SPetr Machata 1592283a72a5SPetr Machata /* At this point we know that the bucket is populated with an 1593283a72a5SPetr Machata * overweight nexthop. It needs to be migrated to a new nexthop if 1594283a72a5SPetr Machata * the idle timer of unbalanced timer expired. 1595283a72a5SPetr Machata */ 1596283a72a5SPetr Machata 1597283a72a5SPetr Machata idle_point = nh_res_bucket_idle_point(res_table, bucket, now); 1598283a72a5SPetr Machata if (time_after_eq(now, idle_point)) { 1599283a72a5SPetr Machata /* The bucket is idle. We _can_ migrate: unset force. */ 1600283a72a5SPetr Machata *force = false; 1601283a72a5SPetr Machata return true; 1602283a72a5SPetr Machata } 1603283a72a5SPetr Machata 1604283a72a5SPetr Machata /* Unbalanced timer of 0 means "never force". */ 1605283a72a5SPetr Machata if (res_table->unbalanced_timer) { 1606283a72a5SPetr Machata unsigned long unb_point; 1607283a72a5SPetr Machata 1608283a72a5SPetr Machata unb_point = nh_res_table_unb_point(res_table); 1609283a72a5SPetr Machata if (time_after(now, unb_point)) { 1610283a72a5SPetr Machata /* The bucket is not idle, but the unbalanced timer 1611283a72a5SPetr Machata * expired. We _can_ migrate, but set force anyway, 1612283a72a5SPetr Machata * so that drivers know to ignore activity reports 1613283a72a5SPetr Machata * from the HW. 1614283a72a5SPetr Machata */ 1615283a72a5SPetr Machata *force = true; 1616283a72a5SPetr Machata return true; 1617283a72a5SPetr Machata } 1618283a72a5SPetr Machata 1619283a72a5SPetr Machata nh_res_time_set_deadline(unb_point, deadline); 1620283a72a5SPetr Machata } 1621283a72a5SPetr Machata 1622283a72a5SPetr Machata nh_res_time_set_deadline(idle_point, deadline); 1623283a72a5SPetr Machata return false; 1624283a72a5SPetr Machata } 1625283a72a5SPetr Machata 1626283a72a5SPetr Machata static bool nh_res_bucket_migrate(struct nh_res_table *res_table, 16270b4818aaSPetr Machata u16 bucket_index, bool notify, 16280b4818aaSPetr Machata bool notify_nl, bool force) 1629283a72a5SPetr Machata { 1630283a72a5SPetr Machata struct nh_res_bucket *bucket = &res_table->nh_buckets[bucket_index]; 1631283a72a5SPetr Machata struct nh_grp_entry *new_nhge; 16327c37c7e0SPetr Machata struct netlink_ext_ack extack; 16337c37c7e0SPetr Machata int err; 1634283a72a5SPetr Machata 1635283a72a5SPetr Machata new_nhge = list_first_entry_or_null(&res_table->uw_nh_entries, 1636283a72a5SPetr Machata struct nh_grp_entry, 1637283a72a5SPetr Machata res.uw_nh_entry); 1638283a72a5SPetr Machata if (WARN_ON_ONCE(!new_nhge)) 1639283a72a5SPetr Machata /* If this function is called, "bucket" is either not 1640283a72a5SPetr Machata * occupied, or it belongs to a next hop that is 1641283a72a5SPetr Machata * overweight. In either case, there ought to be a 1642283a72a5SPetr Machata * corresponding underweight next hop. 1643283a72a5SPetr Machata */ 1644283a72a5SPetr Machata return false; 1645283a72a5SPetr Machata 16467c37c7e0SPetr Machata if (notify) { 16477c37c7e0SPetr Machata struct nh_grp_entry *old_nhge; 16487c37c7e0SPetr Machata 16497c37c7e0SPetr Machata old_nhge = nh_res_dereference(bucket->nh_entry); 16507c37c7e0SPetr Machata err = call_nexthop_res_bucket_notifiers(res_table->net, 16517c37c7e0SPetr Machata res_table->nhg_id, 16527c37c7e0SPetr Machata bucket_index, force, 16537c37c7e0SPetr Machata old_nhge->nh, 16547c37c7e0SPetr Machata new_nhge->nh, &extack); 16557c37c7e0SPetr Machata if (err) { 16567c37c7e0SPetr Machata pr_err_ratelimited("%s\n", extack._msg); 16577c37c7e0SPetr Machata if (!force) 16587c37c7e0SPetr Machata return false; 16597c37c7e0SPetr Machata /* It is not possible to veto a forced replacement, so 16607c37c7e0SPetr Machata * just clear the hardware flags from the nexthop 16617c37c7e0SPetr Machata * bucket to indicate to user space that this bucket is 16627c37c7e0SPetr Machata * not correctly populated in hardware. 16637c37c7e0SPetr Machata */ 16647c37c7e0SPetr Machata bucket->nh_flags &= ~(RTNH_F_OFFLOAD | RTNH_F_TRAP); 16657c37c7e0SPetr Machata } 16667c37c7e0SPetr Machata } 16677c37c7e0SPetr Machata 1668283a72a5SPetr Machata nh_res_bucket_set_nh(bucket, new_nhge); 1669283a72a5SPetr Machata nh_res_bucket_set_idle(res_table, bucket); 1670283a72a5SPetr Machata 16710b4818aaSPetr Machata if (notify_nl) 16720b4818aaSPetr Machata nexthop_bucket_notify(res_table, bucket_index); 16730b4818aaSPetr Machata 1674283a72a5SPetr Machata if (nh_res_nhge_is_balanced(new_nhge)) 1675283a72a5SPetr Machata list_del(&new_nhge->res.uw_nh_entry); 1676283a72a5SPetr Machata return true; 1677283a72a5SPetr Machata } 1678283a72a5SPetr Machata 1679283a72a5SPetr Machata #define NH_RES_UPKEEP_DW_MINIMUM_INTERVAL (HZ / 2) 1680283a72a5SPetr Machata 16810b4818aaSPetr Machata static void nh_res_table_upkeep(struct nh_res_table *res_table, 16820b4818aaSPetr Machata bool notify, bool notify_nl) 1683283a72a5SPetr Machata { 1684283a72a5SPetr Machata unsigned long now = jiffies; 1685283a72a5SPetr Machata unsigned long deadline; 1686283a72a5SPetr Machata u16 i; 1687283a72a5SPetr Machata 1688283a72a5SPetr Machata /* Deadline is the next time that upkeep should be run. It is the 1689283a72a5SPetr Machata * earliest time at which one of the buckets might be migrated. 1690283a72a5SPetr Machata * Start at the most pessimistic estimate: either unbalanced_timer 1691283a72a5SPetr Machata * from now, or if there is none, idle_timer from now. For each 1692283a72a5SPetr Machata * encountered time point, call nh_res_time_set_deadline() to 1693283a72a5SPetr Machata * refine the estimate. 1694283a72a5SPetr Machata */ 1695283a72a5SPetr Machata if (res_table->unbalanced_timer) 1696283a72a5SPetr Machata deadline = now + res_table->unbalanced_timer; 1697283a72a5SPetr Machata else 1698283a72a5SPetr Machata deadline = now + res_table->idle_timer; 1699283a72a5SPetr Machata 1700283a72a5SPetr Machata for (i = 0; i < res_table->num_nh_buckets; i++) { 1701283a72a5SPetr Machata struct nh_res_bucket *bucket = &res_table->nh_buckets[i]; 1702283a72a5SPetr Machata bool force; 1703283a72a5SPetr Machata 1704283a72a5SPetr Machata if (nh_res_bucket_should_migrate(res_table, bucket, 1705283a72a5SPetr Machata &deadline, &force)) { 17067c37c7e0SPetr Machata if (!nh_res_bucket_migrate(res_table, i, notify, 17070b4818aaSPetr Machata notify_nl, force)) { 1708283a72a5SPetr Machata unsigned long idle_point; 1709283a72a5SPetr Machata 1710283a72a5SPetr Machata /* A driver can override the migration 1711283a72a5SPetr Machata * decision if the HW reports that the 1712283a72a5SPetr Machata * bucket is actually not idle. Therefore 1713283a72a5SPetr Machata * remark the bucket as busy again and 1714283a72a5SPetr Machata * update the deadline. 1715283a72a5SPetr Machata */ 1716283a72a5SPetr Machata nh_res_bucket_set_busy(bucket); 1717283a72a5SPetr Machata idle_point = nh_res_bucket_idle_point(res_table, 1718283a72a5SPetr Machata bucket, 1719283a72a5SPetr Machata now); 1720283a72a5SPetr Machata nh_res_time_set_deadline(idle_point, &deadline); 1721283a72a5SPetr Machata } 1722283a72a5SPetr Machata } 1723283a72a5SPetr Machata } 1724283a72a5SPetr Machata 1725283a72a5SPetr Machata /* If the group is still unbalanced, schedule the next upkeep to 1726283a72a5SPetr Machata * either the deadline computed above, or the minimum deadline, 1727283a72a5SPetr Machata * whichever comes later. 1728283a72a5SPetr Machata */ 1729283a72a5SPetr Machata if (!nh_res_table_is_balanced(res_table)) { 1730283a72a5SPetr Machata unsigned long now = jiffies; 1731283a72a5SPetr Machata unsigned long min_deadline; 1732283a72a5SPetr Machata 1733283a72a5SPetr Machata min_deadline = now + NH_RES_UPKEEP_DW_MINIMUM_INTERVAL; 1734283a72a5SPetr Machata if (time_before(deadline, min_deadline)) 1735283a72a5SPetr Machata deadline = min_deadline; 1736283a72a5SPetr Machata 1737283a72a5SPetr Machata queue_delayed_work(system_power_efficient_wq, 1738283a72a5SPetr Machata &res_table->upkeep_dw, deadline - now); 1739283a72a5SPetr Machata } 1740283a72a5SPetr Machata } 1741283a72a5SPetr Machata 1742283a72a5SPetr Machata static void nh_res_table_upkeep_dw(struct work_struct *work) 1743283a72a5SPetr Machata { 1744283a72a5SPetr Machata struct delayed_work *dw = to_delayed_work(work); 1745283a72a5SPetr Machata struct nh_res_table *res_table; 1746283a72a5SPetr Machata 1747283a72a5SPetr Machata res_table = container_of(dw, struct nh_res_table, upkeep_dw); 17480b4818aaSPetr Machata nh_res_table_upkeep(res_table, true, true); 1749283a72a5SPetr Machata } 1750283a72a5SPetr Machata 1751283a72a5SPetr Machata static void nh_res_table_cancel_upkeep(struct nh_res_table *res_table) 1752283a72a5SPetr Machata { 1753283a72a5SPetr Machata cancel_delayed_work_sync(&res_table->upkeep_dw); 1754283a72a5SPetr Machata } 1755283a72a5SPetr Machata 1756283a72a5SPetr Machata static void nh_res_group_rebalance(struct nh_group *nhg, 1757283a72a5SPetr Machata struct nh_res_table *res_table) 1758283a72a5SPetr Machata { 1759283a72a5SPetr Machata int prev_upper_bound = 0; 1760283a72a5SPetr Machata int total = 0; 1761283a72a5SPetr Machata int w = 0; 1762283a72a5SPetr Machata int i; 1763283a72a5SPetr Machata 1764283a72a5SPetr Machata INIT_LIST_HEAD(&res_table->uw_nh_entries); 1765283a72a5SPetr Machata 1766283a72a5SPetr Machata for (i = 0; i < nhg->num_nh; ++i) 1767283a72a5SPetr Machata total += nhg->nh_entries[i].weight; 1768283a72a5SPetr Machata 1769283a72a5SPetr Machata for (i = 0; i < nhg->num_nh; ++i) { 1770283a72a5SPetr Machata struct nh_grp_entry *nhge = &nhg->nh_entries[i]; 1771283a72a5SPetr Machata int upper_bound; 1772283a72a5SPetr Machata 1773283a72a5SPetr Machata w += nhge->weight; 1774283a72a5SPetr Machata upper_bound = DIV_ROUND_CLOSEST(res_table->num_nh_buckets * w, 1775283a72a5SPetr Machata total); 1776283a72a5SPetr Machata nhge->res.wants_buckets = upper_bound - prev_upper_bound; 1777283a72a5SPetr Machata prev_upper_bound = upper_bound; 1778283a72a5SPetr Machata 1779283a72a5SPetr Machata if (nh_res_nhge_is_uw(nhge)) { 1780283a72a5SPetr Machata if (list_empty(&res_table->uw_nh_entries)) 1781283a72a5SPetr Machata res_table->unbalanced_since = jiffies; 1782283a72a5SPetr Machata list_add(&nhge->res.uw_nh_entry, 1783283a72a5SPetr Machata &res_table->uw_nh_entries); 1784283a72a5SPetr Machata } 1785283a72a5SPetr Machata } 1786283a72a5SPetr Machata } 1787283a72a5SPetr Machata 1788283a72a5SPetr Machata /* Migrate buckets in res_table so that they reference NHGE's from NHG with 1789283a72a5SPetr Machata * the right NH ID. Set those buckets that do not have a corresponding NHGE 1790283a72a5SPetr Machata * entry in NHG as not occupied. 1791283a72a5SPetr Machata */ 1792283a72a5SPetr Machata static void nh_res_table_migrate_buckets(struct nh_res_table *res_table, 1793283a72a5SPetr Machata struct nh_group *nhg) 1794283a72a5SPetr Machata { 1795283a72a5SPetr Machata u16 i; 1796283a72a5SPetr Machata 1797283a72a5SPetr Machata for (i = 0; i < res_table->num_nh_buckets; i++) { 1798283a72a5SPetr Machata struct nh_res_bucket *bucket = &res_table->nh_buckets[i]; 1799283a72a5SPetr Machata u32 id = rtnl_dereference(bucket->nh_entry)->nh->id; 1800283a72a5SPetr Machata bool found = false; 1801283a72a5SPetr Machata int j; 1802283a72a5SPetr Machata 1803283a72a5SPetr Machata for (j = 0; j < nhg->num_nh; j++) { 1804283a72a5SPetr Machata struct nh_grp_entry *nhge = &nhg->nh_entries[j]; 1805283a72a5SPetr Machata 1806283a72a5SPetr Machata if (nhge->nh->id == id) { 1807283a72a5SPetr Machata nh_res_bucket_set_nh(bucket, nhge); 1808283a72a5SPetr Machata found = true; 1809283a72a5SPetr Machata break; 1810283a72a5SPetr Machata } 1811283a72a5SPetr Machata } 1812283a72a5SPetr Machata 1813283a72a5SPetr Machata if (!found) 1814283a72a5SPetr Machata nh_res_bucket_unset_nh(bucket); 1815283a72a5SPetr Machata } 1816283a72a5SPetr Machata } 1817283a72a5SPetr Machata 1818283a72a5SPetr Machata static void replace_nexthop_grp_res(struct nh_group *oldg, 1819283a72a5SPetr Machata struct nh_group *newg) 1820283a72a5SPetr Machata { 1821283a72a5SPetr Machata /* For NH group replacement, the new NHG might only have a stub 1822283a72a5SPetr Machata * hash table with 0 buckets, because the number of buckets was not 1823283a72a5SPetr Machata * specified. For NH removal, oldg and newg both reference the same 1824283a72a5SPetr Machata * res_table. So in any case, in the following, we want to work 1825283a72a5SPetr Machata * with oldg->res_table. 1826283a72a5SPetr Machata */ 1827283a72a5SPetr Machata struct nh_res_table *old_res_table = rtnl_dereference(oldg->res_table); 1828283a72a5SPetr Machata unsigned long prev_unbalanced_since = old_res_table->unbalanced_since; 1829283a72a5SPetr Machata bool prev_has_uw = !list_empty(&old_res_table->uw_nh_entries); 1830283a72a5SPetr Machata 1831283a72a5SPetr Machata nh_res_table_cancel_upkeep(old_res_table); 1832283a72a5SPetr Machata nh_res_table_migrate_buckets(old_res_table, newg); 1833283a72a5SPetr Machata nh_res_group_rebalance(newg, old_res_table); 1834283a72a5SPetr Machata if (prev_has_uw && !list_empty(&old_res_table->uw_nh_entries)) 1835283a72a5SPetr Machata old_res_table->unbalanced_since = prev_unbalanced_since; 18360b4818aaSPetr Machata nh_res_table_upkeep(old_res_table, true, false); 1837283a72a5SPetr Machata } 1838283a72a5SPetr Machata 1839de1d1ee3SPetr Machata static void nh_hthr_group_rebalance(struct nh_group *nhg) 1840430a0491SDavid Ahern { 1841430a0491SDavid Ahern int total = 0; 1842430a0491SDavid Ahern int w = 0; 1843430a0491SDavid Ahern int i; 1844430a0491SDavid Ahern 1845430a0491SDavid Ahern for (i = 0; i < nhg->num_nh; ++i) 1846430a0491SDavid Ahern total += nhg->nh_entries[i].weight; 1847430a0491SDavid Ahern 1848430a0491SDavid Ahern for (i = 0; i < nhg->num_nh; ++i) { 1849430a0491SDavid Ahern struct nh_grp_entry *nhge = &nhg->nh_entries[i]; 1850430a0491SDavid Ahern int upper_bound; 1851430a0491SDavid Ahern 1852430a0491SDavid Ahern w += nhge->weight; 1853430a0491SDavid Ahern upper_bound = DIV_ROUND_CLOSEST_ULL((u64)w << 31, total) - 1; 1854de1d1ee3SPetr Machata atomic_set(&nhge->hthr.upper_bound, upper_bound); 1855430a0491SDavid Ahern } 1856430a0491SDavid Ahern } 1857430a0491SDavid Ahern 1858ac21753aSDavid Ahern static void remove_nh_grp_entry(struct net *net, struct nh_grp_entry *nhge, 1859430a0491SDavid Ahern struct nl_info *nlinfo) 1860430a0491SDavid Ahern { 186190f33bffSNikolay Aleksandrov struct nh_grp_entry *nhges, *new_nhges; 1862ac21753aSDavid Ahern struct nexthop *nhp = nhge->nh_parent; 1863833a1065SIdo Schimmel struct netlink_ext_ack extack; 1864430a0491SDavid Ahern struct nexthop *nh = nhge->nh; 186590f33bffSNikolay Aleksandrov struct nh_group *nhg, *newg; 1866833a1065SIdo Schimmel int i, j, err; 1867430a0491SDavid Ahern 1868430a0491SDavid Ahern WARN_ON(!nh); 1869430a0491SDavid Ahern 1870ac21753aSDavid Ahern nhg = rtnl_dereference(nhp->nh_grp); 187190f33bffSNikolay Aleksandrov newg = nhg->spare; 1872430a0491SDavid Ahern 187390f33bffSNikolay Aleksandrov /* last entry, keep it visible and remove the parent */ 187490f33bffSNikolay Aleksandrov if (nhg->num_nh == 1) { 187590f33bffSNikolay Aleksandrov remove_nexthop(net, nhp, nlinfo); 1876430a0491SDavid Ahern return; 187790f33bffSNikolay Aleksandrov } 1878430a0491SDavid Ahern 1879863b2558SIdo Schimmel newg->has_v4 = false; 188090e1a9e2SPetr Machata newg->is_multipath = nhg->is_multipath; 1881de1d1ee3SPetr Machata newg->hash_threshold = nhg->hash_threshold; 1882283a72a5SPetr Machata newg->resilient = nhg->resilient; 1883ce9ac056SDavid Ahern newg->fdb_nh = nhg->fdb_nh; 188490f33bffSNikolay Aleksandrov newg->num_nh = nhg->num_nh; 1885430a0491SDavid Ahern 188690f33bffSNikolay Aleksandrov /* copy old entries to new except the one getting removed */ 188790f33bffSNikolay Aleksandrov nhges = nhg->nh_entries; 188890f33bffSNikolay Aleksandrov new_nhges = newg->nh_entries; 188990f33bffSNikolay Aleksandrov for (i = 0, j = 0; i < nhg->num_nh; ++i) { 1890863b2558SIdo Schimmel struct nh_info *nhi; 1891863b2558SIdo Schimmel 189290f33bffSNikolay Aleksandrov /* current nexthop getting removed */ 189390f33bffSNikolay Aleksandrov if (nhg->nh_entries[i].nh == nh) { 189490f33bffSNikolay Aleksandrov newg->num_nh--; 189590f33bffSNikolay Aleksandrov continue; 189690f33bffSNikolay Aleksandrov } 1897430a0491SDavid Ahern 1898863b2558SIdo Schimmel nhi = rtnl_dereference(nhges[i].nh->nh_info); 1899863b2558SIdo Schimmel if (nhi->family == AF_INET) 1900863b2558SIdo Schimmel newg->has_v4 = true; 1901863b2558SIdo Schimmel 190290f33bffSNikolay Aleksandrov list_del(&nhges[i].nh_list); 1903f4676ea7SIdo Schimmel new_nhges[j].stats = nhges[i].stats; 190490f33bffSNikolay Aleksandrov new_nhges[j].nh_parent = nhges[i].nh_parent; 190590f33bffSNikolay Aleksandrov new_nhges[j].nh = nhges[i].nh; 190690f33bffSNikolay Aleksandrov new_nhges[j].weight = nhges[i].weight; 190790f33bffSNikolay Aleksandrov list_add(&new_nhges[j].nh_list, &new_nhges[j].nh->grp_list); 190890f33bffSNikolay Aleksandrov j++; 190990f33bffSNikolay Aleksandrov } 191090f33bffSNikolay Aleksandrov 1911de1d1ee3SPetr Machata if (newg->hash_threshold) 1912de1d1ee3SPetr Machata nh_hthr_group_rebalance(newg); 1913283a72a5SPetr Machata else if (newg->resilient) 1914283a72a5SPetr Machata replace_nexthop_grp_res(nhg, newg); 1915283a72a5SPetr Machata 191690f33bffSNikolay Aleksandrov rcu_assign_pointer(nhp->nh_grp, newg); 191790f33bffSNikolay Aleksandrov 191890f33bffSNikolay Aleksandrov list_del(&nhge->nh_list); 1919f4676ea7SIdo Schimmel free_percpu(nhge->stats); 192090f33bffSNikolay Aleksandrov nexthop_put(nhge->nh); 1921430a0491SDavid Ahern 19227c37c7e0SPetr Machata /* Removal of a NH from a resilient group is notified through 19237c37c7e0SPetr Machata * bucket notifications. 19247c37c7e0SPetr Machata */ 1925de1d1ee3SPetr Machata if (newg->hash_threshold) { 19267c37c7e0SPetr Machata err = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, nhp, 19277c37c7e0SPetr Machata &extack); 1928833a1065SIdo Schimmel if (err) 1929833a1065SIdo Schimmel pr_err("%s\n", extack._msg); 19307c37c7e0SPetr Machata } 1931833a1065SIdo Schimmel 1932430a0491SDavid Ahern if (nlinfo) 1933ac21753aSDavid Ahern nexthop_notify(RTM_NEWNEXTHOP, nhp, nlinfo); 1934430a0491SDavid Ahern } 1935430a0491SDavid Ahern 1936430a0491SDavid Ahern static void remove_nexthop_from_groups(struct net *net, struct nexthop *nh, 1937430a0491SDavid Ahern struct nl_info *nlinfo) 1938430a0491SDavid Ahern { 1939430a0491SDavid Ahern struct nh_grp_entry *nhge, *tmp; 1940430a0491SDavid Ahern 1941ac21753aSDavid Ahern list_for_each_entry_safe(nhge, tmp, &nh->grp_list, nh_list) 1942ac21753aSDavid Ahern remove_nh_grp_entry(net, nhge, nlinfo); 1943430a0491SDavid Ahern 194490f33bffSNikolay Aleksandrov /* make sure all see the newly published array before releasing rtnl */ 1945df6afe2fSIdo Schimmel synchronize_net(); 1946430a0491SDavid Ahern } 1947430a0491SDavid Ahern 1948430a0491SDavid Ahern static void remove_nexthop_group(struct nexthop *nh, struct nl_info *nlinfo) 1949430a0491SDavid Ahern { 1950430a0491SDavid Ahern struct nh_group *nhg = rcu_dereference_rtnl(nh->nh_grp); 1951283a72a5SPetr Machata struct nh_res_table *res_table; 1952430a0491SDavid Ahern int i, num_nh = nhg->num_nh; 1953430a0491SDavid Ahern 1954430a0491SDavid Ahern for (i = 0; i < num_nh; ++i) { 1955430a0491SDavid Ahern struct nh_grp_entry *nhge = &nhg->nh_entries[i]; 1956430a0491SDavid Ahern 1957430a0491SDavid Ahern if (WARN_ON(!nhge->nh)) 1958430a0491SDavid Ahern continue; 1959430a0491SDavid Ahern 196090f33bffSNikolay Aleksandrov list_del_init(&nhge->nh_list); 1961430a0491SDavid Ahern } 1962283a72a5SPetr Machata 1963283a72a5SPetr Machata if (nhg->resilient) { 1964283a72a5SPetr Machata res_table = rtnl_dereference(nhg->res_table); 1965283a72a5SPetr Machata nh_res_table_cancel_upkeep(res_table); 1966283a72a5SPetr Machata } 1967430a0491SDavid Ahern } 1968430a0491SDavid Ahern 19697bf4796dSDavid Ahern /* not called for nexthop replace */ 19704c7e8084SDavid Ahern static void __remove_nexthop_fib(struct net *net, struct nexthop *nh) 19714c7e8084SDavid Ahern { 1972f88d8ea6SDavid Ahern struct fib6_info *f6i, *tmp; 19734c7e8084SDavid Ahern bool do_flush = false; 19744c7e8084SDavid Ahern struct fib_info *fi; 19754c7e8084SDavid Ahern 19764c7e8084SDavid Ahern list_for_each_entry(fi, &nh->fi_list, nh_list) { 19774c7e8084SDavid Ahern fi->fib_flags |= RTNH_F_DEAD; 19784c7e8084SDavid Ahern do_flush = true; 19794c7e8084SDavid Ahern } 19804c7e8084SDavid Ahern if (do_flush) 19814c7e8084SDavid Ahern fib_flush(net); 1982f88d8ea6SDavid Ahern 1983f88d8ea6SDavid Ahern /* ip6_del_rt removes the entry from this list hence the _safe */ 1984f88d8ea6SDavid Ahern list_for_each_entry_safe(f6i, tmp, &nh->f6i_list, nh_list) { 1985f88d8ea6SDavid Ahern /* __ip6_del_rt does a release, so do a hold here */ 1986f88d8ea6SDavid Ahern fib6_info_hold(f6i); 19874f80116dSRoopa Prabhu ipv6_stub->ip6_del_rt(net, f6i, 1988bdf00bf2SKuniyuki Iwashima !READ_ONCE(net->ipv4.sysctl_nexthop_compat_mode)); 1989f88d8ea6SDavid Ahern } 19904c7e8084SDavid Ahern } 19914c7e8084SDavid Ahern 1992430a0491SDavid Ahern static void __remove_nexthop(struct net *net, struct nexthop *nh, 1993430a0491SDavid Ahern struct nl_info *nlinfo) 1994430a0491SDavid Ahern { 19954c7e8084SDavid Ahern __remove_nexthop_fib(net, nh); 19964c7e8084SDavid Ahern 1997430a0491SDavid Ahern if (nh->is_group) { 1998430a0491SDavid Ahern remove_nexthop_group(nh, nlinfo); 1999430a0491SDavid Ahern } else { 2000597cfe4fSDavid Ahern struct nh_info *nhi; 2001597cfe4fSDavid Ahern 2002597cfe4fSDavid Ahern nhi = rtnl_dereference(nh->nh_info); 2003597cfe4fSDavid Ahern if (nhi->fib_nhc.nhc_dev) 2004597cfe4fSDavid Ahern hlist_del(&nhi->dev_hash); 2005430a0491SDavid Ahern 2006430a0491SDavid Ahern remove_nexthop_from_groups(net, nh, nlinfo); 2007430a0491SDavid Ahern } 2008597cfe4fSDavid Ahern } 2009597cfe4fSDavid Ahern 2010ab84be7eSDavid Ahern static void remove_nexthop(struct net *net, struct nexthop *nh, 2011430a0491SDavid Ahern struct nl_info *nlinfo) 2012ab84be7eSDavid Ahern { 20133578d53dSIdo Schimmel call_nexthop_notifiers(net, NEXTHOP_EVENT_DEL, nh, NULL); 20140695564bSIdo Schimmel 2015ab84be7eSDavid Ahern /* remove from the tree */ 2016ab84be7eSDavid Ahern rb_erase(&nh->rb_node, &net->nexthop.rb_root); 2017ab84be7eSDavid Ahern 2018ab84be7eSDavid Ahern if (nlinfo) 2019ab84be7eSDavid Ahern nexthop_notify(RTM_DELNEXTHOP, nh, nlinfo); 2020ab84be7eSDavid Ahern 2021430a0491SDavid Ahern __remove_nexthop(net, nh, nlinfo); 2022ab84be7eSDavid Ahern nh_base_seq_inc(net); 2023ab84be7eSDavid Ahern 2024ab84be7eSDavid Ahern nexthop_put(nh); 2025ab84be7eSDavid Ahern } 2026ab84be7eSDavid Ahern 20277bf4796dSDavid Ahern /* if any FIB entries reference this nexthop, any dst entries 20287bf4796dSDavid Ahern * need to be regenerated 20297bf4796dSDavid Ahern */ 20301005f19bSNikolay Aleksandrov static void nh_rt_cache_flush(struct net *net, struct nexthop *nh, 20311005f19bSNikolay Aleksandrov struct nexthop *replaced_nh) 20327bf4796dSDavid Ahern { 20337bf4796dSDavid Ahern struct fib6_info *f6i; 20341005f19bSNikolay Aleksandrov struct nh_group *nhg; 20351005f19bSNikolay Aleksandrov int i; 20367bf4796dSDavid Ahern 20377bf4796dSDavid Ahern if (!list_empty(&nh->fi_list)) 20387bf4796dSDavid Ahern rt_cache_flush(net); 20397bf4796dSDavid Ahern 20407bf4796dSDavid Ahern list_for_each_entry(f6i, &nh->f6i_list, nh_list) 20417bf4796dSDavid Ahern ipv6_stub->fib6_update_sernum(net, f6i); 20421005f19bSNikolay Aleksandrov 20431005f19bSNikolay Aleksandrov /* if an IPv6 group was replaced, we have to release all old 20441005f19bSNikolay Aleksandrov * dsts to make sure all refcounts are released 20451005f19bSNikolay Aleksandrov */ 20461005f19bSNikolay Aleksandrov if (!replaced_nh->is_group) 20471005f19bSNikolay Aleksandrov return; 20481005f19bSNikolay Aleksandrov 20491005f19bSNikolay Aleksandrov nhg = rtnl_dereference(replaced_nh->nh_grp); 20501005f19bSNikolay Aleksandrov for (i = 0; i < nhg->num_nh; i++) { 20511005f19bSNikolay Aleksandrov struct nh_grp_entry *nhge = &nhg->nh_entries[i]; 20521005f19bSNikolay Aleksandrov struct nh_info *nhi = rtnl_dereference(nhge->nh->nh_info); 20531005f19bSNikolay Aleksandrov 20541005f19bSNikolay Aleksandrov if (nhi->family == AF_INET6) 20551005f19bSNikolay Aleksandrov ipv6_stub->fib6_nh_release_dsts(&nhi->fib6_nh); 20561005f19bSNikolay Aleksandrov } 20577bf4796dSDavid Ahern } 20587bf4796dSDavid Ahern 20597bf4796dSDavid Ahern static int replace_nexthop_grp(struct net *net, struct nexthop *old, 2060597f48e4SPetr Machata struct nexthop *new, const struct nh_config *cfg, 20617bf4796dSDavid Ahern struct netlink_ext_ack *extack) 20627bf4796dSDavid Ahern { 2063283a72a5SPetr Machata struct nh_res_table *tmp_table = NULL; 2064283a72a5SPetr Machata struct nh_res_table *new_res_table; 2065283a72a5SPetr Machata struct nh_res_table *old_res_table; 20667bf4796dSDavid Ahern struct nh_group *oldg, *newg; 2067d144cc5fSIdo Schimmel int i, err; 20687bf4796dSDavid Ahern 20697bf4796dSDavid Ahern if (!new->is_group) { 20707bf4796dSDavid Ahern NL_SET_ERR_MSG(extack, "Can not replace a nexthop group with a nexthop."); 20717bf4796dSDavid Ahern return -EINVAL; 20727bf4796dSDavid Ahern } 20737bf4796dSDavid Ahern 20747bf4796dSDavid Ahern oldg = rtnl_dereference(old->nh_grp); 20757bf4796dSDavid Ahern newg = rtnl_dereference(new->nh_grp); 20767bf4796dSDavid Ahern 2077de1d1ee3SPetr Machata if (newg->hash_threshold != oldg->hash_threshold) { 2078283a72a5SPetr Machata NL_SET_ERR_MSG(extack, "Can not replace a nexthop group with one of a different type."); 2079283a72a5SPetr Machata return -EINVAL; 2080283a72a5SPetr Machata } 2081283a72a5SPetr Machata 2082de1d1ee3SPetr Machata if (newg->hash_threshold) { 2083283a72a5SPetr Machata err = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, new, 2084283a72a5SPetr Machata extack); 2085283a72a5SPetr Machata if (err) 2086283a72a5SPetr Machata return err; 2087283a72a5SPetr Machata } else if (newg->resilient) { 2088283a72a5SPetr Machata new_res_table = rtnl_dereference(newg->res_table); 2089283a72a5SPetr Machata old_res_table = rtnl_dereference(oldg->res_table); 2090283a72a5SPetr Machata 2091283a72a5SPetr Machata /* Accept if num_nh_buckets was not given, but if it was 2092283a72a5SPetr Machata * given, demand that the value be correct. 2093283a72a5SPetr Machata */ 2094283a72a5SPetr Machata if (cfg->nh_grp_res_has_num_buckets && 2095283a72a5SPetr Machata cfg->nh_grp_res_num_buckets != 2096283a72a5SPetr Machata old_res_table->num_nh_buckets) { 2097283a72a5SPetr Machata NL_SET_ERR_MSG(extack, "Can not change number of buckets of a resilient nexthop group."); 2098283a72a5SPetr Machata return -EINVAL; 2099283a72a5SPetr Machata } 2100283a72a5SPetr Machata 21017c37c7e0SPetr Machata /* Emit a pre-replace notification so that listeners could veto 21027c37c7e0SPetr Machata * a potentially unsupported configuration. Otherwise, 21037c37c7e0SPetr Machata * individual bucket replacement notifications would need to be 21047c37c7e0SPetr Machata * vetoed, which is something that should only happen if the 21057c37c7e0SPetr Machata * bucket is currently active. 21067c37c7e0SPetr Machata */ 21077c37c7e0SPetr Machata err = call_nexthop_res_table_notifiers(net, new, extack); 21087c37c7e0SPetr Machata if (err) 21097c37c7e0SPetr Machata return err; 21107c37c7e0SPetr Machata 2111283a72a5SPetr Machata if (cfg->nh_grp_res_has_idle_timer) 2112283a72a5SPetr Machata old_res_table->idle_timer = cfg->nh_grp_res_idle_timer; 2113283a72a5SPetr Machata if (cfg->nh_grp_res_has_unbalanced_timer) 2114283a72a5SPetr Machata old_res_table->unbalanced_timer = 2115283a72a5SPetr Machata cfg->nh_grp_res_unbalanced_timer; 2116283a72a5SPetr Machata 2117283a72a5SPetr Machata replace_nexthop_grp_res(oldg, newg); 2118283a72a5SPetr Machata 2119283a72a5SPetr Machata tmp_table = new_res_table; 2120283a72a5SPetr Machata rcu_assign_pointer(newg->res_table, old_res_table); 2121283a72a5SPetr Machata rcu_assign_pointer(newg->spare->res_table, old_res_table); 2122283a72a5SPetr Machata } 2123283a72a5SPetr Machata 21247bf4796dSDavid Ahern /* update parents - used by nexthop code for cleanup */ 21257bf4796dSDavid Ahern for (i = 0; i < newg->num_nh; i++) 21267bf4796dSDavid Ahern newg->nh_entries[i].nh_parent = old; 21277bf4796dSDavid Ahern 21287bf4796dSDavid Ahern rcu_assign_pointer(old->nh_grp, newg); 21297bf4796dSDavid Ahern 2130563f23b0SIdo Schimmel /* Make sure concurrent readers are not using 'oldg' anymore. */ 2131563f23b0SIdo Schimmel synchronize_net(); 21327709efa6SNikolay Aleksandrov 21337709efa6SNikolay Aleksandrov if (newg->resilient) { 2134283a72a5SPetr Machata rcu_assign_pointer(oldg->res_table, tmp_table); 2135283a72a5SPetr Machata rcu_assign_pointer(oldg->spare->res_table, tmp_table); 2136283a72a5SPetr Machata } 2137283a72a5SPetr Machata 21387bf4796dSDavid Ahern for (i = 0; i < oldg->num_nh; i++) 21397bf4796dSDavid Ahern oldg->nh_entries[i].nh_parent = new; 21407bf4796dSDavid Ahern 21417bf4796dSDavid Ahern rcu_assign_pointer(new->nh_grp, oldg); 21427bf4796dSDavid Ahern 21437bf4796dSDavid Ahern return 0; 21447bf4796dSDavid Ahern } 21457bf4796dSDavid Ahern 2146885a3b15SIdo Schimmel static void nh_group_v4_update(struct nh_group *nhg) 2147885a3b15SIdo Schimmel { 2148885a3b15SIdo Schimmel struct nh_grp_entry *nhges; 2149885a3b15SIdo Schimmel bool has_v4 = false; 2150885a3b15SIdo Schimmel int i; 2151885a3b15SIdo Schimmel 2152885a3b15SIdo Schimmel nhges = nhg->nh_entries; 2153885a3b15SIdo Schimmel for (i = 0; i < nhg->num_nh; i++) { 2154885a3b15SIdo Schimmel struct nh_info *nhi; 2155885a3b15SIdo Schimmel 2156885a3b15SIdo Schimmel nhi = rtnl_dereference(nhges[i].nh->nh_info); 2157885a3b15SIdo Schimmel if (nhi->family == AF_INET) 2158885a3b15SIdo Schimmel has_v4 = true; 2159885a3b15SIdo Schimmel } 2160885a3b15SIdo Schimmel nhg->has_v4 = has_v4; 2161885a3b15SIdo Schimmel } 2162885a3b15SIdo Schimmel 21637c37c7e0SPetr Machata static int replace_nexthop_single_notify_res(struct net *net, 21647c37c7e0SPetr Machata struct nh_res_table *res_table, 21657c37c7e0SPetr Machata struct nexthop *old, 21667c37c7e0SPetr Machata struct nh_info *oldi, 21677c37c7e0SPetr Machata struct nh_info *newi, 21687c37c7e0SPetr Machata struct netlink_ext_ack *extack) 21697c37c7e0SPetr Machata { 21707c37c7e0SPetr Machata u32 nhg_id = res_table->nhg_id; 21717c37c7e0SPetr Machata int err; 21727c37c7e0SPetr Machata u16 i; 21737c37c7e0SPetr Machata 21747c37c7e0SPetr Machata for (i = 0; i < res_table->num_nh_buckets; i++) { 21757c37c7e0SPetr Machata struct nh_res_bucket *bucket = &res_table->nh_buckets[i]; 21767c37c7e0SPetr Machata struct nh_grp_entry *nhge; 21777c37c7e0SPetr Machata 21787c37c7e0SPetr Machata nhge = rtnl_dereference(bucket->nh_entry); 21797c37c7e0SPetr Machata if (nhge->nh == old) { 21807c37c7e0SPetr Machata err = __call_nexthop_res_bucket_notifiers(net, nhg_id, 21817c37c7e0SPetr Machata i, true, 21827c37c7e0SPetr Machata oldi, newi, 21837c37c7e0SPetr Machata extack); 21847c37c7e0SPetr Machata if (err) 21857c37c7e0SPetr Machata goto err_notify; 21867c37c7e0SPetr Machata } 21877c37c7e0SPetr Machata } 21887c37c7e0SPetr Machata 21897c37c7e0SPetr Machata return 0; 21907c37c7e0SPetr Machata 21917c37c7e0SPetr Machata err_notify: 21927c37c7e0SPetr Machata while (i-- > 0) { 21937c37c7e0SPetr Machata struct nh_res_bucket *bucket = &res_table->nh_buckets[i]; 21947c37c7e0SPetr Machata struct nh_grp_entry *nhge; 21957c37c7e0SPetr Machata 21967c37c7e0SPetr Machata nhge = rtnl_dereference(bucket->nh_entry); 21977c37c7e0SPetr Machata if (nhge->nh == old) 21987c37c7e0SPetr Machata __call_nexthop_res_bucket_notifiers(net, nhg_id, i, 21997c37c7e0SPetr Machata true, newi, oldi, 22007c37c7e0SPetr Machata extack); 22017c37c7e0SPetr Machata } 22027c37c7e0SPetr Machata return err; 22037c37c7e0SPetr Machata } 22047c37c7e0SPetr Machata 22057c37c7e0SPetr Machata static int replace_nexthop_single_notify(struct net *net, 22067c37c7e0SPetr Machata struct nexthop *group_nh, 22077c37c7e0SPetr Machata struct nexthop *old, 22087c37c7e0SPetr Machata struct nh_info *oldi, 22097c37c7e0SPetr Machata struct nh_info *newi, 22107c37c7e0SPetr Machata struct netlink_ext_ack *extack) 22117c37c7e0SPetr Machata { 22127c37c7e0SPetr Machata struct nh_group *nhg = rtnl_dereference(group_nh->nh_grp); 22137c37c7e0SPetr Machata struct nh_res_table *res_table; 22147c37c7e0SPetr Machata 2215de1d1ee3SPetr Machata if (nhg->hash_threshold) { 22167c37c7e0SPetr Machata return call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, 22177c37c7e0SPetr Machata group_nh, extack); 22187c37c7e0SPetr Machata } else if (nhg->resilient) { 22197c37c7e0SPetr Machata res_table = rtnl_dereference(nhg->res_table); 22207c37c7e0SPetr Machata return replace_nexthop_single_notify_res(net, res_table, 22217c37c7e0SPetr Machata old, oldi, newi, 22227c37c7e0SPetr Machata extack); 22237c37c7e0SPetr Machata } 22247c37c7e0SPetr Machata 22257c37c7e0SPetr Machata return -EINVAL; 22267c37c7e0SPetr Machata } 22277c37c7e0SPetr Machata 22287bf4796dSDavid Ahern static int replace_nexthop_single(struct net *net, struct nexthop *old, 22297bf4796dSDavid Ahern struct nexthop *new, 22307bf4796dSDavid Ahern struct netlink_ext_ack *extack) 22317bf4796dSDavid Ahern { 2232f17bc33dSIdo Schimmel u8 old_protocol, old_nh_flags; 22337bf4796dSDavid Ahern struct nh_info *oldi, *newi; 2234f17bc33dSIdo Schimmel struct nh_grp_entry *nhge; 22358c09c9f9SIdo Schimmel int err; 22367bf4796dSDavid Ahern 22377bf4796dSDavid Ahern if (new->is_group) { 22387bf4796dSDavid Ahern NL_SET_ERR_MSG(extack, "Can not replace a nexthop with a nexthop group."); 22397bf4796dSDavid Ahern return -EINVAL; 22407bf4796dSDavid Ahern } 22417bf4796dSDavid Ahern 22428c09c9f9SIdo Schimmel err = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, new, extack); 22438c09c9f9SIdo Schimmel if (err) 22448c09c9f9SIdo Schimmel return err; 22458c09c9f9SIdo Schimmel 22468c09c9f9SIdo Schimmel /* Hardware flags were set on 'old' as 'new' is not in the red-black 22478c09c9f9SIdo Schimmel * tree. Therefore, inherit the flags from 'old' to 'new'. 22488c09c9f9SIdo Schimmel */ 22498c09c9f9SIdo Schimmel new->nh_flags |= old->nh_flags & (RTNH_F_OFFLOAD | RTNH_F_TRAP); 22508c09c9f9SIdo Schimmel 22517bf4796dSDavid Ahern oldi = rtnl_dereference(old->nh_info); 22527bf4796dSDavid Ahern newi = rtnl_dereference(new->nh_info); 22537bf4796dSDavid Ahern 22547bf4796dSDavid Ahern newi->nh_parent = old; 22557bf4796dSDavid Ahern oldi->nh_parent = new; 22567bf4796dSDavid Ahern 2257f17bc33dSIdo Schimmel old_protocol = old->protocol; 2258f17bc33dSIdo Schimmel old_nh_flags = old->nh_flags; 2259f17bc33dSIdo Schimmel 22607bf4796dSDavid Ahern old->protocol = new->protocol; 22617bf4796dSDavid Ahern old->nh_flags = new->nh_flags; 22627bf4796dSDavid Ahern 22637bf4796dSDavid Ahern rcu_assign_pointer(old->nh_info, newi); 22647bf4796dSDavid Ahern rcu_assign_pointer(new->nh_info, oldi); 22657bf4796dSDavid Ahern 2266f17bc33dSIdo Schimmel /* Send a replace notification for all the groups using the nexthop. */ 2267f17bc33dSIdo Schimmel list_for_each_entry(nhge, &old->grp_list, nh_list) { 2268f17bc33dSIdo Schimmel struct nexthop *nhp = nhge->nh_parent; 2269f17bc33dSIdo Schimmel 22707c37c7e0SPetr Machata err = replace_nexthop_single_notify(net, nhp, old, oldi, newi, 2271f17bc33dSIdo Schimmel extack); 2272f17bc33dSIdo Schimmel if (err) 2273f17bc33dSIdo Schimmel goto err_notify; 2274f17bc33dSIdo Schimmel } 2275f17bc33dSIdo Schimmel 2276885a3b15SIdo Schimmel /* When replacing an IPv4 nexthop with an IPv6 nexthop, potentially 2277885a3b15SIdo Schimmel * update IPv4 indication in all the groups using the nexthop. 2278885a3b15SIdo Schimmel */ 2279885a3b15SIdo Schimmel if (oldi->family == AF_INET && newi->family == AF_INET6) { 2280885a3b15SIdo Schimmel list_for_each_entry(nhge, &old->grp_list, nh_list) { 2281885a3b15SIdo Schimmel struct nexthop *nhp = nhge->nh_parent; 2282885a3b15SIdo Schimmel struct nh_group *nhg; 2283885a3b15SIdo Schimmel 2284885a3b15SIdo Schimmel nhg = rtnl_dereference(nhp->nh_grp); 2285885a3b15SIdo Schimmel nh_group_v4_update(nhg); 2286885a3b15SIdo Schimmel } 2287885a3b15SIdo Schimmel } 2288885a3b15SIdo Schimmel 22897bf4796dSDavid Ahern return 0; 2290f17bc33dSIdo Schimmel 2291f17bc33dSIdo Schimmel err_notify: 2292f17bc33dSIdo Schimmel rcu_assign_pointer(new->nh_info, newi); 2293f17bc33dSIdo Schimmel rcu_assign_pointer(old->nh_info, oldi); 2294f17bc33dSIdo Schimmel old->nh_flags = old_nh_flags; 2295f17bc33dSIdo Schimmel old->protocol = old_protocol; 2296f17bc33dSIdo Schimmel oldi->nh_parent = old; 2297f17bc33dSIdo Schimmel newi->nh_parent = new; 2298f17bc33dSIdo Schimmel list_for_each_entry_continue_reverse(nhge, &old->grp_list, nh_list) { 2299f17bc33dSIdo Schimmel struct nexthop *nhp = nhge->nh_parent; 2300f17bc33dSIdo Schimmel 23017c37c7e0SPetr Machata replace_nexthop_single_notify(net, nhp, old, newi, oldi, NULL); 2302f17bc33dSIdo Schimmel } 2303f17bc33dSIdo Schimmel call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, old, extack); 2304f17bc33dSIdo Schimmel return err; 23057bf4796dSDavid Ahern } 23067bf4796dSDavid Ahern 23077bf4796dSDavid Ahern static void __nexthop_replace_notify(struct net *net, struct nexthop *nh, 23087bf4796dSDavid Ahern struct nl_info *info) 23097bf4796dSDavid Ahern { 23107bf4796dSDavid Ahern struct fib6_info *f6i; 23117bf4796dSDavid Ahern 23127bf4796dSDavid Ahern if (!list_empty(&nh->fi_list)) { 23137bf4796dSDavid Ahern struct fib_info *fi; 23147bf4796dSDavid Ahern 23157bf4796dSDavid Ahern /* expectation is a few fib_info per nexthop and then 23167bf4796dSDavid Ahern * a lot of routes per fib_info. So mark the fib_info 23177bf4796dSDavid Ahern * and then walk the fib tables once 23187bf4796dSDavid Ahern */ 23197bf4796dSDavid Ahern list_for_each_entry(fi, &nh->fi_list, nh_list) 23207bf4796dSDavid Ahern fi->nh_updated = true; 23217bf4796dSDavid Ahern 23227bf4796dSDavid Ahern fib_info_notify_update(net, info); 23237bf4796dSDavid Ahern 23247bf4796dSDavid Ahern list_for_each_entry(fi, &nh->fi_list, nh_list) 23257bf4796dSDavid Ahern fi->nh_updated = false; 23267bf4796dSDavid Ahern } 23277bf4796dSDavid Ahern 23287bf4796dSDavid Ahern list_for_each_entry(f6i, &nh->f6i_list, nh_list) 23297bf4796dSDavid Ahern ipv6_stub->fib6_rt_update(net, f6i, info); 23307bf4796dSDavid Ahern } 23317bf4796dSDavid Ahern 23327bf4796dSDavid Ahern /* send RTM_NEWROUTE with REPLACE flag set for all FIB entries 23337bf4796dSDavid Ahern * linked to this nexthop and for all groups that the nexthop 23347bf4796dSDavid Ahern * is a member of 23357bf4796dSDavid Ahern */ 23367bf4796dSDavid Ahern static void nexthop_replace_notify(struct net *net, struct nexthop *nh, 23377bf4796dSDavid Ahern struct nl_info *info) 23387bf4796dSDavid Ahern { 23397bf4796dSDavid Ahern struct nh_grp_entry *nhge; 23407bf4796dSDavid Ahern 23417bf4796dSDavid Ahern __nexthop_replace_notify(net, nh, info); 23427bf4796dSDavid Ahern 23437bf4796dSDavid Ahern list_for_each_entry(nhge, &nh->grp_list, nh_list) 23447bf4796dSDavid Ahern __nexthop_replace_notify(net, nhge->nh_parent, info); 23457bf4796dSDavid Ahern } 23467bf4796dSDavid Ahern 2347ab84be7eSDavid Ahern static int replace_nexthop(struct net *net, struct nexthop *old, 2348597f48e4SPetr Machata struct nexthop *new, const struct nh_config *cfg, 2349597f48e4SPetr Machata struct netlink_ext_ack *extack) 2350ab84be7eSDavid Ahern { 23517bf4796dSDavid Ahern bool new_is_reject = false; 23527bf4796dSDavid Ahern struct nh_grp_entry *nhge; 23537bf4796dSDavid Ahern int err; 23547bf4796dSDavid Ahern 23557bf4796dSDavid Ahern /* check that existing FIB entries are ok with the 23567bf4796dSDavid Ahern * new nexthop definition 23577bf4796dSDavid Ahern */ 23587bf4796dSDavid Ahern err = fib_check_nh_list(old, new, extack); 23597bf4796dSDavid Ahern if (err) 23607bf4796dSDavid Ahern return err; 23617bf4796dSDavid Ahern 23627bf4796dSDavid Ahern err = fib6_check_nh_list(old, new, extack); 23637bf4796dSDavid Ahern if (err) 23647bf4796dSDavid Ahern return err; 23657bf4796dSDavid Ahern 23667bf4796dSDavid Ahern if (!new->is_group) { 23677bf4796dSDavid Ahern struct nh_info *nhi = rtnl_dereference(new->nh_info); 23687bf4796dSDavid Ahern 23697bf4796dSDavid Ahern new_is_reject = nhi->reject_nh; 23707bf4796dSDavid Ahern } 23717bf4796dSDavid Ahern 23727bf4796dSDavid Ahern list_for_each_entry(nhge, &old->grp_list, nh_list) { 23737bf4796dSDavid Ahern /* if new nexthop is a blackhole, any groups using this 23747bf4796dSDavid Ahern * nexthop cannot have more than 1 path 23757bf4796dSDavid Ahern */ 23767bf4796dSDavid Ahern if (new_is_reject && 23777bf4796dSDavid Ahern nexthop_num_path(nhge->nh_parent) > 1) { 23787bf4796dSDavid Ahern NL_SET_ERR_MSG(extack, "Blackhole nexthop can not be a member of a group with more than one path"); 23797bf4796dSDavid Ahern return -EINVAL; 23807bf4796dSDavid Ahern } 23817bf4796dSDavid Ahern 23827bf4796dSDavid Ahern err = fib_check_nh_list(nhge->nh_parent, new, extack); 23837bf4796dSDavid Ahern if (err) 23847bf4796dSDavid Ahern return err; 23857bf4796dSDavid Ahern 23867bf4796dSDavid Ahern err = fib6_check_nh_list(nhge->nh_parent, new, extack); 23877bf4796dSDavid Ahern if (err) 23887bf4796dSDavid Ahern return err; 23897bf4796dSDavid Ahern } 23907bf4796dSDavid Ahern 23917bf4796dSDavid Ahern if (old->is_group) 2392597f48e4SPetr Machata err = replace_nexthop_grp(net, old, new, cfg, extack); 23937bf4796dSDavid Ahern else 23947bf4796dSDavid Ahern err = replace_nexthop_single(net, old, new, extack); 23957bf4796dSDavid Ahern 23967bf4796dSDavid Ahern if (!err) { 23971005f19bSNikolay Aleksandrov nh_rt_cache_flush(net, old, new); 23987bf4796dSDavid Ahern 23997bf4796dSDavid Ahern __remove_nexthop(net, new, NULL); 24007bf4796dSDavid Ahern nexthop_put(new); 24017bf4796dSDavid Ahern } 24027bf4796dSDavid Ahern 24037bf4796dSDavid Ahern return err; 2404ab84be7eSDavid Ahern } 2405ab84be7eSDavid Ahern 2406ab84be7eSDavid Ahern /* called with rtnl_lock held */ 2407ab84be7eSDavid Ahern static int insert_nexthop(struct net *net, struct nexthop *new_nh, 2408ab84be7eSDavid Ahern struct nh_config *cfg, struct netlink_ext_ack *extack) 2409ab84be7eSDavid Ahern { 2410ab84be7eSDavid Ahern struct rb_node **pp, *parent = NULL, *next; 2411ab84be7eSDavid Ahern struct rb_root *root = &net->nexthop.rb_root; 2412ab84be7eSDavid Ahern bool replace = !!(cfg->nlflags & NLM_F_REPLACE); 2413ab84be7eSDavid Ahern bool create = !!(cfg->nlflags & NLM_F_CREATE); 2414ab84be7eSDavid Ahern u32 new_id = new_nh->id; 24157bf4796dSDavid Ahern int replace_notify = 0; 2416ab84be7eSDavid Ahern int rc = -EEXIST; 2417ab84be7eSDavid Ahern 2418ab84be7eSDavid Ahern pp = &root->rb_node; 2419ab84be7eSDavid Ahern while (1) { 2420ab84be7eSDavid Ahern struct nexthop *nh; 2421ab84be7eSDavid Ahern 2422233c6378SIdo Schimmel next = *pp; 2423ab84be7eSDavid Ahern if (!next) 2424ab84be7eSDavid Ahern break; 2425ab84be7eSDavid Ahern 2426ab84be7eSDavid Ahern parent = next; 2427ab84be7eSDavid Ahern 2428ab84be7eSDavid Ahern nh = rb_entry(parent, struct nexthop, rb_node); 2429ab84be7eSDavid Ahern if (new_id < nh->id) { 2430ab84be7eSDavid Ahern pp = &next->rb_left; 2431ab84be7eSDavid Ahern } else if (new_id > nh->id) { 2432ab84be7eSDavid Ahern pp = &next->rb_right; 2433ab84be7eSDavid Ahern } else if (replace) { 2434597f48e4SPetr Machata rc = replace_nexthop(net, nh, new_nh, cfg, extack); 24357bf4796dSDavid Ahern if (!rc) { 2436ab84be7eSDavid Ahern new_nh = nh; /* send notification with old nh */ 24377bf4796dSDavid Ahern replace_notify = 1; 24387bf4796dSDavid Ahern } 2439ab84be7eSDavid Ahern goto out; 2440ab84be7eSDavid Ahern } else { 2441ab84be7eSDavid Ahern /* id already exists and not a replace */ 2442ab84be7eSDavid Ahern goto out; 2443ab84be7eSDavid Ahern } 2444ab84be7eSDavid Ahern } 2445ab84be7eSDavid Ahern 2446ab84be7eSDavid Ahern if (replace && !create) { 2447ab84be7eSDavid Ahern NL_SET_ERR_MSG(extack, "Replace specified without create and no entry exists"); 2448ab84be7eSDavid Ahern rc = -ENOENT; 2449ab84be7eSDavid Ahern goto out; 2450ab84be7eSDavid Ahern } 2451ab84be7eSDavid Ahern 2452283a72a5SPetr Machata if (new_nh->is_group) { 2453283a72a5SPetr Machata struct nh_group *nhg = rtnl_dereference(new_nh->nh_grp); 2454283a72a5SPetr Machata struct nh_res_table *res_table; 2455283a72a5SPetr Machata 2456283a72a5SPetr Machata if (nhg->resilient) { 2457283a72a5SPetr Machata res_table = rtnl_dereference(nhg->res_table); 2458283a72a5SPetr Machata 2459283a72a5SPetr Machata /* Not passing the number of buckets is OK when 2460283a72a5SPetr Machata * replacing, but not when creating a new group. 2461283a72a5SPetr Machata */ 2462283a72a5SPetr Machata if (!cfg->nh_grp_res_has_num_buckets) { 2463283a72a5SPetr Machata NL_SET_ERR_MSG(extack, "Number of buckets not specified for nexthop group insertion"); 2464283a72a5SPetr Machata rc = -EINVAL; 2465283a72a5SPetr Machata goto out; 2466283a72a5SPetr Machata } 2467283a72a5SPetr Machata 2468283a72a5SPetr Machata nh_res_group_rebalance(nhg, res_table); 24697c37c7e0SPetr Machata 24707c37c7e0SPetr Machata /* Do not send bucket notifications, we do full 24717c37c7e0SPetr Machata * notification below. 24727c37c7e0SPetr Machata */ 24730b4818aaSPetr Machata nh_res_table_upkeep(res_table, false, false); 2474283a72a5SPetr Machata } 2475283a72a5SPetr Machata } 2476283a72a5SPetr Machata 2477ab84be7eSDavid Ahern rb_link_node_rcu(&new_nh->rb_node, parent, pp); 2478ab84be7eSDavid Ahern rb_insert_color(&new_nh->rb_node, root); 2479732d167bSIdo Schimmel 2480de1d1ee3SPetr Machata /* The initial insertion is a full notification for hash-threshold as 2481de1d1ee3SPetr Machata * well as resilient groups. 24827c37c7e0SPetr Machata */ 2483732d167bSIdo Schimmel rc = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, new_nh, extack); 2484732d167bSIdo Schimmel if (rc) 2485732d167bSIdo Schimmel rb_erase(&new_nh->rb_node, &net->nexthop.rb_root); 2486732d167bSIdo Schimmel 2487ab84be7eSDavid Ahern out: 2488ab84be7eSDavid Ahern if (!rc) { 2489ab84be7eSDavid Ahern nh_base_seq_inc(net); 2490ab84be7eSDavid Ahern nexthop_notify(RTM_NEWNEXTHOP, new_nh, &cfg->nlinfo); 2491bdf00bf2SKuniyuki Iwashima if (replace_notify && 2492bdf00bf2SKuniyuki Iwashima READ_ONCE(net->ipv4.sysctl_nexthop_compat_mode)) 24937bf4796dSDavid Ahern nexthop_replace_notify(net, new_nh, &cfg->nlinfo); 2494ab84be7eSDavid Ahern } 2495ab84be7eSDavid Ahern 2496ab84be7eSDavid Ahern return rc; 2497ab84be7eSDavid Ahern } 2498ab84be7eSDavid Ahern 2499597cfe4fSDavid Ahern /* rtnl */ 2500597cfe4fSDavid Ahern /* remove all nexthops tied to a device being deleted */ 250176c03bf8SIdo Schimmel static void nexthop_flush_dev(struct net_device *dev, unsigned long event) 2502597cfe4fSDavid Ahern { 2503597cfe4fSDavid Ahern unsigned int hash = nh_dev_hashfn(dev->ifindex); 2504597cfe4fSDavid Ahern struct net *net = dev_net(dev); 2505597cfe4fSDavid Ahern struct hlist_head *head = &net->nexthop.devhash[hash]; 2506597cfe4fSDavid Ahern struct hlist_node *n; 2507597cfe4fSDavid Ahern struct nh_info *nhi; 2508597cfe4fSDavid Ahern 2509597cfe4fSDavid Ahern hlist_for_each_entry_safe(nhi, n, head, dev_hash) { 2510597cfe4fSDavid Ahern if (nhi->fib_nhc.nhc_dev != dev) 2511597cfe4fSDavid Ahern continue; 2512597cfe4fSDavid Ahern 251376c03bf8SIdo Schimmel if (nhi->reject_nh && 251476c03bf8SIdo Schimmel (event == NETDEV_DOWN || event == NETDEV_CHANGE)) 251576c03bf8SIdo Schimmel continue; 251676c03bf8SIdo Schimmel 2517430a0491SDavid Ahern remove_nexthop(net, nhi->nh_parent, NULL); 2518597cfe4fSDavid Ahern } 2519597cfe4fSDavid Ahern } 2520597cfe4fSDavid Ahern 2521ab84be7eSDavid Ahern /* rtnl; called when net namespace is deleted */ 2522ab84be7eSDavid Ahern static void flush_all_nexthops(struct net *net) 2523ab84be7eSDavid Ahern { 2524ab84be7eSDavid Ahern struct rb_root *root = &net->nexthop.rb_root; 2525ab84be7eSDavid Ahern struct rb_node *node; 2526ab84be7eSDavid Ahern struct nexthop *nh; 2527ab84be7eSDavid Ahern 2528ab84be7eSDavid Ahern while ((node = rb_first(root))) { 2529ab84be7eSDavid Ahern nh = rb_entry(node, struct nexthop, rb_node); 2530430a0491SDavid Ahern remove_nexthop(net, nh, NULL); 2531ab84be7eSDavid Ahern cond_resched(); 2532ab84be7eSDavid Ahern } 2533ab84be7eSDavid Ahern } 2534ab84be7eSDavid Ahern 2535430a0491SDavid Ahern static struct nexthop *nexthop_create_group(struct net *net, 2536430a0491SDavid Ahern struct nh_config *cfg) 2537430a0491SDavid Ahern { 2538430a0491SDavid Ahern struct nlattr *grps_attr = cfg->nh_grp; 2539430a0491SDavid Ahern struct nexthop_grp *entry = nla_data(grps_attr); 254090f33bffSNikolay Aleksandrov u16 num_nh = nla_len(grps_attr) / sizeof(*entry); 2541430a0491SDavid Ahern struct nh_group *nhg; 2542430a0491SDavid Ahern struct nexthop *nh; 2543283a72a5SPetr Machata int err; 2544430a0491SDavid Ahern int i; 2545430a0491SDavid Ahern 2546eeaac363SNikolay Aleksandrov if (WARN_ON(!num_nh)) 2547eeaac363SNikolay Aleksandrov return ERR_PTR(-EINVAL); 2548eeaac363SNikolay Aleksandrov 2549430a0491SDavid Ahern nh = nexthop_alloc(); 2550430a0491SDavid Ahern if (!nh) 2551430a0491SDavid Ahern return ERR_PTR(-ENOMEM); 2552430a0491SDavid Ahern 2553430a0491SDavid Ahern nh->is_group = 1; 2554430a0491SDavid Ahern 255590f33bffSNikolay Aleksandrov nhg = nexthop_grp_alloc(num_nh); 2556430a0491SDavid Ahern if (!nhg) { 2557430a0491SDavid Ahern kfree(nh); 2558430a0491SDavid Ahern return ERR_PTR(-ENOMEM); 2559430a0491SDavid Ahern } 2560430a0491SDavid Ahern 256190f33bffSNikolay Aleksandrov /* spare group used for removals */ 256290f33bffSNikolay Aleksandrov nhg->spare = nexthop_grp_alloc(num_nh); 2563dafe2078SPatrick Eigensatz if (!nhg->spare) { 256490f33bffSNikolay Aleksandrov kfree(nhg); 256590f33bffSNikolay Aleksandrov kfree(nh); 2566dafe2078SPatrick Eigensatz return ERR_PTR(-ENOMEM); 256790f33bffSNikolay Aleksandrov } 256890f33bffSNikolay Aleksandrov nhg->spare->spare = nhg; 256990f33bffSNikolay Aleksandrov 2570430a0491SDavid Ahern for (i = 0; i < nhg->num_nh; ++i) { 2571430a0491SDavid Ahern struct nexthop *nhe; 2572430a0491SDavid Ahern struct nh_info *nhi; 2573430a0491SDavid Ahern 2574430a0491SDavid Ahern nhe = nexthop_find_by_id(net, entry[i].id); 2575283a72a5SPetr Machata if (!nexthop_get(nhe)) { 2576283a72a5SPetr Machata err = -ENOENT; 2577430a0491SDavid Ahern goto out_no_nh; 2578283a72a5SPetr Machata } 2579430a0491SDavid Ahern 2580430a0491SDavid Ahern nhi = rtnl_dereference(nhe->nh_info); 2581430a0491SDavid Ahern if (nhi->family == AF_INET) 2582430a0491SDavid Ahern nhg->has_v4 = true; 2583430a0491SDavid Ahern 2584f4676ea7SIdo Schimmel nhg->nh_entries[i].stats = 2585f4676ea7SIdo Schimmel netdev_alloc_pcpu_stats(struct nh_grp_entry_stats); 2586f4676ea7SIdo Schimmel if (!nhg->nh_entries[i].stats) { 2587f4676ea7SIdo Schimmel err = -ENOMEM; 2588f4676ea7SIdo Schimmel nexthop_put(nhe); 2589f4676ea7SIdo Schimmel goto out_no_nh; 2590f4676ea7SIdo Schimmel } 2591430a0491SDavid Ahern nhg->nh_entries[i].nh = nhe; 2592430a0491SDavid Ahern nhg->nh_entries[i].weight = entry[i].weight + 1; 2593430a0491SDavid Ahern list_add(&nhg->nh_entries[i].nh_list, &nhe->grp_list); 2594430a0491SDavid Ahern nhg->nh_entries[i].nh_parent = nh; 2595430a0491SDavid Ahern } 2596430a0491SDavid Ahern 259790e1a9e2SPetr Machata if (cfg->nh_grp_type == NEXTHOP_GRP_TYPE_MPATH) { 2598de1d1ee3SPetr Machata nhg->hash_threshold = 1; 259990e1a9e2SPetr Machata nhg->is_multipath = true; 2600710ec562SIdo Schimmel } else if (cfg->nh_grp_type == NEXTHOP_GRP_TYPE_RES) { 2601283a72a5SPetr Machata struct nh_res_table *res_table; 2602283a72a5SPetr Machata 2603283a72a5SPetr Machata res_table = nexthop_res_table_alloc(net, cfg->nh_id, cfg); 2604283a72a5SPetr Machata if (!res_table) { 2605283a72a5SPetr Machata err = -ENOMEM; 2606710ec562SIdo Schimmel goto out_no_nh; 260790e1a9e2SPetr Machata } 2608720ccd9aSPetr Machata 2609283a72a5SPetr Machata rcu_assign_pointer(nhg->spare->res_table, res_table); 2610283a72a5SPetr Machata rcu_assign_pointer(nhg->res_table, res_table); 2611283a72a5SPetr Machata nhg->resilient = true; 2612283a72a5SPetr Machata nhg->is_multipath = true; 2613283a72a5SPetr Machata } 2614283a72a5SPetr Machata 2615de1d1ee3SPetr Machata WARN_ON_ONCE(nhg->hash_threshold + nhg->resilient != 1); 2616720ccd9aSPetr Machata 2617de1d1ee3SPetr Machata if (nhg->hash_threshold) 2618de1d1ee3SPetr Machata nh_hthr_group_rebalance(nhg); 2619430a0491SDavid Ahern 262038428d68SRoopa Prabhu if (cfg->nh_fdb) 2621ce9ac056SDavid Ahern nhg->fdb_nh = 1; 262238428d68SRoopa Prabhu 2623430a0491SDavid Ahern rcu_assign_pointer(nh->nh_grp, nhg); 2624430a0491SDavid Ahern 2625430a0491SDavid Ahern return nh; 2626430a0491SDavid Ahern 2627430a0491SDavid Ahern out_no_nh: 26287b01e53eSIdo Schimmel for (i--; i >= 0; --i) { 26297b01e53eSIdo Schimmel list_del(&nhg->nh_entries[i].nh_list); 2630f4676ea7SIdo Schimmel free_percpu(nhg->nh_entries[i].stats); 2631430a0491SDavid Ahern nexthop_put(nhg->nh_entries[i].nh); 26327b01e53eSIdo Schimmel } 2633430a0491SDavid Ahern 263490f33bffSNikolay Aleksandrov kfree(nhg->spare); 2635430a0491SDavid Ahern kfree(nhg); 2636430a0491SDavid Ahern kfree(nh); 2637430a0491SDavid Ahern 2638283a72a5SPetr Machata return ERR_PTR(err); 2639430a0491SDavid Ahern } 2640430a0491SDavid Ahern 2641597cfe4fSDavid Ahern static int nh_create_ipv4(struct net *net, struct nexthop *nh, 2642597cfe4fSDavid Ahern struct nh_info *nhi, struct nh_config *cfg, 2643597cfe4fSDavid Ahern struct netlink_ext_ack *extack) 2644597cfe4fSDavid Ahern { 2645597cfe4fSDavid Ahern struct fib_nh *fib_nh = &nhi->fib_nh; 2646597cfe4fSDavid Ahern struct fib_config fib_cfg = { 2647597cfe4fSDavid Ahern .fc_oif = cfg->nh_ifindex, 2648597cfe4fSDavid Ahern .fc_gw4 = cfg->gw.ipv4, 2649597cfe4fSDavid Ahern .fc_gw_family = cfg->gw.ipv4 ? AF_INET : 0, 2650597cfe4fSDavid Ahern .fc_flags = cfg->nh_flags, 26519aca491eSRyoga Saito .fc_nlinfo = cfg->nlinfo, 2652b513bd03SDavid Ahern .fc_encap = cfg->nh_encap, 2653b513bd03SDavid Ahern .fc_encap_type = cfg->nh_encap_type, 2654597cfe4fSDavid Ahern }; 265538428d68SRoopa Prabhu u32 tb_id = (cfg->dev ? l3mdev_fib_table(cfg->dev) : RT_TABLE_MAIN); 2656c76c9925SColin Ian King int err; 2657597cfe4fSDavid Ahern 2658597cfe4fSDavid Ahern err = fib_nh_init(net, fib_nh, &fib_cfg, 1, extack); 2659597cfe4fSDavid Ahern if (err) { 2660597cfe4fSDavid Ahern fib_nh_release(net, fib_nh); 2661597cfe4fSDavid Ahern goto out; 2662597cfe4fSDavid Ahern } 2663597cfe4fSDavid Ahern 2664ce9ac056SDavid Ahern if (nhi->fdb_nh) 266538428d68SRoopa Prabhu goto out; 266638428d68SRoopa Prabhu 2667597cfe4fSDavid Ahern /* sets nh_dev if successful */ 2668597cfe4fSDavid Ahern err = fib_check_nh(net, fib_nh, tb_id, 0, extack); 2669597cfe4fSDavid Ahern if (!err) { 2670597cfe4fSDavid Ahern nh->nh_flags = fib_nh->fib_nh_flags; 2671dcb1ecb5SDavid Ahern fib_info_update_nhc_saddr(net, &fib_nh->nh_common, 2672bac0f937SNicolas Dichtel !fib_nh->fib_nh_scope ? 0 : fib_nh->fib_nh_scope - 1); 2673597cfe4fSDavid Ahern } else { 2674597cfe4fSDavid Ahern fib_nh_release(net, fib_nh); 2675597cfe4fSDavid Ahern } 2676597cfe4fSDavid Ahern out: 2677597cfe4fSDavid Ahern return err; 2678597cfe4fSDavid Ahern } 2679597cfe4fSDavid Ahern 268053010f99SDavid Ahern static int nh_create_ipv6(struct net *net, struct nexthop *nh, 268153010f99SDavid Ahern struct nh_info *nhi, struct nh_config *cfg, 268253010f99SDavid Ahern struct netlink_ext_ack *extack) 268353010f99SDavid Ahern { 268453010f99SDavid Ahern struct fib6_nh *fib6_nh = &nhi->fib6_nh; 268553010f99SDavid Ahern struct fib6_config fib6_cfg = { 268653010f99SDavid Ahern .fc_table = l3mdev_fib_table(cfg->dev), 268753010f99SDavid Ahern .fc_ifindex = cfg->nh_ifindex, 268853010f99SDavid Ahern .fc_gateway = cfg->gw.ipv6, 268953010f99SDavid Ahern .fc_flags = cfg->nh_flags, 26909aca491eSRyoga Saito .fc_nlinfo = cfg->nlinfo, 2691b513bd03SDavid Ahern .fc_encap = cfg->nh_encap, 2692b513bd03SDavid Ahern .fc_encap_type = cfg->nh_encap_type, 269338428d68SRoopa Prabhu .fc_is_fdb = cfg->nh_fdb, 269453010f99SDavid Ahern }; 26956f43e525SColin Ian King int err; 269653010f99SDavid Ahern 269753010f99SDavid Ahern if (!ipv6_addr_any(&cfg->gw.ipv6)) 269853010f99SDavid Ahern fib6_cfg.fc_flags |= RTF_GATEWAY; 269953010f99SDavid Ahern 270053010f99SDavid Ahern /* sets nh_dev if successful */ 270153010f99SDavid Ahern err = ipv6_stub->fib6_nh_init(net, fib6_nh, &fib6_cfg, GFP_KERNEL, 270253010f99SDavid Ahern extack); 27031c743127SNikolay Aleksandrov if (err) { 27041c743127SNikolay Aleksandrov /* IPv6 is not enabled, don't call fib6_nh_release */ 27051c743127SNikolay Aleksandrov if (err == -EAFNOSUPPORT) 27061c743127SNikolay Aleksandrov goto out; 270753010f99SDavid Ahern ipv6_stub->fib6_nh_release(fib6_nh); 27081c743127SNikolay Aleksandrov } else { 270953010f99SDavid Ahern nh->nh_flags = fib6_nh->fib_nh_flags; 27101c743127SNikolay Aleksandrov } 27111c743127SNikolay Aleksandrov out: 271253010f99SDavid Ahern return err; 271353010f99SDavid Ahern } 271453010f99SDavid Ahern 2715ab84be7eSDavid Ahern static struct nexthop *nexthop_create(struct net *net, struct nh_config *cfg, 2716ab84be7eSDavid Ahern struct netlink_ext_ack *extack) 2717ab84be7eSDavid Ahern { 2718ab84be7eSDavid Ahern struct nh_info *nhi; 2719ab84be7eSDavid Ahern struct nexthop *nh; 2720ab84be7eSDavid Ahern int err = 0; 2721ab84be7eSDavid Ahern 2722ab84be7eSDavid Ahern nh = nexthop_alloc(); 2723ab84be7eSDavid Ahern if (!nh) 2724ab84be7eSDavid Ahern return ERR_PTR(-ENOMEM); 2725ab84be7eSDavid Ahern 2726ab84be7eSDavid Ahern nhi = kzalloc(sizeof(*nhi), GFP_KERNEL); 2727ab84be7eSDavid Ahern if (!nhi) { 2728ab84be7eSDavid Ahern kfree(nh); 2729ab84be7eSDavid Ahern return ERR_PTR(-ENOMEM); 2730ab84be7eSDavid Ahern } 2731ab84be7eSDavid Ahern 2732ab84be7eSDavid Ahern nh->nh_flags = cfg->nh_flags; 2733ab84be7eSDavid Ahern nh->net = net; 2734ab84be7eSDavid Ahern 2735ab84be7eSDavid Ahern nhi->nh_parent = nh; 2736ab84be7eSDavid Ahern nhi->family = cfg->nh_family; 2737ab84be7eSDavid Ahern nhi->fib_nhc.nhc_scope = RT_SCOPE_LINK; 2738ab84be7eSDavid Ahern 273938428d68SRoopa Prabhu if (cfg->nh_fdb) 2740ce9ac056SDavid Ahern nhi->fdb_nh = 1; 274138428d68SRoopa Prabhu 2742ab84be7eSDavid Ahern if (cfg->nh_blackhole) { 2743ab84be7eSDavid Ahern nhi->reject_nh = 1; 2744ab84be7eSDavid Ahern cfg->nh_ifindex = net->loopback_dev->ifindex; 2745ab84be7eSDavid Ahern } 2746ab84be7eSDavid Ahern 2747597cfe4fSDavid Ahern switch (cfg->nh_family) { 2748597cfe4fSDavid Ahern case AF_INET: 2749597cfe4fSDavid Ahern err = nh_create_ipv4(net, nh, nhi, cfg, extack); 2750597cfe4fSDavid Ahern break; 275153010f99SDavid Ahern case AF_INET6: 275253010f99SDavid Ahern err = nh_create_ipv6(net, nh, nhi, cfg, extack); 275353010f99SDavid Ahern break; 2754597cfe4fSDavid Ahern } 2755597cfe4fSDavid Ahern 2756ab84be7eSDavid Ahern if (err) { 2757ab84be7eSDavid Ahern kfree(nhi); 2758ab84be7eSDavid Ahern kfree(nh); 2759ab84be7eSDavid Ahern return ERR_PTR(err); 2760ab84be7eSDavid Ahern } 2761ab84be7eSDavid Ahern 2762597cfe4fSDavid Ahern /* add the entry to the device based hash */ 2763ce9ac056SDavid Ahern if (!nhi->fdb_nh) 2764597cfe4fSDavid Ahern nexthop_devhash_add(net, nhi); 2765597cfe4fSDavid Ahern 2766ab84be7eSDavid Ahern rcu_assign_pointer(nh->nh_info, nhi); 2767ab84be7eSDavid Ahern 2768ab84be7eSDavid Ahern return nh; 2769ab84be7eSDavid Ahern } 2770ab84be7eSDavid Ahern 2771ab84be7eSDavid Ahern /* called with rtnl lock held */ 2772ab84be7eSDavid Ahern static struct nexthop *nexthop_add(struct net *net, struct nh_config *cfg, 2773ab84be7eSDavid Ahern struct netlink_ext_ack *extack) 2774ab84be7eSDavid Ahern { 2775ab84be7eSDavid Ahern struct nexthop *nh; 2776ab84be7eSDavid Ahern int err; 2777ab84be7eSDavid Ahern 2778ab84be7eSDavid Ahern if (cfg->nlflags & NLM_F_REPLACE && !cfg->nh_id) { 2779ab84be7eSDavid Ahern NL_SET_ERR_MSG(extack, "Replace requires nexthop id"); 2780ab84be7eSDavid Ahern return ERR_PTR(-EINVAL); 2781ab84be7eSDavid Ahern } 2782ab84be7eSDavid Ahern 2783ab84be7eSDavid Ahern if (!cfg->nh_id) { 2784ab84be7eSDavid Ahern cfg->nh_id = nh_find_unused_id(net); 2785ab84be7eSDavid Ahern if (!cfg->nh_id) { 2786ab84be7eSDavid Ahern NL_SET_ERR_MSG(extack, "No unused id"); 2787ab84be7eSDavid Ahern return ERR_PTR(-EINVAL); 2788ab84be7eSDavid Ahern } 2789ab84be7eSDavid Ahern } 2790ab84be7eSDavid Ahern 2791430a0491SDavid Ahern if (cfg->nh_grp) 2792430a0491SDavid Ahern nh = nexthop_create_group(net, cfg); 2793430a0491SDavid Ahern else 2794ab84be7eSDavid Ahern nh = nexthop_create(net, cfg, extack); 2795430a0491SDavid Ahern 2796ab84be7eSDavid Ahern if (IS_ERR(nh)) 2797ab84be7eSDavid Ahern return nh; 2798ab84be7eSDavid Ahern 2799ab84be7eSDavid Ahern refcount_set(&nh->refcnt, 1); 2800ab84be7eSDavid Ahern nh->id = cfg->nh_id; 2801ab84be7eSDavid Ahern nh->protocol = cfg->nh_protocol; 2802ab84be7eSDavid Ahern nh->net = net; 2803ab84be7eSDavid Ahern 2804ab84be7eSDavid Ahern err = insert_nexthop(net, nh, cfg, extack); 2805ab84be7eSDavid Ahern if (err) { 2806430a0491SDavid Ahern __remove_nexthop(net, nh, NULL); 2807ab84be7eSDavid Ahern nexthop_put(nh); 2808ab84be7eSDavid Ahern nh = ERR_PTR(err); 2809ab84be7eSDavid Ahern } 2810ab84be7eSDavid Ahern 2811ab84be7eSDavid Ahern return nh; 2812ab84be7eSDavid Ahern } 2813ab84be7eSDavid Ahern 2814a2601e2bSPetr Machata static int rtm_nh_get_timer(struct nlattr *attr, unsigned long fallback, 2815a2601e2bSPetr Machata unsigned long *timer_p, bool *has_p, 2816a2601e2bSPetr Machata struct netlink_ext_ack *extack) 2817a2601e2bSPetr Machata { 2818a2601e2bSPetr Machata unsigned long timer; 2819a2601e2bSPetr Machata u32 value; 2820a2601e2bSPetr Machata 2821a2601e2bSPetr Machata if (!attr) { 2822a2601e2bSPetr Machata *timer_p = fallback; 2823a2601e2bSPetr Machata *has_p = false; 2824a2601e2bSPetr Machata return 0; 2825a2601e2bSPetr Machata } 2826a2601e2bSPetr Machata 2827a2601e2bSPetr Machata value = nla_get_u32(attr); 2828a2601e2bSPetr Machata timer = clock_t_to_jiffies(value); 2829a2601e2bSPetr Machata if (timer == ~0UL) { 2830a2601e2bSPetr Machata NL_SET_ERR_MSG(extack, "Timer value too large"); 2831a2601e2bSPetr Machata return -EINVAL; 2832a2601e2bSPetr Machata } 2833a2601e2bSPetr Machata 2834a2601e2bSPetr Machata *timer_p = timer; 2835a2601e2bSPetr Machata *has_p = true; 2836a2601e2bSPetr Machata return 0; 2837a2601e2bSPetr Machata } 2838a2601e2bSPetr Machata 2839a2601e2bSPetr Machata static int rtm_to_nh_config_grp_res(struct nlattr *res, struct nh_config *cfg, 2840a2601e2bSPetr Machata struct netlink_ext_ack *extack) 2841a2601e2bSPetr Machata { 2842a2601e2bSPetr Machata struct nlattr *tb[ARRAY_SIZE(rtm_nh_res_policy_new)] = {}; 2843a2601e2bSPetr Machata int err; 2844a2601e2bSPetr Machata 2845a2601e2bSPetr Machata if (res) { 2846a2601e2bSPetr Machata err = nla_parse_nested(tb, 2847a2601e2bSPetr Machata ARRAY_SIZE(rtm_nh_res_policy_new) - 1, 2848a2601e2bSPetr Machata res, rtm_nh_res_policy_new, extack); 2849a2601e2bSPetr Machata if (err < 0) 2850a2601e2bSPetr Machata return err; 2851a2601e2bSPetr Machata } 2852a2601e2bSPetr Machata 2853a2601e2bSPetr Machata if (tb[NHA_RES_GROUP_BUCKETS]) { 2854a2601e2bSPetr Machata cfg->nh_grp_res_num_buckets = 2855a2601e2bSPetr Machata nla_get_u16(tb[NHA_RES_GROUP_BUCKETS]); 2856a2601e2bSPetr Machata cfg->nh_grp_res_has_num_buckets = true; 2857a2601e2bSPetr Machata if (!cfg->nh_grp_res_num_buckets) { 2858a2601e2bSPetr Machata NL_SET_ERR_MSG(extack, "Number of buckets needs to be non-0"); 2859a2601e2bSPetr Machata return -EINVAL; 2860a2601e2bSPetr Machata } 2861a2601e2bSPetr Machata } 2862a2601e2bSPetr Machata 2863a2601e2bSPetr Machata err = rtm_nh_get_timer(tb[NHA_RES_GROUP_IDLE_TIMER], 2864a2601e2bSPetr Machata NH_RES_DEFAULT_IDLE_TIMER, 2865a2601e2bSPetr Machata &cfg->nh_grp_res_idle_timer, 2866a2601e2bSPetr Machata &cfg->nh_grp_res_has_idle_timer, 2867a2601e2bSPetr Machata extack); 2868a2601e2bSPetr Machata if (err) 2869a2601e2bSPetr Machata return err; 2870a2601e2bSPetr Machata 2871a2601e2bSPetr Machata return rtm_nh_get_timer(tb[NHA_RES_GROUP_UNBALANCED_TIMER], 2872a2601e2bSPetr Machata NH_RES_DEFAULT_UNBALANCED_TIMER, 2873a2601e2bSPetr Machata &cfg->nh_grp_res_unbalanced_timer, 2874a2601e2bSPetr Machata &cfg->nh_grp_res_has_unbalanced_timer, 2875a2601e2bSPetr Machata extack); 2876a2601e2bSPetr Machata } 2877a2601e2bSPetr Machata 2878ab84be7eSDavid Ahern static int rtm_to_nh_config(struct net *net, struct sk_buff *skb, 2879ab84be7eSDavid Ahern struct nlmsghdr *nlh, struct nh_config *cfg, 2880ab84be7eSDavid Ahern struct netlink_ext_ack *extack) 2881ab84be7eSDavid Ahern { 2882ab84be7eSDavid Ahern struct nhmsg *nhm = nlmsg_data(nlh); 2883643d0878SPetr Machata struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_new)]; 2884ab84be7eSDavid Ahern int err; 2885ab84be7eSDavid Ahern 2886643d0878SPetr Machata err = nlmsg_parse(nlh, sizeof(*nhm), tb, 2887643d0878SPetr Machata ARRAY_SIZE(rtm_nh_policy_new) - 1, 2888643d0878SPetr Machata rtm_nh_policy_new, extack); 2889ab84be7eSDavid Ahern if (err < 0) 2890ab84be7eSDavid Ahern return err; 2891ab84be7eSDavid Ahern 2892ab84be7eSDavid Ahern err = -EINVAL; 2893ab84be7eSDavid Ahern if (nhm->resvd || nhm->nh_scope) { 2894ab84be7eSDavid Ahern NL_SET_ERR_MSG(extack, "Invalid values in ancillary header"); 2895ab84be7eSDavid Ahern goto out; 2896ab84be7eSDavid Ahern } 2897ab84be7eSDavid Ahern if (nhm->nh_flags & ~NEXTHOP_VALID_USER_FLAGS) { 2898ab84be7eSDavid Ahern NL_SET_ERR_MSG(extack, "Invalid nexthop flags in ancillary header"); 2899ab84be7eSDavid Ahern goto out; 2900ab84be7eSDavid Ahern } 2901ab84be7eSDavid Ahern 2902ab84be7eSDavid Ahern switch (nhm->nh_family) { 2903597cfe4fSDavid Ahern case AF_INET: 290453010f99SDavid Ahern case AF_INET6: 2905597cfe4fSDavid Ahern break; 2906430a0491SDavid Ahern case AF_UNSPEC: 2907430a0491SDavid Ahern if (tb[NHA_GROUP]) 2908430a0491SDavid Ahern break; 2909a8eceea8SJoe Perches fallthrough; 2910ab84be7eSDavid Ahern default: 2911ab84be7eSDavid Ahern NL_SET_ERR_MSG(extack, "Invalid address family"); 2912ab84be7eSDavid Ahern goto out; 2913ab84be7eSDavid Ahern } 2914ab84be7eSDavid Ahern 2915ab84be7eSDavid Ahern memset(cfg, 0, sizeof(*cfg)); 2916ab84be7eSDavid Ahern cfg->nlflags = nlh->nlmsg_flags; 2917ab84be7eSDavid Ahern cfg->nlinfo.portid = NETLINK_CB(skb).portid; 2918ab84be7eSDavid Ahern cfg->nlinfo.nlh = nlh; 2919ab84be7eSDavid Ahern cfg->nlinfo.nl_net = net; 2920ab84be7eSDavid Ahern 2921ab84be7eSDavid Ahern cfg->nh_family = nhm->nh_family; 2922ab84be7eSDavid Ahern cfg->nh_protocol = nhm->nh_protocol; 2923ab84be7eSDavid Ahern cfg->nh_flags = nhm->nh_flags; 2924ab84be7eSDavid Ahern 2925ab84be7eSDavid Ahern if (tb[NHA_ID]) 2926ab84be7eSDavid Ahern cfg->nh_id = nla_get_u32(tb[NHA_ID]); 2927ab84be7eSDavid Ahern 292838428d68SRoopa Prabhu if (tb[NHA_FDB]) { 292938428d68SRoopa Prabhu if (tb[NHA_OIF] || tb[NHA_BLACKHOLE] || 293038428d68SRoopa Prabhu tb[NHA_ENCAP] || tb[NHA_ENCAP_TYPE]) { 293138428d68SRoopa Prabhu NL_SET_ERR_MSG(extack, "Fdb attribute can not be used with encap, oif or blackhole"); 293238428d68SRoopa Prabhu goto out; 293338428d68SRoopa Prabhu } 293438428d68SRoopa Prabhu if (nhm->nh_flags) { 293538428d68SRoopa Prabhu NL_SET_ERR_MSG(extack, "Unsupported nexthop flags in ancillary header"); 293638428d68SRoopa Prabhu goto out; 293738428d68SRoopa Prabhu } 293838428d68SRoopa Prabhu cfg->nh_fdb = nla_get_flag(tb[NHA_FDB]); 293938428d68SRoopa Prabhu } 294038428d68SRoopa Prabhu 2941430a0491SDavid Ahern if (tb[NHA_GROUP]) { 2942430a0491SDavid Ahern if (nhm->nh_family != AF_UNSPEC) { 2943430a0491SDavid Ahern NL_SET_ERR_MSG(extack, "Invalid family for group"); 2944430a0491SDavid Ahern goto out; 2945430a0491SDavid Ahern } 2946430a0491SDavid Ahern cfg->nh_grp = tb[NHA_GROUP]; 2947430a0491SDavid Ahern 2948430a0491SDavid Ahern cfg->nh_grp_type = NEXTHOP_GRP_TYPE_MPATH; 2949430a0491SDavid Ahern if (tb[NHA_GROUP_TYPE]) 2950430a0491SDavid Ahern cfg->nh_grp_type = nla_get_u16(tb[NHA_GROUP_TYPE]); 2951430a0491SDavid Ahern 2952430a0491SDavid Ahern if (cfg->nh_grp_type > NEXTHOP_GRP_TYPE_MAX) { 2953430a0491SDavid Ahern NL_SET_ERR_MSG(extack, "Invalid group type"); 2954430a0491SDavid Ahern goto out; 2955430a0491SDavid Ahern } 2956a2601e2bSPetr Machata err = nh_check_attr_group(net, tb, ARRAY_SIZE(tb), 2957a2601e2bSPetr Machata cfg->nh_grp_type, extack); 2958a2601e2bSPetr Machata if (err) 2959a2601e2bSPetr Machata goto out; 2960a2601e2bSPetr Machata 2961a2601e2bSPetr Machata if (cfg->nh_grp_type == NEXTHOP_GRP_TYPE_RES) 2962a2601e2bSPetr Machata err = rtm_to_nh_config_grp_res(tb[NHA_RES_GROUP], 2963a2601e2bSPetr Machata cfg, extack); 2964430a0491SDavid Ahern 2965430a0491SDavid Ahern /* no other attributes should be set */ 2966430a0491SDavid Ahern goto out; 2967430a0491SDavid Ahern } 2968430a0491SDavid Ahern 2969ab84be7eSDavid Ahern if (tb[NHA_BLACKHOLE]) { 2970b513bd03SDavid Ahern if (tb[NHA_GATEWAY] || tb[NHA_OIF] || 297138428d68SRoopa Prabhu tb[NHA_ENCAP] || tb[NHA_ENCAP_TYPE] || tb[NHA_FDB]) { 297238428d68SRoopa Prabhu NL_SET_ERR_MSG(extack, "Blackhole attribute can not be used with gateway, oif, encap or fdb"); 2973ab84be7eSDavid Ahern goto out; 2974ab84be7eSDavid Ahern } 2975ab84be7eSDavid Ahern 2976ab84be7eSDavid Ahern cfg->nh_blackhole = 1; 2977ab84be7eSDavid Ahern err = 0; 2978ab84be7eSDavid Ahern goto out; 2979ab84be7eSDavid Ahern } 2980ab84be7eSDavid Ahern 298138428d68SRoopa Prabhu if (!cfg->nh_fdb && !tb[NHA_OIF]) { 298238428d68SRoopa Prabhu NL_SET_ERR_MSG(extack, "Device attribute required for non-blackhole and non-fdb nexthops"); 2983ab84be7eSDavid Ahern goto out; 2984ab84be7eSDavid Ahern } 2985ab84be7eSDavid Ahern 298638428d68SRoopa Prabhu if (!cfg->nh_fdb && tb[NHA_OIF]) { 2987ab84be7eSDavid Ahern cfg->nh_ifindex = nla_get_u32(tb[NHA_OIF]); 2988ab84be7eSDavid Ahern if (cfg->nh_ifindex) 2989ab84be7eSDavid Ahern cfg->dev = __dev_get_by_index(net, cfg->nh_ifindex); 2990ab84be7eSDavid Ahern 2991ab84be7eSDavid Ahern if (!cfg->dev) { 2992ab84be7eSDavid Ahern NL_SET_ERR_MSG(extack, "Invalid device index"); 2993ab84be7eSDavid Ahern goto out; 2994ab84be7eSDavid Ahern } else if (!(cfg->dev->flags & IFF_UP)) { 2995ab84be7eSDavid Ahern NL_SET_ERR_MSG(extack, "Nexthop device is not up"); 2996ab84be7eSDavid Ahern err = -ENETDOWN; 2997ab84be7eSDavid Ahern goto out; 2998ab84be7eSDavid Ahern } else if (!netif_carrier_ok(cfg->dev)) { 2999ab84be7eSDavid Ahern NL_SET_ERR_MSG(extack, "Carrier for nexthop device is down"); 3000ab84be7eSDavid Ahern err = -ENETDOWN; 3001ab84be7eSDavid Ahern goto out; 3002ab84be7eSDavid Ahern } 300338428d68SRoopa Prabhu } 3004ab84be7eSDavid Ahern 3005597cfe4fSDavid Ahern err = -EINVAL; 3006597cfe4fSDavid Ahern if (tb[NHA_GATEWAY]) { 3007597cfe4fSDavid Ahern struct nlattr *gwa = tb[NHA_GATEWAY]; 3008597cfe4fSDavid Ahern 3009597cfe4fSDavid Ahern switch (cfg->nh_family) { 3010597cfe4fSDavid Ahern case AF_INET: 3011597cfe4fSDavid Ahern if (nla_len(gwa) != sizeof(u32)) { 3012597cfe4fSDavid Ahern NL_SET_ERR_MSG(extack, "Invalid gateway"); 3013597cfe4fSDavid Ahern goto out; 3014597cfe4fSDavid Ahern } 3015597cfe4fSDavid Ahern cfg->gw.ipv4 = nla_get_be32(gwa); 3016597cfe4fSDavid Ahern break; 301753010f99SDavid Ahern case AF_INET6: 301853010f99SDavid Ahern if (nla_len(gwa) != sizeof(struct in6_addr)) { 301953010f99SDavid Ahern NL_SET_ERR_MSG(extack, "Invalid gateway"); 302053010f99SDavid Ahern goto out; 302153010f99SDavid Ahern } 302253010f99SDavid Ahern cfg->gw.ipv6 = nla_get_in6_addr(gwa); 302353010f99SDavid Ahern break; 3024597cfe4fSDavid Ahern default: 3025597cfe4fSDavid Ahern NL_SET_ERR_MSG(extack, 3026597cfe4fSDavid Ahern "Unknown address family for gateway"); 3027597cfe4fSDavid Ahern goto out; 3028597cfe4fSDavid Ahern } 3029597cfe4fSDavid Ahern } else { 3030597cfe4fSDavid Ahern /* device only nexthop (no gateway) */ 3031597cfe4fSDavid Ahern if (cfg->nh_flags & RTNH_F_ONLINK) { 3032597cfe4fSDavid Ahern NL_SET_ERR_MSG(extack, 3033597cfe4fSDavid Ahern "ONLINK flag can not be set for nexthop without a gateway"); 3034597cfe4fSDavid Ahern goto out; 3035597cfe4fSDavid Ahern } 3036597cfe4fSDavid Ahern } 3037597cfe4fSDavid Ahern 3038b513bd03SDavid Ahern if (tb[NHA_ENCAP]) { 3039b513bd03SDavid Ahern cfg->nh_encap = tb[NHA_ENCAP]; 3040b513bd03SDavid Ahern 3041b513bd03SDavid Ahern if (!tb[NHA_ENCAP_TYPE]) { 3042b513bd03SDavid Ahern NL_SET_ERR_MSG(extack, "LWT encapsulation type is missing"); 3043b513bd03SDavid Ahern goto out; 3044b513bd03SDavid Ahern } 3045b513bd03SDavid Ahern 3046b513bd03SDavid Ahern cfg->nh_encap_type = nla_get_u16(tb[NHA_ENCAP_TYPE]); 3047b513bd03SDavid Ahern err = lwtunnel_valid_encap_type(cfg->nh_encap_type, extack); 3048b513bd03SDavid Ahern if (err < 0) 3049b513bd03SDavid Ahern goto out; 3050b513bd03SDavid Ahern 3051b513bd03SDavid Ahern } else if (tb[NHA_ENCAP_TYPE]) { 3052b513bd03SDavid Ahern NL_SET_ERR_MSG(extack, "LWT encapsulation attribute is missing"); 3053b513bd03SDavid Ahern goto out; 3054b513bd03SDavid Ahern } 3055b513bd03SDavid Ahern 3056b513bd03SDavid Ahern 3057ab84be7eSDavid Ahern err = 0; 3058ab84be7eSDavid Ahern out: 3059ab84be7eSDavid Ahern return err; 3060ab84be7eSDavid Ahern } 3061ab84be7eSDavid Ahern 3062ab84be7eSDavid Ahern /* rtnl */ 3063ab84be7eSDavid Ahern static int rtm_new_nexthop(struct sk_buff *skb, struct nlmsghdr *nlh, 3064ab84be7eSDavid Ahern struct netlink_ext_ack *extack) 3065ab84be7eSDavid Ahern { 3066ab84be7eSDavid Ahern struct net *net = sock_net(skb->sk); 3067ab84be7eSDavid Ahern struct nh_config cfg; 3068ab84be7eSDavid Ahern struct nexthop *nh; 3069ab84be7eSDavid Ahern int err; 3070ab84be7eSDavid Ahern 3071ab84be7eSDavid Ahern err = rtm_to_nh_config(net, skb, nlh, &cfg, extack); 3072ab84be7eSDavid Ahern if (!err) { 3073ab84be7eSDavid Ahern nh = nexthop_add(net, &cfg, extack); 3074ab84be7eSDavid Ahern if (IS_ERR(nh)) 3075ab84be7eSDavid Ahern err = PTR_ERR(nh); 3076ab84be7eSDavid Ahern } 3077ab84be7eSDavid Ahern 3078ab84be7eSDavid Ahern return err; 3079ab84be7eSDavid Ahern } 3080ab84be7eSDavid Ahern 30812118f939SPetr Machata static int nh_valid_get_del_req(const struct nlmsghdr *nlh, 3082a207eab1SPetr Machata struct nlattr **tb, u32 *id, u32 *op_flags, 3083ab84be7eSDavid Ahern struct netlink_ext_ack *extack) 3084ab84be7eSDavid Ahern { 3085ab84be7eSDavid Ahern struct nhmsg *nhm = nlmsg_data(nlh); 30860bccf8edSPetr Machata 30870bccf8edSPetr Machata if (nhm->nh_protocol || nhm->resvd || nhm->nh_scope || nhm->nh_flags) { 30880bccf8edSPetr Machata NL_SET_ERR_MSG(extack, "Invalid values in header"); 30890bccf8edSPetr Machata return -EINVAL; 30900bccf8edSPetr Machata } 30910bccf8edSPetr Machata 30920bccf8edSPetr Machata if (!tb[NHA_ID]) { 30930bccf8edSPetr Machata NL_SET_ERR_MSG(extack, "Nexthop id is missing"); 30940bccf8edSPetr Machata return -EINVAL; 30950bccf8edSPetr Machata } 30960bccf8edSPetr Machata 30970bccf8edSPetr Machata *id = nla_get_u32(tb[NHA_ID]); 30980bccf8edSPetr Machata if (!(*id)) { 30990bccf8edSPetr Machata NL_SET_ERR_MSG(extack, "Invalid nexthop id"); 31000bccf8edSPetr Machata return -EINVAL; 31010bccf8edSPetr Machata } 31020bccf8edSPetr Machata 3103a207eab1SPetr Machata if (tb[NHA_OP_FLAGS]) 3104a207eab1SPetr Machata *op_flags = nla_get_u32(tb[NHA_OP_FLAGS]); 3105a207eab1SPetr Machata else 3106a207eab1SPetr Machata *op_flags = 0; 3107a207eab1SPetr Machata 31080bccf8edSPetr Machata return 0; 31090bccf8edSPetr Machata } 31100bccf8edSPetr Machata 3111ab84be7eSDavid Ahern /* rtnl */ 3112ab84be7eSDavid Ahern static int rtm_del_nexthop(struct sk_buff *skb, struct nlmsghdr *nlh, 3113ab84be7eSDavid Ahern struct netlink_ext_ack *extack) 3114ab84be7eSDavid Ahern { 3115ab84be7eSDavid Ahern struct net *net = sock_net(skb->sk); 31162118f939SPetr Machata struct nlattr *tb[NHA_MAX + 1]; 3117ab84be7eSDavid Ahern struct nl_info nlinfo = { 3118ab84be7eSDavid Ahern .nlh = nlh, 3119ab84be7eSDavid Ahern .nl_net = net, 3120ab84be7eSDavid Ahern .portid = NETLINK_CB(skb).portid, 3121ab84be7eSDavid Ahern }; 3122ab84be7eSDavid Ahern struct nexthop *nh; 3123a207eab1SPetr Machata u32 op_flags; 3124ab84be7eSDavid Ahern int err; 3125ab84be7eSDavid Ahern u32 id; 3126ab84be7eSDavid Ahern 31272118f939SPetr Machata err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb, NHA_MAX, 31282118f939SPetr Machata rtm_nh_policy_del, extack); 31292118f939SPetr Machata if (err < 0) 31302118f939SPetr Machata return err; 31312118f939SPetr Machata 3132a207eab1SPetr Machata err = nh_valid_get_del_req(nlh, tb, &id, &op_flags, extack); 3133ab84be7eSDavid Ahern if (err) 3134ab84be7eSDavid Ahern return err; 3135ab84be7eSDavid Ahern 3136ab84be7eSDavid Ahern nh = nexthop_find_by_id(net, id); 3137ab84be7eSDavid Ahern if (!nh) 3138ab84be7eSDavid Ahern return -ENOENT; 3139ab84be7eSDavid Ahern 3140430a0491SDavid Ahern remove_nexthop(net, nh, &nlinfo); 3141ab84be7eSDavid Ahern 3142ab84be7eSDavid Ahern return 0; 3143ab84be7eSDavid Ahern } 3144ab84be7eSDavid Ahern 3145ab84be7eSDavid Ahern /* rtnl */ 3146ab84be7eSDavid Ahern static int rtm_get_nexthop(struct sk_buff *in_skb, struct nlmsghdr *nlh, 3147ab84be7eSDavid Ahern struct netlink_ext_ack *extack) 3148ab84be7eSDavid Ahern { 3149ab84be7eSDavid Ahern struct net *net = sock_net(in_skb->sk); 31502118f939SPetr Machata struct nlattr *tb[NHA_MAX + 1]; 3151ab84be7eSDavid Ahern struct sk_buff *skb = NULL; 3152ab84be7eSDavid Ahern struct nexthop *nh; 3153a207eab1SPetr Machata u32 op_flags; 3154ab84be7eSDavid Ahern int err; 3155ab84be7eSDavid Ahern u32 id; 3156ab84be7eSDavid Ahern 31572118f939SPetr Machata err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb, NHA_MAX, 31582118f939SPetr Machata rtm_nh_policy_get, extack); 31592118f939SPetr Machata if (err < 0) 31602118f939SPetr Machata return err; 31612118f939SPetr Machata 3162a207eab1SPetr Machata err = nh_valid_get_del_req(nlh, tb, &id, &op_flags, extack); 3163ab84be7eSDavid Ahern if (err) 3164ab84be7eSDavid Ahern return err; 3165ab84be7eSDavid Ahern 3166ab84be7eSDavid Ahern err = -ENOBUFS; 3167ab84be7eSDavid Ahern skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 3168ab84be7eSDavid Ahern if (!skb) 3169ab84be7eSDavid Ahern goto out; 3170ab84be7eSDavid Ahern 3171ab84be7eSDavid Ahern err = -ENOENT; 3172ab84be7eSDavid Ahern nh = nexthop_find_by_id(net, id); 3173ab84be7eSDavid Ahern if (!nh) 3174ab84be7eSDavid Ahern goto errout_free; 3175ab84be7eSDavid Ahern 3176ab84be7eSDavid Ahern err = nh_fill_node(skb, nh, RTM_NEWNEXTHOP, NETLINK_CB(in_skb).portid, 3177*95fedd76SIdo Schimmel nlh->nlmsg_seq, 0, op_flags); 3178ab84be7eSDavid Ahern if (err < 0) { 3179ab84be7eSDavid Ahern WARN_ON(err == -EMSGSIZE); 3180ab84be7eSDavid Ahern goto errout_free; 3181ab84be7eSDavid Ahern } 3182ab84be7eSDavid Ahern 3183ab84be7eSDavid Ahern err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); 3184ab84be7eSDavid Ahern out: 3185ab84be7eSDavid Ahern return err; 3186ab84be7eSDavid Ahern errout_free: 3187ab84be7eSDavid Ahern kfree_skb(skb); 3188ab84be7eSDavid Ahern goto out; 3189ab84be7eSDavid Ahern } 3190ab84be7eSDavid Ahern 319156450ec6SPetr Machata struct nh_dump_filter { 31928a1bbabbSPetr Machata u32 nh_id; 319356450ec6SPetr Machata int dev_idx; 319456450ec6SPetr Machata int master_idx; 319556450ec6SPetr Machata bool group_filter; 319656450ec6SPetr Machata bool fdb_filter; 31978a1bbabbSPetr Machata u32 res_bucket_nh_id; 3198a207eab1SPetr Machata u32 op_flags; 319956450ec6SPetr Machata }; 320056450ec6SPetr Machata 320156450ec6SPetr Machata static bool nh_dump_filtered(struct nexthop *nh, 320256450ec6SPetr Machata struct nh_dump_filter *filter, u8 family) 3203ab84be7eSDavid Ahern { 3204ab84be7eSDavid Ahern const struct net_device *dev; 3205ab84be7eSDavid Ahern const struct nh_info *nhi; 3206ab84be7eSDavid Ahern 320756450ec6SPetr Machata if (filter->group_filter && !nh->is_group) 3208430a0491SDavid Ahern return true; 3209430a0491SDavid Ahern 321056450ec6SPetr Machata if (!filter->dev_idx && !filter->master_idx && !family) 3211ab84be7eSDavid Ahern return false; 3212ab84be7eSDavid Ahern 3213430a0491SDavid Ahern if (nh->is_group) 3214430a0491SDavid Ahern return true; 3215430a0491SDavid Ahern 3216ab84be7eSDavid Ahern nhi = rtnl_dereference(nh->nh_info); 3217ab84be7eSDavid Ahern if (family && nhi->family != family) 3218ab84be7eSDavid Ahern return true; 3219ab84be7eSDavid Ahern 3220ab84be7eSDavid Ahern dev = nhi->fib_nhc.nhc_dev; 322156450ec6SPetr Machata if (filter->dev_idx && (!dev || dev->ifindex != filter->dev_idx)) 3222ab84be7eSDavid Ahern return true; 3223ab84be7eSDavid Ahern 322456450ec6SPetr Machata if (filter->master_idx) { 3225ab84be7eSDavid Ahern struct net_device *master; 3226ab84be7eSDavid Ahern 3227ab84be7eSDavid Ahern if (!dev) 3228ab84be7eSDavid Ahern return true; 3229ab84be7eSDavid Ahern 3230ab84be7eSDavid Ahern master = netdev_master_upper_dev_get((struct net_device *)dev); 323156450ec6SPetr Machata if (!master || master->ifindex != filter->master_idx) 3232ab84be7eSDavid Ahern return true; 3233ab84be7eSDavid Ahern } 3234ab84be7eSDavid Ahern 3235ab84be7eSDavid Ahern return false; 3236ab84be7eSDavid Ahern } 3237ab84be7eSDavid Ahern 3238b9ebea12SPetr Machata static int __nh_valid_dump_req(const struct nlmsghdr *nlh, struct nlattr **tb, 323956450ec6SPetr Machata struct nh_dump_filter *filter, 3240b9ebea12SPetr Machata struct netlink_ext_ack *extack) 3241ab84be7eSDavid Ahern { 3242ab84be7eSDavid Ahern struct nhmsg *nhm; 3243ab84be7eSDavid Ahern u32 idx; 3244ab84be7eSDavid Ahern 324544551bffSPetr Machata if (tb[NHA_OIF]) { 324644551bffSPetr Machata idx = nla_get_u32(tb[NHA_OIF]); 3247ab84be7eSDavid Ahern if (idx > INT_MAX) { 3248ab84be7eSDavid Ahern NL_SET_ERR_MSG(extack, "Invalid device index"); 3249ab84be7eSDavid Ahern return -EINVAL; 3250ab84be7eSDavid Ahern } 325156450ec6SPetr Machata filter->dev_idx = idx; 325244551bffSPetr Machata } 325344551bffSPetr Machata if (tb[NHA_MASTER]) { 325444551bffSPetr Machata idx = nla_get_u32(tb[NHA_MASTER]); 3255ab84be7eSDavid Ahern if (idx > INT_MAX) { 3256ab84be7eSDavid Ahern NL_SET_ERR_MSG(extack, "Invalid master device index"); 3257ab84be7eSDavid Ahern return -EINVAL; 3258ab84be7eSDavid Ahern } 325956450ec6SPetr Machata filter->master_idx = idx; 3260ab84be7eSDavid Ahern } 326156450ec6SPetr Machata filter->group_filter = nla_get_flag(tb[NHA_GROUPS]); 326256450ec6SPetr Machata filter->fdb_filter = nla_get_flag(tb[NHA_FDB]); 3263ab84be7eSDavid Ahern 3264ab84be7eSDavid Ahern nhm = nlmsg_data(nlh); 3265ab84be7eSDavid Ahern if (nhm->nh_protocol || nhm->resvd || nhm->nh_scope || nhm->nh_flags) { 3266ab84be7eSDavid Ahern NL_SET_ERR_MSG(extack, "Invalid values in header for nexthop dump request"); 3267ab84be7eSDavid Ahern return -EINVAL; 3268ab84be7eSDavid Ahern } 3269ab84be7eSDavid Ahern 3270a207eab1SPetr Machata if (tb[NHA_OP_FLAGS]) 3271a207eab1SPetr Machata filter->op_flags = nla_get_u32(tb[NHA_OP_FLAGS]); 3272a207eab1SPetr Machata else 3273a207eab1SPetr Machata filter->op_flags = 0; 3274a207eab1SPetr Machata 3275ab84be7eSDavid Ahern return 0; 3276ab84be7eSDavid Ahern } 3277ab84be7eSDavid Ahern 3278b9ebea12SPetr Machata static int nh_valid_dump_req(const struct nlmsghdr *nlh, 3279b9ebea12SPetr Machata struct nh_dump_filter *filter, 3280b9ebea12SPetr Machata struct netlink_callback *cb) 3281b9ebea12SPetr Machata { 32822118f939SPetr Machata struct nlattr *tb[NHA_MAX + 1]; 3283b9ebea12SPetr Machata int err; 3284b9ebea12SPetr Machata 32852118f939SPetr Machata err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb, NHA_MAX, 3286b9ebea12SPetr Machata rtm_nh_policy_dump, cb->extack); 3287b9ebea12SPetr Machata if (err < 0) 3288b9ebea12SPetr Machata return err; 3289b9ebea12SPetr Machata 3290b9ebea12SPetr Machata return __nh_valid_dump_req(nlh, tb, filter, cb->extack); 3291b9ebea12SPetr Machata } 3292b9ebea12SPetr Machata 3293a6fbbaa6SPetr Machata struct rtm_dump_nh_ctx { 3294a6fbbaa6SPetr Machata u32 idx; 3295a6fbbaa6SPetr Machata }; 3296a6fbbaa6SPetr Machata 3297a6fbbaa6SPetr Machata static struct rtm_dump_nh_ctx * 3298a6fbbaa6SPetr Machata rtm_dump_nh_ctx(struct netlink_callback *cb) 3299a6fbbaa6SPetr Machata { 3300a6fbbaa6SPetr Machata struct rtm_dump_nh_ctx *ctx = (void *)cb->ctx; 3301a6fbbaa6SPetr Machata 3302a6fbbaa6SPetr Machata BUILD_BUG_ON(sizeof(*ctx) > sizeof(cb->ctx)); 3303a6fbbaa6SPetr Machata return ctx; 3304a6fbbaa6SPetr Machata } 3305a6fbbaa6SPetr Machata 3306cbee1807SPetr Machata static int rtm_dump_walk_nexthops(struct sk_buff *skb, 3307cbee1807SPetr Machata struct netlink_callback *cb, 3308cbee1807SPetr Machata struct rb_root *root, 3309cbee1807SPetr Machata struct rtm_dump_nh_ctx *ctx, 3310e948217dSPetr Machata int (*nh_cb)(struct sk_buff *skb, 3311e948217dSPetr Machata struct netlink_callback *cb, 3312e948217dSPetr Machata struct nexthop *nh, void *data), 3313e948217dSPetr Machata void *data) 3314ab84be7eSDavid Ahern { 3315ab84be7eSDavid Ahern struct rb_node *node; 33169e46fb65SIdo Schimmel int s_idx; 3317ab84be7eSDavid Ahern int err; 3318ab84be7eSDavid Ahern 3319a6fbbaa6SPetr Machata s_idx = ctx->idx; 3320ab84be7eSDavid Ahern for (node = rb_first(root); node; node = rb_next(node)) { 3321ab84be7eSDavid Ahern struct nexthop *nh; 3322ab84be7eSDavid Ahern 3323ab84be7eSDavid Ahern nh = rb_entry(node, struct nexthop, rb_node); 33249e46fb65SIdo Schimmel if (nh->id < s_idx) 33259e46fb65SIdo Schimmel continue; 33269e46fb65SIdo Schimmel 33279e46fb65SIdo Schimmel ctx->idx = nh->id; 3328e948217dSPetr Machata err = nh_cb(skb, cb, nh, data); 3329e948217dSPetr Machata if (err) 3330cbee1807SPetr Machata return err; 3331cbee1807SPetr Machata } 3332cbee1807SPetr Machata 3333cbee1807SPetr Machata return 0; 3334cbee1807SPetr Machata } 3335cbee1807SPetr Machata 3336e948217dSPetr Machata static int rtm_dump_nexthop_cb(struct sk_buff *skb, struct netlink_callback *cb, 3337e948217dSPetr Machata struct nexthop *nh, void *data) 3338e948217dSPetr Machata { 3339e948217dSPetr Machata struct nhmsg *nhm = nlmsg_data(cb->nlh); 3340e948217dSPetr Machata struct nh_dump_filter *filter = data; 3341e948217dSPetr Machata 3342e948217dSPetr Machata if (nh_dump_filtered(nh, filter, nhm->nh_family)) 3343e948217dSPetr Machata return 0; 3344e948217dSPetr Machata 3345e948217dSPetr Machata return nh_fill_node(skb, nh, RTM_NEWNEXTHOP, 3346e948217dSPetr Machata NETLINK_CB(cb->skb).portid, 3347*95fedd76SIdo Schimmel cb->nlh->nlmsg_seq, NLM_F_MULTI, filter->op_flags); 3348e948217dSPetr Machata } 3349e948217dSPetr Machata 3350cbee1807SPetr Machata /* rtnl */ 3351cbee1807SPetr Machata static int rtm_dump_nexthop(struct sk_buff *skb, struct netlink_callback *cb) 3352cbee1807SPetr Machata { 3353cbee1807SPetr Machata struct rtm_dump_nh_ctx *ctx = rtm_dump_nh_ctx(cb); 3354cbee1807SPetr Machata struct net *net = sock_net(skb->sk); 3355cbee1807SPetr Machata struct rb_root *root = &net->nexthop.rb_root; 3356cbee1807SPetr Machata struct nh_dump_filter filter = {}; 3357cbee1807SPetr Machata int err; 3358cbee1807SPetr Machata 3359cbee1807SPetr Machata err = nh_valid_dump_req(cb->nlh, &filter, cb); 3360cbee1807SPetr Machata if (err < 0) 3361cbee1807SPetr Machata return err; 3362cbee1807SPetr Machata 3363e948217dSPetr Machata err = rtm_dump_walk_nexthops(skb, cb, root, ctx, 3364e948217dSPetr Machata &rtm_dump_nexthop_cb, &filter); 3365ab84be7eSDavid Ahern if (err < 0) { 3366ab84be7eSDavid Ahern if (likely(skb->len)) 3367913f60caSIdo Schimmel err = skb->len; 3368ab84be7eSDavid Ahern } 3369ab84be7eSDavid Ahern 3370ab84be7eSDavid Ahern cb->seq = net->nexthop.seq; 3371ab84be7eSDavid Ahern nl_dump_check_consistent(cb, nlmsg_hdr(skb)); 3372ab84be7eSDavid Ahern return err; 3373ab84be7eSDavid Ahern } 3374ab84be7eSDavid Ahern 33758a1bbabbSPetr Machata static struct nexthop * 33768a1bbabbSPetr Machata nexthop_find_group_resilient(struct net *net, u32 id, 33778a1bbabbSPetr Machata struct netlink_ext_ack *extack) 33788a1bbabbSPetr Machata { 33798a1bbabbSPetr Machata struct nh_group *nhg; 33808a1bbabbSPetr Machata struct nexthop *nh; 33818a1bbabbSPetr Machata 33828a1bbabbSPetr Machata nh = nexthop_find_by_id(net, id); 33838a1bbabbSPetr Machata if (!nh) 33848a1bbabbSPetr Machata return ERR_PTR(-ENOENT); 33858a1bbabbSPetr Machata 33868a1bbabbSPetr Machata if (!nh->is_group) { 33878a1bbabbSPetr Machata NL_SET_ERR_MSG(extack, "Not a nexthop group"); 33888a1bbabbSPetr Machata return ERR_PTR(-EINVAL); 33898a1bbabbSPetr Machata } 33908a1bbabbSPetr Machata 33918a1bbabbSPetr Machata nhg = rtnl_dereference(nh->nh_grp); 33928a1bbabbSPetr Machata if (!nhg->resilient) { 33938a1bbabbSPetr Machata NL_SET_ERR_MSG(extack, "Nexthop group not of type resilient"); 33948a1bbabbSPetr Machata return ERR_PTR(-EINVAL); 33958a1bbabbSPetr Machata } 33968a1bbabbSPetr Machata 33978a1bbabbSPetr Machata return nh; 33988a1bbabbSPetr Machata } 33998a1bbabbSPetr Machata 34008a1bbabbSPetr Machata static int nh_valid_dump_nhid(struct nlattr *attr, u32 *nh_id_p, 34018a1bbabbSPetr Machata struct netlink_ext_ack *extack) 34028a1bbabbSPetr Machata { 34038a1bbabbSPetr Machata u32 idx; 34048a1bbabbSPetr Machata 34058a1bbabbSPetr Machata if (attr) { 34068a1bbabbSPetr Machata idx = nla_get_u32(attr); 34078a1bbabbSPetr Machata if (!idx) { 34088a1bbabbSPetr Machata NL_SET_ERR_MSG(extack, "Invalid nexthop id"); 34098a1bbabbSPetr Machata return -EINVAL; 34108a1bbabbSPetr Machata } 34118a1bbabbSPetr Machata *nh_id_p = idx; 34128a1bbabbSPetr Machata } else { 34138a1bbabbSPetr Machata *nh_id_p = 0; 34148a1bbabbSPetr Machata } 34158a1bbabbSPetr Machata 34168a1bbabbSPetr Machata return 0; 34178a1bbabbSPetr Machata } 34188a1bbabbSPetr Machata 34198a1bbabbSPetr Machata static int nh_valid_dump_bucket_req(const struct nlmsghdr *nlh, 34208a1bbabbSPetr Machata struct nh_dump_filter *filter, 34218a1bbabbSPetr Machata struct netlink_callback *cb) 34228a1bbabbSPetr Machata { 34238a1bbabbSPetr Machata struct nlattr *res_tb[ARRAY_SIZE(rtm_nh_res_bucket_policy_dump)]; 34242118f939SPetr Machata struct nlattr *tb[NHA_MAX + 1]; 34258a1bbabbSPetr Machata int err; 34268a1bbabbSPetr Machata 34272118f939SPetr Machata err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb, NHA_MAX, 34288a1bbabbSPetr Machata rtm_nh_policy_dump_bucket, NULL); 34298a1bbabbSPetr Machata if (err < 0) 34308a1bbabbSPetr Machata return err; 34318a1bbabbSPetr Machata 34328a1bbabbSPetr Machata err = nh_valid_dump_nhid(tb[NHA_ID], &filter->nh_id, cb->extack); 34338a1bbabbSPetr Machata if (err) 34348a1bbabbSPetr Machata return err; 34358a1bbabbSPetr Machata 34368a1bbabbSPetr Machata if (tb[NHA_RES_BUCKET]) { 34378a1bbabbSPetr Machata size_t max = ARRAY_SIZE(rtm_nh_res_bucket_policy_dump) - 1; 34388a1bbabbSPetr Machata 34398a1bbabbSPetr Machata err = nla_parse_nested(res_tb, max, 34408a1bbabbSPetr Machata tb[NHA_RES_BUCKET], 34418a1bbabbSPetr Machata rtm_nh_res_bucket_policy_dump, 34428a1bbabbSPetr Machata cb->extack); 34438a1bbabbSPetr Machata if (err < 0) 34448a1bbabbSPetr Machata return err; 34458a1bbabbSPetr Machata 34468a1bbabbSPetr Machata err = nh_valid_dump_nhid(res_tb[NHA_RES_BUCKET_NH_ID], 34478a1bbabbSPetr Machata &filter->res_bucket_nh_id, 34488a1bbabbSPetr Machata cb->extack); 34498a1bbabbSPetr Machata if (err) 34508a1bbabbSPetr Machata return err; 34518a1bbabbSPetr Machata } 34528a1bbabbSPetr Machata 34538a1bbabbSPetr Machata return __nh_valid_dump_req(nlh, tb, filter, cb->extack); 34548a1bbabbSPetr Machata } 34558a1bbabbSPetr Machata 34568a1bbabbSPetr Machata struct rtm_dump_res_bucket_ctx { 34578a1bbabbSPetr Machata struct rtm_dump_nh_ctx nh; 34588a1bbabbSPetr Machata u16 bucket_index; 34598a1bbabbSPetr Machata }; 34608a1bbabbSPetr Machata 34618a1bbabbSPetr Machata static struct rtm_dump_res_bucket_ctx * 34628a1bbabbSPetr Machata rtm_dump_res_bucket_ctx(struct netlink_callback *cb) 34638a1bbabbSPetr Machata { 34648a1bbabbSPetr Machata struct rtm_dump_res_bucket_ctx *ctx = (void *)cb->ctx; 34658a1bbabbSPetr Machata 34668a1bbabbSPetr Machata BUILD_BUG_ON(sizeof(*ctx) > sizeof(cb->ctx)); 34678a1bbabbSPetr Machata return ctx; 34688a1bbabbSPetr Machata } 34698a1bbabbSPetr Machata 34708a1bbabbSPetr Machata struct rtm_dump_nexthop_bucket_data { 34718a1bbabbSPetr Machata struct rtm_dump_res_bucket_ctx *ctx; 34728a1bbabbSPetr Machata struct nh_dump_filter filter; 34738a1bbabbSPetr Machata }; 34748a1bbabbSPetr Machata 34758a1bbabbSPetr Machata static int rtm_dump_nexthop_bucket_nh(struct sk_buff *skb, 34768a1bbabbSPetr Machata struct netlink_callback *cb, 34778a1bbabbSPetr Machata struct nexthop *nh, 34788a1bbabbSPetr Machata struct rtm_dump_nexthop_bucket_data *dd) 34798a1bbabbSPetr Machata { 34808a1bbabbSPetr Machata u32 portid = NETLINK_CB(cb->skb).portid; 34818a1bbabbSPetr Machata struct nhmsg *nhm = nlmsg_data(cb->nlh); 34828a1bbabbSPetr Machata struct nh_res_table *res_table; 34838a1bbabbSPetr Machata struct nh_group *nhg; 34848a1bbabbSPetr Machata u16 bucket_index; 34858a1bbabbSPetr Machata int err; 34868a1bbabbSPetr Machata 34878a1bbabbSPetr Machata nhg = rtnl_dereference(nh->nh_grp); 34888a1bbabbSPetr Machata res_table = rtnl_dereference(nhg->res_table); 34898a1bbabbSPetr Machata for (bucket_index = dd->ctx->bucket_index; 34908a1bbabbSPetr Machata bucket_index < res_table->num_nh_buckets; 34918a1bbabbSPetr Machata bucket_index++) { 34928a1bbabbSPetr Machata struct nh_res_bucket *bucket; 34938a1bbabbSPetr Machata struct nh_grp_entry *nhge; 34948a1bbabbSPetr Machata 34958a1bbabbSPetr Machata bucket = &res_table->nh_buckets[bucket_index]; 34968a1bbabbSPetr Machata nhge = rtnl_dereference(bucket->nh_entry); 34978a1bbabbSPetr Machata if (nh_dump_filtered(nhge->nh, &dd->filter, nhm->nh_family)) 34988a1bbabbSPetr Machata continue; 34998a1bbabbSPetr Machata 35008a1bbabbSPetr Machata if (dd->filter.res_bucket_nh_id && 35018a1bbabbSPetr Machata dd->filter.res_bucket_nh_id != nhge->nh->id) 35028a1bbabbSPetr Machata continue; 35038a1bbabbSPetr Machata 3504f10d3d9dSIdo Schimmel dd->ctx->bucket_index = bucket_index; 35058a1bbabbSPetr Machata err = nh_fill_res_bucket(skb, nh, bucket, bucket_index, 35068a1bbabbSPetr Machata RTM_NEWNEXTHOPBUCKET, portid, 35078a1bbabbSPetr Machata cb->nlh->nlmsg_seq, NLM_F_MULTI, 35088a1bbabbSPetr Machata cb->extack); 3509f10d3d9dSIdo Schimmel if (err) 3510f10d3d9dSIdo Schimmel return err; 35118a1bbabbSPetr Machata } 35128a1bbabbSPetr Machata 3513f10d3d9dSIdo Schimmel dd->ctx->bucket_index = 0; 35148a1bbabbSPetr Machata 3515f10d3d9dSIdo Schimmel return 0; 35168a1bbabbSPetr Machata } 35178a1bbabbSPetr Machata 35188a1bbabbSPetr Machata static int rtm_dump_nexthop_bucket_cb(struct sk_buff *skb, 35198a1bbabbSPetr Machata struct netlink_callback *cb, 35208a1bbabbSPetr Machata struct nexthop *nh, void *data) 35218a1bbabbSPetr Machata { 35228a1bbabbSPetr Machata struct rtm_dump_nexthop_bucket_data *dd = data; 35238a1bbabbSPetr Machata struct nh_group *nhg; 35248a1bbabbSPetr Machata 35258a1bbabbSPetr Machata if (!nh->is_group) 35268a1bbabbSPetr Machata return 0; 35278a1bbabbSPetr Machata 35288a1bbabbSPetr Machata nhg = rtnl_dereference(nh->nh_grp); 35298a1bbabbSPetr Machata if (!nhg->resilient) 35308a1bbabbSPetr Machata return 0; 35318a1bbabbSPetr Machata 35328a1bbabbSPetr Machata return rtm_dump_nexthop_bucket_nh(skb, cb, nh, dd); 35338a1bbabbSPetr Machata } 35348a1bbabbSPetr Machata 35358a1bbabbSPetr Machata /* rtnl */ 35368a1bbabbSPetr Machata static int rtm_dump_nexthop_bucket(struct sk_buff *skb, 35378a1bbabbSPetr Machata struct netlink_callback *cb) 35388a1bbabbSPetr Machata { 35398a1bbabbSPetr Machata struct rtm_dump_res_bucket_ctx *ctx = rtm_dump_res_bucket_ctx(cb); 35408a1bbabbSPetr Machata struct rtm_dump_nexthop_bucket_data dd = { .ctx = ctx }; 35418a1bbabbSPetr Machata struct net *net = sock_net(skb->sk); 35428a1bbabbSPetr Machata struct nexthop *nh; 35438a1bbabbSPetr Machata int err; 35448a1bbabbSPetr Machata 35458a1bbabbSPetr Machata err = nh_valid_dump_bucket_req(cb->nlh, &dd.filter, cb); 35468a1bbabbSPetr Machata if (err) 35478a1bbabbSPetr Machata return err; 35488a1bbabbSPetr Machata 35498a1bbabbSPetr Machata if (dd.filter.nh_id) { 35508a1bbabbSPetr Machata nh = nexthop_find_group_resilient(net, dd.filter.nh_id, 35518a1bbabbSPetr Machata cb->extack); 35528a1bbabbSPetr Machata if (IS_ERR(nh)) 35538a1bbabbSPetr Machata return PTR_ERR(nh); 35548a1bbabbSPetr Machata err = rtm_dump_nexthop_bucket_nh(skb, cb, nh, &dd); 35558a1bbabbSPetr Machata } else { 35568a1bbabbSPetr Machata struct rb_root *root = &net->nexthop.rb_root; 35578a1bbabbSPetr Machata 35588a1bbabbSPetr Machata err = rtm_dump_walk_nexthops(skb, cb, root, &ctx->nh, 35598a1bbabbSPetr Machata &rtm_dump_nexthop_bucket_cb, &dd); 35608a1bbabbSPetr Machata } 35618a1bbabbSPetr Machata 35628a1bbabbSPetr Machata if (err < 0) { 35638a1bbabbSPetr Machata if (likely(skb->len)) 35648743aeffSIdo Schimmel err = skb->len; 35658a1bbabbSPetr Machata } 35668a1bbabbSPetr Machata 35678a1bbabbSPetr Machata cb->seq = net->nexthop.seq; 35688a1bbabbSPetr Machata nl_dump_check_consistent(cb, nlmsg_hdr(skb)); 35698a1bbabbSPetr Machata return err; 35708a1bbabbSPetr Machata } 35718a1bbabbSPetr Machata 3572187d4c6bSPetr Machata static int nh_valid_get_bucket_req_res_bucket(struct nlattr *res, 3573187d4c6bSPetr Machata u16 *bucket_index, 3574187d4c6bSPetr Machata struct netlink_ext_ack *extack) 3575187d4c6bSPetr Machata { 3576187d4c6bSPetr Machata struct nlattr *tb[ARRAY_SIZE(rtm_nh_res_bucket_policy_get)]; 3577187d4c6bSPetr Machata int err; 3578187d4c6bSPetr Machata 3579187d4c6bSPetr Machata err = nla_parse_nested(tb, ARRAY_SIZE(rtm_nh_res_bucket_policy_get) - 1, 3580187d4c6bSPetr Machata res, rtm_nh_res_bucket_policy_get, extack); 3581187d4c6bSPetr Machata if (err < 0) 3582187d4c6bSPetr Machata return err; 3583187d4c6bSPetr Machata 3584187d4c6bSPetr Machata if (!tb[NHA_RES_BUCKET_INDEX]) { 3585187d4c6bSPetr Machata NL_SET_ERR_MSG(extack, "Bucket index is missing"); 3586187d4c6bSPetr Machata return -EINVAL; 3587187d4c6bSPetr Machata } 3588187d4c6bSPetr Machata 3589187d4c6bSPetr Machata *bucket_index = nla_get_u16(tb[NHA_RES_BUCKET_INDEX]); 3590187d4c6bSPetr Machata return 0; 3591187d4c6bSPetr Machata } 3592187d4c6bSPetr Machata 3593187d4c6bSPetr Machata static int nh_valid_get_bucket_req(const struct nlmsghdr *nlh, 3594187d4c6bSPetr Machata u32 *id, u16 *bucket_index, 3595187d4c6bSPetr Machata struct netlink_ext_ack *extack) 3596187d4c6bSPetr Machata { 35972118f939SPetr Machata struct nlattr *tb[NHA_MAX + 1]; 3598a207eab1SPetr Machata u32 op_flags; 3599187d4c6bSPetr Machata int err; 3600187d4c6bSPetr Machata 36012118f939SPetr Machata err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb, NHA_MAX, 3602187d4c6bSPetr Machata rtm_nh_policy_get_bucket, extack); 3603187d4c6bSPetr Machata if (err < 0) 3604187d4c6bSPetr Machata return err; 3605187d4c6bSPetr Machata 3606a207eab1SPetr Machata err = nh_valid_get_del_req(nlh, tb, id, &op_flags, extack); 3607187d4c6bSPetr Machata if (err) 3608187d4c6bSPetr Machata return err; 3609187d4c6bSPetr Machata 3610187d4c6bSPetr Machata if (!tb[NHA_RES_BUCKET]) { 3611187d4c6bSPetr Machata NL_SET_ERR_MSG(extack, "Bucket information is missing"); 3612187d4c6bSPetr Machata return -EINVAL; 3613187d4c6bSPetr Machata } 3614187d4c6bSPetr Machata 3615187d4c6bSPetr Machata err = nh_valid_get_bucket_req_res_bucket(tb[NHA_RES_BUCKET], 3616187d4c6bSPetr Machata bucket_index, extack); 3617187d4c6bSPetr Machata if (err) 3618187d4c6bSPetr Machata return err; 3619187d4c6bSPetr Machata 3620187d4c6bSPetr Machata return 0; 3621187d4c6bSPetr Machata } 3622187d4c6bSPetr Machata 3623187d4c6bSPetr Machata /* rtnl */ 3624187d4c6bSPetr Machata static int rtm_get_nexthop_bucket(struct sk_buff *in_skb, struct nlmsghdr *nlh, 3625187d4c6bSPetr Machata struct netlink_ext_ack *extack) 3626187d4c6bSPetr Machata { 3627187d4c6bSPetr Machata struct net *net = sock_net(in_skb->sk); 3628187d4c6bSPetr Machata struct nh_res_table *res_table; 3629187d4c6bSPetr Machata struct sk_buff *skb = NULL; 3630187d4c6bSPetr Machata struct nh_group *nhg; 3631187d4c6bSPetr Machata struct nexthop *nh; 3632187d4c6bSPetr Machata u16 bucket_index; 3633187d4c6bSPetr Machata int err; 3634187d4c6bSPetr Machata u32 id; 3635187d4c6bSPetr Machata 3636187d4c6bSPetr Machata err = nh_valid_get_bucket_req(nlh, &id, &bucket_index, extack); 3637187d4c6bSPetr Machata if (err) 3638187d4c6bSPetr Machata return err; 3639187d4c6bSPetr Machata 3640187d4c6bSPetr Machata nh = nexthop_find_group_resilient(net, id, extack); 3641187d4c6bSPetr Machata if (IS_ERR(nh)) 3642187d4c6bSPetr Machata return PTR_ERR(nh); 3643187d4c6bSPetr Machata 3644187d4c6bSPetr Machata nhg = rtnl_dereference(nh->nh_grp); 3645187d4c6bSPetr Machata res_table = rtnl_dereference(nhg->res_table); 3646187d4c6bSPetr Machata if (bucket_index >= res_table->num_nh_buckets) { 3647187d4c6bSPetr Machata NL_SET_ERR_MSG(extack, "Bucket index out of bounds"); 3648187d4c6bSPetr Machata return -ENOENT; 3649187d4c6bSPetr Machata } 3650187d4c6bSPetr Machata 3651187d4c6bSPetr Machata skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 3652187d4c6bSPetr Machata if (!skb) 3653187d4c6bSPetr Machata return -ENOBUFS; 3654187d4c6bSPetr Machata 3655187d4c6bSPetr Machata err = nh_fill_res_bucket(skb, nh, &res_table->nh_buckets[bucket_index], 3656187d4c6bSPetr Machata bucket_index, RTM_NEWNEXTHOPBUCKET, 3657187d4c6bSPetr Machata NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, 3658187d4c6bSPetr Machata 0, extack); 3659187d4c6bSPetr Machata if (err < 0) { 3660187d4c6bSPetr Machata WARN_ON(err == -EMSGSIZE); 3661187d4c6bSPetr Machata goto errout_free; 3662187d4c6bSPetr Machata } 3663187d4c6bSPetr Machata 3664187d4c6bSPetr Machata return rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); 3665187d4c6bSPetr Machata 3666187d4c6bSPetr Machata errout_free: 3667187d4c6bSPetr Machata kfree_skb(skb); 3668187d4c6bSPetr Machata return err; 3669187d4c6bSPetr Machata } 3670187d4c6bSPetr Machata 3671597cfe4fSDavid Ahern static void nexthop_sync_mtu(struct net_device *dev, u32 orig_mtu) 3672597cfe4fSDavid Ahern { 3673597cfe4fSDavid Ahern unsigned int hash = nh_dev_hashfn(dev->ifindex); 3674597cfe4fSDavid Ahern struct net *net = dev_net(dev); 3675597cfe4fSDavid Ahern struct hlist_head *head = &net->nexthop.devhash[hash]; 3676597cfe4fSDavid Ahern struct hlist_node *n; 3677597cfe4fSDavid Ahern struct nh_info *nhi; 3678597cfe4fSDavid Ahern 3679597cfe4fSDavid Ahern hlist_for_each_entry_safe(nhi, n, head, dev_hash) { 3680597cfe4fSDavid Ahern if (nhi->fib_nhc.nhc_dev == dev) { 3681597cfe4fSDavid Ahern if (nhi->family == AF_INET) 3682597cfe4fSDavid Ahern fib_nhc_update_mtu(&nhi->fib_nhc, dev->mtu, 3683597cfe4fSDavid Ahern orig_mtu); 3684597cfe4fSDavid Ahern } 3685597cfe4fSDavid Ahern } 3686597cfe4fSDavid Ahern } 3687597cfe4fSDavid Ahern 3688597cfe4fSDavid Ahern /* rtnl */ 3689597cfe4fSDavid Ahern static int nh_netdev_event(struct notifier_block *this, 3690597cfe4fSDavid Ahern unsigned long event, void *ptr) 3691597cfe4fSDavid Ahern { 3692597cfe4fSDavid Ahern struct net_device *dev = netdev_notifier_info_to_dev(ptr); 3693597cfe4fSDavid Ahern struct netdev_notifier_info_ext *info_ext; 3694597cfe4fSDavid Ahern 3695597cfe4fSDavid Ahern switch (event) { 3696597cfe4fSDavid Ahern case NETDEV_DOWN: 3697597cfe4fSDavid Ahern case NETDEV_UNREGISTER: 369876c03bf8SIdo Schimmel nexthop_flush_dev(dev, event); 3699597cfe4fSDavid Ahern break; 3700597cfe4fSDavid Ahern case NETDEV_CHANGE: 3701597cfe4fSDavid Ahern if (!(dev_get_flags(dev) & (IFF_RUNNING | IFF_LOWER_UP))) 370276c03bf8SIdo Schimmel nexthop_flush_dev(dev, event); 3703597cfe4fSDavid Ahern break; 3704597cfe4fSDavid Ahern case NETDEV_CHANGEMTU: 3705597cfe4fSDavid Ahern info_ext = ptr; 3706597cfe4fSDavid Ahern nexthop_sync_mtu(dev, info_ext->ext.mtu); 3707597cfe4fSDavid Ahern rt_cache_flush(dev_net(dev)); 3708597cfe4fSDavid Ahern break; 3709597cfe4fSDavid Ahern } 3710597cfe4fSDavid Ahern return NOTIFY_DONE; 3711597cfe4fSDavid Ahern } 3712597cfe4fSDavid Ahern 3713597cfe4fSDavid Ahern static struct notifier_block nh_netdev_notifier = { 3714597cfe4fSDavid Ahern .notifier_call = nh_netdev_event, 3715597cfe4fSDavid Ahern }; 3716597cfe4fSDavid Ahern 3717975ff7f3SIdo Schimmel static int nexthops_dump(struct net *net, struct notifier_block *nb, 37183106a084SIdo Schimmel enum nexthop_event_type event_type, 3719975ff7f3SIdo Schimmel struct netlink_ext_ack *extack) 3720975ff7f3SIdo Schimmel { 3721975ff7f3SIdo Schimmel struct rb_root *root = &net->nexthop.rb_root; 3722975ff7f3SIdo Schimmel struct rb_node *node; 3723975ff7f3SIdo Schimmel int err = 0; 3724975ff7f3SIdo Schimmel 3725975ff7f3SIdo Schimmel for (node = rb_first(root); node; node = rb_next(node)) { 3726975ff7f3SIdo Schimmel struct nexthop *nh; 3727975ff7f3SIdo Schimmel 3728975ff7f3SIdo Schimmel nh = rb_entry(node, struct nexthop, rb_node); 37293106a084SIdo Schimmel err = call_nexthop_notifier(nb, net, event_type, nh, extack); 3730975ff7f3SIdo Schimmel if (err) 3731975ff7f3SIdo Schimmel break; 3732975ff7f3SIdo Schimmel } 3733975ff7f3SIdo Schimmel 3734975ff7f3SIdo Schimmel return err; 3735975ff7f3SIdo Schimmel } 3736975ff7f3SIdo Schimmel 3737ce7e9c8aSIdo Schimmel int register_nexthop_notifier(struct net *net, struct notifier_block *nb, 3738ce7e9c8aSIdo Schimmel struct netlink_ext_ack *extack) 37398590ceedSRoopa Prabhu { 3740975ff7f3SIdo Schimmel int err; 3741975ff7f3SIdo Schimmel 3742975ff7f3SIdo Schimmel rtnl_lock(); 37433106a084SIdo Schimmel err = nexthops_dump(net, nb, NEXTHOP_EVENT_REPLACE, extack); 3744975ff7f3SIdo Schimmel if (err) 3745975ff7f3SIdo Schimmel goto unlock; 3746975ff7f3SIdo Schimmel err = blocking_notifier_chain_register(&net->nexthop.notifier_chain, 374780690ec6SIdo Schimmel nb); 3748975ff7f3SIdo Schimmel unlock: 3749975ff7f3SIdo Schimmel rtnl_unlock(); 3750975ff7f3SIdo Schimmel return err; 37518590ceedSRoopa Prabhu } 37528590ceedSRoopa Prabhu EXPORT_SYMBOL(register_nexthop_notifier); 37538590ceedSRoopa Prabhu 375470f16ea2SEric Dumazet int __unregister_nexthop_notifier(struct net *net, struct notifier_block *nb) 375570f16ea2SEric Dumazet { 375670f16ea2SEric Dumazet int err; 375770f16ea2SEric Dumazet 375870f16ea2SEric Dumazet err = blocking_notifier_chain_unregister(&net->nexthop.notifier_chain, 375970f16ea2SEric Dumazet nb); 376070f16ea2SEric Dumazet if (!err) 376170f16ea2SEric Dumazet nexthops_dump(net, nb, NEXTHOP_EVENT_DEL, NULL); 376270f16ea2SEric Dumazet return err; 376370f16ea2SEric Dumazet } 376470f16ea2SEric Dumazet EXPORT_SYMBOL(__unregister_nexthop_notifier); 376570f16ea2SEric Dumazet 37668590ceedSRoopa Prabhu int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb) 37678590ceedSRoopa Prabhu { 37683106a084SIdo Schimmel int err; 37693106a084SIdo Schimmel 37703106a084SIdo Schimmel rtnl_lock(); 377170f16ea2SEric Dumazet err = __unregister_nexthop_notifier(net, nb); 37723106a084SIdo Schimmel rtnl_unlock(); 37733106a084SIdo Schimmel return err; 37748590ceedSRoopa Prabhu } 37758590ceedSRoopa Prabhu EXPORT_SYMBOL(unregister_nexthop_notifier); 37768590ceedSRoopa Prabhu 3777e95f2592SIdo Schimmel void nexthop_set_hw_flags(struct net *net, u32 id, bool offload, bool trap) 3778e95f2592SIdo Schimmel { 3779e95f2592SIdo Schimmel struct nexthop *nexthop; 3780e95f2592SIdo Schimmel 3781e95f2592SIdo Schimmel rcu_read_lock(); 3782e95f2592SIdo Schimmel 3783e95f2592SIdo Schimmel nexthop = nexthop_find_by_id(net, id); 3784e95f2592SIdo Schimmel if (!nexthop) 3785e95f2592SIdo Schimmel goto out; 3786e95f2592SIdo Schimmel 3787e95f2592SIdo Schimmel nexthop->nh_flags &= ~(RTNH_F_OFFLOAD | RTNH_F_TRAP); 3788e95f2592SIdo Schimmel if (offload) 3789e95f2592SIdo Schimmel nexthop->nh_flags |= RTNH_F_OFFLOAD; 3790e95f2592SIdo Schimmel if (trap) 3791e95f2592SIdo Schimmel nexthop->nh_flags |= RTNH_F_TRAP; 3792e95f2592SIdo Schimmel 3793e95f2592SIdo Schimmel out: 3794e95f2592SIdo Schimmel rcu_read_unlock(); 3795e95f2592SIdo Schimmel } 3796e95f2592SIdo Schimmel EXPORT_SYMBOL(nexthop_set_hw_flags); 3797e95f2592SIdo Schimmel 379856ad5ba3SIdo Schimmel void nexthop_bucket_set_hw_flags(struct net *net, u32 id, u16 bucket_index, 379956ad5ba3SIdo Schimmel bool offload, bool trap) 380056ad5ba3SIdo Schimmel { 380156ad5ba3SIdo Schimmel struct nh_res_table *res_table; 380256ad5ba3SIdo Schimmel struct nh_res_bucket *bucket; 380356ad5ba3SIdo Schimmel struct nexthop *nexthop; 380456ad5ba3SIdo Schimmel struct nh_group *nhg; 380556ad5ba3SIdo Schimmel 380656ad5ba3SIdo Schimmel rcu_read_lock(); 380756ad5ba3SIdo Schimmel 380856ad5ba3SIdo Schimmel nexthop = nexthop_find_by_id(net, id); 380956ad5ba3SIdo Schimmel if (!nexthop || !nexthop->is_group) 381056ad5ba3SIdo Schimmel goto out; 381156ad5ba3SIdo Schimmel 381256ad5ba3SIdo Schimmel nhg = rcu_dereference(nexthop->nh_grp); 381356ad5ba3SIdo Schimmel if (!nhg->resilient) 381456ad5ba3SIdo Schimmel goto out; 381556ad5ba3SIdo Schimmel 381656ad5ba3SIdo Schimmel if (bucket_index >= nhg->res_table->num_nh_buckets) 381756ad5ba3SIdo Schimmel goto out; 381856ad5ba3SIdo Schimmel 381956ad5ba3SIdo Schimmel res_table = rcu_dereference(nhg->res_table); 382056ad5ba3SIdo Schimmel bucket = &res_table->nh_buckets[bucket_index]; 382156ad5ba3SIdo Schimmel bucket->nh_flags &= ~(RTNH_F_OFFLOAD | RTNH_F_TRAP); 382256ad5ba3SIdo Schimmel if (offload) 382356ad5ba3SIdo Schimmel bucket->nh_flags |= RTNH_F_OFFLOAD; 382456ad5ba3SIdo Schimmel if (trap) 382556ad5ba3SIdo Schimmel bucket->nh_flags |= RTNH_F_TRAP; 382656ad5ba3SIdo Schimmel 382756ad5ba3SIdo Schimmel out: 382856ad5ba3SIdo Schimmel rcu_read_unlock(); 382956ad5ba3SIdo Schimmel } 383056ad5ba3SIdo Schimmel EXPORT_SYMBOL(nexthop_bucket_set_hw_flags); 383156ad5ba3SIdo Schimmel 3832cfc15c1dSIdo Schimmel void nexthop_res_grp_activity_update(struct net *net, u32 id, u16 num_buckets, 3833cfc15c1dSIdo Schimmel unsigned long *activity) 3834cfc15c1dSIdo Schimmel { 3835cfc15c1dSIdo Schimmel struct nh_res_table *res_table; 3836cfc15c1dSIdo Schimmel struct nexthop *nexthop; 3837cfc15c1dSIdo Schimmel struct nh_group *nhg; 3838cfc15c1dSIdo Schimmel u16 i; 3839cfc15c1dSIdo Schimmel 3840cfc15c1dSIdo Schimmel rcu_read_lock(); 3841cfc15c1dSIdo Schimmel 3842cfc15c1dSIdo Schimmel nexthop = nexthop_find_by_id(net, id); 3843cfc15c1dSIdo Schimmel if (!nexthop || !nexthop->is_group) 3844cfc15c1dSIdo Schimmel goto out; 3845cfc15c1dSIdo Schimmel 3846cfc15c1dSIdo Schimmel nhg = rcu_dereference(nexthop->nh_grp); 3847cfc15c1dSIdo Schimmel if (!nhg->resilient) 3848cfc15c1dSIdo Schimmel goto out; 3849cfc15c1dSIdo Schimmel 3850cfc15c1dSIdo Schimmel /* Instead of silently ignoring some buckets, demand that the sizes 3851cfc15c1dSIdo Schimmel * be the same. 3852cfc15c1dSIdo Schimmel */ 3853cfc15c1dSIdo Schimmel res_table = rcu_dereference(nhg->res_table); 3854cfc15c1dSIdo Schimmel if (num_buckets != res_table->num_nh_buckets) 3855cfc15c1dSIdo Schimmel goto out; 3856cfc15c1dSIdo Schimmel 3857cfc15c1dSIdo Schimmel for (i = 0; i < num_buckets; i++) { 3858cfc15c1dSIdo Schimmel if (test_bit(i, activity)) 3859cfc15c1dSIdo Schimmel nh_res_bucket_set_busy(&res_table->nh_buckets[i]); 3860cfc15c1dSIdo Schimmel } 3861cfc15c1dSIdo Schimmel 3862cfc15c1dSIdo Schimmel out: 3863cfc15c1dSIdo Schimmel rcu_read_unlock(); 3864cfc15c1dSIdo Schimmel } 3865cfc15c1dSIdo Schimmel EXPORT_SYMBOL(nexthop_res_grp_activity_update); 3866cfc15c1dSIdo Schimmel 3867a7ec2512SEric Dumazet static void __net_exit nexthop_net_exit_batch_rtnl(struct list_head *net_list, 3868a7ec2512SEric Dumazet struct list_head *dev_to_kill) 3869ab84be7eSDavid Ahern { 3870fea7b201SEric Dumazet struct net *net; 3871fea7b201SEric Dumazet 3872a7ec2512SEric Dumazet ASSERT_RTNL(); 3873a7ec2512SEric Dumazet list_for_each_entry(net, net_list, exit_list) 3874ab84be7eSDavid Ahern flush_all_nexthops(net); 3875ab84be7eSDavid Ahern } 3876a7ec2512SEric Dumazet 3877a7ec2512SEric Dumazet static void __net_exit nexthop_net_exit(struct net *net) 3878a7ec2512SEric Dumazet { 3879a7ec2512SEric Dumazet kfree(net->nexthop.devhash); 3880a7ec2512SEric Dumazet net->nexthop.devhash = NULL; 3881fea7b201SEric Dumazet } 3882ab84be7eSDavid Ahern 3883ab84be7eSDavid Ahern static int __net_init nexthop_net_init(struct net *net) 3884ab84be7eSDavid Ahern { 3885597cfe4fSDavid Ahern size_t sz = sizeof(struct hlist_head) * NH_DEV_HASHSIZE; 3886597cfe4fSDavid Ahern 3887ab84be7eSDavid Ahern net->nexthop.rb_root = RB_ROOT; 3888597cfe4fSDavid Ahern net->nexthop.devhash = kzalloc(sz, GFP_KERNEL); 3889597cfe4fSDavid Ahern if (!net->nexthop.devhash) 3890597cfe4fSDavid Ahern return -ENOMEM; 389180690ec6SIdo Schimmel BLOCKING_INIT_NOTIFIER_HEAD(&net->nexthop.notifier_chain); 3892ab84be7eSDavid Ahern 3893ab84be7eSDavid Ahern return 0; 3894ab84be7eSDavid Ahern } 3895ab84be7eSDavid Ahern 3896ab84be7eSDavid Ahern static struct pernet_operations nexthop_net_ops = { 3897ab84be7eSDavid Ahern .init = nexthop_net_init, 3898a7ec2512SEric Dumazet .exit = nexthop_net_exit, 3899a7ec2512SEric Dumazet .exit_batch_rtnl = nexthop_net_exit_batch_rtnl, 3900ab84be7eSDavid Ahern }; 3901ab84be7eSDavid Ahern 3902ab84be7eSDavid Ahern static int __init nexthop_init(void) 3903ab84be7eSDavid Ahern { 3904ab84be7eSDavid Ahern register_pernet_subsys(&nexthop_net_ops); 3905ab84be7eSDavid Ahern 3906597cfe4fSDavid Ahern register_netdevice_notifier(&nh_netdev_notifier); 3907597cfe4fSDavid Ahern 3908ab84be7eSDavid Ahern rtnl_register(PF_UNSPEC, RTM_NEWNEXTHOP, rtm_new_nexthop, NULL, 0); 3909ab84be7eSDavid Ahern rtnl_register(PF_UNSPEC, RTM_DELNEXTHOP, rtm_del_nexthop, NULL, 0); 3910ab84be7eSDavid Ahern rtnl_register(PF_UNSPEC, RTM_GETNEXTHOP, rtm_get_nexthop, 3911ab84be7eSDavid Ahern rtm_dump_nexthop, 0); 3912ab84be7eSDavid Ahern 3913ab84be7eSDavid Ahern rtnl_register(PF_INET, RTM_NEWNEXTHOP, rtm_new_nexthop, NULL, 0); 3914ab84be7eSDavid Ahern rtnl_register(PF_INET, RTM_GETNEXTHOP, NULL, rtm_dump_nexthop, 0); 3915ab84be7eSDavid Ahern 3916ab84be7eSDavid Ahern rtnl_register(PF_INET6, RTM_NEWNEXTHOP, rtm_new_nexthop, NULL, 0); 3917ab84be7eSDavid Ahern rtnl_register(PF_INET6, RTM_GETNEXTHOP, NULL, rtm_dump_nexthop, 0); 3918ab84be7eSDavid Ahern 3919187d4c6bSPetr Machata rtnl_register(PF_UNSPEC, RTM_GETNEXTHOPBUCKET, rtm_get_nexthop_bucket, 39208a1bbabbSPetr Machata rtm_dump_nexthop_bucket, 0); 39218a1bbabbSPetr Machata 3922ab84be7eSDavid Ahern return 0; 3923ab84be7eSDavid Ahern } 3924ab84be7eSDavid Ahern subsys_initcall(nexthop_init); 3925