1ab84be7eSDavid Ahern // SPDX-License-Identifier: GPL-2.0 2ab84be7eSDavid Ahern /* Generic nexthop implementation 3ab84be7eSDavid Ahern * 4ab84be7eSDavid Ahern * Copyright (c) 2017-19 Cumulus Networks 5ab84be7eSDavid Ahern * Copyright (c) 2017-19 David Ahern <dsa@cumulusnetworks.com> 6ab84be7eSDavid Ahern */ 7ab84be7eSDavid Ahern 8ab84be7eSDavid Ahern #include <linux/nexthop.h> 9ab84be7eSDavid Ahern #include <linux/rtnetlink.h> 10ab84be7eSDavid Ahern #include <linux/slab.h> 11430a0491SDavid Ahern #include <net/arp.h> 1253010f99SDavid Ahern #include <net/ipv6_stubs.h> 13b513bd03SDavid Ahern #include <net/lwtunnel.h> 14430a0491SDavid Ahern #include <net/ndisc.h> 15ab84be7eSDavid Ahern #include <net/nexthop.h> 16597cfe4fSDavid Ahern #include <net/route.h> 17ab84be7eSDavid Ahern #include <net/sock.h> 18ab84be7eSDavid Ahern 19430a0491SDavid Ahern static void remove_nexthop(struct net *net, struct nexthop *nh, 20430a0491SDavid Ahern struct nl_info *nlinfo); 21430a0491SDavid Ahern 22597cfe4fSDavid Ahern #define NH_DEV_HASHBITS 8 23597cfe4fSDavid Ahern #define NH_DEV_HASHSIZE (1U << NH_DEV_HASHBITS) 24597cfe4fSDavid Ahern 25643d0878SPetr Machata static const struct nla_policy rtm_nh_policy_new[] = { 26ab84be7eSDavid Ahern [NHA_ID] = { .type = NLA_U32 }, 27ab84be7eSDavid Ahern [NHA_GROUP] = { .type = NLA_BINARY }, 28ab84be7eSDavid Ahern [NHA_GROUP_TYPE] = { .type = NLA_U16 }, 29ab84be7eSDavid Ahern [NHA_BLACKHOLE] = { .type = NLA_FLAG }, 30ab84be7eSDavid Ahern [NHA_OIF] = { .type = NLA_U32 }, 31ab84be7eSDavid Ahern [NHA_GATEWAY] = { .type = NLA_BINARY }, 32ab84be7eSDavid Ahern [NHA_ENCAP_TYPE] = { .type = NLA_U16 }, 33ab84be7eSDavid Ahern [NHA_ENCAP] = { .type = NLA_NESTED }, 3438428d68SRoopa Prabhu [NHA_FDB] = { .type = NLA_FLAG }, 35ab84be7eSDavid Ahern }; 36ab84be7eSDavid Ahern 3760f5ad5eSPetr Machata static const struct nla_policy rtm_nh_policy_get[] = { 3860f5ad5eSPetr Machata [NHA_ID] = { .type = NLA_U32 }, 3960f5ad5eSPetr Machata }; 4060f5ad5eSPetr Machata 4144551bffSPetr Machata static const struct nla_policy rtm_nh_policy_dump[] = { 4244551bffSPetr Machata [NHA_OIF] = { .type = NLA_U32 }, 4344551bffSPetr Machata [NHA_GROUPS] = { .type = NLA_FLAG }, 4444551bffSPetr Machata [NHA_MASTER] = { .type = NLA_U32 }, 4544551bffSPetr Machata [NHA_FDB] = { .type = NLA_FLAG }, 4644551bffSPetr Machata }; 4744551bffSPetr Machata 485ca474f2SIdo Schimmel static bool nexthop_notifiers_is_empty(struct net *net) 495ca474f2SIdo Schimmel { 505ca474f2SIdo Schimmel return !net->nexthop.notifier_chain.head; 515ca474f2SIdo Schimmel } 525ca474f2SIdo Schimmel 535ca474f2SIdo Schimmel static void 545ca474f2SIdo Schimmel __nh_notifier_single_info_init(struct nh_notifier_single_info *nh_info, 5596a85625SPetr Machata const struct nh_info *nhi) 565ca474f2SIdo Schimmel { 575ca474f2SIdo Schimmel nh_info->dev = nhi->fib_nhc.nhc_dev; 585ca474f2SIdo Schimmel nh_info->gw_family = nhi->fib_nhc.nhc_gw_family; 595ca474f2SIdo Schimmel if (nh_info->gw_family == AF_INET) 605ca474f2SIdo Schimmel nh_info->ipv4 = nhi->fib_nhc.nhc_gw.ipv4; 615ca474f2SIdo Schimmel else if (nh_info->gw_family == AF_INET6) 625ca474f2SIdo Schimmel nh_info->ipv6 = nhi->fib_nhc.nhc_gw.ipv6; 635ca474f2SIdo Schimmel 645ca474f2SIdo Schimmel nh_info->is_reject = nhi->reject_nh; 655ca474f2SIdo Schimmel nh_info->is_fdb = nhi->fdb_nh; 665ca474f2SIdo Schimmel nh_info->has_encap = !!nhi->fib_nhc.nhc_lwtstate; 675ca474f2SIdo Schimmel } 685ca474f2SIdo Schimmel 695ca474f2SIdo Schimmel static int nh_notifier_single_info_init(struct nh_notifier_info *info, 705ca474f2SIdo Schimmel const struct nexthop *nh) 715ca474f2SIdo Schimmel { 7296a85625SPetr Machata struct nh_info *nhi = rtnl_dereference(nh->nh_info); 7396a85625SPetr Machata 7409ad6becSIdo Schimmel info->type = NH_NOTIFIER_INFO_TYPE_SINGLE; 755ca474f2SIdo Schimmel info->nh = kzalloc(sizeof(*info->nh), GFP_KERNEL); 765ca474f2SIdo Schimmel if (!info->nh) 775ca474f2SIdo Schimmel return -ENOMEM; 785ca474f2SIdo Schimmel 7996a85625SPetr Machata __nh_notifier_single_info_init(info->nh, nhi); 805ca474f2SIdo Schimmel 815ca474f2SIdo Schimmel return 0; 825ca474f2SIdo Schimmel } 835ca474f2SIdo Schimmel 845ca474f2SIdo Schimmel static void nh_notifier_single_info_fini(struct nh_notifier_info *info) 855ca474f2SIdo Schimmel { 865ca474f2SIdo Schimmel kfree(info->nh); 875ca474f2SIdo Schimmel } 885ca474f2SIdo Schimmel 89da230501SPetr Machata static int nh_notifier_mp_info_init(struct nh_notifier_info *info, 90da230501SPetr Machata struct nh_group *nhg) 915ca474f2SIdo Schimmel { 925ca474f2SIdo Schimmel u16 num_nh = nhg->num_nh; 935ca474f2SIdo Schimmel int i; 945ca474f2SIdo Schimmel 9509ad6becSIdo Schimmel info->type = NH_NOTIFIER_INFO_TYPE_GRP; 965ca474f2SIdo Schimmel info->nh_grp = kzalloc(struct_size(info->nh_grp, nh_entries, num_nh), 975ca474f2SIdo Schimmel GFP_KERNEL); 985ca474f2SIdo Schimmel if (!info->nh_grp) 995ca474f2SIdo Schimmel return -ENOMEM; 1005ca474f2SIdo Schimmel 1015ca474f2SIdo Schimmel info->nh_grp->num_nh = num_nh; 1025ca474f2SIdo Schimmel info->nh_grp->is_fdb = nhg->fdb_nh; 1035ca474f2SIdo Schimmel 1045ca474f2SIdo Schimmel for (i = 0; i < num_nh; i++) { 1055ca474f2SIdo Schimmel struct nh_grp_entry *nhge = &nhg->nh_entries[i]; 10696a85625SPetr Machata struct nh_info *nhi; 1075ca474f2SIdo Schimmel 10896a85625SPetr Machata nhi = rtnl_dereference(nhge->nh->nh_info); 1095ca474f2SIdo Schimmel info->nh_grp->nh_entries[i].id = nhge->nh->id; 1105ca474f2SIdo Schimmel info->nh_grp->nh_entries[i].weight = nhge->weight; 1115ca474f2SIdo Schimmel __nh_notifier_single_info_init(&info->nh_grp->nh_entries[i].nh, 11296a85625SPetr Machata nhi); 1135ca474f2SIdo Schimmel } 1145ca474f2SIdo Schimmel 1155ca474f2SIdo Schimmel return 0; 1165ca474f2SIdo Schimmel } 1175ca474f2SIdo Schimmel 1187c37c7e0SPetr Machata static int nh_notifier_res_table_info_init(struct nh_notifier_info *info, 1197c37c7e0SPetr Machata struct nh_group *nhg) 1207c37c7e0SPetr Machata { 1217c37c7e0SPetr Machata struct nh_res_table *res_table = rtnl_dereference(nhg->res_table); 1227c37c7e0SPetr Machata u16 num_nh_buckets = res_table->num_nh_buckets; 1237c37c7e0SPetr Machata unsigned long size; 1247c37c7e0SPetr Machata u16 i; 1257c37c7e0SPetr Machata 1267c37c7e0SPetr Machata info->type = NH_NOTIFIER_INFO_TYPE_RES_TABLE; 1277c37c7e0SPetr Machata size = struct_size(info->nh_res_table, nhs, num_nh_buckets); 1287c37c7e0SPetr Machata info->nh_res_table = __vmalloc(size, GFP_KERNEL | __GFP_ZERO | 1297c37c7e0SPetr Machata __GFP_NOWARN); 1307c37c7e0SPetr Machata if (!info->nh_res_table) 1317c37c7e0SPetr Machata return -ENOMEM; 1327c37c7e0SPetr Machata 1337c37c7e0SPetr Machata info->nh_res_table->num_nh_buckets = num_nh_buckets; 1347c37c7e0SPetr Machata 1357c37c7e0SPetr Machata for (i = 0; i < num_nh_buckets; i++) { 1367c37c7e0SPetr Machata struct nh_res_bucket *bucket = &res_table->nh_buckets[i]; 1377c37c7e0SPetr Machata struct nh_grp_entry *nhge; 1387c37c7e0SPetr Machata struct nh_info *nhi; 1397c37c7e0SPetr Machata 1407c37c7e0SPetr Machata nhge = rtnl_dereference(bucket->nh_entry); 1417c37c7e0SPetr Machata nhi = rtnl_dereference(nhge->nh->nh_info); 1427c37c7e0SPetr Machata __nh_notifier_single_info_init(&info->nh_res_table->nhs[i], 1437c37c7e0SPetr Machata nhi); 1447c37c7e0SPetr Machata } 1457c37c7e0SPetr Machata 1467c37c7e0SPetr Machata return 0; 1477c37c7e0SPetr Machata } 1487c37c7e0SPetr Machata 149da230501SPetr Machata static int nh_notifier_grp_info_init(struct nh_notifier_info *info, 150da230501SPetr Machata const struct nexthop *nh) 1515ca474f2SIdo Schimmel { 152da230501SPetr Machata struct nh_group *nhg = rtnl_dereference(nh->nh_grp); 153da230501SPetr Machata 154da230501SPetr Machata if (nhg->mpath) 155da230501SPetr Machata return nh_notifier_mp_info_init(info, nhg); 1567c37c7e0SPetr Machata else if (nhg->resilient) 1577c37c7e0SPetr Machata return nh_notifier_res_table_info_init(info, nhg); 158da230501SPetr Machata return -EINVAL; 159da230501SPetr Machata } 160da230501SPetr Machata 161da230501SPetr Machata static void nh_notifier_grp_info_fini(struct nh_notifier_info *info, 162da230501SPetr Machata const struct nexthop *nh) 163da230501SPetr Machata { 164da230501SPetr Machata struct nh_group *nhg = rtnl_dereference(nh->nh_grp); 165da230501SPetr Machata 166da230501SPetr Machata if (nhg->mpath) 1675ca474f2SIdo Schimmel kfree(info->nh_grp); 1687c37c7e0SPetr Machata else if (nhg->resilient) 1697c37c7e0SPetr Machata vfree(info->nh_res_table); 1705ca474f2SIdo Schimmel } 1715ca474f2SIdo Schimmel 1725ca474f2SIdo Schimmel static int nh_notifier_info_init(struct nh_notifier_info *info, 1735ca474f2SIdo Schimmel const struct nexthop *nh) 1745ca474f2SIdo Schimmel { 1755ca474f2SIdo Schimmel info->id = nh->id; 1765ca474f2SIdo Schimmel 17709ad6becSIdo Schimmel if (nh->is_group) 1785ca474f2SIdo Schimmel return nh_notifier_grp_info_init(info, nh); 1795ca474f2SIdo Schimmel else 1805ca474f2SIdo Schimmel return nh_notifier_single_info_init(info, nh); 1815ca474f2SIdo Schimmel } 1825ca474f2SIdo Schimmel 18309ad6becSIdo Schimmel static void nh_notifier_info_fini(struct nh_notifier_info *info, 18409ad6becSIdo Schimmel const struct nexthop *nh) 1855ca474f2SIdo Schimmel { 18609ad6becSIdo Schimmel if (nh->is_group) 187da230501SPetr Machata nh_notifier_grp_info_fini(info, nh); 1885ca474f2SIdo Schimmel else 1895ca474f2SIdo Schimmel nh_notifier_single_info_fini(info); 1905ca474f2SIdo Schimmel } 1915ca474f2SIdo Schimmel 1928590ceedSRoopa Prabhu static int call_nexthop_notifiers(struct net *net, 193d8e79f1dSNathan Chancellor enum nexthop_event_type event_type, 1943578d53dSIdo Schimmel struct nexthop *nh, 1953578d53dSIdo Schimmel struct netlink_ext_ack *extack) 1968590ceedSRoopa Prabhu { 1975ca474f2SIdo Schimmel struct nh_notifier_info info = { 1985ca474f2SIdo Schimmel .net = net, 1995ca474f2SIdo Schimmel .extack = extack, 2005ca474f2SIdo Schimmel }; 2018590ceedSRoopa Prabhu int err; 2028590ceedSRoopa Prabhu 2035ca474f2SIdo Schimmel ASSERT_RTNL(); 2045ca474f2SIdo Schimmel 2055ca474f2SIdo Schimmel if (nexthop_notifiers_is_empty(net)) 2065ca474f2SIdo Schimmel return 0; 2075ca474f2SIdo Schimmel 2085ca474f2SIdo Schimmel err = nh_notifier_info_init(&info, nh); 2095ca474f2SIdo Schimmel if (err) { 2105ca474f2SIdo Schimmel NL_SET_ERR_MSG(extack, "Failed to initialize nexthop notifier info"); 2115ca474f2SIdo Schimmel return err; 2125ca474f2SIdo Schimmel } 2135ca474f2SIdo Schimmel 21480690ec6SIdo Schimmel err = blocking_notifier_call_chain(&net->nexthop.notifier_chain, 2151ec69d18SIdo Schimmel event_type, &info); 21609ad6becSIdo Schimmel nh_notifier_info_fini(&info, nh); 2175ca474f2SIdo Schimmel 2188590ceedSRoopa Prabhu return notifier_to_errno(err); 2198590ceedSRoopa Prabhu } 2208590ceedSRoopa Prabhu 2217c37c7e0SPetr Machata static int 2227c37c7e0SPetr Machata nh_notifier_res_bucket_idle_timer_get(const struct nh_notifier_info *info, 2237c37c7e0SPetr Machata bool force, unsigned int *p_idle_timer_ms) 2247c37c7e0SPetr Machata { 2257c37c7e0SPetr Machata struct nh_res_table *res_table; 2267c37c7e0SPetr Machata struct nh_group *nhg; 2277c37c7e0SPetr Machata struct nexthop *nh; 2287c37c7e0SPetr Machata int err = 0; 2297c37c7e0SPetr Machata 2307c37c7e0SPetr Machata /* When 'force' is false, nexthop bucket replacement is performed 2317c37c7e0SPetr Machata * because the bucket was deemed to be idle. In this case, capable 2327c37c7e0SPetr Machata * listeners can choose to perform an atomic replacement: The bucket is 2337c37c7e0SPetr Machata * only replaced if it is inactive. However, if the idle timer interval 2347c37c7e0SPetr Machata * is smaller than the interval in which a listener is querying 2357c37c7e0SPetr Machata * buckets' activity from the device, then atomic replacement should 2367c37c7e0SPetr Machata * not be tried. Pass the idle timer value to listeners, so that they 2377c37c7e0SPetr Machata * could determine which type of replacement to perform. 2387c37c7e0SPetr Machata */ 2397c37c7e0SPetr Machata if (force) { 2407c37c7e0SPetr Machata *p_idle_timer_ms = 0; 2417c37c7e0SPetr Machata return 0; 2427c37c7e0SPetr Machata } 2437c37c7e0SPetr Machata 2447c37c7e0SPetr Machata rcu_read_lock(); 2457c37c7e0SPetr Machata 2467c37c7e0SPetr Machata nh = nexthop_find_by_id(info->net, info->id); 2477c37c7e0SPetr Machata if (!nh) { 2487c37c7e0SPetr Machata err = -EINVAL; 2497c37c7e0SPetr Machata goto out; 2507c37c7e0SPetr Machata } 2517c37c7e0SPetr Machata 2527c37c7e0SPetr Machata nhg = rcu_dereference(nh->nh_grp); 2537c37c7e0SPetr Machata res_table = rcu_dereference(nhg->res_table); 2547c37c7e0SPetr Machata *p_idle_timer_ms = jiffies_to_msecs(res_table->idle_timer); 2557c37c7e0SPetr Machata 2567c37c7e0SPetr Machata out: 2577c37c7e0SPetr Machata rcu_read_unlock(); 2587c37c7e0SPetr Machata 2597c37c7e0SPetr Machata return err; 2607c37c7e0SPetr Machata } 2617c37c7e0SPetr Machata 2627c37c7e0SPetr Machata static int nh_notifier_res_bucket_info_init(struct nh_notifier_info *info, 2637c37c7e0SPetr Machata u16 bucket_index, bool force, 2647c37c7e0SPetr Machata struct nh_info *oldi, 2657c37c7e0SPetr Machata struct nh_info *newi) 2667c37c7e0SPetr Machata { 2677c37c7e0SPetr Machata unsigned int idle_timer_ms; 2687c37c7e0SPetr Machata int err; 2697c37c7e0SPetr Machata 2707c37c7e0SPetr Machata err = nh_notifier_res_bucket_idle_timer_get(info, force, 2717c37c7e0SPetr Machata &idle_timer_ms); 2727c37c7e0SPetr Machata if (err) 2737c37c7e0SPetr Machata return err; 2747c37c7e0SPetr Machata 2757c37c7e0SPetr Machata info->type = NH_NOTIFIER_INFO_TYPE_RES_BUCKET; 2767c37c7e0SPetr Machata info->nh_res_bucket = kzalloc(sizeof(*info->nh_res_bucket), 2777c37c7e0SPetr Machata GFP_KERNEL); 2787c37c7e0SPetr Machata if (!info->nh_res_bucket) 2797c37c7e0SPetr Machata return -ENOMEM; 2807c37c7e0SPetr Machata 2817c37c7e0SPetr Machata info->nh_res_bucket->bucket_index = bucket_index; 2827c37c7e0SPetr Machata info->nh_res_bucket->idle_timer_ms = idle_timer_ms; 2837c37c7e0SPetr Machata info->nh_res_bucket->force = force; 2847c37c7e0SPetr Machata __nh_notifier_single_info_init(&info->nh_res_bucket->old_nh, oldi); 2857c37c7e0SPetr Machata __nh_notifier_single_info_init(&info->nh_res_bucket->new_nh, newi); 2867c37c7e0SPetr Machata return 0; 2877c37c7e0SPetr Machata } 2887c37c7e0SPetr Machata 2897c37c7e0SPetr Machata static void nh_notifier_res_bucket_info_fini(struct nh_notifier_info *info) 2907c37c7e0SPetr Machata { 2917c37c7e0SPetr Machata kfree(info->nh_res_bucket); 2927c37c7e0SPetr Machata } 2937c37c7e0SPetr Machata 2947c37c7e0SPetr Machata static int __call_nexthop_res_bucket_notifiers(struct net *net, u32 nhg_id, 2957c37c7e0SPetr Machata u16 bucket_index, bool force, 2967c37c7e0SPetr Machata struct nh_info *oldi, 2977c37c7e0SPetr Machata struct nh_info *newi, 2987c37c7e0SPetr Machata struct netlink_ext_ack *extack) 2997c37c7e0SPetr Machata { 3007c37c7e0SPetr Machata struct nh_notifier_info info = { 3017c37c7e0SPetr Machata .net = net, 3027c37c7e0SPetr Machata .extack = extack, 3037c37c7e0SPetr Machata .id = nhg_id, 3047c37c7e0SPetr Machata }; 3057c37c7e0SPetr Machata int err; 3067c37c7e0SPetr Machata 3077c37c7e0SPetr Machata if (nexthop_notifiers_is_empty(net)) 3087c37c7e0SPetr Machata return 0; 3097c37c7e0SPetr Machata 3107c37c7e0SPetr Machata err = nh_notifier_res_bucket_info_init(&info, bucket_index, force, 3117c37c7e0SPetr Machata oldi, newi); 3127c37c7e0SPetr Machata if (err) 3137c37c7e0SPetr Machata return err; 3147c37c7e0SPetr Machata 3157c37c7e0SPetr Machata err = blocking_notifier_call_chain(&net->nexthop.notifier_chain, 3167c37c7e0SPetr Machata NEXTHOP_EVENT_BUCKET_REPLACE, &info); 3177c37c7e0SPetr Machata nh_notifier_res_bucket_info_fini(&info); 3187c37c7e0SPetr Machata 3197c37c7e0SPetr Machata return notifier_to_errno(err); 3207c37c7e0SPetr Machata } 3217c37c7e0SPetr Machata 322283a72a5SPetr Machata /* There are three users of RES_TABLE, and NHs etc. referenced from there: 323283a72a5SPetr Machata * 324283a72a5SPetr Machata * 1) a collection of callbacks for NH maintenance. This operates under 325283a72a5SPetr Machata * RTNL, 326283a72a5SPetr Machata * 2) the delayed work that gradually balances the resilient table, 327283a72a5SPetr Machata * 3) and nexthop_select_path(), operating under RCU. 328283a72a5SPetr Machata * 329283a72a5SPetr Machata * Both the delayed work and the RTNL block are writers, and need to 330283a72a5SPetr Machata * maintain mutual exclusion. Since there are only two and well-known 331283a72a5SPetr Machata * writers for each table, the RTNL code can make sure it has exclusive 332283a72a5SPetr Machata * access thus: 333283a72a5SPetr Machata * 334283a72a5SPetr Machata * - Have the DW operate without locking; 335283a72a5SPetr Machata * - synchronously cancel the DW; 336283a72a5SPetr Machata * - do the writing; 337283a72a5SPetr Machata * - if the write was not actually a delete, call upkeep, which schedules 338283a72a5SPetr Machata * DW again if necessary. 339283a72a5SPetr Machata * 340283a72a5SPetr Machata * The functions that are always called from the RTNL context use 341283a72a5SPetr Machata * rtnl_dereference(). The functions that can also be called from the DW do 342283a72a5SPetr Machata * a raw dereference and rely on the above mutual exclusion scheme. 343283a72a5SPetr Machata */ 344283a72a5SPetr Machata #define nh_res_dereference(p) (rcu_dereference_raw(p)) 345283a72a5SPetr Machata 3467c37c7e0SPetr Machata static int call_nexthop_res_bucket_notifiers(struct net *net, u32 nhg_id, 3477c37c7e0SPetr Machata u16 bucket_index, bool force, 3487c37c7e0SPetr Machata struct nexthop *old_nh, 3497c37c7e0SPetr Machata struct nexthop *new_nh, 3507c37c7e0SPetr Machata struct netlink_ext_ack *extack) 3517c37c7e0SPetr Machata { 3527c37c7e0SPetr Machata struct nh_info *oldi = nh_res_dereference(old_nh->nh_info); 3537c37c7e0SPetr Machata struct nh_info *newi = nh_res_dereference(new_nh->nh_info); 3547c37c7e0SPetr Machata 3557c37c7e0SPetr Machata return __call_nexthop_res_bucket_notifiers(net, nhg_id, bucket_index, 3567c37c7e0SPetr Machata force, oldi, newi, extack); 3577c37c7e0SPetr Machata } 3587c37c7e0SPetr Machata 3597c37c7e0SPetr Machata static int call_nexthop_res_table_notifiers(struct net *net, struct nexthop *nh, 3607c37c7e0SPetr Machata struct netlink_ext_ack *extack) 3617c37c7e0SPetr Machata { 3627c37c7e0SPetr Machata struct nh_notifier_info info = { 3637c37c7e0SPetr Machata .net = net, 3647c37c7e0SPetr Machata .extack = extack, 3657c37c7e0SPetr Machata }; 3667c37c7e0SPetr Machata struct nh_group *nhg; 3677c37c7e0SPetr Machata int err; 3687c37c7e0SPetr Machata 3697c37c7e0SPetr Machata ASSERT_RTNL(); 3707c37c7e0SPetr Machata 3717c37c7e0SPetr Machata if (nexthop_notifiers_is_empty(net)) 3727c37c7e0SPetr Machata return 0; 3737c37c7e0SPetr Machata 3747c37c7e0SPetr Machata /* At this point, the nexthop buckets are still not populated. Only 3757c37c7e0SPetr Machata * emit a notification with the logical nexthops, so that a listener 3767c37c7e0SPetr Machata * could potentially veto it in case of unsupported configuration. 3777c37c7e0SPetr Machata */ 3787c37c7e0SPetr Machata nhg = rtnl_dereference(nh->nh_grp); 3797c37c7e0SPetr Machata err = nh_notifier_mp_info_init(&info, nhg); 3807c37c7e0SPetr Machata if (err) { 3817c37c7e0SPetr Machata NL_SET_ERR_MSG(extack, "Failed to initialize nexthop notifier info"); 3827c37c7e0SPetr Machata return err; 3837c37c7e0SPetr Machata } 3847c37c7e0SPetr Machata 3857c37c7e0SPetr Machata err = blocking_notifier_call_chain(&net->nexthop.notifier_chain, 3867c37c7e0SPetr Machata NEXTHOP_EVENT_RES_TABLE_PRE_REPLACE, 3877c37c7e0SPetr Machata &info); 3887c37c7e0SPetr Machata kfree(info.nh_grp); 3897c37c7e0SPetr Machata 3907c37c7e0SPetr Machata return notifier_to_errno(err); 3917c37c7e0SPetr Machata } 3927c37c7e0SPetr Machata 393975ff7f3SIdo Schimmel static int call_nexthop_notifier(struct notifier_block *nb, struct net *net, 394975ff7f3SIdo Schimmel enum nexthop_event_type event_type, 395975ff7f3SIdo Schimmel struct nexthop *nh, 396975ff7f3SIdo Schimmel struct netlink_ext_ack *extack) 397975ff7f3SIdo Schimmel { 398975ff7f3SIdo Schimmel struct nh_notifier_info info = { 399975ff7f3SIdo Schimmel .net = net, 400975ff7f3SIdo Schimmel .extack = extack, 401975ff7f3SIdo Schimmel }; 402975ff7f3SIdo Schimmel int err; 403975ff7f3SIdo Schimmel 404975ff7f3SIdo Schimmel err = nh_notifier_info_init(&info, nh); 405975ff7f3SIdo Schimmel if (err) 406975ff7f3SIdo Schimmel return err; 407975ff7f3SIdo Schimmel 408975ff7f3SIdo Schimmel err = nb->notifier_call(nb, event_type, &info); 40909ad6becSIdo Schimmel nh_notifier_info_fini(&info, nh); 410975ff7f3SIdo Schimmel 411975ff7f3SIdo Schimmel return notifier_to_errno(err); 412975ff7f3SIdo Schimmel } 413975ff7f3SIdo Schimmel 414597cfe4fSDavid Ahern static unsigned int nh_dev_hashfn(unsigned int val) 415597cfe4fSDavid Ahern { 416597cfe4fSDavid Ahern unsigned int mask = NH_DEV_HASHSIZE - 1; 417597cfe4fSDavid Ahern 418597cfe4fSDavid Ahern return (val ^ 419597cfe4fSDavid Ahern (val >> NH_DEV_HASHBITS) ^ 420597cfe4fSDavid Ahern (val >> (NH_DEV_HASHBITS * 2))) & mask; 421597cfe4fSDavid Ahern } 422597cfe4fSDavid Ahern 423597cfe4fSDavid Ahern static void nexthop_devhash_add(struct net *net, struct nh_info *nhi) 424597cfe4fSDavid Ahern { 425597cfe4fSDavid Ahern struct net_device *dev = nhi->fib_nhc.nhc_dev; 426597cfe4fSDavid Ahern struct hlist_head *head; 427597cfe4fSDavid Ahern unsigned int hash; 428597cfe4fSDavid Ahern 429597cfe4fSDavid Ahern WARN_ON(!dev); 430597cfe4fSDavid Ahern 431597cfe4fSDavid Ahern hash = nh_dev_hashfn(dev->ifindex); 432597cfe4fSDavid Ahern head = &net->nexthop.devhash[hash]; 433597cfe4fSDavid Ahern hlist_add_head(&nhi->dev_hash, head); 434597cfe4fSDavid Ahern } 435597cfe4fSDavid Ahern 4365d1f0f09SDavid Ahern static void nexthop_free_group(struct nexthop *nh) 437ab84be7eSDavid Ahern { 438430a0491SDavid Ahern struct nh_group *nhg; 439430a0491SDavid Ahern int i; 440430a0491SDavid Ahern 441430a0491SDavid Ahern nhg = rcu_dereference_raw(nh->nh_grp); 44290f33bffSNikolay Aleksandrov for (i = 0; i < nhg->num_nh; ++i) { 44390f33bffSNikolay Aleksandrov struct nh_grp_entry *nhge = &nhg->nh_entries[i]; 444430a0491SDavid Ahern 44590f33bffSNikolay Aleksandrov WARN_ON(!list_empty(&nhge->nh_list)); 44690f33bffSNikolay Aleksandrov nexthop_put(nhge->nh); 44790f33bffSNikolay Aleksandrov } 44890f33bffSNikolay Aleksandrov 44990f33bffSNikolay Aleksandrov WARN_ON(nhg->spare == nhg); 45090f33bffSNikolay Aleksandrov 451283a72a5SPetr Machata if (nhg->resilient) 452283a72a5SPetr Machata vfree(rcu_dereference_raw(nhg->res_table)); 453283a72a5SPetr Machata 45490f33bffSNikolay Aleksandrov kfree(nhg->spare); 455430a0491SDavid Ahern kfree(nhg); 456430a0491SDavid Ahern } 457430a0491SDavid Ahern 458430a0491SDavid Ahern static void nexthop_free_single(struct nexthop *nh) 459430a0491SDavid Ahern { 460ab84be7eSDavid Ahern struct nh_info *nhi; 461ab84be7eSDavid Ahern 462ab84be7eSDavid Ahern nhi = rcu_dereference_raw(nh->nh_info); 463597cfe4fSDavid Ahern switch (nhi->family) { 464597cfe4fSDavid Ahern case AF_INET: 465597cfe4fSDavid Ahern fib_nh_release(nh->net, &nhi->fib_nh); 466597cfe4fSDavid Ahern break; 46753010f99SDavid Ahern case AF_INET6: 46853010f99SDavid Ahern ipv6_stub->fib6_nh_release(&nhi->fib6_nh); 46953010f99SDavid Ahern break; 470597cfe4fSDavid Ahern } 471ab84be7eSDavid Ahern kfree(nhi); 472430a0491SDavid Ahern } 473430a0491SDavid Ahern 474430a0491SDavid Ahern void nexthop_free_rcu(struct rcu_head *head) 475430a0491SDavid Ahern { 476430a0491SDavid Ahern struct nexthop *nh = container_of(head, struct nexthop, rcu); 477430a0491SDavid Ahern 478430a0491SDavid Ahern if (nh->is_group) 4795d1f0f09SDavid Ahern nexthop_free_group(nh); 480430a0491SDavid Ahern else 481430a0491SDavid Ahern nexthop_free_single(nh); 482ab84be7eSDavid Ahern 483ab84be7eSDavid Ahern kfree(nh); 484ab84be7eSDavid Ahern } 485ab84be7eSDavid Ahern EXPORT_SYMBOL_GPL(nexthop_free_rcu); 486ab84be7eSDavid Ahern 487ab84be7eSDavid Ahern static struct nexthop *nexthop_alloc(void) 488ab84be7eSDavid Ahern { 489ab84be7eSDavid Ahern struct nexthop *nh; 490ab84be7eSDavid Ahern 491ab84be7eSDavid Ahern nh = kzalloc(sizeof(struct nexthop), GFP_KERNEL); 492430a0491SDavid Ahern if (nh) { 4934c7e8084SDavid Ahern INIT_LIST_HEAD(&nh->fi_list); 494f88d8ea6SDavid Ahern INIT_LIST_HEAD(&nh->f6i_list); 495430a0491SDavid Ahern INIT_LIST_HEAD(&nh->grp_list); 49638428d68SRoopa Prabhu INIT_LIST_HEAD(&nh->fdb_list); 497430a0491SDavid Ahern } 498ab84be7eSDavid Ahern return nh; 499ab84be7eSDavid Ahern } 500ab84be7eSDavid Ahern 501430a0491SDavid Ahern static struct nh_group *nexthop_grp_alloc(u16 num_nh) 502430a0491SDavid Ahern { 503430a0491SDavid Ahern struct nh_group *nhg; 504430a0491SDavid Ahern 505d7d49dc7SIdo Schimmel nhg = kzalloc(struct_size(nhg, nh_entries, num_nh), GFP_KERNEL); 506430a0491SDavid Ahern if (nhg) 507430a0491SDavid Ahern nhg->num_nh = num_nh; 508430a0491SDavid Ahern 509430a0491SDavid Ahern return nhg; 510430a0491SDavid Ahern } 511430a0491SDavid Ahern 512283a72a5SPetr Machata static void nh_res_table_upkeep_dw(struct work_struct *work); 513283a72a5SPetr Machata 514283a72a5SPetr Machata static struct nh_res_table * 515283a72a5SPetr Machata nexthop_res_table_alloc(struct net *net, u32 nhg_id, struct nh_config *cfg) 516283a72a5SPetr Machata { 517283a72a5SPetr Machata const u16 num_nh_buckets = cfg->nh_grp_res_num_buckets; 518283a72a5SPetr Machata struct nh_res_table *res_table; 519283a72a5SPetr Machata unsigned long size; 520283a72a5SPetr Machata 521283a72a5SPetr Machata size = struct_size(res_table, nh_buckets, num_nh_buckets); 522283a72a5SPetr Machata res_table = __vmalloc(size, GFP_KERNEL | __GFP_ZERO | __GFP_NOWARN); 523283a72a5SPetr Machata if (!res_table) 524283a72a5SPetr Machata return NULL; 525283a72a5SPetr Machata 526283a72a5SPetr Machata res_table->net = net; 527283a72a5SPetr Machata res_table->nhg_id = nhg_id; 528283a72a5SPetr Machata INIT_DELAYED_WORK(&res_table->upkeep_dw, &nh_res_table_upkeep_dw); 529283a72a5SPetr Machata INIT_LIST_HEAD(&res_table->uw_nh_entries); 530283a72a5SPetr Machata res_table->idle_timer = cfg->nh_grp_res_idle_timer; 531283a72a5SPetr Machata res_table->unbalanced_timer = cfg->nh_grp_res_unbalanced_timer; 532283a72a5SPetr Machata res_table->num_nh_buckets = num_nh_buckets; 533283a72a5SPetr Machata return res_table; 534283a72a5SPetr Machata } 535283a72a5SPetr Machata 536ab84be7eSDavid Ahern static void nh_base_seq_inc(struct net *net) 537ab84be7eSDavid Ahern { 538ab84be7eSDavid Ahern while (++net->nexthop.seq == 0) 539ab84be7eSDavid Ahern ; 540ab84be7eSDavid Ahern } 541ab84be7eSDavid Ahern 542ab84be7eSDavid Ahern /* no reference taken; rcu lock or rtnl must be held */ 543ab84be7eSDavid Ahern struct nexthop *nexthop_find_by_id(struct net *net, u32 id) 544ab84be7eSDavid Ahern { 545ab84be7eSDavid Ahern struct rb_node **pp, *parent = NULL, *next; 546ab84be7eSDavid Ahern 547ab84be7eSDavid Ahern pp = &net->nexthop.rb_root.rb_node; 548ab84be7eSDavid Ahern while (1) { 549ab84be7eSDavid Ahern struct nexthop *nh; 550ab84be7eSDavid Ahern 551ab84be7eSDavid Ahern next = rcu_dereference_raw(*pp); 552ab84be7eSDavid Ahern if (!next) 553ab84be7eSDavid Ahern break; 554ab84be7eSDavid Ahern parent = next; 555ab84be7eSDavid Ahern 556ab84be7eSDavid Ahern nh = rb_entry(parent, struct nexthop, rb_node); 557ab84be7eSDavid Ahern if (id < nh->id) 558ab84be7eSDavid Ahern pp = &next->rb_left; 559ab84be7eSDavid Ahern else if (id > nh->id) 560ab84be7eSDavid Ahern pp = &next->rb_right; 561ab84be7eSDavid Ahern else 562ab84be7eSDavid Ahern return nh; 563ab84be7eSDavid Ahern } 564ab84be7eSDavid Ahern return NULL; 565ab84be7eSDavid Ahern } 566ab84be7eSDavid Ahern EXPORT_SYMBOL_GPL(nexthop_find_by_id); 567ab84be7eSDavid Ahern 568ab84be7eSDavid Ahern /* used for auto id allocation; called with rtnl held */ 569ab84be7eSDavid Ahern static u32 nh_find_unused_id(struct net *net) 570ab84be7eSDavid Ahern { 571ab84be7eSDavid Ahern u32 id_start = net->nexthop.last_id_allocated; 572ab84be7eSDavid Ahern 573ab84be7eSDavid Ahern while (1) { 574ab84be7eSDavid Ahern net->nexthop.last_id_allocated++; 575ab84be7eSDavid Ahern if (net->nexthop.last_id_allocated == id_start) 576ab84be7eSDavid Ahern break; 577ab84be7eSDavid Ahern 578ab84be7eSDavid Ahern if (!nexthop_find_by_id(net, net->nexthop.last_id_allocated)) 579ab84be7eSDavid Ahern return net->nexthop.last_id_allocated; 580ab84be7eSDavid Ahern } 581ab84be7eSDavid Ahern return 0; 582ab84be7eSDavid Ahern } 583ab84be7eSDavid Ahern 584283a72a5SPetr Machata static void nh_res_time_set_deadline(unsigned long next_time, 585283a72a5SPetr Machata unsigned long *deadline) 586283a72a5SPetr Machata { 587283a72a5SPetr Machata if (time_before(next_time, *deadline)) 588283a72a5SPetr Machata *deadline = next_time; 589283a72a5SPetr Machata } 590283a72a5SPetr Machata 591430a0491SDavid Ahern static int nla_put_nh_group(struct sk_buff *skb, struct nh_group *nhg) 592430a0491SDavid Ahern { 593430a0491SDavid Ahern struct nexthop_grp *p; 594430a0491SDavid Ahern size_t len = nhg->num_nh * sizeof(*p); 595430a0491SDavid Ahern struct nlattr *nla; 596430a0491SDavid Ahern u16 group_type = 0; 597430a0491SDavid Ahern int i; 598430a0491SDavid Ahern 599430a0491SDavid Ahern if (nhg->mpath) 600430a0491SDavid Ahern group_type = NEXTHOP_GRP_TYPE_MPATH; 601430a0491SDavid Ahern 602430a0491SDavid Ahern if (nla_put_u16(skb, NHA_GROUP_TYPE, group_type)) 603430a0491SDavid Ahern goto nla_put_failure; 604430a0491SDavid Ahern 605430a0491SDavid Ahern nla = nla_reserve(skb, NHA_GROUP, len); 606430a0491SDavid Ahern if (!nla) 607430a0491SDavid Ahern goto nla_put_failure; 608430a0491SDavid Ahern 609430a0491SDavid Ahern p = nla_data(nla); 610430a0491SDavid Ahern for (i = 0; i < nhg->num_nh; ++i) { 611430a0491SDavid Ahern p->id = nhg->nh_entries[i].nh->id; 612430a0491SDavid Ahern p->weight = nhg->nh_entries[i].weight - 1; 613430a0491SDavid Ahern p += 1; 614430a0491SDavid Ahern } 615430a0491SDavid Ahern 616430a0491SDavid Ahern return 0; 617430a0491SDavid Ahern 618430a0491SDavid Ahern nla_put_failure: 619430a0491SDavid Ahern return -EMSGSIZE; 620430a0491SDavid Ahern } 621430a0491SDavid Ahern 622ab84be7eSDavid Ahern static int nh_fill_node(struct sk_buff *skb, struct nexthop *nh, 623ab84be7eSDavid Ahern int event, u32 portid, u32 seq, unsigned int nlflags) 624ab84be7eSDavid Ahern { 62553010f99SDavid Ahern struct fib6_nh *fib6_nh; 626597cfe4fSDavid Ahern struct fib_nh *fib_nh; 627ab84be7eSDavid Ahern struct nlmsghdr *nlh; 628ab84be7eSDavid Ahern struct nh_info *nhi; 629ab84be7eSDavid Ahern struct nhmsg *nhm; 630ab84be7eSDavid Ahern 631ab84be7eSDavid Ahern nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nhm), nlflags); 632ab84be7eSDavid Ahern if (!nlh) 633ab84be7eSDavid Ahern return -EMSGSIZE; 634ab84be7eSDavid Ahern 635ab84be7eSDavid Ahern nhm = nlmsg_data(nlh); 636ab84be7eSDavid Ahern nhm->nh_family = AF_UNSPEC; 637ab84be7eSDavid Ahern nhm->nh_flags = nh->nh_flags; 638ab84be7eSDavid Ahern nhm->nh_protocol = nh->protocol; 639ab84be7eSDavid Ahern nhm->nh_scope = 0; 640ab84be7eSDavid Ahern nhm->resvd = 0; 641ab84be7eSDavid Ahern 642ab84be7eSDavid Ahern if (nla_put_u32(skb, NHA_ID, nh->id)) 643ab84be7eSDavid Ahern goto nla_put_failure; 644ab84be7eSDavid Ahern 645430a0491SDavid Ahern if (nh->is_group) { 646430a0491SDavid Ahern struct nh_group *nhg = rtnl_dereference(nh->nh_grp); 647430a0491SDavid Ahern 648ce9ac056SDavid Ahern if (nhg->fdb_nh && nla_put_flag(skb, NHA_FDB)) 649ce9ac056SDavid Ahern goto nla_put_failure; 650430a0491SDavid Ahern if (nla_put_nh_group(skb, nhg)) 651430a0491SDavid Ahern goto nla_put_failure; 652430a0491SDavid Ahern goto out; 653430a0491SDavid Ahern } 654430a0491SDavid Ahern 655ab84be7eSDavid Ahern nhi = rtnl_dereference(nh->nh_info); 656ab84be7eSDavid Ahern nhm->nh_family = nhi->family; 657ab84be7eSDavid Ahern if (nhi->reject_nh) { 658ab84be7eSDavid Ahern if (nla_put_flag(skb, NHA_BLACKHOLE)) 659ab84be7eSDavid Ahern goto nla_put_failure; 660ab84be7eSDavid Ahern goto out; 661ce9ac056SDavid Ahern } else if (nhi->fdb_nh) { 662ce9ac056SDavid Ahern if (nla_put_flag(skb, NHA_FDB)) 663ce9ac056SDavid Ahern goto nla_put_failure; 664ce9ac056SDavid Ahern } else { 665597cfe4fSDavid Ahern const struct net_device *dev; 666597cfe4fSDavid Ahern 667597cfe4fSDavid Ahern dev = nhi->fib_nhc.nhc_dev; 668597cfe4fSDavid Ahern if (dev && nla_put_u32(skb, NHA_OIF, dev->ifindex)) 669597cfe4fSDavid Ahern goto nla_put_failure; 670597cfe4fSDavid Ahern } 671597cfe4fSDavid Ahern 672597cfe4fSDavid Ahern nhm->nh_scope = nhi->fib_nhc.nhc_scope; 673597cfe4fSDavid Ahern switch (nhi->family) { 674597cfe4fSDavid Ahern case AF_INET: 675597cfe4fSDavid Ahern fib_nh = &nhi->fib_nh; 676597cfe4fSDavid Ahern if (fib_nh->fib_nh_gw_family && 67733d80996SIdo Schimmel nla_put_be32(skb, NHA_GATEWAY, fib_nh->fib_nh_gw4)) 678597cfe4fSDavid Ahern goto nla_put_failure; 679597cfe4fSDavid Ahern break; 68053010f99SDavid Ahern 68153010f99SDavid Ahern case AF_INET6: 68253010f99SDavid Ahern fib6_nh = &nhi->fib6_nh; 68353010f99SDavid Ahern if (fib6_nh->fib_nh_gw_family && 68453010f99SDavid Ahern nla_put_in6_addr(skb, NHA_GATEWAY, &fib6_nh->fib_nh_gw6)) 68553010f99SDavid Ahern goto nla_put_failure; 68653010f99SDavid Ahern break; 687ab84be7eSDavid Ahern } 688ab84be7eSDavid Ahern 689b513bd03SDavid Ahern if (nhi->fib_nhc.nhc_lwtstate && 690b513bd03SDavid Ahern lwtunnel_fill_encap(skb, nhi->fib_nhc.nhc_lwtstate, 691b513bd03SDavid Ahern NHA_ENCAP, NHA_ENCAP_TYPE) < 0) 692b513bd03SDavid Ahern goto nla_put_failure; 693b513bd03SDavid Ahern 694ab84be7eSDavid Ahern out: 695ab84be7eSDavid Ahern nlmsg_end(skb, nlh); 696ab84be7eSDavid Ahern return 0; 697ab84be7eSDavid Ahern 698ab84be7eSDavid Ahern nla_put_failure: 699d69100b8SStephen Worley nlmsg_cancel(skb, nlh); 700ab84be7eSDavid Ahern return -EMSGSIZE; 701ab84be7eSDavid Ahern } 702ab84be7eSDavid Ahern 703430a0491SDavid Ahern static size_t nh_nlmsg_size_grp(struct nexthop *nh) 704430a0491SDavid Ahern { 705430a0491SDavid Ahern struct nh_group *nhg = rtnl_dereference(nh->nh_grp); 706430a0491SDavid Ahern size_t sz = sizeof(struct nexthop_grp) * nhg->num_nh; 707430a0491SDavid Ahern 708430a0491SDavid Ahern return nla_total_size(sz) + 709430a0491SDavid Ahern nla_total_size(2); /* NHA_GROUP_TYPE */ 710430a0491SDavid Ahern } 711430a0491SDavid Ahern 712430a0491SDavid Ahern static size_t nh_nlmsg_size_single(struct nexthop *nh) 713ab84be7eSDavid Ahern { 714597cfe4fSDavid Ahern struct nh_info *nhi = rtnl_dereference(nh->nh_info); 715430a0491SDavid Ahern size_t sz; 716ab84be7eSDavid Ahern 717ab84be7eSDavid Ahern /* covers NHA_BLACKHOLE since NHA_OIF and BLACKHOLE 718ab84be7eSDavid Ahern * are mutually exclusive 719ab84be7eSDavid Ahern */ 720430a0491SDavid Ahern sz = nla_total_size(4); /* NHA_OIF */ 721ab84be7eSDavid Ahern 722597cfe4fSDavid Ahern switch (nhi->family) { 723597cfe4fSDavid Ahern case AF_INET: 724597cfe4fSDavid Ahern if (nhi->fib_nh.fib_nh_gw_family) 725597cfe4fSDavid Ahern sz += nla_total_size(4); /* NHA_GATEWAY */ 726597cfe4fSDavid Ahern break; 72753010f99SDavid Ahern 72853010f99SDavid Ahern case AF_INET6: 72953010f99SDavid Ahern /* NHA_GATEWAY */ 73053010f99SDavid Ahern if (nhi->fib6_nh.fib_nh_gw_family) 73153010f99SDavid Ahern sz += nla_total_size(sizeof(const struct in6_addr)); 73253010f99SDavid Ahern break; 733597cfe4fSDavid Ahern } 734597cfe4fSDavid Ahern 735b513bd03SDavid Ahern if (nhi->fib_nhc.nhc_lwtstate) { 736b513bd03SDavid Ahern sz += lwtunnel_get_encap_size(nhi->fib_nhc.nhc_lwtstate); 737b513bd03SDavid Ahern sz += nla_total_size(2); /* NHA_ENCAP_TYPE */ 738b513bd03SDavid Ahern } 739b513bd03SDavid Ahern 740ab84be7eSDavid Ahern return sz; 741ab84be7eSDavid Ahern } 742ab84be7eSDavid Ahern 743430a0491SDavid Ahern static size_t nh_nlmsg_size(struct nexthop *nh) 744430a0491SDavid Ahern { 745f9e95555SStephen Worley size_t sz = NLMSG_ALIGN(sizeof(struct nhmsg)); 746f9e95555SStephen Worley 747f9e95555SStephen Worley sz += nla_total_size(4); /* NHA_ID */ 748430a0491SDavid Ahern 749430a0491SDavid Ahern if (nh->is_group) 750430a0491SDavid Ahern sz += nh_nlmsg_size_grp(nh); 751430a0491SDavid Ahern else 752430a0491SDavid Ahern sz += nh_nlmsg_size_single(nh); 753430a0491SDavid Ahern 754430a0491SDavid Ahern return sz; 755430a0491SDavid Ahern } 756430a0491SDavid Ahern 757ab84be7eSDavid Ahern static void nexthop_notify(int event, struct nexthop *nh, struct nl_info *info) 758ab84be7eSDavid Ahern { 759ab84be7eSDavid Ahern unsigned int nlflags = info->nlh ? info->nlh->nlmsg_flags : 0; 760ab84be7eSDavid Ahern u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0; 761ab84be7eSDavid Ahern struct sk_buff *skb; 762ab84be7eSDavid Ahern int err = -ENOBUFS; 763ab84be7eSDavid Ahern 764ab84be7eSDavid Ahern skb = nlmsg_new(nh_nlmsg_size(nh), gfp_any()); 765ab84be7eSDavid Ahern if (!skb) 766ab84be7eSDavid Ahern goto errout; 767ab84be7eSDavid Ahern 768ab84be7eSDavid Ahern err = nh_fill_node(skb, nh, event, info->portid, seq, nlflags); 769ab84be7eSDavid Ahern if (err < 0) { 770ab84be7eSDavid Ahern /* -EMSGSIZE implies BUG in nh_nlmsg_size() */ 771ab84be7eSDavid Ahern WARN_ON(err == -EMSGSIZE); 772ab84be7eSDavid Ahern kfree_skb(skb); 773ab84be7eSDavid Ahern goto errout; 774ab84be7eSDavid Ahern } 775ab84be7eSDavid Ahern 776ab84be7eSDavid Ahern rtnl_notify(skb, info->nl_net, info->portid, RTNLGRP_NEXTHOP, 777ab84be7eSDavid Ahern info->nlh, gfp_any()); 778ab84be7eSDavid Ahern return; 779ab84be7eSDavid Ahern errout: 780ab84be7eSDavid Ahern if (err < 0) 781ab84be7eSDavid Ahern rtnl_set_sk_err(info->nl_net, RTNLGRP_NEXTHOP, err); 782ab84be7eSDavid Ahern } 783ab84be7eSDavid Ahern 784283a72a5SPetr Machata static unsigned long nh_res_bucket_used_time(const struct nh_res_bucket *bucket) 785283a72a5SPetr Machata { 786283a72a5SPetr Machata return (unsigned long)atomic_long_read(&bucket->used_time); 787283a72a5SPetr Machata } 788283a72a5SPetr Machata 789283a72a5SPetr Machata static unsigned long 790283a72a5SPetr Machata nh_res_bucket_idle_point(const struct nh_res_table *res_table, 791283a72a5SPetr Machata const struct nh_res_bucket *bucket, 792283a72a5SPetr Machata unsigned long now) 793283a72a5SPetr Machata { 794283a72a5SPetr Machata unsigned long time = nh_res_bucket_used_time(bucket); 795283a72a5SPetr Machata 796283a72a5SPetr Machata /* Bucket was not used since it was migrated. The idle time is now. */ 797283a72a5SPetr Machata if (time == bucket->migrated_time) 798283a72a5SPetr Machata return now; 799283a72a5SPetr Machata 800283a72a5SPetr Machata return time + res_table->idle_timer; 801283a72a5SPetr Machata } 802283a72a5SPetr Machata 803283a72a5SPetr Machata static unsigned long 804283a72a5SPetr Machata nh_res_table_unb_point(const struct nh_res_table *res_table) 805283a72a5SPetr Machata { 806283a72a5SPetr Machata return res_table->unbalanced_since + res_table->unbalanced_timer; 807283a72a5SPetr Machata } 808283a72a5SPetr Machata 809283a72a5SPetr Machata static void nh_res_bucket_set_idle(const struct nh_res_table *res_table, 810283a72a5SPetr Machata struct nh_res_bucket *bucket) 811283a72a5SPetr Machata { 812283a72a5SPetr Machata unsigned long now = jiffies; 813283a72a5SPetr Machata 814283a72a5SPetr Machata atomic_long_set(&bucket->used_time, (long)now); 815283a72a5SPetr Machata bucket->migrated_time = now; 816283a72a5SPetr Machata } 817283a72a5SPetr Machata 818283a72a5SPetr Machata static void nh_res_bucket_set_busy(struct nh_res_bucket *bucket) 819283a72a5SPetr Machata { 820283a72a5SPetr Machata atomic_long_set(&bucket->used_time, (long)jiffies); 821283a72a5SPetr Machata } 822283a72a5SPetr Machata 823430a0491SDavid Ahern static bool valid_group_nh(struct nexthop *nh, unsigned int npaths, 824ce9ac056SDavid Ahern bool *is_fdb, struct netlink_ext_ack *extack) 825597cfe4fSDavid Ahern { 826430a0491SDavid Ahern if (nh->is_group) { 827430a0491SDavid Ahern struct nh_group *nhg = rtnl_dereference(nh->nh_grp); 828430a0491SDavid Ahern 829283a72a5SPetr Machata /* Nesting groups within groups is not supported. */ 830430a0491SDavid Ahern if (nhg->mpath) { 831430a0491SDavid Ahern NL_SET_ERR_MSG(extack, 832430a0491SDavid Ahern "Multipath group can not be a nexthop within a group"); 833430a0491SDavid Ahern return false; 834430a0491SDavid Ahern } 835283a72a5SPetr Machata if (nhg->resilient) { 836283a72a5SPetr Machata NL_SET_ERR_MSG(extack, 837283a72a5SPetr Machata "Resilient group can not be a nexthop within a group"); 838283a72a5SPetr Machata return false; 839283a72a5SPetr Machata } 840ce9ac056SDavid Ahern *is_fdb = nhg->fdb_nh; 841430a0491SDavid Ahern } else { 842430a0491SDavid Ahern struct nh_info *nhi = rtnl_dereference(nh->nh_info); 843430a0491SDavid Ahern 844430a0491SDavid Ahern if (nhi->reject_nh && npaths > 1) { 845430a0491SDavid Ahern NL_SET_ERR_MSG(extack, 846430a0491SDavid Ahern "Blackhole nexthop can not be used in a group with more than 1 path"); 847430a0491SDavid Ahern return false; 848430a0491SDavid Ahern } 849ce9ac056SDavid Ahern *is_fdb = nhi->fdb_nh; 850430a0491SDavid Ahern } 851430a0491SDavid Ahern 852430a0491SDavid Ahern return true; 853430a0491SDavid Ahern } 854430a0491SDavid Ahern 85538428d68SRoopa Prabhu static int nh_check_attr_fdb_group(struct nexthop *nh, u8 *nh_family, 85638428d68SRoopa Prabhu struct netlink_ext_ack *extack) 85738428d68SRoopa Prabhu { 85838428d68SRoopa Prabhu struct nh_info *nhi; 85938428d68SRoopa Prabhu 860ce9ac056SDavid Ahern nhi = rtnl_dereference(nh->nh_info); 861ce9ac056SDavid Ahern 862ce9ac056SDavid Ahern if (!nhi->fdb_nh) { 86338428d68SRoopa Prabhu NL_SET_ERR_MSG(extack, "FDB nexthop group can only have fdb nexthops"); 86438428d68SRoopa Prabhu return -EINVAL; 86538428d68SRoopa Prabhu } 86638428d68SRoopa Prabhu 86738428d68SRoopa Prabhu if (*nh_family == AF_UNSPEC) { 86838428d68SRoopa Prabhu *nh_family = nhi->family; 86938428d68SRoopa Prabhu } else if (*nh_family != nhi->family) { 87038428d68SRoopa Prabhu NL_SET_ERR_MSG(extack, "FDB nexthop group cannot have mixed family nexthops"); 87138428d68SRoopa Prabhu return -EINVAL; 87238428d68SRoopa Prabhu } 87338428d68SRoopa Prabhu 87438428d68SRoopa Prabhu return 0; 87538428d68SRoopa Prabhu } 87638428d68SRoopa Prabhu 877643d0878SPetr Machata static int nh_check_attr_group(struct net *net, 878643d0878SPetr Machata struct nlattr *tb[], size_t tb_size, 879430a0491SDavid Ahern struct netlink_ext_ack *extack) 880430a0491SDavid Ahern { 881430a0491SDavid Ahern unsigned int len = nla_len(tb[NHA_GROUP]); 88238428d68SRoopa Prabhu u8 nh_family = AF_UNSPEC; 883430a0491SDavid Ahern struct nexthop_grp *nhg; 884430a0491SDavid Ahern unsigned int i, j; 88538428d68SRoopa Prabhu u8 nhg_fdb = 0; 886430a0491SDavid Ahern 887eeaac363SNikolay Aleksandrov if (!len || len & (sizeof(struct nexthop_grp) - 1)) { 888430a0491SDavid Ahern NL_SET_ERR_MSG(extack, 889430a0491SDavid Ahern "Invalid length for nexthop group attribute"); 890430a0491SDavid Ahern return -EINVAL; 891430a0491SDavid Ahern } 892430a0491SDavid Ahern 893430a0491SDavid Ahern /* convert len to number of nexthop ids */ 894430a0491SDavid Ahern len /= sizeof(*nhg); 895430a0491SDavid Ahern 896430a0491SDavid Ahern nhg = nla_data(tb[NHA_GROUP]); 897430a0491SDavid Ahern for (i = 0; i < len; ++i) { 898430a0491SDavid Ahern if (nhg[i].resvd1 || nhg[i].resvd2) { 899430a0491SDavid Ahern NL_SET_ERR_MSG(extack, "Reserved fields in nexthop_grp must be 0"); 900430a0491SDavid Ahern return -EINVAL; 901430a0491SDavid Ahern } 902430a0491SDavid Ahern if (nhg[i].weight > 254) { 903430a0491SDavid Ahern NL_SET_ERR_MSG(extack, "Invalid value for weight"); 904430a0491SDavid Ahern return -EINVAL; 905430a0491SDavid Ahern } 906430a0491SDavid Ahern for (j = i + 1; j < len; ++j) { 907430a0491SDavid Ahern if (nhg[i].id == nhg[j].id) { 908430a0491SDavid Ahern NL_SET_ERR_MSG(extack, "Nexthop id can not be used twice in a group"); 909430a0491SDavid Ahern return -EINVAL; 910430a0491SDavid Ahern } 911430a0491SDavid Ahern } 912430a0491SDavid Ahern } 913430a0491SDavid Ahern 91438428d68SRoopa Prabhu if (tb[NHA_FDB]) 91538428d68SRoopa Prabhu nhg_fdb = 1; 916430a0491SDavid Ahern nhg = nla_data(tb[NHA_GROUP]); 917430a0491SDavid Ahern for (i = 0; i < len; ++i) { 918430a0491SDavid Ahern struct nexthop *nh; 919ce9ac056SDavid Ahern bool is_fdb_nh; 920430a0491SDavid Ahern 921430a0491SDavid Ahern nh = nexthop_find_by_id(net, nhg[i].id); 922430a0491SDavid Ahern if (!nh) { 923430a0491SDavid Ahern NL_SET_ERR_MSG(extack, "Invalid nexthop id"); 924430a0491SDavid Ahern return -EINVAL; 925430a0491SDavid Ahern } 926ce9ac056SDavid Ahern if (!valid_group_nh(nh, len, &is_fdb_nh, extack)) 927430a0491SDavid Ahern return -EINVAL; 92838428d68SRoopa Prabhu 92938428d68SRoopa Prabhu if (nhg_fdb && nh_check_attr_fdb_group(nh, &nh_family, extack)) 93038428d68SRoopa Prabhu return -EINVAL; 93138428d68SRoopa Prabhu 932ce9ac056SDavid Ahern if (!nhg_fdb && is_fdb_nh) { 93338428d68SRoopa Prabhu NL_SET_ERR_MSG(extack, "Non FDB nexthop group cannot have fdb nexthops"); 93438428d68SRoopa Prabhu return -EINVAL; 93538428d68SRoopa Prabhu } 936430a0491SDavid Ahern } 937643d0878SPetr Machata for (i = NHA_GROUP_TYPE + 1; i < tb_size; ++i) { 938430a0491SDavid Ahern if (!tb[i]) 939430a0491SDavid Ahern continue; 940b19218b2SPetr Machata if (i == NHA_FDB) 94138428d68SRoopa Prabhu continue; 942430a0491SDavid Ahern NL_SET_ERR_MSG(extack, 943430a0491SDavid Ahern "No other attributes can be set in nexthop groups"); 944430a0491SDavid Ahern return -EINVAL; 945430a0491SDavid Ahern } 946430a0491SDavid Ahern 947430a0491SDavid Ahern return 0; 948430a0491SDavid Ahern } 949430a0491SDavid Ahern 950430a0491SDavid Ahern static bool ipv6_good_nh(const struct fib6_nh *nh) 951430a0491SDavid Ahern { 952430a0491SDavid Ahern int state = NUD_REACHABLE; 953430a0491SDavid Ahern struct neighbour *n; 954430a0491SDavid Ahern 955430a0491SDavid Ahern rcu_read_lock_bh(); 956430a0491SDavid Ahern 957430a0491SDavid Ahern n = __ipv6_neigh_lookup_noref_stub(nh->fib_nh_dev, &nh->fib_nh_gw6); 958430a0491SDavid Ahern if (n) 959430a0491SDavid Ahern state = n->nud_state; 960430a0491SDavid Ahern 961430a0491SDavid Ahern rcu_read_unlock_bh(); 962430a0491SDavid Ahern 963430a0491SDavid Ahern return !!(state & NUD_VALID); 964430a0491SDavid Ahern } 965430a0491SDavid Ahern 966430a0491SDavid Ahern static bool ipv4_good_nh(const struct fib_nh *nh) 967430a0491SDavid Ahern { 968430a0491SDavid Ahern int state = NUD_REACHABLE; 969430a0491SDavid Ahern struct neighbour *n; 970430a0491SDavid Ahern 971430a0491SDavid Ahern rcu_read_lock_bh(); 972430a0491SDavid Ahern 973430a0491SDavid Ahern n = __ipv4_neigh_lookup_noref(nh->fib_nh_dev, 974430a0491SDavid Ahern (__force u32)nh->fib_nh_gw4); 975430a0491SDavid Ahern if (n) 976430a0491SDavid Ahern state = n->nud_state; 977430a0491SDavid Ahern 978430a0491SDavid Ahern rcu_read_unlock_bh(); 979430a0491SDavid Ahern 980430a0491SDavid Ahern return !!(state & NUD_VALID); 981430a0491SDavid Ahern } 982430a0491SDavid Ahern 98379bc55e3SPetr Machata static struct nexthop *nexthop_select_path_mp(struct nh_group *nhg, int hash) 984430a0491SDavid Ahern { 985430a0491SDavid Ahern struct nexthop *rc = NULL; 986430a0491SDavid Ahern int i; 987430a0491SDavid Ahern 988430a0491SDavid Ahern for (i = 0; i < nhg->num_nh; ++i) { 989430a0491SDavid Ahern struct nh_grp_entry *nhge = &nhg->nh_entries[i]; 990430a0491SDavid Ahern struct nh_info *nhi; 991430a0491SDavid Ahern 992b9bae61bSPetr Machata if (hash > atomic_read(&nhge->mpath.upper_bound)) 993430a0491SDavid Ahern continue; 994430a0491SDavid Ahern 995ce9ac056SDavid Ahern nhi = rcu_dereference(nhge->nh->nh_info); 996ce9ac056SDavid Ahern if (nhi->fdb_nh) 99738428d68SRoopa Prabhu return nhge->nh; 99838428d68SRoopa Prabhu 999430a0491SDavid Ahern /* nexthops always check if it is good and does 1000430a0491SDavid Ahern * not rely on a sysctl for this behavior 1001430a0491SDavid Ahern */ 1002430a0491SDavid Ahern switch (nhi->family) { 1003430a0491SDavid Ahern case AF_INET: 1004430a0491SDavid Ahern if (ipv4_good_nh(&nhi->fib_nh)) 1005430a0491SDavid Ahern return nhge->nh; 1006430a0491SDavid Ahern break; 1007430a0491SDavid Ahern case AF_INET6: 1008430a0491SDavid Ahern if (ipv6_good_nh(&nhi->fib6_nh)) 1009430a0491SDavid Ahern return nhge->nh; 1010430a0491SDavid Ahern break; 1011430a0491SDavid Ahern } 1012430a0491SDavid Ahern 1013430a0491SDavid Ahern if (!rc) 1014430a0491SDavid Ahern rc = nhge->nh; 1015430a0491SDavid Ahern } 1016430a0491SDavid Ahern 1017430a0491SDavid Ahern return rc; 1018430a0491SDavid Ahern } 101979bc55e3SPetr Machata 1020283a72a5SPetr Machata static struct nexthop *nexthop_select_path_res(struct nh_group *nhg, int hash) 1021283a72a5SPetr Machata { 1022283a72a5SPetr Machata struct nh_res_table *res_table = rcu_dereference(nhg->res_table); 1023283a72a5SPetr Machata u16 bucket_index = hash % res_table->num_nh_buckets; 1024283a72a5SPetr Machata struct nh_res_bucket *bucket; 1025283a72a5SPetr Machata struct nh_grp_entry *nhge; 1026283a72a5SPetr Machata 1027283a72a5SPetr Machata /* nexthop_select_path() is expected to return a non-NULL value, so 1028283a72a5SPetr Machata * skip protocol validation and just hand out whatever there is. 1029283a72a5SPetr Machata */ 1030283a72a5SPetr Machata bucket = &res_table->nh_buckets[bucket_index]; 1031283a72a5SPetr Machata nh_res_bucket_set_busy(bucket); 1032283a72a5SPetr Machata nhge = rcu_dereference(bucket->nh_entry); 1033283a72a5SPetr Machata return nhge->nh; 1034283a72a5SPetr Machata } 1035283a72a5SPetr Machata 103679bc55e3SPetr Machata struct nexthop *nexthop_select_path(struct nexthop *nh, int hash) 103779bc55e3SPetr Machata { 103879bc55e3SPetr Machata struct nh_group *nhg; 103979bc55e3SPetr Machata 104079bc55e3SPetr Machata if (!nh->is_group) 104179bc55e3SPetr Machata return nh; 104279bc55e3SPetr Machata 104379bc55e3SPetr Machata nhg = rcu_dereference(nh->nh_grp); 104479bc55e3SPetr Machata if (nhg->mpath) 104579bc55e3SPetr Machata return nexthop_select_path_mp(nhg, hash); 1046283a72a5SPetr Machata else if (nhg->resilient) 1047283a72a5SPetr Machata return nexthop_select_path_res(nhg, hash); 104879bc55e3SPetr Machata 104979bc55e3SPetr Machata /* Unreachable. */ 105079bc55e3SPetr Machata return NULL; 105179bc55e3SPetr Machata } 1052430a0491SDavid Ahern EXPORT_SYMBOL_GPL(nexthop_select_path); 1053430a0491SDavid Ahern 1054f88c9aa1SDavid Ahern int nexthop_for_each_fib6_nh(struct nexthop *nh, 1055f88c9aa1SDavid Ahern int (*cb)(struct fib6_nh *nh, void *arg), 1056f88c9aa1SDavid Ahern void *arg) 1057f88c9aa1SDavid Ahern { 1058f88c9aa1SDavid Ahern struct nh_info *nhi; 1059f88c9aa1SDavid Ahern int err; 1060f88c9aa1SDavid Ahern 1061f88c9aa1SDavid Ahern if (nh->is_group) { 1062f88c9aa1SDavid Ahern struct nh_group *nhg; 1063f88c9aa1SDavid Ahern int i; 1064f88c9aa1SDavid Ahern 1065f88c9aa1SDavid Ahern nhg = rcu_dereference_rtnl(nh->nh_grp); 1066f88c9aa1SDavid Ahern for (i = 0; i < nhg->num_nh; i++) { 1067f88c9aa1SDavid Ahern struct nh_grp_entry *nhge = &nhg->nh_entries[i]; 1068f88c9aa1SDavid Ahern 1069f88c9aa1SDavid Ahern nhi = rcu_dereference_rtnl(nhge->nh->nh_info); 1070f88c9aa1SDavid Ahern err = cb(&nhi->fib6_nh, arg); 1071f88c9aa1SDavid Ahern if (err) 1072f88c9aa1SDavid Ahern return err; 1073f88c9aa1SDavid Ahern } 1074f88c9aa1SDavid Ahern } else { 1075f88c9aa1SDavid Ahern nhi = rcu_dereference_rtnl(nh->nh_info); 1076f88c9aa1SDavid Ahern err = cb(&nhi->fib6_nh, arg); 1077f88c9aa1SDavid Ahern if (err) 1078f88c9aa1SDavid Ahern return err; 1079f88c9aa1SDavid Ahern } 1080f88c9aa1SDavid Ahern 1081f88c9aa1SDavid Ahern return 0; 1082f88c9aa1SDavid Ahern } 1083f88c9aa1SDavid Ahern EXPORT_SYMBOL_GPL(nexthop_for_each_fib6_nh); 1084f88c9aa1SDavid Ahern 10857bf4796dSDavid Ahern static int check_src_addr(const struct in6_addr *saddr, 10867bf4796dSDavid Ahern struct netlink_ext_ack *extack) 10877bf4796dSDavid Ahern { 10887bf4796dSDavid Ahern if (!ipv6_addr_any(saddr)) { 10897bf4796dSDavid Ahern NL_SET_ERR_MSG(extack, "IPv6 routes using source address can not use nexthop objects"); 10907bf4796dSDavid Ahern return -EINVAL; 10917bf4796dSDavid Ahern } 10927bf4796dSDavid Ahern return 0; 10937bf4796dSDavid Ahern } 10947bf4796dSDavid Ahern 1095f88d8ea6SDavid Ahern int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg, 1096f88d8ea6SDavid Ahern struct netlink_ext_ack *extack) 1097f88d8ea6SDavid Ahern { 1098f88d8ea6SDavid Ahern struct nh_info *nhi; 1099ce9ac056SDavid Ahern bool is_fdb_nh; 110038428d68SRoopa Prabhu 1101f88d8ea6SDavid Ahern /* fib6_src is unique to a fib6_info and limits the ability to cache 1102f88d8ea6SDavid Ahern * routes in fib6_nh within a nexthop that is potentially shared 1103f88d8ea6SDavid Ahern * across multiple fib entries. If the config wants to use source 1104f88d8ea6SDavid Ahern * routing it can not use nexthop objects. mlxsw also does not allow 1105f88d8ea6SDavid Ahern * fib6_src on routes. 1106f88d8ea6SDavid Ahern */ 11077bf4796dSDavid Ahern if (cfg && check_src_addr(&cfg->fc_src, extack) < 0) 1108f88d8ea6SDavid Ahern return -EINVAL; 1109f88d8ea6SDavid Ahern 1110f88d8ea6SDavid Ahern if (nh->is_group) { 1111f88d8ea6SDavid Ahern struct nh_group *nhg; 1112f88d8ea6SDavid Ahern 1113f88d8ea6SDavid Ahern nhg = rtnl_dereference(nh->nh_grp); 1114f88d8ea6SDavid Ahern if (nhg->has_v4) 1115f88d8ea6SDavid Ahern goto no_v4_nh; 1116ce9ac056SDavid Ahern is_fdb_nh = nhg->fdb_nh; 1117f88d8ea6SDavid Ahern } else { 1118f88d8ea6SDavid Ahern nhi = rtnl_dereference(nh->nh_info); 1119f88d8ea6SDavid Ahern if (nhi->family == AF_INET) 1120f88d8ea6SDavid Ahern goto no_v4_nh; 1121ce9ac056SDavid Ahern is_fdb_nh = nhi->fdb_nh; 1122ce9ac056SDavid Ahern } 1123ce9ac056SDavid Ahern 1124ce9ac056SDavid Ahern if (is_fdb_nh) { 1125ce9ac056SDavid Ahern NL_SET_ERR_MSG(extack, "Route cannot point to a fdb nexthop"); 1126ce9ac056SDavid Ahern return -EINVAL; 1127f88d8ea6SDavid Ahern } 1128f88d8ea6SDavid Ahern 1129f88d8ea6SDavid Ahern return 0; 1130f88d8ea6SDavid Ahern no_v4_nh: 1131f88d8ea6SDavid Ahern NL_SET_ERR_MSG(extack, "IPv6 routes can not use an IPv4 nexthop"); 1132f88d8ea6SDavid Ahern return -EINVAL; 1133f88d8ea6SDavid Ahern } 1134f88d8ea6SDavid Ahern EXPORT_SYMBOL_GPL(fib6_check_nexthop); 1135f88d8ea6SDavid Ahern 11367bf4796dSDavid Ahern /* if existing nexthop has ipv6 routes linked to it, need 11377bf4796dSDavid Ahern * to verify this new spec works with ipv6 11387bf4796dSDavid Ahern */ 11397bf4796dSDavid Ahern static int fib6_check_nh_list(struct nexthop *old, struct nexthop *new, 11407bf4796dSDavid Ahern struct netlink_ext_ack *extack) 11417bf4796dSDavid Ahern { 11427bf4796dSDavid Ahern struct fib6_info *f6i; 11437bf4796dSDavid Ahern 11447bf4796dSDavid Ahern if (list_empty(&old->f6i_list)) 11457bf4796dSDavid Ahern return 0; 11467bf4796dSDavid Ahern 11477bf4796dSDavid Ahern list_for_each_entry(f6i, &old->f6i_list, nh_list) { 11487bf4796dSDavid Ahern if (check_src_addr(&f6i->fib6_src.addr, extack) < 0) 11497bf4796dSDavid Ahern return -EINVAL; 11507bf4796dSDavid Ahern } 11517bf4796dSDavid Ahern 11527bf4796dSDavid Ahern return fib6_check_nexthop(new, NULL, extack); 11537bf4796dSDavid Ahern } 11547bf4796dSDavid Ahern 1155ce9ac056SDavid Ahern static int nexthop_check_scope(struct nh_info *nhi, u8 scope, 11564c7e8084SDavid Ahern struct netlink_ext_ack *extack) 11574c7e8084SDavid Ahern { 11584c7e8084SDavid Ahern if (scope == RT_SCOPE_HOST && nhi->fib_nhc.nhc_gw_family) { 11594c7e8084SDavid Ahern NL_SET_ERR_MSG(extack, 11604c7e8084SDavid Ahern "Route with host scope can not have a gateway"); 11614c7e8084SDavid Ahern return -EINVAL; 11624c7e8084SDavid Ahern } 11634c7e8084SDavid Ahern 11644c7e8084SDavid Ahern if (nhi->fib_nhc.nhc_flags & RTNH_F_ONLINK && scope >= RT_SCOPE_LINK) { 11654c7e8084SDavid Ahern NL_SET_ERR_MSG(extack, "Scope mismatch with nexthop"); 11664c7e8084SDavid Ahern return -EINVAL; 11674c7e8084SDavid Ahern } 11684c7e8084SDavid Ahern 11694c7e8084SDavid Ahern return 0; 11704c7e8084SDavid Ahern } 11714c7e8084SDavid Ahern 11724c7e8084SDavid Ahern /* Invoked by fib add code to verify nexthop by id is ok with 11734c7e8084SDavid Ahern * config for prefix; parts of fib_check_nh not done when nexthop 11744c7e8084SDavid Ahern * object is used. 11754c7e8084SDavid Ahern */ 11764c7e8084SDavid Ahern int fib_check_nexthop(struct nexthop *nh, u8 scope, 11774c7e8084SDavid Ahern struct netlink_ext_ack *extack) 11784c7e8084SDavid Ahern { 1179ce9ac056SDavid Ahern struct nh_info *nhi; 11804c7e8084SDavid Ahern int err = 0; 11814c7e8084SDavid Ahern 1182ce9ac056SDavid Ahern if (nh->is_group) { 1183ce9ac056SDavid Ahern struct nh_group *nhg; 1184ce9ac056SDavid Ahern 1185ce9ac056SDavid Ahern nhg = rtnl_dereference(nh->nh_grp); 1186ce9ac056SDavid Ahern if (nhg->fdb_nh) { 118738428d68SRoopa Prabhu NL_SET_ERR_MSG(extack, "Route cannot point to a fdb nexthop"); 118838428d68SRoopa Prabhu err = -EINVAL; 118938428d68SRoopa Prabhu goto out; 119038428d68SRoopa Prabhu } 119138428d68SRoopa Prabhu 11924c7e8084SDavid Ahern if (scope == RT_SCOPE_HOST) { 11934c7e8084SDavid Ahern NL_SET_ERR_MSG(extack, "Route with host scope can not have multiple nexthops"); 11944c7e8084SDavid Ahern err = -EINVAL; 11954c7e8084SDavid Ahern goto out; 11964c7e8084SDavid Ahern } 11974c7e8084SDavid Ahern 11984c7e8084SDavid Ahern /* all nexthops in a group have the same scope */ 1199ce9ac056SDavid Ahern nhi = rtnl_dereference(nhg->nh_entries[0].nh->nh_info); 1200ce9ac056SDavid Ahern err = nexthop_check_scope(nhi, scope, extack); 12014c7e8084SDavid Ahern } else { 1202ce9ac056SDavid Ahern nhi = rtnl_dereference(nh->nh_info); 1203ce9ac056SDavid Ahern if (nhi->fdb_nh) { 1204ce9ac056SDavid Ahern NL_SET_ERR_MSG(extack, "Route cannot point to a fdb nexthop"); 1205ce9ac056SDavid Ahern err = -EINVAL; 1206ce9ac056SDavid Ahern goto out; 12074c7e8084SDavid Ahern } 1208ce9ac056SDavid Ahern err = nexthop_check_scope(nhi, scope, extack); 1209ce9ac056SDavid Ahern } 1210ce9ac056SDavid Ahern 12114c7e8084SDavid Ahern out: 12124c7e8084SDavid Ahern return err; 12134c7e8084SDavid Ahern } 12144c7e8084SDavid Ahern 12157bf4796dSDavid Ahern static int fib_check_nh_list(struct nexthop *old, struct nexthop *new, 12167bf4796dSDavid Ahern struct netlink_ext_ack *extack) 12177bf4796dSDavid Ahern { 12187bf4796dSDavid Ahern struct fib_info *fi; 12197bf4796dSDavid Ahern 12207bf4796dSDavid Ahern list_for_each_entry(fi, &old->fi_list, nh_list) { 12217bf4796dSDavid Ahern int err; 12227bf4796dSDavid Ahern 12237bf4796dSDavid Ahern err = fib_check_nexthop(new, fi->fib_scope, extack); 12247bf4796dSDavid Ahern if (err) 12257bf4796dSDavid Ahern return err; 12267bf4796dSDavid Ahern } 12277bf4796dSDavid Ahern return 0; 12287bf4796dSDavid Ahern } 12297bf4796dSDavid Ahern 1230283a72a5SPetr Machata static bool nh_res_nhge_is_balanced(const struct nh_grp_entry *nhge) 1231283a72a5SPetr Machata { 1232283a72a5SPetr Machata return nhge->res.count_buckets == nhge->res.wants_buckets; 1233283a72a5SPetr Machata } 1234283a72a5SPetr Machata 1235283a72a5SPetr Machata static bool nh_res_nhge_is_ow(const struct nh_grp_entry *nhge) 1236283a72a5SPetr Machata { 1237283a72a5SPetr Machata return nhge->res.count_buckets > nhge->res.wants_buckets; 1238283a72a5SPetr Machata } 1239283a72a5SPetr Machata 1240283a72a5SPetr Machata static bool nh_res_nhge_is_uw(const struct nh_grp_entry *nhge) 1241283a72a5SPetr Machata { 1242283a72a5SPetr Machata return nhge->res.count_buckets < nhge->res.wants_buckets; 1243283a72a5SPetr Machata } 1244283a72a5SPetr Machata 1245283a72a5SPetr Machata static bool nh_res_table_is_balanced(const struct nh_res_table *res_table) 1246283a72a5SPetr Machata { 1247283a72a5SPetr Machata return list_empty(&res_table->uw_nh_entries); 1248283a72a5SPetr Machata } 1249283a72a5SPetr Machata 1250283a72a5SPetr Machata static void nh_res_bucket_unset_nh(struct nh_res_bucket *bucket) 1251283a72a5SPetr Machata { 1252283a72a5SPetr Machata struct nh_grp_entry *nhge; 1253283a72a5SPetr Machata 1254283a72a5SPetr Machata if (bucket->occupied) { 1255283a72a5SPetr Machata nhge = nh_res_dereference(bucket->nh_entry); 1256283a72a5SPetr Machata nhge->res.count_buckets--; 1257283a72a5SPetr Machata bucket->occupied = false; 1258283a72a5SPetr Machata } 1259283a72a5SPetr Machata } 1260283a72a5SPetr Machata 1261283a72a5SPetr Machata static void nh_res_bucket_set_nh(struct nh_res_bucket *bucket, 1262283a72a5SPetr Machata struct nh_grp_entry *nhge) 1263283a72a5SPetr Machata { 1264283a72a5SPetr Machata nh_res_bucket_unset_nh(bucket); 1265283a72a5SPetr Machata 1266283a72a5SPetr Machata bucket->occupied = true; 1267283a72a5SPetr Machata rcu_assign_pointer(bucket->nh_entry, nhge); 1268283a72a5SPetr Machata nhge->res.count_buckets++; 1269283a72a5SPetr Machata } 1270283a72a5SPetr Machata 1271283a72a5SPetr Machata static bool nh_res_bucket_should_migrate(struct nh_res_table *res_table, 1272283a72a5SPetr Machata struct nh_res_bucket *bucket, 1273283a72a5SPetr Machata unsigned long *deadline, bool *force) 1274283a72a5SPetr Machata { 1275283a72a5SPetr Machata unsigned long now = jiffies; 1276283a72a5SPetr Machata struct nh_grp_entry *nhge; 1277283a72a5SPetr Machata unsigned long idle_point; 1278283a72a5SPetr Machata 1279283a72a5SPetr Machata if (!bucket->occupied) { 1280283a72a5SPetr Machata /* The bucket is not occupied, its NHGE pointer is either 1281283a72a5SPetr Machata * NULL or obsolete. We _have to_ migrate: set force. 1282283a72a5SPetr Machata */ 1283283a72a5SPetr Machata *force = true; 1284283a72a5SPetr Machata return true; 1285283a72a5SPetr Machata } 1286283a72a5SPetr Machata 1287283a72a5SPetr Machata nhge = nh_res_dereference(bucket->nh_entry); 1288283a72a5SPetr Machata 1289283a72a5SPetr Machata /* If the bucket is populated by an underweight or balanced 1290283a72a5SPetr Machata * nexthop, do not migrate. 1291283a72a5SPetr Machata */ 1292283a72a5SPetr Machata if (!nh_res_nhge_is_ow(nhge)) 1293283a72a5SPetr Machata return false; 1294283a72a5SPetr Machata 1295283a72a5SPetr Machata /* At this point we know that the bucket is populated with an 1296283a72a5SPetr Machata * overweight nexthop. It needs to be migrated to a new nexthop if 1297283a72a5SPetr Machata * the idle timer of unbalanced timer expired. 1298283a72a5SPetr Machata */ 1299283a72a5SPetr Machata 1300283a72a5SPetr Machata idle_point = nh_res_bucket_idle_point(res_table, bucket, now); 1301283a72a5SPetr Machata if (time_after_eq(now, idle_point)) { 1302283a72a5SPetr Machata /* The bucket is idle. We _can_ migrate: unset force. */ 1303283a72a5SPetr Machata *force = false; 1304283a72a5SPetr Machata return true; 1305283a72a5SPetr Machata } 1306283a72a5SPetr Machata 1307283a72a5SPetr Machata /* Unbalanced timer of 0 means "never force". */ 1308283a72a5SPetr Machata if (res_table->unbalanced_timer) { 1309283a72a5SPetr Machata unsigned long unb_point; 1310283a72a5SPetr Machata 1311283a72a5SPetr Machata unb_point = nh_res_table_unb_point(res_table); 1312283a72a5SPetr Machata if (time_after(now, unb_point)) { 1313283a72a5SPetr Machata /* The bucket is not idle, but the unbalanced timer 1314283a72a5SPetr Machata * expired. We _can_ migrate, but set force anyway, 1315283a72a5SPetr Machata * so that drivers know to ignore activity reports 1316283a72a5SPetr Machata * from the HW. 1317283a72a5SPetr Machata */ 1318283a72a5SPetr Machata *force = true; 1319283a72a5SPetr Machata return true; 1320283a72a5SPetr Machata } 1321283a72a5SPetr Machata 1322283a72a5SPetr Machata nh_res_time_set_deadline(unb_point, deadline); 1323283a72a5SPetr Machata } 1324283a72a5SPetr Machata 1325283a72a5SPetr Machata nh_res_time_set_deadline(idle_point, deadline); 1326283a72a5SPetr Machata return false; 1327283a72a5SPetr Machata } 1328283a72a5SPetr Machata 1329283a72a5SPetr Machata static bool nh_res_bucket_migrate(struct nh_res_table *res_table, 13307c37c7e0SPetr Machata u16 bucket_index, bool notify, bool force) 1331283a72a5SPetr Machata { 1332283a72a5SPetr Machata struct nh_res_bucket *bucket = &res_table->nh_buckets[bucket_index]; 1333283a72a5SPetr Machata struct nh_grp_entry *new_nhge; 13347c37c7e0SPetr Machata struct netlink_ext_ack extack; 13357c37c7e0SPetr Machata int err; 1336283a72a5SPetr Machata 1337283a72a5SPetr Machata new_nhge = list_first_entry_or_null(&res_table->uw_nh_entries, 1338283a72a5SPetr Machata struct nh_grp_entry, 1339283a72a5SPetr Machata res.uw_nh_entry); 1340283a72a5SPetr Machata if (WARN_ON_ONCE(!new_nhge)) 1341283a72a5SPetr Machata /* If this function is called, "bucket" is either not 1342283a72a5SPetr Machata * occupied, or it belongs to a next hop that is 1343283a72a5SPetr Machata * overweight. In either case, there ought to be a 1344283a72a5SPetr Machata * corresponding underweight next hop. 1345283a72a5SPetr Machata */ 1346283a72a5SPetr Machata return false; 1347283a72a5SPetr Machata 13487c37c7e0SPetr Machata if (notify) { 13497c37c7e0SPetr Machata struct nh_grp_entry *old_nhge; 13507c37c7e0SPetr Machata 13517c37c7e0SPetr Machata old_nhge = nh_res_dereference(bucket->nh_entry); 13527c37c7e0SPetr Machata err = call_nexthop_res_bucket_notifiers(res_table->net, 13537c37c7e0SPetr Machata res_table->nhg_id, 13547c37c7e0SPetr Machata bucket_index, force, 13557c37c7e0SPetr Machata old_nhge->nh, 13567c37c7e0SPetr Machata new_nhge->nh, &extack); 13577c37c7e0SPetr Machata if (err) { 13587c37c7e0SPetr Machata pr_err_ratelimited("%s\n", extack._msg); 13597c37c7e0SPetr Machata if (!force) 13607c37c7e0SPetr Machata return false; 13617c37c7e0SPetr Machata /* It is not possible to veto a forced replacement, so 13627c37c7e0SPetr Machata * just clear the hardware flags from the nexthop 13637c37c7e0SPetr Machata * bucket to indicate to user space that this bucket is 13647c37c7e0SPetr Machata * not correctly populated in hardware. 13657c37c7e0SPetr Machata */ 13667c37c7e0SPetr Machata bucket->nh_flags &= ~(RTNH_F_OFFLOAD | RTNH_F_TRAP); 13677c37c7e0SPetr Machata } 13687c37c7e0SPetr Machata } 13697c37c7e0SPetr Machata 1370283a72a5SPetr Machata nh_res_bucket_set_nh(bucket, new_nhge); 1371283a72a5SPetr Machata nh_res_bucket_set_idle(res_table, bucket); 1372283a72a5SPetr Machata 1373283a72a5SPetr Machata if (nh_res_nhge_is_balanced(new_nhge)) 1374283a72a5SPetr Machata list_del(&new_nhge->res.uw_nh_entry); 1375283a72a5SPetr Machata return true; 1376283a72a5SPetr Machata } 1377283a72a5SPetr Machata 1378283a72a5SPetr Machata #define NH_RES_UPKEEP_DW_MINIMUM_INTERVAL (HZ / 2) 1379283a72a5SPetr Machata 13807c37c7e0SPetr Machata static void nh_res_table_upkeep(struct nh_res_table *res_table, bool notify) 1381283a72a5SPetr Machata { 1382283a72a5SPetr Machata unsigned long now = jiffies; 1383283a72a5SPetr Machata unsigned long deadline; 1384283a72a5SPetr Machata u16 i; 1385283a72a5SPetr Machata 1386283a72a5SPetr Machata /* Deadline is the next time that upkeep should be run. It is the 1387283a72a5SPetr Machata * earliest time at which one of the buckets might be migrated. 1388283a72a5SPetr Machata * Start at the most pessimistic estimate: either unbalanced_timer 1389283a72a5SPetr Machata * from now, or if there is none, idle_timer from now. For each 1390283a72a5SPetr Machata * encountered time point, call nh_res_time_set_deadline() to 1391283a72a5SPetr Machata * refine the estimate. 1392283a72a5SPetr Machata */ 1393283a72a5SPetr Machata if (res_table->unbalanced_timer) 1394283a72a5SPetr Machata deadline = now + res_table->unbalanced_timer; 1395283a72a5SPetr Machata else 1396283a72a5SPetr Machata deadline = now + res_table->idle_timer; 1397283a72a5SPetr Machata 1398283a72a5SPetr Machata for (i = 0; i < res_table->num_nh_buckets; i++) { 1399283a72a5SPetr Machata struct nh_res_bucket *bucket = &res_table->nh_buckets[i]; 1400283a72a5SPetr Machata bool force; 1401283a72a5SPetr Machata 1402283a72a5SPetr Machata if (nh_res_bucket_should_migrate(res_table, bucket, 1403283a72a5SPetr Machata &deadline, &force)) { 14047c37c7e0SPetr Machata if (!nh_res_bucket_migrate(res_table, i, notify, 14057c37c7e0SPetr Machata force)) { 1406283a72a5SPetr Machata unsigned long idle_point; 1407283a72a5SPetr Machata 1408283a72a5SPetr Machata /* A driver can override the migration 1409283a72a5SPetr Machata * decision if the HW reports that the 1410283a72a5SPetr Machata * bucket is actually not idle. Therefore 1411283a72a5SPetr Machata * remark the bucket as busy again and 1412283a72a5SPetr Machata * update the deadline. 1413283a72a5SPetr Machata */ 1414283a72a5SPetr Machata nh_res_bucket_set_busy(bucket); 1415283a72a5SPetr Machata idle_point = nh_res_bucket_idle_point(res_table, 1416283a72a5SPetr Machata bucket, 1417283a72a5SPetr Machata now); 1418283a72a5SPetr Machata nh_res_time_set_deadline(idle_point, &deadline); 1419283a72a5SPetr Machata } 1420283a72a5SPetr Machata } 1421283a72a5SPetr Machata } 1422283a72a5SPetr Machata 1423283a72a5SPetr Machata /* If the group is still unbalanced, schedule the next upkeep to 1424283a72a5SPetr Machata * either the deadline computed above, or the minimum deadline, 1425283a72a5SPetr Machata * whichever comes later. 1426283a72a5SPetr Machata */ 1427283a72a5SPetr Machata if (!nh_res_table_is_balanced(res_table)) { 1428283a72a5SPetr Machata unsigned long now = jiffies; 1429283a72a5SPetr Machata unsigned long min_deadline; 1430283a72a5SPetr Machata 1431283a72a5SPetr Machata min_deadline = now + NH_RES_UPKEEP_DW_MINIMUM_INTERVAL; 1432283a72a5SPetr Machata if (time_before(deadline, min_deadline)) 1433283a72a5SPetr Machata deadline = min_deadline; 1434283a72a5SPetr Machata 1435283a72a5SPetr Machata queue_delayed_work(system_power_efficient_wq, 1436283a72a5SPetr Machata &res_table->upkeep_dw, deadline - now); 1437283a72a5SPetr Machata } 1438283a72a5SPetr Machata } 1439283a72a5SPetr Machata 1440283a72a5SPetr Machata static void nh_res_table_upkeep_dw(struct work_struct *work) 1441283a72a5SPetr Machata { 1442283a72a5SPetr Machata struct delayed_work *dw = to_delayed_work(work); 1443283a72a5SPetr Machata struct nh_res_table *res_table; 1444283a72a5SPetr Machata 1445283a72a5SPetr Machata res_table = container_of(dw, struct nh_res_table, upkeep_dw); 14467c37c7e0SPetr Machata nh_res_table_upkeep(res_table, true); 1447283a72a5SPetr Machata } 1448283a72a5SPetr Machata 1449283a72a5SPetr Machata static void nh_res_table_cancel_upkeep(struct nh_res_table *res_table) 1450283a72a5SPetr Machata { 1451283a72a5SPetr Machata cancel_delayed_work_sync(&res_table->upkeep_dw); 1452283a72a5SPetr Machata } 1453283a72a5SPetr Machata 1454283a72a5SPetr Machata static void nh_res_group_rebalance(struct nh_group *nhg, 1455283a72a5SPetr Machata struct nh_res_table *res_table) 1456283a72a5SPetr Machata { 1457283a72a5SPetr Machata int prev_upper_bound = 0; 1458283a72a5SPetr Machata int total = 0; 1459283a72a5SPetr Machata int w = 0; 1460283a72a5SPetr Machata int i; 1461283a72a5SPetr Machata 1462283a72a5SPetr Machata INIT_LIST_HEAD(&res_table->uw_nh_entries); 1463283a72a5SPetr Machata 1464283a72a5SPetr Machata for (i = 0; i < nhg->num_nh; ++i) 1465283a72a5SPetr Machata total += nhg->nh_entries[i].weight; 1466283a72a5SPetr Machata 1467283a72a5SPetr Machata for (i = 0; i < nhg->num_nh; ++i) { 1468283a72a5SPetr Machata struct nh_grp_entry *nhge = &nhg->nh_entries[i]; 1469283a72a5SPetr Machata int upper_bound; 1470283a72a5SPetr Machata 1471283a72a5SPetr Machata w += nhge->weight; 1472283a72a5SPetr Machata upper_bound = DIV_ROUND_CLOSEST(res_table->num_nh_buckets * w, 1473283a72a5SPetr Machata total); 1474283a72a5SPetr Machata nhge->res.wants_buckets = upper_bound - prev_upper_bound; 1475283a72a5SPetr Machata prev_upper_bound = upper_bound; 1476283a72a5SPetr Machata 1477283a72a5SPetr Machata if (nh_res_nhge_is_uw(nhge)) { 1478283a72a5SPetr Machata if (list_empty(&res_table->uw_nh_entries)) 1479283a72a5SPetr Machata res_table->unbalanced_since = jiffies; 1480283a72a5SPetr Machata list_add(&nhge->res.uw_nh_entry, 1481283a72a5SPetr Machata &res_table->uw_nh_entries); 1482283a72a5SPetr Machata } 1483283a72a5SPetr Machata } 1484283a72a5SPetr Machata } 1485283a72a5SPetr Machata 1486283a72a5SPetr Machata /* Migrate buckets in res_table so that they reference NHGE's from NHG with 1487283a72a5SPetr Machata * the right NH ID. Set those buckets that do not have a corresponding NHGE 1488283a72a5SPetr Machata * entry in NHG as not occupied. 1489283a72a5SPetr Machata */ 1490283a72a5SPetr Machata static void nh_res_table_migrate_buckets(struct nh_res_table *res_table, 1491283a72a5SPetr Machata struct nh_group *nhg) 1492283a72a5SPetr Machata { 1493283a72a5SPetr Machata u16 i; 1494283a72a5SPetr Machata 1495283a72a5SPetr Machata for (i = 0; i < res_table->num_nh_buckets; i++) { 1496283a72a5SPetr Machata struct nh_res_bucket *bucket = &res_table->nh_buckets[i]; 1497283a72a5SPetr Machata u32 id = rtnl_dereference(bucket->nh_entry)->nh->id; 1498283a72a5SPetr Machata bool found = false; 1499283a72a5SPetr Machata int j; 1500283a72a5SPetr Machata 1501283a72a5SPetr Machata for (j = 0; j < nhg->num_nh; j++) { 1502283a72a5SPetr Machata struct nh_grp_entry *nhge = &nhg->nh_entries[j]; 1503283a72a5SPetr Machata 1504283a72a5SPetr Machata if (nhge->nh->id == id) { 1505283a72a5SPetr Machata nh_res_bucket_set_nh(bucket, nhge); 1506283a72a5SPetr Machata found = true; 1507283a72a5SPetr Machata break; 1508283a72a5SPetr Machata } 1509283a72a5SPetr Machata } 1510283a72a5SPetr Machata 1511283a72a5SPetr Machata if (!found) 1512283a72a5SPetr Machata nh_res_bucket_unset_nh(bucket); 1513283a72a5SPetr Machata } 1514283a72a5SPetr Machata } 1515283a72a5SPetr Machata 1516283a72a5SPetr Machata static void replace_nexthop_grp_res(struct nh_group *oldg, 1517283a72a5SPetr Machata struct nh_group *newg) 1518283a72a5SPetr Machata { 1519283a72a5SPetr Machata /* For NH group replacement, the new NHG might only have a stub 1520283a72a5SPetr Machata * hash table with 0 buckets, because the number of buckets was not 1521283a72a5SPetr Machata * specified. For NH removal, oldg and newg both reference the same 1522283a72a5SPetr Machata * res_table. So in any case, in the following, we want to work 1523283a72a5SPetr Machata * with oldg->res_table. 1524283a72a5SPetr Machata */ 1525283a72a5SPetr Machata struct nh_res_table *old_res_table = rtnl_dereference(oldg->res_table); 1526283a72a5SPetr Machata unsigned long prev_unbalanced_since = old_res_table->unbalanced_since; 1527283a72a5SPetr Machata bool prev_has_uw = !list_empty(&old_res_table->uw_nh_entries); 1528283a72a5SPetr Machata 1529283a72a5SPetr Machata nh_res_table_cancel_upkeep(old_res_table); 1530283a72a5SPetr Machata nh_res_table_migrate_buckets(old_res_table, newg); 1531283a72a5SPetr Machata nh_res_group_rebalance(newg, old_res_table); 1532283a72a5SPetr Machata if (prev_has_uw && !list_empty(&old_res_table->uw_nh_entries)) 1533283a72a5SPetr Machata old_res_table->unbalanced_since = prev_unbalanced_since; 15347c37c7e0SPetr Machata nh_res_table_upkeep(old_res_table, true); 1535283a72a5SPetr Machata } 1536283a72a5SPetr Machata 1537283a72a5SPetr Machata static void nh_mp_group_rebalance(struct nh_group *nhg) 1538430a0491SDavid Ahern { 1539430a0491SDavid Ahern int total = 0; 1540430a0491SDavid Ahern int w = 0; 1541430a0491SDavid Ahern int i; 1542430a0491SDavid Ahern 1543430a0491SDavid Ahern for (i = 0; i < nhg->num_nh; ++i) 1544430a0491SDavid Ahern total += nhg->nh_entries[i].weight; 1545430a0491SDavid Ahern 1546430a0491SDavid Ahern for (i = 0; i < nhg->num_nh; ++i) { 1547430a0491SDavid Ahern struct nh_grp_entry *nhge = &nhg->nh_entries[i]; 1548430a0491SDavid Ahern int upper_bound; 1549430a0491SDavid Ahern 1550430a0491SDavid Ahern w += nhge->weight; 1551430a0491SDavid Ahern upper_bound = DIV_ROUND_CLOSEST_ULL((u64)w << 31, total) - 1; 1552b9bae61bSPetr Machata atomic_set(&nhge->mpath.upper_bound, upper_bound); 1553430a0491SDavid Ahern } 1554430a0491SDavid Ahern } 1555430a0491SDavid Ahern 1556ac21753aSDavid Ahern static void remove_nh_grp_entry(struct net *net, struct nh_grp_entry *nhge, 1557430a0491SDavid Ahern struct nl_info *nlinfo) 1558430a0491SDavid Ahern { 155990f33bffSNikolay Aleksandrov struct nh_grp_entry *nhges, *new_nhges; 1560ac21753aSDavid Ahern struct nexthop *nhp = nhge->nh_parent; 1561833a1065SIdo Schimmel struct netlink_ext_ack extack; 1562430a0491SDavid Ahern struct nexthop *nh = nhge->nh; 156390f33bffSNikolay Aleksandrov struct nh_group *nhg, *newg; 1564833a1065SIdo Schimmel int i, j, err; 1565430a0491SDavid Ahern 1566430a0491SDavid Ahern WARN_ON(!nh); 1567430a0491SDavid Ahern 1568ac21753aSDavid Ahern nhg = rtnl_dereference(nhp->nh_grp); 156990f33bffSNikolay Aleksandrov newg = nhg->spare; 1570430a0491SDavid Ahern 157190f33bffSNikolay Aleksandrov /* last entry, keep it visible and remove the parent */ 157290f33bffSNikolay Aleksandrov if (nhg->num_nh == 1) { 157390f33bffSNikolay Aleksandrov remove_nexthop(net, nhp, nlinfo); 1574430a0491SDavid Ahern return; 157590f33bffSNikolay Aleksandrov } 1576430a0491SDavid Ahern 1577863b2558SIdo Schimmel newg->has_v4 = false; 157890e1a9e2SPetr Machata newg->is_multipath = nhg->is_multipath; 157990f33bffSNikolay Aleksandrov newg->mpath = nhg->mpath; 1580283a72a5SPetr Machata newg->resilient = nhg->resilient; 1581ce9ac056SDavid Ahern newg->fdb_nh = nhg->fdb_nh; 158290f33bffSNikolay Aleksandrov newg->num_nh = nhg->num_nh; 1583430a0491SDavid Ahern 158490f33bffSNikolay Aleksandrov /* copy old entries to new except the one getting removed */ 158590f33bffSNikolay Aleksandrov nhges = nhg->nh_entries; 158690f33bffSNikolay Aleksandrov new_nhges = newg->nh_entries; 158790f33bffSNikolay Aleksandrov for (i = 0, j = 0; i < nhg->num_nh; ++i) { 1588863b2558SIdo Schimmel struct nh_info *nhi; 1589863b2558SIdo Schimmel 159090f33bffSNikolay Aleksandrov /* current nexthop getting removed */ 159190f33bffSNikolay Aleksandrov if (nhg->nh_entries[i].nh == nh) { 159290f33bffSNikolay Aleksandrov newg->num_nh--; 159390f33bffSNikolay Aleksandrov continue; 159490f33bffSNikolay Aleksandrov } 1595430a0491SDavid Ahern 1596863b2558SIdo Schimmel nhi = rtnl_dereference(nhges[i].nh->nh_info); 1597863b2558SIdo Schimmel if (nhi->family == AF_INET) 1598863b2558SIdo Schimmel newg->has_v4 = true; 1599863b2558SIdo Schimmel 160090f33bffSNikolay Aleksandrov list_del(&nhges[i].nh_list); 160190f33bffSNikolay Aleksandrov new_nhges[j].nh_parent = nhges[i].nh_parent; 160290f33bffSNikolay Aleksandrov new_nhges[j].nh = nhges[i].nh; 160390f33bffSNikolay Aleksandrov new_nhges[j].weight = nhges[i].weight; 160490f33bffSNikolay Aleksandrov list_add(&new_nhges[j].nh_list, &new_nhges[j].nh->grp_list); 160590f33bffSNikolay Aleksandrov j++; 160690f33bffSNikolay Aleksandrov } 160790f33bffSNikolay Aleksandrov 1608283a72a5SPetr Machata if (newg->mpath) 1609283a72a5SPetr Machata nh_mp_group_rebalance(newg); 1610283a72a5SPetr Machata else if (newg->resilient) 1611283a72a5SPetr Machata replace_nexthop_grp_res(nhg, newg); 1612283a72a5SPetr Machata 161390f33bffSNikolay Aleksandrov rcu_assign_pointer(nhp->nh_grp, newg); 161490f33bffSNikolay Aleksandrov 161590f33bffSNikolay Aleksandrov list_del(&nhge->nh_list); 161690f33bffSNikolay Aleksandrov nexthop_put(nhge->nh); 1617430a0491SDavid Ahern 16187c37c7e0SPetr Machata /* Removal of a NH from a resilient group is notified through 16197c37c7e0SPetr Machata * bucket notifications. 16207c37c7e0SPetr Machata */ 16217c37c7e0SPetr Machata if (newg->mpath) { 16227c37c7e0SPetr Machata err = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, nhp, 16237c37c7e0SPetr Machata &extack); 1624833a1065SIdo Schimmel if (err) 1625833a1065SIdo Schimmel pr_err("%s\n", extack._msg); 16267c37c7e0SPetr Machata } 1627833a1065SIdo Schimmel 1628430a0491SDavid Ahern if (nlinfo) 1629ac21753aSDavid Ahern nexthop_notify(RTM_NEWNEXTHOP, nhp, nlinfo); 1630430a0491SDavid Ahern } 1631430a0491SDavid Ahern 1632430a0491SDavid Ahern static void remove_nexthop_from_groups(struct net *net, struct nexthop *nh, 1633430a0491SDavid Ahern struct nl_info *nlinfo) 1634430a0491SDavid Ahern { 1635430a0491SDavid Ahern struct nh_grp_entry *nhge, *tmp; 1636430a0491SDavid Ahern 1637ac21753aSDavid Ahern list_for_each_entry_safe(nhge, tmp, &nh->grp_list, nh_list) 1638ac21753aSDavid Ahern remove_nh_grp_entry(net, nhge, nlinfo); 1639430a0491SDavid Ahern 164090f33bffSNikolay Aleksandrov /* make sure all see the newly published array before releasing rtnl */ 1641df6afe2fSIdo Schimmel synchronize_net(); 1642430a0491SDavid Ahern } 1643430a0491SDavid Ahern 1644430a0491SDavid Ahern static void remove_nexthop_group(struct nexthop *nh, struct nl_info *nlinfo) 1645430a0491SDavid Ahern { 1646430a0491SDavid Ahern struct nh_group *nhg = rcu_dereference_rtnl(nh->nh_grp); 1647283a72a5SPetr Machata struct nh_res_table *res_table; 1648430a0491SDavid Ahern int i, num_nh = nhg->num_nh; 1649430a0491SDavid Ahern 1650430a0491SDavid Ahern for (i = 0; i < num_nh; ++i) { 1651430a0491SDavid Ahern struct nh_grp_entry *nhge = &nhg->nh_entries[i]; 1652430a0491SDavid Ahern 1653430a0491SDavid Ahern if (WARN_ON(!nhge->nh)) 1654430a0491SDavid Ahern continue; 1655430a0491SDavid Ahern 165690f33bffSNikolay Aleksandrov list_del_init(&nhge->nh_list); 1657430a0491SDavid Ahern } 1658283a72a5SPetr Machata 1659283a72a5SPetr Machata if (nhg->resilient) { 1660283a72a5SPetr Machata res_table = rtnl_dereference(nhg->res_table); 1661283a72a5SPetr Machata nh_res_table_cancel_upkeep(res_table); 1662283a72a5SPetr Machata } 1663430a0491SDavid Ahern } 1664430a0491SDavid Ahern 16657bf4796dSDavid Ahern /* not called for nexthop replace */ 16664c7e8084SDavid Ahern static void __remove_nexthop_fib(struct net *net, struct nexthop *nh) 16674c7e8084SDavid Ahern { 1668f88d8ea6SDavid Ahern struct fib6_info *f6i, *tmp; 16694c7e8084SDavid Ahern bool do_flush = false; 16704c7e8084SDavid Ahern struct fib_info *fi; 16714c7e8084SDavid Ahern 16724c7e8084SDavid Ahern list_for_each_entry(fi, &nh->fi_list, nh_list) { 16734c7e8084SDavid Ahern fi->fib_flags |= RTNH_F_DEAD; 16744c7e8084SDavid Ahern do_flush = true; 16754c7e8084SDavid Ahern } 16764c7e8084SDavid Ahern if (do_flush) 16774c7e8084SDavid Ahern fib_flush(net); 1678f88d8ea6SDavid Ahern 1679f88d8ea6SDavid Ahern /* ip6_del_rt removes the entry from this list hence the _safe */ 1680f88d8ea6SDavid Ahern list_for_each_entry_safe(f6i, tmp, &nh->f6i_list, nh_list) { 1681f88d8ea6SDavid Ahern /* __ip6_del_rt does a release, so do a hold here */ 1682f88d8ea6SDavid Ahern fib6_info_hold(f6i); 16834f80116dSRoopa Prabhu ipv6_stub->ip6_del_rt(net, f6i, 16844f80116dSRoopa Prabhu !net->ipv4.sysctl_nexthop_compat_mode); 1685f88d8ea6SDavid Ahern } 16864c7e8084SDavid Ahern } 16874c7e8084SDavid Ahern 1688430a0491SDavid Ahern static void __remove_nexthop(struct net *net, struct nexthop *nh, 1689430a0491SDavid Ahern struct nl_info *nlinfo) 1690430a0491SDavid Ahern { 16914c7e8084SDavid Ahern __remove_nexthop_fib(net, nh); 16924c7e8084SDavid Ahern 1693430a0491SDavid Ahern if (nh->is_group) { 1694430a0491SDavid Ahern remove_nexthop_group(nh, nlinfo); 1695430a0491SDavid Ahern } else { 1696597cfe4fSDavid Ahern struct nh_info *nhi; 1697597cfe4fSDavid Ahern 1698597cfe4fSDavid Ahern nhi = rtnl_dereference(nh->nh_info); 1699597cfe4fSDavid Ahern if (nhi->fib_nhc.nhc_dev) 1700597cfe4fSDavid Ahern hlist_del(&nhi->dev_hash); 1701430a0491SDavid Ahern 1702430a0491SDavid Ahern remove_nexthop_from_groups(net, nh, nlinfo); 1703430a0491SDavid Ahern } 1704597cfe4fSDavid Ahern } 1705597cfe4fSDavid Ahern 1706ab84be7eSDavid Ahern static void remove_nexthop(struct net *net, struct nexthop *nh, 1707430a0491SDavid Ahern struct nl_info *nlinfo) 1708ab84be7eSDavid Ahern { 17093578d53dSIdo Schimmel call_nexthop_notifiers(net, NEXTHOP_EVENT_DEL, nh, NULL); 17100695564bSIdo Schimmel 1711ab84be7eSDavid Ahern /* remove from the tree */ 1712ab84be7eSDavid Ahern rb_erase(&nh->rb_node, &net->nexthop.rb_root); 1713ab84be7eSDavid Ahern 1714ab84be7eSDavid Ahern if (nlinfo) 1715ab84be7eSDavid Ahern nexthop_notify(RTM_DELNEXTHOP, nh, nlinfo); 1716ab84be7eSDavid Ahern 1717430a0491SDavid Ahern __remove_nexthop(net, nh, nlinfo); 1718ab84be7eSDavid Ahern nh_base_seq_inc(net); 1719ab84be7eSDavid Ahern 1720ab84be7eSDavid Ahern nexthop_put(nh); 1721ab84be7eSDavid Ahern } 1722ab84be7eSDavid Ahern 17237bf4796dSDavid Ahern /* if any FIB entries reference this nexthop, any dst entries 17247bf4796dSDavid Ahern * need to be regenerated 17257bf4796dSDavid Ahern */ 17267bf4796dSDavid Ahern static void nh_rt_cache_flush(struct net *net, struct nexthop *nh) 17277bf4796dSDavid Ahern { 17287bf4796dSDavid Ahern struct fib6_info *f6i; 17297bf4796dSDavid Ahern 17307bf4796dSDavid Ahern if (!list_empty(&nh->fi_list)) 17317bf4796dSDavid Ahern rt_cache_flush(net); 17327bf4796dSDavid Ahern 17337bf4796dSDavid Ahern list_for_each_entry(f6i, &nh->f6i_list, nh_list) 17347bf4796dSDavid Ahern ipv6_stub->fib6_update_sernum(net, f6i); 17357bf4796dSDavid Ahern } 17367bf4796dSDavid Ahern 17377bf4796dSDavid Ahern static int replace_nexthop_grp(struct net *net, struct nexthop *old, 1738597f48e4SPetr Machata struct nexthop *new, const struct nh_config *cfg, 17397bf4796dSDavid Ahern struct netlink_ext_ack *extack) 17407bf4796dSDavid Ahern { 1741283a72a5SPetr Machata struct nh_res_table *tmp_table = NULL; 1742283a72a5SPetr Machata struct nh_res_table *new_res_table; 1743283a72a5SPetr Machata struct nh_res_table *old_res_table; 17447bf4796dSDavid Ahern struct nh_group *oldg, *newg; 1745d144cc5fSIdo Schimmel int i, err; 17467bf4796dSDavid Ahern 17477bf4796dSDavid Ahern if (!new->is_group) { 17487bf4796dSDavid Ahern NL_SET_ERR_MSG(extack, "Can not replace a nexthop group with a nexthop."); 17497bf4796dSDavid Ahern return -EINVAL; 17507bf4796dSDavid Ahern } 17517bf4796dSDavid Ahern 17527bf4796dSDavid Ahern oldg = rtnl_dereference(old->nh_grp); 17537bf4796dSDavid Ahern newg = rtnl_dereference(new->nh_grp); 17547bf4796dSDavid Ahern 1755283a72a5SPetr Machata if (newg->mpath != oldg->mpath) { 1756283a72a5SPetr Machata NL_SET_ERR_MSG(extack, "Can not replace a nexthop group with one of a different type."); 1757283a72a5SPetr Machata return -EINVAL; 1758283a72a5SPetr Machata } 1759283a72a5SPetr Machata 1760283a72a5SPetr Machata if (newg->mpath) { 1761283a72a5SPetr Machata err = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, new, 1762283a72a5SPetr Machata extack); 1763283a72a5SPetr Machata if (err) 1764283a72a5SPetr Machata return err; 1765283a72a5SPetr Machata } else if (newg->resilient) { 1766283a72a5SPetr Machata new_res_table = rtnl_dereference(newg->res_table); 1767283a72a5SPetr Machata old_res_table = rtnl_dereference(oldg->res_table); 1768283a72a5SPetr Machata 1769283a72a5SPetr Machata /* Accept if num_nh_buckets was not given, but if it was 1770283a72a5SPetr Machata * given, demand that the value be correct. 1771283a72a5SPetr Machata */ 1772283a72a5SPetr Machata if (cfg->nh_grp_res_has_num_buckets && 1773283a72a5SPetr Machata cfg->nh_grp_res_num_buckets != 1774283a72a5SPetr Machata old_res_table->num_nh_buckets) { 1775283a72a5SPetr Machata NL_SET_ERR_MSG(extack, "Can not change number of buckets of a resilient nexthop group."); 1776283a72a5SPetr Machata return -EINVAL; 1777283a72a5SPetr Machata } 1778283a72a5SPetr Machata 17797c37c7e0SPetr Machata /* Emit a pre-replace notification so that listeners could veto 17807c37c7e0SPetr Machata * a potentially unsupported configuration. Otherwise, 17817c37c7e0SPetr Machata * individual bucket replacement notifications would need to be 17827c37c7e0SPetr Machata * vetoed, which is something that should only happen if the 17837c37c7e0SPetr Machata * bucket is currently active. 17847c37c7e0SPetr Machata */ 17857c37c7e0SPetr Machata err = call_nexthop_res_table_notifiers(net, new, extack); 17867c37c7e0SPetr Machata if (err) 17877c37c7e0SPetr Machata return err; 17887c37c7e0SPetr Machata 1789283a72a5SPetr Machata if (cfg->nh_grp_res_has_idle_timer) 1790283a72a5SPetr Machata old_res_table->idle_timer = cfg->nh_grp_res_idle_timer; 1791283a72a5SPetr Machata if (cfg->nh_grp_res_has_unbalanced_timer) 1792283a72a5SPetr Machata old_res_table->unbalanced_timer = 1793283a72a5SPetr Machata cfg->nh_grp_res_unbalanced_timer; 1794283a72a5SPetr Machata 1795283a72a5SPetr Machata replace_nexthop_grp_res(oldg, newg); 1796283a72a5SPetr Machata 1797283a72a5SPetr Machata tmp_table = new_res_table; 1798283a72a5SPetr Machata rcu_assign_pointer(newg->res_table, old_res_table); 1799283a72a5SPetr Machata rcu_assign_pointer(newg->spare->res_table, old_res_table); 1800283a72a5SPetr Machata } 1801283a72a5SPetr Machata 18027bf4796dSDavid Ahern /* update parents - used by nexthop code for cleanup */ 18037bf4796dSDavid Ahern for (i = 0; i < newg->num_nh; i++) 18047bf4796dSDavid Ahern newg->nh_entries[i].nh_parent = old; 18057bf4796dSDavid Ahern 18067bf4796dSDavid Ahern rcu_assign_pointer(old->nh_grp, newg); 18077bf4796dSDavid Ahern 1808283a72a5SPetr Machata if (newg->resilient) { 1809283a72a5SPetr Machata rcu_assign_pointer(oldg->res_table, tmp_table); 1810283a72a5SPetr Machata rcu_assign_pointer(oldg->spare->res_table, tmp_table); 1811283a72a5SPetr Machata } 1812283a72a5SPetr Machata 18137bf4796dSDavid Ahern for (i = 0; i < oldg->num_nh; i++) 18147bf4796dSDavid Ahern oldg->nh_entries[i].nh_parent = new; 18157bf4796dSDavid Ahern 18167bf4796dSDavid Ahern rcu_assign_pointer(new->nh_grp, oldg); 18177bf4796dSDavid Ahern 18187bf4796dSDavid Ahern return 0; 18197bf4796dSDavid Ahern } 18207bf4796dSDavid Ahern 1821885a3b15SIdo Schimmel static void nh_group_v4_update(struct nh_group *nhg) 1822885a3b15SIdo Schimmel { 1823885a3b15SIdo Schimmel struct nh_grp_entry *nhges; 1824885a3b15SIdo Schimmel bool has_v4 = false; 1825885a3b15SIdo Schimmel int i; 1826885a3b15SIdo Schimmel 1827885a3b15SIdo Schimmel nhges = nhg->nh_entries; 1828885a3b15SIdo Schimmel for (i = 0; i < nhg->num_nh; i++) { 1829885a3b15SIdo Schimmel struct nh_info *nhi; 1830885a3b15SIdo Schimmel 1831885a3b15SIdo Schimmel nhi = rtnl_dereference(nhges[i].nh->nh_info); 1832885a3b15SIdo Schimmel if (nhi->family == AF_INET) 1833885a3b15SIdo Schimmel has_v4 = true; 1834885a3b15SIdo Schimmel } 1835885a3b15SIdo Schimmel nhg->has_v4 = has_v4; 1836885a3b15SIdo Schimmel } 1837885a3b15SIdo Schimmel 18387c37c7e0SPetr Machata static int replace_nexthop_single_notify_res(struct net *net, 18397c37c7e0SPetr Machata struct nh_res_table *res_table, 18407c37c7e0SPetr Machata struct nexthop *old, 18417c37c7e0SPetr Machata struct nh_info *oldi, 18427c37c7e0SPetr Machata struct nh_info *newi, 18437c37c7e0SPetr Machata struct netlink_ext_ack *extack) 18447c37c7e0SPetr Machata { 18457c37c7e0SPetr Machata u32 nhg_id = res_table->nhg_id; 18467c37c7e0SPetr Machata int err; 18477c37c7e0SPetr Machata u16 i; 18487c37c7e0SPetr Machata 18497c37c7e0SPetr Machata for (i = 0; i < res_table->num_nh_buckets; i++) { 18507c37c7e0SPetr Machata struct nh_res_bucket *bucket = &res_table->nh_buckets[i]; 18517c37c7e0SPetr Machata struct nh_grp_entry *nhge; 18527c37c7e0SPetr Machata 18537c37c7e0SPetr Machata nhge = rtnl_dereference(bucket->nh_entry); 18547c37c7e0SPetr Machata if (nhge->nh == old) { 18557c37c7e0SPetr Machata err = __call_nexthop_res_bucket_notifiers(net, nhg_id, 18567c37c7e0SPetr Machata i, true, 18577c37c7e0SPetr Machata oldi, newi, 18587c37c7e0SPetr Machata extack); 18597c37c7e0SPetr Machata if (err) 18607c37c7e0SPetr Machata goto err_notify; 18617c37c7e0SPetr Machata } 18627c37c7e0SPetr Machata } 18637c37c7e0SPetr Machata 18647c37c7e0SPetr Machata return 0; 18657c37c7e0SPetr Machata 18667c37c7e0SPetr Machata err_notify: 18677c37c7e0SPetr Machata while (i-- > 0) { 18687c37c7e0SPetr Machata struct nh_res_bucket *bucket = &res_table->nh_buckets[i]; 18697c37c7e0SPetr Machata struct nh_grp_entry *nhge; 18707c37c7e0SPetr Machata 18717c37c7e0SPetr Machata nhge = rtnl_dereference(bucket->nh_entry); 18727c37c7e0SPetr Machata if (nhge->nh == old) 18737c37c7e0SPetr Machata __call_nexthop_res_bucket_notifiers(net, nhg_id, i, 18747c37c7e0SPetr Machata true, newi, oldi, 18757c37c7e0SPetr Machata extack); 18767c37c7e0SPetr Machata } 18777c37c7e0SPetr Machata return err; 18787c37c7e0SPetr Machata } 18797c37c7e0SPetr Machata 18807c37c7e0SPetr Machata static int replace_nexthop_single_notify(struct net *net, 18817c37c7e0SPetr Machata struct nexthop *group_nh, 18827c37c7e0SPetr Machata struct nexthop *old, 18837c37c7e0SPetr Machata struct nh_info *oldi, 18847c37c7e0SPetr Machata struct nh_info *newi, 18857c37c7e0SPetr Machata struct netlink_ext_ack *extack) 18867c37c7e0SPetr Machata { 18877c37c7e0SPetr Machata struct nh_group *nhg = rtnl_dereference(group_nh->nh_grp); 18887c37c7e0SPetr Machata struct nh_res_table *res_table; 18897c37c7e0SPetr Machata 18907c37c7e0SPetr Machata if (nhg->mpath) { 18917c37c7e0SPetr Machata return call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, 18927c37c7e0SPetr Machata group_nh, extack); 18937c37c7e0SPetr Machata } else if (nhg->resilient) { 18947c37c7e0SPetr Machata res_table = rtnl_dereference(nhg->res_table); 18957c37c7e0SPetr Machata return replace_nexthop_single_notify_res(net, res_table, 18967c37c7e0SPetr Machata old, oldi, newi, 18977c37c7e0SPetr Machata extack); 18987c37c7e0SPetr Machata } 18997c37c7e0SPetr Machata 19007c37c7e0SPetr Machata return -EINVAL; 19017c37c7e0SPetr Machata } 19027c37c7e0SPetr Machata 19037bf4796dSDavid Ahern static int replace_nexthop_single(struct net *net, struct nexthop *old, 19047bf4796dSDavid Ahern struct nexthop *new, 19057bf4796dSDavid Ahern struct netlink_ext_ack *extack) 19067bf4796dSDavid Ahern { 1907f17bc33dSIdo Schimmel u8 old_protocol, old_nh_flags; 19087bf4796dSDavid Ahern struct nh_info *oldi, *newi; 1909f17bc33dSIdo Schimmel struct nh_grp_entry *nhge; 19108c09c9f9SIdo Schimmel int err; 19117bf4796dSDavid Ahern 19127bf4796dSDavid Ahern if (new->is_group) { 19137bf4796dSDavid Ahern NL_SET_ERR_MSG(extack, "Can not replace a nexthop with a nexthop group."); 19147bf4796dSDavid Ahern return -EINVAL; 19157bf4796dSDavid Ahern } 19167bf4796dSDavid Ahern 19178c09c9f9SIdo Schimmel err = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, new, extack); 19188c09c9f9SIdo Schimmel if (err) 19198c09c9f9SIdo Schimmel return err; 19208c09c9f9SIdo Schimmel 19218c09c9f9SIdo Schimmel /* Hardware flags were set on 'old' as 'new' is not in the red-black 19228c09c9f9SIdo Schimmel * tree. Therefore, inherit the flags from 'old' to 'new'. 19238c09c9f9SIdo Schimmel */ 19248c09c9f9SIdo Schimmel new->nh_flags |= old->nh_flags & (RTNH_F_OFFLOAD | RTNH_F_TRAP); 19258c09c9f9SIdo Schimmel 19267bf4796dSDavid Ahern oldi = rtnl_dereference(old->nh_info); 19277bf4796dSDavid Ahern newi = rtnl_dereference(new->nh_info); 19287bf4796dSDavid Ahern 19297bf4796dSDavid Ahern newi->nh_parent = old; 19307bf4796dSDavid Ahern oldi->nh_parent = new; 19317bf4796dSDavid Ahern 1932f17bc33dSIdo Schimmel old_protocol = old->protocol; 1933f17bc33dSIdo Schimmel old_nh_flags = old->nh_flags; 1934f17bc33dSIdo Schimmel 19357bf4796dSDavid Ahern old->protocol = new->protocol; 19367bf4796dSDavid Ahern old->nh_flags = new->nh_flags; 19377bf4796dSDavid Ahern 19387bf4796dSDavid Ahern rcu_assign_pointer(old->nh_info, newi); 19397bf4796dSDavid Ahern rcu_assign_pointer(new->nh_info, oldi); 19407bf4796dSDavid Ahern 1941f17bc33dSIdo Schimmel /* Send a replace notification for all the groups using the nexthop. */ 1942f17bc33dSIdo Schimmel list_for_each_entry(nhge, &old->grp_list, nh_list) { 1943f17bc33dSIdo Schimmel struct nexthop *nhp = nhge->nh_parent; 1944f17bc33dSIdo Schimmel 19457c37c7e0SPetr Machata err = replace_nexthop_single_notify(net, nhp, old, oldi, newi, 1946f17bc33dSIdo Schimmel extack); 1947f17bc33dSIdo Schimmel if (err) 1948f17bc33dSIdo Schimmel goto err_notify; 1949f17bc33dSIdo Schimmel } 1950f17bc33dSIdo Schimmel 1951885a3b15SIdo Schimmel /* When replacing an IPv4 nexthop with an IPv6 nexthop, potentially 1952885a3b15SIdo Schimmel * update IPv4 indication in all the groups using the nexthop. 1953885a3b15SIdo Schimmel */ 1954885a3b15SIdo Schimmel if (oldi->family == AF_INET && newi->family == AF_INET6) { 1955885a3b15SIdo Schimmel list_for_each_entry(nhge, &old->grp_list, nh_list) { 1956885a3b15SIdo Schimmel struct nexthop *nhp = nhge->nh_parent; 1957885a3b15SIdo Schimmel struct nh_group *nhg; 1958885a3b15SIdo Schimmel 1959885a3b15SIdo Schimmel nhg = rtnl_dereference(nhp->nh_grp); 1960885a3b15SIdo Schimmel nh_group_v4_update(nhg); 1961885a3b15SIdo Schimmel } 1962885a3b15SIdo Schimmel } 1963885a3b15SIdo Schimmel 19647bf4796dSDavid Ahern return 0; 1965f17bc33dSIdo Schimmel 1966f17bc33dSIdo Schimmel err_notify: 1967f17bc33dSIdo Schimmel rcu_assign_pointer(new->nh_info, newi); 1968f17bc33dSIdo Schimmel rcu_assign_pointer(old->nh_info, oldi); 1969f17bc33dSIdo Schimmel old->nh_flags = old_nh_flags; 1970f17bc33dSIdo Schimmel old->protocol = old_protocol; 1971f17bc33dSIdo Schimmel oldi->nh_parent = old; 1972f17bc33dSIdo Schimmel newi->nh_parent = new; 1973f17bc33dSIdo Schimmel list_for_each_entry_continue_reverse(nhge, &old->grp_list, nh_list) { 1974f17bc33dSIdo Schimmel struct nexthop *nhp = nhge->nh_parent; 1975f17bc33dSIdo Schimmel 19767c37c7e0SPetr Machata replace_nexthop_single_notify(net, nhp, old, newi, oldi, NULL); 1977f17bc33dSIdo Schimmel } 1978f17bc33dSIdo Schimmel call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, old, extack); 1979f17bc33dSIdo Schimmel return err; 19807bf4796dSDavid Ahern } 19817bf4796dSDavid Ahern 19827bf4796dSDavid Ahern static void __nexthop_replace_notify(struct net *net, struct nexthop *nh, 19837bf4796dSDavid Ahern struct nl_info *info) 19847bf4796dSDavid Ahern { 19857bf4796dSDavid Ahern struct fib6_info *f6i; 19867bf4796dSDavid Ahern 19877bf4796dSDavid Ahern if (!list_empty(&nh->fi_list)) { 19887bf4796dSDavid Ahern struct fib_info *fi; 19897bf4796dSDavid Ahern 19907bf4796dSDavid Ahern /* expectation is a few fib_info per nexthop and then 19917bf4796dSDavid Ahern * a lot of routes per fib_info. So mark the fib_info 19927bf4796dSDavid Ahern * and then walk the fib tables once 19937bf4796dSDavid Ahern */ 19947bf4796dSDavid Ahern list_for_each_entry(fi, &nh->fi_list, nh_list) 19957bf4796dSDavid Ahern fi->nh_updated = true; 19967bf4796dSDavid Ahern 19977bf4796dSDavid Ahern fib_info_notify_update(net, info); 19987bf4796dSDavid Ahern 19997bf4796dSDavid Ahern list_for_each_entry(fi, &nh->fi_list, nh_list) 20007bf4796dSDavid Ahern fi->nh_updated = false; 20017bf4796dSDavid Ahern } 20027bf4796dSDavid Ahern 20037bf4796dSDavid Ahern list_for_each_entry(f6i, &nh->f6i_list, nh_list) 20047bf4796dSDavid Ahern ipv6_stub->fib6_rt_update(net, f6i, info); 20057bf4796dSDavid Ahern } 20067bf4796dSDavid Ahern 20077bf4796dSDavid Ahern /* send RTM_NEWROUTE with REPLACE flag set for all FIB entries 20087bf4796dSDavid Ahern * linked to this nexthop and for all groups that the nexthop 20097bf4796dSDavid Ahern * is a member of 20107bf4796dSDavid Ahern */ 20117bf4796dSDavid Ahern static void nexthop_replace_notify(struct net *net, struct nexthop *nh, 20127bf4796dSDavid Ahern struct nl_info *info) 20137bf4796dSDavid Ahern { 20147bf4796dSDavid Ahern struct nh_grp_entry *nhge; 20157bf4796dSDavid Ahern 20167bf4796dSDavid Ahern __nexthop_replace_notify(net, nh, info); 20177bf4796dSDavid Ahern 20187bf4796dSDavid Ahern list_for_each_entry(nhge, &nh->grp_list, nh_list) 20197bf4796dSDavid Ahern __nexthop_replace_notify(net, nhge->nh_parent, info); 20207bf4796dSDavid Ahern } 20217bf4796dSDavid Ahern 2022ab84be7eSDavid Ahern static int replace_nexthop(struct net *net, struct nexthop *old, 2023597f48e4SPetr Machata struct nexthop *new, const struct nh_config *cfg, 2024597f48e4SPetr Machata struct netlink_ext_ack *extack) 2025ab84be7eSDavid Ahern { 20267bf4796dSDavid Ahern bool new_is_reject = false; 20277bf4796dSDavid Ahern struct nh_grp_entry *nhge; 20287bf4796dSDavid Ahern int err; 20297bf4796dSDavid Ahern 20307bf4796dSDavid Ahern /* check that existing FIB entries are ok with the 20317bf4796dSDavid Ahern * new nexthop definition 20327bf4796dSDavid Ahern */ 20337bf4796dSDavid Ahern err = fib_check_nh_list(old, new, extack); 20347bf4796dSDavid Ahern if (err) 20357bf4796dSDavid Ahern return err; 20367bf4796dSDavid Ahern 20377bf4796dSDavid Ahern err = fib6_check_nh_list(old, new, extack); 20387bf4796dSDavid Ahern if (err) 20397bf4796dSDavid Ahern return err; 20407bf4796dSDavid Ahern 20417bf4796dSDavid Ahern if (!new->is_group) { 20427bf4796dSDavid Ahern struct nh_info *nhi = rtnl_dereference(new->nh_info); 20437bf4796dSDavid Ahern 20447bf4796dSDavid Ahern new_is_reject = nhi->reject_nh; 20457bf4796dSDavid Ahern } 20467bf4796dSDavid Ahern 20477bf4796dSDavid Ahern list_for_each_entry(nhge, &old->grp_list, nh_list) { 20487bf4796dSDavid Ahern /* if new nexthop is a blackhole, any groups using this 20497bf4796dSDavid Ahern * nexthop cannot have more than 1 path 20507bf4796dSDavid Ahern */ 20517bf4796dSDavid Ahern if (new_is_reject && 20527bf4796dSDavid Ahern nexthop_num_path(nhge->nh_parent) > 1) { 20537bf4796dSDavid Ahern NL_SET_ERR_MSG(extack, "Blackhole nexthop can not be a member of a group with more than one path"); 20547bf4796dSDavid Ahern return -EINVAL; 20557bf4796dSDavid Ahern } 20567bf4796dSDavid Ahern 20577bf4796dSDavid Ahern err = fib_check_nh_list(nhge->nh_parent, new, extack); 20587bf4796dSDavid Ahern if (err) 20597bf4796dSDavid Ahern return err; 20607bf4796dSDavid Ahern 20617bf4796dSDavid Ahern err = fib6_check_nh_list(nhge->nh_parent, new, extack); 20627bf4796dSDavid Ahern if (err) 20637bf4796dSDavid Ahern return err; 20647bf4796dSDavid Ahern } 20657bf4796dSDavid Ahern 20667bf4796dSDavid Ahern if (old->is_group) 2067597f48e4SPetr Machata err = replace_nexthop_grp(net, old, new, cfg, extack); 20687bf4796dSDavid Ahern else 20697bf4796dSDavid Ahern err = replace_nexthop_single(net, old, new, extack); 20707bf4796dSDavid Ahern 20717bf4796dSDavid Ahern if (!err) { 20727bf4796dSDavid Ahern nh_rt_cache_flush(net, old); 20737bf4796dSDavid Ahern 20747bf4796dSDavid Ahern __remove_nexthop(net, new, NULL); 20757bf4796dSDavid Ahern nexthop_put(new); 20767bf4796dSDavid Ahern } 20777bf4796dSDavid Ahern 20787bf4796dSDavid Ahern return err; 2079ab84be7eSDavid Ahern } 2080ab84be7eSDavid Ahern 2081ab84be7eSDavid Ahern /* called with rtnl_lock held */ 2082ab84be7eSDavid Ahern static int insert_nexthop(struct net *net, struct nexthop *new_nh, 2083ab84be7eSDavid Ahern struct nh_config *cfg, struct netlink_ext_ack *extack) 2084ab84be7eSDavid Ahern { 2085ab84be7eSDavid Ahern struct rb_node **pp, *parent = NULL, *next; 2086ab84be7eSDavid Ahern struct rb_root *root = &net->nexthop.rb_root; 2087ab84be7eSDavid Ahern bool replace = !!(cfg->nlflags & NLM_F_REPLACE); 2088ab84be7eSDavid Ahern bool create = !!(cfg->nlflags & NLM_F_CREATE); 2089ab84be7eSDavid Ahern u32 new_id = new_nh->id; 20907bf4796dSDavid Ahern int replace_notify = 0; 2091ab84be7eSDavid Ahern int rc = -EEXIST; 2092ab84be7eSDavid Ahern 2093ab84be7eSDavid Ahern pp = &root->rb_node; 2094ab84be7eSDavid Ahern while (1) { 2095ab84be7eSDavid Ahern struct nexthop *nh; 2096ab84be7eSDavid Ahern 2097233c6378SIdo Schimmel next = *pp; 2098ab84be7eSDavid Ahern if (!next) 2099ab84be7eSDavid Ahern break; 2100ab84be7eSDavid Ahern 2101ab84be7eSDavid Ahern parent = next; 2102ab84be7eSDavid Ahern 2103ab84be7eSDavid Ahern nh = rb_entry(parent, struct nexthop, rb_node); 2104ab84be7eSDavid Ahern if (new_id < nh->id) { 2105ab84be7eSDavid Ahern pp = &next->rb_left; 2106ab84be7eSDavid Ahern } else if (new_id > nh->id) { 2107ab84be7eSDavid Ahern pp = &next->rb_right; 2108ab84be7eSDavid Ahern } else if (replace) { 2109597f48e4SPetr Machata rc = replace_nexthop(net, nh, new_nh, cfg, extack); 21107bf4796dSDavid Ahern if (!rc) { 2111ab84be7eSDavid Ahern new_nh = nh; /* send notification with old nh */ 21127bf4796dSDavid Ahern replace_notify = 1; 21137bf4796dSDavid Ahern } 2114ab84be7eSDavid Ahern goto out; 2115ab84be7eSDavid Ahern } else { 2116ab84be7eSDavid Ahern /* id already exists and not a replace */ 2117ab84be7eSDavid Ahern goto out; 2118ab84be7eSDavid Ahern } 2119ab84be7eSDavid Ahern } 2120ab84be7eSDavid Ahern 2121ab84be7eSDavid Ahern if (replace && !create) { 2122ab84be7eSDavid Ahern NL_SET_ERR_MSG(extack, "Replace specified without create and no entry exists"); 2123ab84be7eSDavid Ahern rc = -ENOENT; 2124ab84be7eSDavid Ahern goto out; 2125ab84be7eSDavid Ahern } 2126ab84be7eSDavid Ahern 2127283a72a5SPetr Machata if (new_nh->is_group) { 2128283a72a5SPetr Machata struct nh_group *nhg = rtnl_dereference(new_nh->nh_grp); 2129283a72a5SPetr Machata struct nh_res_table *res_table; 2130283a72a5SPetr Machata 2131283a72a5SPetr Machata if (nhg->resilient) { 2132283a72a5SPetr Machata res_table = rtnl_dereference(nhg->res_table); 2133283a72a5SPetr Machata 2134283a72a5SPetr Machata /* Not passing the number of buckets is OK when 2135283a72a5SPetr Machata * replacing, but not when creating a new group. 2136283a72a5SPetr Machata */ 2137283a72a5SPetr Machata if (!cfg->nh_grp_res_has_num_buckets) { 2138283a72a5SPetr Machata NL_SET_ERR_MSG(extack, "Number of buckets not specified for nexthop group insertion"); 2139283a72a5SPetr Machata rc = -EINVAL; 2140283a72a5SPetr Machata goto out; 2141283a72a5SPetr Machata } 2142283a72a5SPetr Machata 2143283a72a5SPetr Machata nh_res_group_rebalance(nhg, res_table); 21447c37c7e0SPetr Machata 21457c37c7e0SPetr Machata /* Do not send bucket notifications, we do full 21467c37c7e0SPetr Machata * notification below. 21477c37c7e0SPetr Machata */ 21487c37c7e0SPetr Machata nh_res_table_upkeep(res_table, false); 2149283a72a5SPetr Machata } 2150283a72a5SPetr Machata } 2151283a72a5SPetr Machata 2152ab84be7eSDavid Ahern rb_link_node_rcu(&new_nh->rb_node, parent, pp); 2153ab84be7eSDavid Ahern rb_insert_color(&new_nh->rb_node, root); 2154732d167bSIdo Schimmel 21557c37c7e0SPetr Machata /* The initial insertion is a full notification for mpath as well 21567c37c7e0SPetr Machata * as resilient groups. 21577c37c7e0SPetr Machata */ 2158732d167bSIdo Schimmel rc = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, new_nh, extack); 2159732d167bSIdo Schimmel if (rc) 2160732d167bSIdo Schimmel rb_erase(&new_nh->rb_node, &net->nexthop.rb_root); 2161732d167bSIdo Schimmel 2162ab84be7eSDavid Ahern out: 2163ab84be7eSDavid Ahern if (!rc) { 2164ab84be7eSDavid Ahern nh_base_seq_inc(net); 2165ab84be7eSDavid Ahern nexthop_notify(RTM_NEWNEXTHOP, new_nh, &cfg->nlinfo); 21664f80116dSRoopa Prabhu if (replace_notify && net->ipv4.sysctl_nexthop_compat_mode) 21677bf4796dSDavid Ahern nexthop_replace_notify(net, new_nh, &cfg->nlinfo); 2168ab84be7eSDavid Ahern } 2169ab84be7eSDavid Ahern 2170ab84be7eSDavid Ahern return rc; 2171ab84be7eSDavid Ahern } 2172ab84be7eSDavid Ahern 2173597cfe4fSDavid Ahern /* rtnl */ 2174597cfe4fSDavid Ahern /* remove all nexthops tied to a device being deleted */ 217576c03bf8SIdo Schimmel static void nexthop_flush_dev(struct net_device *dev, unsigned long event) 2176597cfe4fSDavid Ahern { 2177597cfe4fSDavid Ahern unsigned int hash = nh_dev_hashfn(dev->ifindex); 2178597cfe4fSDavid Ahern struct net *net = dev_net(dev); 2179597cfe4fSDavid Ahern struct hlist_head *head = &net->nexthop.devhash[hash]; 2180597cfe4fSDavid Ahern struct hlist_node *n; 2181597cfe4fSDavid Ahern struct nh_info *nhi; 2182597cfe4fSDavid Ahern 2183597cfe4fSDavid Ahern hlist_for_each_entry_safe(nhi, n, head, dev_hash) { 2184597cfe4fSDavid Ahern if (nhi->fib_nhc.nhc_dev != dev) 2185597cfe4fSDavid Ahern continue; 2186597cfe4fSDavid Ahern 218776c03bf8SIdo Schimmel if (nhi->reject_nh && 218876c03bf8SIdo Schimmel (event == NETDEV_DOWN || event == NETDEV_CHANGE)) 218976c03bf8SIdo Schimmel continue; 219076c03bf8SIdo Schimmel 2191430a0491SDavid Ahern remove_nexthop(net, nhi->nh_parent, NULL); 2192597cfe4fSDavid Ahern } 2193597cfe4fSDavid Ahern } 2194597cfe4fSDavid Ahern 2195ab84be7eSDavid Ahern /* rtnl; called when net namespace is deleted */ 2196ab84be7eSDavid Ahern static void flush_all_nexthops(struct net *net) 2197ab84be7eSDavid Ahern { 2198ab84be7eSDavid Ahern struct rb_root *root = &net->nexthop.rb_root; 2199ab84be7eSDavid Ahern struct rb_node *node; 2200ab84be7eSDavid Ahern struct nexthop *nh; 2201ab84be7eSDavid Ahern 2202ab84be7eSDavid Ahern while ((node = rb_first(root))) { 2203ab84be7eSDavid Ahern nh = rb_entry(node, struct nexthop, rb_node); 2204430a0491SDavid Ahern remove_nexthop(net, nh, NULL); 2205ab84be7eSDavid Ahern cond_resched(); 2206ab84be7eSDavid Ahern } 2207ab84be7eSDavid Ahern } 2208ab84be7eSDavid Ahern 2209430a0491SDavid Ahern static struct nexthop *nexthop_create_group(struct net *net, 2210430a0491SDavid Ahern struct nh_config *cfg) 2211430a0491SDavid Ahern { 2212430a0491SDavid Ahern struct nlattr *grps_attr = cfg->nh_grp; 2213430a0491SDavid Ahern struct nexthop_grp *entry = nla_data(grps_attr); 221490f33bffSNikolay Aleksandrov u16 num_nh = nla_len(grps_attr) / sizeof(*entry); 2215430a0491SDavid Ahern struct nh_group *nhg; 2216430a0491SDavid Ahern struct nexthop *nh; 2217283a72a5SPetr Machata int err; 2218430a0491SDavid Ahern int i; 2219430a0491SDavid Ahern 2220eeaac363SNikolay Aleksandrov if (WARN_ON(!num_nh)) 2221eeaac363SNikolay Aleksandrov return ERR_PTR(-EINVAL); 2222eeaac363SNikolay Aleksandrov 2223430a0491SDavid Ahern nh = nexthop_alloc(); 2224430a0491SDavid Ahern if (!nh) 2225430a0491SDavid Ahern return ERR_PTR(-ENOMEM); 2226430a0491SDavid Ahern 2227430a0491SDavid Ahern nh->is_group = 1; 2228430a0491SDavid Ahern 222990f33bffSNikolay Aleksandrov nhg = nexthop_grp_alloc(num_nh); 2230430a0491SDavid Ahern if (!nhg) { 2231430a0491SDavid Ahern kfree(nh); 2232430a0491SDavid Ahern return ERR_PTR(-ENOMEM); 2233430a0491SDavid Ahern } 2234430a0491SDavid Ahern 223590f33bffSNikolay Aleksandrov /* spare group used for removals */ 223690f33bffSNikolay Aleksandrov nhg->spare = nexthop_grp_alloc(num_nh); 2237dafe2078SPatrick Eigensatz if (!nhg->spare) { 223890f33bffSNikolay Aleksandrov kfree(nhg); 223990f33bffSNikolay Aleksandrov kfree(nh); 2240dafe2078SPatrick Eigensatz return ERR_PTR(-ENOMEM); 224190f33bffSNikolay Aleksandrov } 224290f33bffSNikolay Aleksandrov nhg->spare->spare = nhg; 224390f33bffSNikolay Aleksandrov 2244430a0491SDavid Ahern for (i = 0; i < nhg->num_nh; ++i) { 2245430a0491SDavid Ahern struct nexthop *nhe; 2246430a0491SDavid Ahern struct nh_info *nhi; 2247430a0491SDavid Ahern 2248430a0491SDavid Ahern nhe = nexthop_find_by_id(net, entry[i].id); 2249283a72a5SPetr Machata if (!nexthop_get(nhe)) { 2250283a72a5SPetr Machata err = -ENOENT; 2251430a0491SDavid Ahern goto out_no_nh; 2252283a72a5SPetr Machata } 2253430a0491SDavid Ahern 2254430a0491SDavid Ahern nhi = rtnl_dereference(nhe->nh_info); 2255430a0491SDavid Ahern if (nhi->family == AF_INET) 2256430a0491SDavid Ahern nhg->has_v4 = true; 2257430a0491SDavid Ahern 2258430a0491SDavid Ahern nhg->nh_entries[i].nh = nhe; 2259430a0491SDavid Ahern nhg->nh_entries[i].weight = entry[i].weight + 1; 2260430a0491SDavid Ahern list_add(&nhg->nh_entries[i].nh_list, &nhe->grp_list); 2261430a0491SDavid Ahern nhg->nh_entries[i].nh_parent = nh; 2262430a0491SDavid Ahern } 2263430a0491SDavid Ahern 226490e1a9e2SPetr Machata if (cfg->nh_grp_type == NEXTHOP_GRP_TYPE_MPATH) { 2265430a0491SDavid Ahern nhg->mpath = 1; 226690e1a9e2SPetr Machata nhg->is_multipath = true; 2267710ec562SIdo Schimmel } else if (cfg->nh_grp_type == NEXTHOP_GRP_TYPE_RES) { 2268283a72a5SPetr Machata struct nh_res_table *res_table; 2269283a72a5SPetr Machata 2270283a72a5SPetr Machata /* Bounce resilient groups for now. */ 2271283a72a5SPetr Machata err = -EINVAL; 2272283a72a5SPetr Machata goto out_no_nh; 2273283a72a5SPetr Machata 2274283a72a5SPetr Machata res_table = nexthop_res_table_alloc(net, cfg->nh_id, cfg); 2275283a72a5SPetr Machata if (!res_table) { 2276283a72a5SPetr Machata err = -ENOMEM; 2277710ec562SIdo Schimmel goto out_no_nh; 227890e1a9e2SPetr Machata } 2279720ccd9aSPetr Machata 2280283a72a5SPetr Machata rcu_assign_pointer(nhg->spare->res_table, res_table); 2281283a72a5SPetr Machata rcu_assign_pointer(nhg->res_table, res_table); 2282283a72a5SPetr Machata nhg->resilient = true; 2283283a72a5SPetr Machata nhg->is_multipath = true; 2284283a72a5SPetr Machata } 2285283a72a5SPetr Machata 2286283a72a5SPetr Machata WARN_ON_ONCE(nhg->mpath + nhg->resilient != 1); 2287720ccd9aSPetr Machata 2288720ccd9aSPetr Machata if (nhg->mpath) 2289283a72a5SPetr Machata nh_mp_group_rebalance(nhg); 2290430a0491SDavid Ahern 229138428d68SRoopa Prabhu if (cfg->nh_fdb) 2292ce9ac056SDavid Ahern nhg->fdb_nh = 1; 229338428d68SRoopa Prabhu 2294430a0491SDavid Ahern rcu_assign_pointer(nh->nh_grp, nhg); 2295430a0491SDavid Ahern 2296430a0491SDavid Ahern return nh; 2297430a0491SDavid Ahern 2298430a0491SDavid Ahern out_no_nh: 22997b01e53eSIdo Schimmel for (i--; i >= 0; --i) { 23007b01e53eSIdo Schimmel list_del(&nhg->nh_entries[i].nh_list); 2301430a0491SDavid Ahern nexthop_put(nhg->nh_entries[i].nh); 23027b01e53eSIdo Schimmel } 2303430a0491SDavid Ahern 230490f33bffSNikolay Aleksandrov kfree(nhg->spare); 2305430a0491SDavid Ahern kfree(nhg); 2306430a0491SDavid Ahern kfree(nh); 2307430a0491SDavid Ahern 2308283a72a5SPetr Machata return ERR_PTR(err); 2309430a0491SDavid Ahern } 2310430a0491SDavid Ahern 2311597cfe4fSDavid Ahern static int nh_create_ipv4(struct net *net, struct nexthop *nh, 2312597cfe4fSDavid Ahern struct nh_info *nhi, struct nh_config *cfg, 2313597cfe4fSDavid Ahern struct netlink_ext_ack *extack) 2314597cfe4fSDavid Ahern { 2315597cfe4fSDavid Ahern struct fib_nh *fib_nh = &nhi->fib_nh; 2316597cfe4fSDavid Ahern struct fib_config fib_cfg = { 2317597cfe4fSDavid Ahern .fc_oif = cfg->nh_ifindex, 2318597cfe4fSDavid Ahern .fc_gw4 = cfg->gw.ipv4, 2319597cfe4fSDavid Ahern .fc_gw_family = cfg->gw.ipv4 ? AF_INET : 0, 2320597cfe4fSDavid Ahern .fc_flags = cfg->nh_flags, 2321b513bd03SDavid Ahern .fc_encap = cfg->nh_encap, 2322b513bd03SDavid Ahern .fc_encap_type = cfg->nh_encap_type, 2323597cfe4fSDavid Ahern }; 232438428d68SRoopa Prabhu u32 tb_id = (cfg->dev ? l3mdev_fib_table(cfg->dev) : RT_TABLE_MAIN); 2325c76c9925SColin Ian King int err; 2326597cfe4fSDavid Ahern 2327597cfe4fSDavid Ahern err = fib_nh_init(net, fib_nh, &fib_cfg, 1, extack); 2328597cfe4fSDavid Ahern if (err) { 2329597cfe4fSDavid Ahern fib_nh_release(net, fib_nh); 2330597cfe4fSDavid Ahern goto out; 2331597cfe4fSDavid Ahern } 2332597cfe4fSDavid Ahern 2333ce9ac056SDavid Ahern if (nhi->fdb_nh) 233438428d68SRoopa Prabhu goto out; 233538428d68SRoopa Prabhu 2336597cfe4fSDavid Ahern /* sets nh_dev if successful */ 2337597cfe4fSDavid Ahern err = fib_check_nh(net, fib_nh, tb_id, 0, extack); 2338597cfe4fSDavid Ahern if (!err) { 2339597cfe4fSDavid Ahern nh->nh_flags = fib_nh->fib_nh_flags; 2340dcb1ecb5SDavid Ahern fib_info_update_nhc_saddr(net, &fib_nh->nh_common, 2341dcb1ecb5SDavid Ahern fib_nh->fib_nh_scope); 2342597cfe4fSDavid Ahern } else { 2343597cfe4fSDavid Ahern fib_nh_release(net, fib_nh); 2344597cfe4fSDavid Ahern } 2345597cfe4fSDavid Ahern out: 2346597cfe4fSDavid Ahern return err; 2347597cfe4fSDavid Ahern } 2348597cfe4fSDavid Ahern 234953010f99SDavid Ahern static int nh_create_ipv6(struct net *net, struct nexthop *nh, 235053010f99SDavid Ahern struct nh_info *nhi, struct nh_config *cfg, 235153010f99SDavid Ahern struct netlink_ext_ack *extack) 235253010f99SDavid Ahern { 235353010f99SDavid Ahern struct fib6_nh *fib6_nh = &nhi->fib6_nh; 235453010f99SDavid Ahern struct fib6_config fib6_cfg = { 235553010f99SDavid Ahern .fc_table = l3mdev_fib_table(cfg->dev), 235653010f99SDavid Ahern .fc_ifindex = cfg->nh_ifindex, 235753010f99SDavid Ahern .fc_gateway = cfg->gw.ipv6, 235853010f99SDavid Ahern .fc_flags = cfg->nh_flags, 2359b513bd03SDavid Ahern .fc_encap = cfg->nh_encap, 2360b513bd03SDavid Ahern .fc_encap_type = cfg->nh_encap_type, 236138428d68SRoopa Prabhu .fc_is_fdb = cfg->nh_fdb, 236253010f99SDavid Ahern }; 23636f43e525SColin Ian King int err; 236453010f99SDavid Ahern 236553010f99SDavid Ahern if (!ipv6_addr_any(&cfg->gw.ipv6)) 236653010f99SDavid Ahern fib6_cfg.fc_flags |= RTF_GATEWAY; 236753010f99SDavid Ahern 236853010f99SDavid Ahern /* sets nh_dev if successful */ 236953010f99SDavid Ahern err = ipv6_stub->fib6_nh_init(net, fib6_nh, &fib6_cfg, GFP_KERNEL, 237053010f99SDavid Ahern extack); 237153010f99SDavid Ahern if (err) 237253010f99SDavid Ahern ipv6_stub->fib6_nh_release(fib6_nh); 237353010f99SDavid Ahern else 237453010f99SDavid Ahern nh->nh_flags = fib6_nh->fib_nh_flags; 237553010f99SDavid Ahern 237653010f99SDavid Ahern return err; 237753010f99SDavid Ahern } 237853010f99SDavid Ahern 2379ab84be7eSDavid Ahern static struct nexthop *nexthop_create(struct net *net, struct nh_config *cfg, 2380ab84be7eSDavid Ahern struct netlink_ext_ack *extack) 2381ab84be7eSDavid Ahern { 2382ab84be7eSDavid Ahern struct nh_info *nhi; 2383ab84be7eSDavid Ahern struct nexthop *nh; 2384ab84be7eSDavid Ahern int err = 0; 2385ab84be7eSDavid Ahern 2386ab84be7eSDavid Ahern nh = nexthop_alloc(); 2387ab84be7eSDavid Ahern if (!nh) 2388ab84be7eSDavid Ahern return ERR_PTR(-ENOMEM); 2389ab84be7eSDavid Ahern 2390ab84be7eSDavid Ahern nhi = kzalloc(sizeof(*nhi), GFP_KERNEL); 2391ab84be7eSDavid Ahern if (!nhi) { 2392ab84be7eSDavid Ahern kfree(nh); 2393ab84be7eSDavid Ahern return ERR_PTR(-ENOMEM); 2394ab84be7eSDavid Ahern } 2395ab84be7eSDavid Ahern 2396ab84be7eSDavid Ahern nh->nh_flags = cfg->nh_flags; 2397ab84be7eSDavid Ahern nh->net = net; 2398ab84be7eSDavid Ahern 2399ab84be7eSDavid Ahern nhi->nh_parent = nh; 2400ab84be7eSDavid Ahern nhi->family = cfg->nh_family; 2401ab84be7eSDavid Ahern nhi->fib_nhc.nhc_scope = RT_SCOPE_LINK; 2402ab84be7eSDavid Ahern 240338428d68SRoopa Prabhu if (cfg->nh_fdb) 2404ce9ac056SDavid Ahern nhi->fdb_nh = 1; 240538428d68SRoopa Prabhu 2406ab84be7eSDavid Ahern if (cfg->nh_blackhole) { 2407ab84be7eSDavid Ahern nhi->reject_nh = 1; 2408ab84be7eSDavid Ahern cfg->nh_ifindex = net->loopback_dev->ifindex; 2409ab84be7eSDavid Ahern } 2410ab84be7eSDavid Ahern 2411597cfe4fSDavid Ahern switch (cfg->nh_family) { 2412597cfe4fSDavid Ahern case AF_INET: 2413597cfe4fSDavid Ahern err = nh_create_ipv4(net, nh, nhi, cfg, extack); 2414597cfe4fSDavid Ahern break; 241553010f99SDavid Ahern case AF_INET6: 241653010f99SDavid Ahern err = nh_create_ipv6(net, nh, nhi, cfg, extack); 241753010f99SDavid Ahern break; 2418597cfe4fSDavid Ahern } 2419597cfe4fSDavid Ahern 2420ab84be7eSDavid Ahern if (err) { 2421ab84be7eSDavid Ahern kfree(nhi); 2422ab84be7eSDavid Ahern kfree(nh); 2423ab84be7eSDavid Ahern return ERR_PTR(err); 2424ab84be7eSDavid Ahern } 2425ab84be7eSDavid Ahern 2426597cfe4fSDavid Ahern /* add the entry to the device based hash */ 2427ce9ac056SDavid Ahern if (!nhi->fdb_nh) 2428597cfe4fSDavid Ahern nexthop_devhash_add(net, nhi); 2429597cfe4fSDavid Ahern 2430ab84be7eSDavid Ahern rcu_assign_pointer(nh->nh_info, nhi); 2431ab84be7eSDavid Ahern 2432ab84be7eSDavid Ahern return nh; 2433ab84be7eSDavid Ahern } 2434ab84be7eSDavid Ahern 2435ab84be7eSDavid Ahern /* called with rtnl lock held */ 2436ab84be7eSDavid Ahern static struct nexthop *nexthop_add(struct net *net, struct nh_config *cfg, 2437ab84be7eSDavid Ahern struct netlink_ext_ack *extack) 2438ab84be7eSDavid Ahern { 2439ab84be7eSDavid Ahern struct nexthop *nh; 2440ab84be7eSDavid Ahern int err; 2441ab84be7eSDavid Ahern 2442ab84be7eSDavid Ahern if (cfg->nlflags & NLM_F_REPLACE && !cfg->nh_id) { 2443ab84be7eSDavid Ahern NL_SET_ERR_MSG(extack, "Replace requires nexthop id"); 2444ab84be7eSDavid Ahern return ERR_PTR(-EINVAL); 2445ab84be7eSDavid Ahern } 2446ab84be7eSDavid Ahern 2447ab84be7eSDavid Ahern if (!cfg->nh_id) { 2448ab84be7eSDavid Ahern cfg->nh_id = nh_find_unused_id(net); 2449ab84be7eSDavid Ahern if (!cfg->nh_id) { 2450ab84be7eSDavid Ahern NL_SET_ERR_MSG(extack, "No unused id"); 2451ab84be7eSDavid Ahern return ERR_PTR(-EINVAL); 2452ab84be7eSDavid Ahern } 2453ab84be7eSDavid Ahern } 2454ab84be7eSDavid Ahern 2455430a0491SDavid Ahern if (cfg->nh_grp) 2456430a0491SDavid Ahern nh = nexthop_create_group(net, cfg); 2457430a0491SDavid Ahern else 2458ab84be7eSDavid Ahern nh = nexthop_create(net, cfg, extack); 2459430a0491SDavid Ahern 2460ab84be7eSDavid Ahern if (IS_ERR(nh)) 2461ab84be7eSDavid Ahern return nh; 2462ab84be7eSDavid Ahern 2463ab84be7eSDavid Ahern refcount_set(&nh->refcnt, 1); 2464ab84be7eSDavid Ahern nh->id = cfg->nh_id; 2465ab84be7eSDavid Ahern nh->protocol = cfg->nh_protocol; 2466ab84be7eSDavid Ahern nh->net = net; 2467ab84be7eSDavid Ahern 2468ab84be7eSDavid Ahern err = insert_nexthop(net, nh, cfg, extack); 2469ab84be7eSDavid Ahern if (err) { 2470430a0491SDavid Ahern __remove_nexthop(net, nh, NULL); 2471ab84be7eSDavid Ahern nexthop_put(nh); 2472ab84be7eSDavid Ahern nh = ERR_PTR(err); 2473ab84be7eSDavid Ahern } 2474ab84be7eSDavid Ahern 2475ab84be7eSDavid Ahern return nh; 2476ab84be7eSDavid Ahern } 2477ab84be7eSDavid Ahern 2478ab84be7eSDavid Ahern static int rtm_to_nh_config(struct net *net, struct sk_buff *skb, 2479ab84be7eSDavid Ahern struct nlmsghdr *nlh, struct nh_config *cfg, 2480ab84be7eSDavid Ahern struct netlink_ext_ack *extack) 2481ab84be7eSDavid Ahern { 2482ab84be7eSDavid Ahern struct nhmsg *nhm = nlmsg_data(nlh); 2483643d0878SPetr Machata struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_new)]; 2484ab84be7eSDavid Ahern int err; 2485ab84be7eSDavid Ahern 2486643d0878SPetr Machata err = nlmsg_parse(nlh, sizeof(*nhm), tb, 2487643d0878SPetr Machata ARRAY_SIZE(rtm_nh_policy_new) - 1, 2488643d0878SPetr Machata rtm_nh_policy_new, extack); 2489ab84be7eSDavid Ahern if (err < 0) 2490ab84be7eSDavid Ahern return err; 2491ab84be7eSDavid Ahern 2492ab84be7eSDavid Ahern err = -EINVAL; 2493ab84be7eSDavid Ahern if (nhm->resvd || nhm->nh_scope) { 2494ab84be7eSDavid Ahern NL_SET_ERR_MSG(extack, "Invalid values in ancillary header"); 2495ab84be7eSDavid Ahern goto out; 2496ab84be7eSDavid Ahern } 2497ab84be7eSDavid Ahern if (nhm->nh_flags & ~NEXTHOP_VALID_USER_FLAGS) { 2498ab84be7eSDavid Ahern NL_SET_ERR_MSG(extack, "Invalid nexthop flags in ancillary header"); 2499ab84be7eSDavid Ahern goto out; 2500ab84be7eSDavid Ahern } 2501ab84be7eSDavid Ahern 2502ab84be7eSDavid Ahern switch (nhm->nh_family) { 2503597cfe4fSDavid Ahern case AF_INET: 250453010f99SDavid Ahern case AF_INET6: 2505597cfe4fSDavid Ahern break; 2506430a0491SDavid Ahern case AF_UNSPEC: 2507430a0491SDavid Ahern if (tb[NHA_GROUP]) 2508430a0491SDavid Ahern break; 2509a8eceea8SJoe Perches fallthrough; 2510ab84be7eSDavid Ahern default: 2511ab84be7eSDavid Ahern NL_SET_ERR_MSG(extack, "Invalid address family"); 2512ab84be7eSDavid Ahern goto out; 2513ab84be7eSDavid Ahern } 2514ab84be7eSDavid Ahern 2515ab84be7eSDavid Ahern memset(cfg, 0, sizeof(*cfg)); 2516ab84be7eSDavid Ahern cfg->nlflags = nlh->nlmsg_flags; 2517ab84be7eSDavid Ahern cfg->nlinfo.portid = NETLINK_CB(skb).portid; 2518ab84be7eSDavid Ahern cfg->nlinfo.nlh = nlh; 2519ab84be7eSDavid Ahern cfg->nlinfo.nl_net = net; 2520ab84be7eSDavid Ahern 2521ab84be7eSDavid Ahern cfg->nh_family = nhm->nh_family; 2522ab84be7eSDavid Ahern cfg->nh_protocol = nhm->nh_protocol; 2523ab84be7eSDavid Ahern cfg->nh_flags = nhm->nh_flags; 2524ab84be7eSDavid Ahern 2525ab84be7eSDavid Ahern if (tb[NHA_ID]) 2526ab84be7eSDavid Ahern cfg->nh_id = nla_get_u32(tb[NHA_ID]); 2527ab84be7eSDavid Ahern 252838428d68SRoopa Prabhu if (tb[NHA_FDB]) { 252938428d68SRoopa Prabhu if (tb[NHA_OIF] || tb[NHA_BLACKHOLE] || 253038428d68SRoopa Prabhu tb[NHA_ENCAP] || tb[NHA_ENCAP_TYPE]) { 253138428d68SRoopa Prabhu NL_SET_ERR_MSG(extack, "Fdb attribute can not be used with encap, oif or blackhole"); 253238428d68SRoopa Prabhu goto out; 253338428d68SRoopa Prabhu } 253438428d68SRoopa Prabhu if (nhm->nh_flags) { 253538428d68SRoopa Prabhu NL_SET_ERR_MSG(extack, "Unsupported nexthop flags in ancillary header"); 253638428d68SRoopa Prabhu goto out; 253738428d68SRoopa Prabhu } 253838428d68SRoopa Prabhu cfg->nh_fdb = nla_get_flag(tb[NHA_FDB]); 253938428d68SRoopa Prabhu } 254038428d68SRoopa Prabhu 2541430a0491SDavid Ahern if (tb[NHA_GROUP]) { 2542430a0491SDavid Ahern if (nhm->nh_family != AF_UNSPEC) { 2543430a0491SDavid Ahern NL_SET_ERR_MSG(extack, "Invalid family for group"); 2544430a0491SDavid Ahern goto out; 2545430a0491SDavid Ahern } 2546430a0491SDavid Ahern cfg->nh_grp = tb[NHA_GROUP]; 2547430a0491SDavid Ahern 2548430a0491SDavid Ahern cfg->nh_grp_type = NEXTHOP_GRP_TYPE_MPATH; 2549430a0491SDavid Ahern if (tb[NHA_GROUP_TYPE]) 2550430a0491SDavid Ahern cfg->nh_grp_type = nla_get_u16(tb[NHA_GROUP_TYPE]); 2551430a0491SDavid Ahern 2552430a0491SDavid Ahern if (cfg->nh_grp_type > NEXTHOP_GRP_TYPE_MAX) { 2553430a0491SDavid Ahern NL_SET_ERR_MSG(extack, "Invalid group type"); 2554430a0491SDavid Ahern goto out; 2555430a0491SDavid Ahern } 2556643d0878SPetr Machata err = nh_check_attr_group(net, tb, ARRAY_SIZE(tb), extack); 2557430a0491SDavid Ahern 2558430a0491SDavid Ahern /* no other attributes should be set */ 2559430a0491SDavid Ahern goto out; 2560430a0491SDavid Ahern } 2561430a0491SDavid Ahern 2562ab84be7eSDavid Ahern if (tb[NHA_BLACKHOLE]) { 2563b513bd03SDavid Ahern if (tb[NHA_GATEWAY] || tb[NHA_OIF] || 256438428d68SRoopa Prabhu tb[NHA_ENCAP] || tb[NHA_ENCAP_TYPE] || tb[NHA_FDB]) { 256538428d68SRoopa Prabhu NL_SET_ERR_MSG(extack, "Blackhole attribute can not be used with gateway, oif, encap or fdb"); 2566ab84be7eSDavid Ahern goto out; 2567ab84be7eSDavid Ahern } 2568ab84be7eSDavid Ahern 2569ab84be7eSDavid Ahern cfg->nh_blackhole = 1; 2570ab84be7eSDavid Ahern err = 0; 2571ab84be7eSDavid Ahern goto out; 2572ab84be7eSDavid Ahern } 2573ab84be7eSDavid Ahern 257438428d68SRoopa Prabhu if (!cfg->nh_fdb && !tb[NHA_OIF]) { 257538428d68SRoopa Prabhu NL_SET_ERR_MSG(extack, "Device attribute required for non-blackhole and non-fdb nexthops"); 2576ab84be7eSDavid Ahern goto out; 2577ab84be7eSDavid Ahern } 2578ab84be7eSDavid Ahern 257938428d68SRoopa Prabhu if (!cfg->nh_fdb && tb[NHA_OIF]) { 2580ab84be7eSDavid Ahern cfg->nh_ifindex = nla_get_u32(tb[NHA_OIF]); 2581ab84be7eSDavid Ahern if (cfg->nh_ifindex) 2582ab84be7eSDavid Ahern cfg->dev = __dev_get_by_index(net, cfg->nh_ifindex); 2583ab84be7eSDavid Ahern 2584ab84be7eSDavid Ahern if (!cfg->dev) { 2585ab84be7eSDavid Ahern NL_SET_ERR_MSG(extack, "Invalid device index"); 2586ab84be7eSDavid Ahern goto out; 2587ab84be7eSDavid Ahern } else if (!(cfg->dev->flags & IFF_UP)) { 2588ab84be7eSDavid Ahern NL_SET_ERR_MSG(extack, "Nexthop device is not up"); 2589ab84be7eSDavid Ahern err = -ENETDOWN; 2590ab84be7eSDavid Ahern goto out; 2591ab84be7eSDavid Ahern } else if (!netif_carrier_ok(cfg->dev)) { 2592ab84be7eSDavid Ahern NL_SET_ERR_MSG(extack, "Carrier for nexthop device is down"); 2593ab84be7eSDavid Ahern err = -ENETDOWN; 2594ab84be7eSDavid Ahern goto out; 2595ab84be7eSDavid Ahern } 259638428d68SRoopa Prabhu } 2597ab84be7eSDavid Ahern 2598597cfe4fSDavid Ahern err = -EINVAL; 2599597cfe4fSDavid Ahern if (tb[NHA_GATEWAY]) { 2600597cfe4fSDavid Ahern struct nlattr *gwa = tb[NHA_GATEWAY]; 2601597cfe4fSDavid Ahern 2602597cfe4fSDavid Ahern switch (cfg->nh_family) { 2603597cfe4fSDavid Ahern case AF_INET: 2604597cfe4fSDavid Ahern if (nla_len(gwa) != sizeof(u32)) { 2605597cfe4fSDavid Ahern NL_SET_ERR_MSG(extack, "Invalid gateway"); 2606597cfe4fSDavid Ahern goto out; 2607597cfe4fSDavid Ahern } 2608597cfe4fSDavid Ahern cfg->gw.ipv4 = nla_get_be32(gwa); 2609597cfe4fSDavid Ahern break; 261053010f99SDavid Ahern case AF_INET6: 261153010f99SDavid Ahern if (nla_len(gwa) != sizeof(struct in6_addr)) { 261253010f99SDavid Ahern NL_SET_ERR_MSG(extack, "Invalid gateway"); 261353010f99SDavid Ahern goto out; 261453010f99SDavid Ahern } 261553010f99SDavid Ahern cfg->gw.ipv6 = nla_get_in6_addr(gwa); 261653010f99SDavid Ahern break; 2617597cfe4fSDavid Ahern default: 2618597cfe4fSDavid Ahern NL_SET_ERR_MSG(extack, 2619597cfe4fSDavid Ahern "Unknown address family for gateway"); 2620597cfe4fSDavid Ahern goto out; 2621597cfe4fSDavid Ahern } 2622597cfe4fSDavid Ahern } else { 2623597cfe4fSDavid Ahern /* device only nexthop (no gateway) */ 2624597cfe4fSDavid Ahern if (cfg->nh_flags & RTNH_F_ONLINK) { 2625597cfe4fSDavid Ahern NL_SET_ERR_MSG(extack, 2626597cfe4fSDavid Ahern "ONLINK flag can not be set for nexthop without a gateway"); 2627597cfe4fSDavid Ahern goto out; 2628597cfe4fSDavid Ahern } 2629597cfe4fSDavid Ahern } 2630597cfe4fSDavid Ahern 2631b513bd03SDavid Ahern if (tb[NHA_ENCAP]) { 2632b513bd03SDavid Ahern cfg->nh_encap = tb[NHA_ENCAP]; 2633b513bd03SDavid Ahern 2634b513bd03SDavid Ahern if (!tb[NHA_ENCAP_TYPE]) { 2635b513bd03SDavid Ahern NL_SET_ERR_MSG(extack, "LWT encapsulation type is missing"); 2636b513bd03SDavid Ahern goto out; 2637b513bd03SDavid Ahern } 2638b513bd03SDavid Ahern 2639b513bd03SDavid Ahern cfg->nh_encap_type = nla_get_u16(tb[NHA_ENCAP_TYPE]); 2640b513bd03SDavid Ahern err = lwtunnel_valid_encap_type(cfg->nh_encap_type, extack); 2641b513bd03SDavid Ahern if (err < 0) 2642b513bd03SDavid Ahern goto out; 2643b513bd03SDavid Ahern 2644b513bd03SDavid Ahern } else if (tb[NHA_ENCAP_TYPE]) { 2645b513bd03SDavid Ahern NL_SET_ERR_MSG(extack, "LWT encapsulation attribute is missing"); 2646b513bd03SDavid Ahern goto out; 2647b513bd03SDavid Ahern } 2648b513bd03SDavid Ahern 2649b513bd03SDavid Ahern 2650ab84be7eSDavid Ahern err = 0; 2651ab84be7eSDavid Ahern out: 2652ab84be7eSDavid Ahern return err; 2653ab84be7eSDavid Ahern } 2654ab84be7eSDavid Ahern 2655ab84be7eSDavid Ahern /* rtnl */ 2656ab84be7eSDavid Ahern static int rtm_new_nexthop(struct sk_buff *skb, struct nlmsghdr *nlh, 2657ab84be7eSDavid Ahern struct netlink_ext_ack *extack) 2658ab84be7eSDavid Ahern { 2659ab84be7eSDavid Ahern struct net *net = sock_net(skb->sk); 2660ab84be7eSDavid Ahern struct nh_config cfg; 2661ab84be7eSDavid Ahern struct nexthop *nh; 2662ab84be7eSDavid Ahern int err; 2663ab84be7eSDavid Ahern 2664ab84be7eSDavid Ahern err = rtm_to_nh_config(net, skb, nlh, &cfg, extack); 2665ab84be7eSDavid Ahern if (!err) { 2666ab84be7eSDavid Ahern nh = nexthop_add(net, &cfg, extack); 2667ab84be7eSDavid Ahern if (IS_ERR(nh)) 2668ab84be7eSDavid Ahern err = PTR_ERR(nh); 2669ab84be7eSDavid Ahern } 2670ab84be7eSDavid Ahern 2671ab84be7eSDavid Ahern return err; 2672ab84be7eSDavid Ahern } 2673ab84be7eSDavid Ahern 26740bccf8edSPetr Machata static int __nh_valid_get_del_req(const struct nlmsghdr *nlh, 26750bccf8edSPetr Machata struct nlattr **tb, u32 *id, 2676ab84be7eSDavid Ahern struct netlink_ext_ack *extack) 2677ab84be7eSDavid Ahern { 2678ab84be7eSDavid Ahern struct nhmsg *nhm = nlmsg_data(nlh); 26790bccf8edSPetr Machata 26800bccf8edSPetr Machata if (nhm->nh_protocol || nhm->resvd || nhm->nh_scope || nhm->nh_flags) { 26810bccf8edSPetr Machata NL_SET_ERR_MSG(extack, "Invalid values in header"); 26820bccf8edSPetr Machata return -EINVAL; 26830bccf8edSPetr Machata } 26840bccf8edSPetr Machata 26850bccf8edSPetr Machata if (!tb[NHA_ID]) { 26860bccf8edSPetr Machata NL_SET_ERR_MSG(extack, "Nexthop id is missing"); 26870bccf8edSPetr Machata return -EINVAL; 26880bccf8edSPetr Machata } 26890bccf8edSPetr Machata 26900bccf8edSPetr Machata *id = nla_get_u32(tb[NHA_ID]); 26910bccf8edSPetr Machata if (!(*id)) { 26920bccf8edSPetr Machata NL_SET_ERR_MSG(extack, "Invalid nexthop id"); 26930bccf8edSPetr Machata return -EINVAL; 26940bccf8edSPetr Machata } 26950bccf8edSPetr Machata 26960bccf8edSPetr Machata return 0; 26970bccf8edSPetr Machata } 26980bccf8edSPetr Machata 26990bccf8edSPetr Machata static int nh_valid_get_del_req(const struct nlmsghdr *nlh, u32 *id, 27000bccf8edSPetr Machata struct netlink_ext_ack *extack) 27010bccf8edSPetr Machata { 270260f5ad5eSPetr Machata struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_get)]; 270360f5ad5eSPetr Machata int err; 2704ab84be7eSDavid Ahern 27050bccf8edSPetr Machata err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb, 270660f5ad5eSPetr Machata ARRAY_SIZE(rtm_nh_policy_get) - 1, 270760f5ad5eSPetr Machata rtm_nh_policy_get, extack); 2708ab84be7eSDavid Ahern if (err < 0) 2709ab84be7eSDavid Ahern return err; 2710ab84be7eSDavid Ahern 27110bccf8edSPetr Machata return __nh_valid_get_del_req(nlh, tb, id, extack); 2712ab84be7eSDavid Ahern } 2713ab84be7eSDavid Ahern 2714ab84be7eSDavid Ahern /* rtnl */ 2715ab84be7eSDavid Ahern static int rtm_del_nexthop(struct sk_buff *skb, struct nlmsghdr *nlh, 2716ab84be7eSDavid Ahern struct netlink_ext_ack *extack) 2717ab84be7eSDavid Ahern { 2718ab84be7eSDavid Ahern struct net *net = sock_net(skb->sk); 2719ab84be7eSDavid Ahern struct nl_info nlinfo = { 2720ab84be7eSDavid Ahern .nlh = nlh, 2721ab84be7eSDavid Ahern .nl_net = net, 2722ab84be7eSDavid Ahern .portid = NETLINK_CB(skb).portid, 2723ab84be7eSDavid Ahern }; 2724ab84be7eSDavid Ahern struct nexthop *nh; 2725ab84be7eSDavid Ahern int err; 2726ab84be7eSDavid Ahern u32 id; 2727ab84be7eSDavid Ahern 2728ab84be7eSDavid Ahern err = nh_valid_get_del_req(nlh, &id, extack); 2729ab84be7eSDavid Ahern if (err) 2730ab84be7eSDavid Ahern return err; 2731ab84be7eSDavid Ahern 2732ab84be7eSDavid Ahern nh = nexthop_find_by_id(net, id); 2733ab84be7eSDavid Ahern if (!nh) 2734ab84be7eSDavid Ahern return -ENOENT; 2735ab84be7eSDavid Ahern 2736430a0491SDavid Ahern remove_nexthop(net, nh, &nlinfo); 2737ab84be7eSDavid Ahern 2738ab84be7eSDavid Ahern return 0; 2739ab84be7eSDavid Ahern } 2740ab84be7eSDavid Ahern 2741ab84be7eSDavid Ahern /* rtnl */ 2742ab84be7eSDavid Ahern static int rtm_get_nexthop(struct sk_buff *in_skb, struct nlmsghdr *nlh, 2743ab84be7eSDavid Ahern struct netlink_ext_ack *extack) 2744ab84be7eSDavid Ahern { 2745ab84be7eSDavid Ahern struct net *net = sock_net(in_skb->sk); 2746ab84be7eSDavid Ahern struct sk_buff *skb = NULL; 2747ab84be7eSDavid Ahern struct nexthop *nh; 2748ab84be7eSDavid Ahern int err; 2749ab84be7eSDavid Ahern u32 id; 2750ab84be7eSDavid Ahern 2751ab84be7eSDavid Ahern err = nh_valid_get_del_req(nlh, &id, extack); 2752ab84be7eSDavid Ahern if (err) 2753ab84be7eSDavid Ahern return err; 2754ab84be7eSDavid Ahern 2755ab84be7eSDavid Ahern err = -ENOBUFS; 2756ab84be7eSDavid Ahern skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 2757ab84be7eSDavid Ahern if (!skb) 2758ab84be7eSDavid Ahern goto out; 2759ab84be7eSDavid Ahern 2760ab84be7eSDavid Ahern err = -ENOENT; 2761ab84be7eSDavid Ahern nh = nexthop_find_by_id(net, id); 2762ab84be7eSDavid Ahern if (!nh) 2763ab84be7eSDavid Ahern goto errout_free; 2764ab84be7eSDavid Ahern 2765ab84be7eSDavid Ahern err = nh_fill_node(skb, nh, RTM_NEWNEXTHOP, NETLINK_CB(in_skb).portid, 2766ab84be7eSDavid Ahern nlh->nlmsg_seq, 0); 2767ab84be7eSDavid Ahern if (err < 0) { 2768ab84be7eSDavid Ahern WARN_ON(err == -EMSGSIZE); 2769ab84be7eSDavid Ahern goto errout_free; 2770ab84be7eSDavid Ahern } 2771ab84be7eSDavid Ahern 2772ab84be7eSDavid Ahern err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); 2773ab84be7eSDavid Ahern out: 2774ab84be7eSDavid Ahern return err; 2775ab84be7eSDavid Ahern errout_free: 2776ab84be7eSDavid Ahern kfree_skb(skb); 2777ab84be7eSDavid Ahern goto out; 2778ab84be7eSDavid Ahern } 2779ab84be7eSDavid Ahern 278056450ec6SPetr Machata struct nh_dump_filter { 278156450ec6SPetr Machata int dev_idx; 278256450ec6SPetr Machata int master_idx; 278356450ec6SPetr Machata bool group_filter; 278456450ec6SPetr Machata bool fdb_filter; 278556450ec6SPetr Machata }; 278656450ec6SPetr Machata 278756450ec6SPetr Machata static bool nh_dump_filtered(struct nexthop *nh, 278856450ec6SPetr Machata struct nh_dump_filter *filter, u8 family) 2789ab84be7eSDavid Ahern { 2790ab84be7eSDavid Ahern const struct net_device *dev; 2791ab84be7eSDavid Ahern const struct nh_info *nhi; 2792ab84be7eSDavid Ahern 279356450ec6SPetr Machata if (filter->group_filter && !nh->is_group) 2794430a0491SDavid Ahern return true; 2795430a0491SDavid Ahern 279656450ec6SPetr Machata if (!filter->dev_idx && !filter->master_idx && !family) 2797ab84be7eSDavid Ahern return false; 2798ab84be7eSDavid Ahern 2799430a0491SDavid Ahern if (nh->is_group) 2800430a0491SDavid Ahern return true; 2801430a0491SDavid Ahern 2802ab84be7eSDavid Ahern nhi = rtnl_dereference(nh->nh_info); 2803ab84be7eSDavid Ahern if (family && nhi->family != family) 2804ab84be7eSDavid Ahern return true; 2805ab84be7eSDavid Ahern 2806ab84be7eSDavid Ahern dev = nhi->fib_nhc.nhc_dev; 280756450ec6SPetr Machata if (filter->dev_idx && (!dev || dev->ifindex != filter->dev_idx)) 2808ab84be7eSDavid Ahern return true; 2809ab84be7eSDavid Ahern 281056450ec6SPetr Machata if (filter->master_idx) { 2811ab84be7eSDavid Ahern struct net_device *master; 2812ab84be7eSDavid Ahern 2813ab84be7eSDavid Ahern if (!dev) 2814ab84be7eSDavid Ahern return true; 2815ab84be7eSDavid Ahern 2816ab84be7eSDavid Ahern master = netdev_master_upper_dev_get((struct net_device *)dev); 281756450ec6SPetr Machata if (!master || master->ifindex != filter->master_idx) 2818ab84be7eSDavid Ahern return true; 2819ab84be7eSDavid Ahern } 2820ab84be7eSDavid Ahern 2821ab84be7eSDavid Ahern return false; 2822ab84be7eSDavid Ahern } 2823ab84be7eSDavid Ahern 2824b9ebea12SPetr Machata static int __nh_valid_dump_req(const struct nlmsghdr *nlh, struct nlattr **tb, 282556450ec6SPetr Machata struct nh_dump_filter *filter, 2826b9ebea12SPetr Machata struct netlink_ext_ack *extack) 2827ab84be7eSDavid Ahern { 2828ab84be7eSDavid Ahern struct nhmsg *nhm; 2829ab84be7eSDavid Ahern u32 idx; 2830ab84be7eSDavid Ahern 283144551bffSPetr Machata if (tb[NHA_OIF]) { 283244551bffSPetr Machata idx = nla_get_u32(tb[NHA_OIF]); 2833ab84be7eSDavid Ahern if (idx > INT_MAX) { 2834ab84be7eSDavid Ahern NL_SET_ERR_MSG(extack, "Invalid device index"); 2835ab84be7eSDavid Ahern return -EINVAL; 2836ab84be7eSDavid Ahern } 283756450ec6SPetr Machata filter->dev_idx = idx; 283844551bffSPetr Machata } 283944551bffSPetr Machata if (tb[NHA_MASTER]) { 284044551bffSPetr Machata idx = nla_get_u32(tb[NHA_MASTER]); 2841ab84be7eSDavid Ahern if (idx > INT_MAX) { 2842ab84be7eSDavid Ahern NL_SET_ERR_MSG(extack, "Invalid master device index"); 2843ab84be7eSDavid Ahern return -EINVAL; 2844ab84be7eSDavid Ahern } 284556450ec6SPetr Machata filter->master_idx = idx; 2846ab84be7eSDavid Ahern } 284756450ec6SPetr Machata filter->group_filter = nla_get_flag(tb[NHA_GROUPS]); 284856450ec6SPetr Machata filter->fdb_filter = nla_get_flag(tb[NHA_FDB]); 2849ab84be7eSDavid Ahern 2850ab84be7eSDavid Ahern nhm = nlmsg_data(nlh); 2851ab84be7eSDavid Ahern if (nhm->nh_protocol || nhm->resvd || nhm->nh_scope || nhm->nh_flags) { 2852ab84be7eSDavid Ahern NL_SET_ERR_MSG(extack, "Invalid values in header for nexthop dump request"); 2853ab84be7eSDavid Ahern return -EINVAL; 2854ab84be7eSDavid Ahern } 2855ab84be7eSDavid Ahern 2856ab84be7eSDavid Ahern return 0; 2857ab84be7eSDavid Ahern } 2858ab84be7eSDavid Ahern 2859b9ebea12SPetr Machata static int nh_valid_dump_req(const struct nlmsghdr *nlh, 2860b9ebea12SPetr Machata struct nh_dump_filter *filter, 2861b9ebea12SPetr Machata struct netlink_callback *cb) 2862b9ebea12SPetr Machata { 2863b9ebea12SPetr Machata struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_dump)]; 2864b9ebea12SPetr Machata int err; 2865b9ebea12SPetr Machata 2866b9ebea12SPetr Machata err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb, 2867b9ebea12SPetr Machata ARRAY_SIZE(rtm_nh_policy_dump) - 1, 2868b9ebea12SPetr Machata rtm_nh_policy_dump, cb->extack); 2869b9ebea12SPetr Machata if (err < 0) 2870b9ebea12SPetr Machata return err; 2871b9ebea12SPetr Machata 2872b9ebea12SPetr Machata return __nh_valid_dump_req(nlh, tb, filter, cb->extack); 2873b9ebea12SPetr Machata } 2874b9ebea12SPetr Machata 2875a6fbbaa6SPetr Machata struct rtm_dump_nh_ctx { 2876a6fbbaa6SPetr Machata u32 idx; 2877a6fbbaa6SPetr Machata }; 2878a6fbbaa6SPetr Machata 2879a6fbbaa6SPetr Machata static struct rtm_dump_nh_ctx * 2880a6fbbaa6SPetr Machata rtm_dump_nh_ctx(struct netlink_callback *cb) 2881a6fbbaa6SPetr Machata { 2882a6fbbaa6SPetr Machata struct rtm_dump_nh_ctx *ctx = (void *)cb->ctx; 2883a6fbbaa6SPetr Machata 2884a6fbbaa6SPetr Machata BUILD_BUG_ON(sizeof(*ctx) > sizeof(cb->ctx)); 2885a6fbbaa6SPetr Machata return ctx; 2886a6fbbaa6SPetr Machata } 2887a6fbbaa6SPetr Machata 2888cbee1807SPetr Machata static int rtm_dump_walk_nexthops(struct sk_buff *skb, 2889cbee1807SPetr Machata struct netlink_callback *cb, 2890cbee1807SPetr Machata struct rb_root *root, 2891cbee1807SPetr Machata struct rtm_dump_nh_ctx *ctx, 2892e948217dSPetr Machata int (*nh_cb)(struct sk_buff *skb, 2893e948217dSPetr Machata struct netlink_callback *cb, 2894e948217dSPetr Machata struct nexthop *nh, void *data), 2895e948217dSPetr Machata void *data) 2896ab84be7eSDavid Ahern { 2897ab84be7eSDavid Ahern struct rb_node *node; 2898ab84be7eSDavid Ahern int idx = 0, s_idx; 2899ab84be7eSDavid Ahern int err; 2900ab84be7eSDavid Ahern 2901a6fbbaa6SPetr Machata s_idx = ctx->idx; 2902ab84be7eSDavid Ahern for (node = rb_first(root); node; node = rb_next(node)) { 2903ab84be7eSDavid Ahern struct nexthop *nh; 2904ab84be7eSDavid Ahern 2905ab84be7eSDavid Ahern if (idx < s_idx) 2906ab84be7eSDavid Ahern goto cont; 2907ab84be7eSDavid Ahern 2908ab84be7eSDavid Ahern nh = rb_entry(node, struct nexthop, rb_node); 2909cbee1807SPetr Machata ctx->idx = idx; 2910e948217dSPetr Machata err = nh_cb(skb, cb, nh, data); 2911e948217dSPetr Machata if (err) 2912cbee1807SPetr Machata return err; 2913cbee1807SPetr Machata cont: 2914cbee1807SPetr Machata idx++; 2915cbee1807SPetr Machata } 2916cbee1807SPetr Machata 2917cbee1807SPetr Machata ctx->idx = idx; 2918cbee1807SPetr Machata return 0; 2919cbee1807SPetr Machata } 2920cbee1807SPetr Machata 2921e948217dSPetr Machata static int rtm_dump_nexthop_cb(struct sk_buff *skb, struct netlink_callback *cb, 2922e948217dSPetr Machata struct nexthop *nh, void *data) 2923e948217dSPetr Machata { 2924e948217dSPetr Machata struct nhmsg *nhm = nlmsg_data(cb->nlh); 2925e948217dSPetr Machata struct nh_dump_filter *filter = data; 2926e948217dSPetr Machata 2927e948217dSPetr Machata if (nh_dump_filtered(nh, filter, nhm->nh_family)) 2928e948217dSPetr Machata return 0; 2929e948217dSPetr Machata 2930e948217dSPetr Machata return nh_fill_node(skb, nh, RTM_NEWNEXTHOP, 2931e948217dSPetr Machata NETLINK_CB(cb->skb).portid, 2932e948217dSPetr Machata cb->nlh->nlmsg_seq, NLM_F_MULTI); 2933e948217dSPetr Machata } 2934e948217dSPetr Machata 2935cbee1807SPetr Machata /* rtnl */ 2936cbee1807SPetr Machata static int rtm_dump_nexthop(struct sk_buff *skb, struct netlink_callback *cb) 2937cbee1807SPetr Machata { 2938cbee1807SPetr Machata struct rtm_dump_nh_ctx *ctx = rtm_dump_nh_ctx(cb); 2939cbee1807SPetr Machata struct net *net = sock_net(skb->sk); 2940cbee1807SPetr Machata struct rb_root *root = &net->nexthop.rb_root; 2941cbee1807SPetr Machata struct nh_dump_filter filter = {}; 2942cbee1807SPetr Machata int err; 2943cbee1807SPetr Machata 2944cbee1807SPetr Machata err = nh_valid_dump_req(cb->nlh, &filter, cb); 2945cbee1807SPetr Machata if (err < 0) 2946cbee1807SPetr Machata return err; 2947cbee1807SPetr Machata 2948e948217dSPetr Machata err = rtm_dump_walk_nexthops(skb, cb, root, ctx, 2949e948217dSPetr Machata &rtm_dump_nexthop_cb, &filter); 2950ab84be7eSDavid Ahern if (err < 0) { 2951ab84be7eSDavid Ahern if (likely(skb->len)) 2952ab84be7eSDavid Ahern goto out; 2953ab84be7eSDavid Ahern goto out_err; 2954ab84be7eSDavid Ahern } 2955ab84be7eSDavid Ahern 2956ab84be7eSDavid Ahern out: 2957ab84be7eSDavid Ahern err = skb->len; 2958ab84be7eSDavid Ahern out_err: 2959ab84be7eSDavid Ahern cb->seq = net->nexthop.seq; 2960ab84be7eSDavid Ahern nl_dump_check_consistent(cb, nlmsg_hdr(skb)); 2961ab84be7eSDavid Ahern return err; 2962ab84be7eSDavid Ahern } 2963ab84be7eSDavid Ahern 2964597cfe4fSDavid Ahern static void nexthop_sync_mtu(struct net_device *dev, u32 orig_mtu) 2965597cfe4fSDavid Ahern { 2966597cfe4fSDavid Ahern unsigned int hash = nh_dev_hashfn(dev->ifindex); 2967597cfe4fSDavid Ahern struct net *net = dev_net(dev); 2968597cfe4fSDavid Ahern struct hlist_head *head = &net->nexthop.devhash[hash]; 2969597cfe4fSDavid Ahern struct hlist_node *n; 2970597cfe4fSDavid Ahern struct nh_info *nhi; 2971597cfe4fSDavid Ahern 2972597cfe4fSDavid Ahern hlist_for_each_entry_safe(nhi, n, head, dev_hash) { 2973597cfe4fSDavid Ahern if (nhi->fib_nhc.nhc_dev == dev) { 2974597cfe4fSDavid Ahern if (nhi->family == AF_INET) 2975597cfe4fSDavid Ahern fib_nhc_update_mtu(&nhi->fib_nhc, dev->mtu, 2976597cfe4fSDavid Ahern orig_mtu); 2977597cfe4fSDavid Ahern } 2978597cfe4fSDavid Ahern } 2979597cfe4fSDavid Ahern } 2980597cfe4fSDavid Ahern 2981597cfe4fSDavid Ahern /* rtnl */ 2982597cfe4fSDavid Ahern static int nh_netdev_event(struct notifier_block *this, 2983597cfe4fSDavid Ahern unsigned long event, void *ptr) 2984597cfe4fSDavid Ahern { 2985597cfe4fSDavid Ahern struct net_device *dev = netdev_notifier_info_to_dev(ptr); 2986597cfe4fSDavid Ahern struct netdev_notifier_info_ext *info_ext; 2987597cfe4fSDavid Ahern 2988597cfe4fSDavid Ahern switch (event) { 2989597cfe4fSDavid Ahern case NETDEV_DOWN: 2990597cfe4fSDavid Ahern case NETDEV_UNREGISTER: 299176c03bf8SIdo Schimmel nexthop_flush_dev(dev, event); 2992597cfe4fSDavid Ahern break; 2993597cfe4fSDavid Ahern case NETDEV_CHANGE: 2994597cfe4fSDavid Ahern if (!(dev_get_flags(dev) & (IFF_RUNNING | IFF_LOWER_UP))) 299576c03bf8SIdo Schimmel nexthop_flush_dev(dev, event); 2996597cfe4fSDavid Ahern break; 2997597cfe4fSDavid Ahern case NETDEV_CHANGEMTU: 2998597cfe4fSDavid Ahern info_ext = ptr; 2999597cfe4fSDavid Ahern nexthop_sync_mtu(dev, info_ext->ext.mtu); 3000597cfe4fSDavid Ahern rt_cache_flush(dev_net(dev)); 3001597cfe4fSDavid Ahern break; 3002597cfe4fSDavid Ahern } 3003597cfe4fSDavid Ahern return NOTIFY_DONE; 3004597cfe4fSDavid Ahern } 3005597cfe4fSDavid Ahern 3006597cfe4fSDavid Ahern static struct notifier_block nh_netdev_notifier = { 3007597cfe4fSDavid Ahern .notifier_call = nh_netdev_event, 3008597cfe4fSDavid Ahern }; 3009597cfe4fSDavid Ahern 3010975ff7f3SIdo Schimmel static int nexthops_dump(struct net *net, struct notifier_block *nb, 3011975ff7f3SIdo Schimmel struct netlink_ext_ack *extack) 3012975ff7f3SIdo Schimmel { 3013975ff7f3SIdo Schimmel struct rb_root *root = &net->nexthop.rb_root; 3014975ff7f3SIdo Schimmel struct rb_node *node; 3015975ff7f3SIdo Schimmel int err = 0; 3016975ff7f3SIdo Schimmel 3017975ff7f3SIdo Schimmel for (node = rb_first(root); node; node = rb_next(node)) { 3018975ff7f3SIdo Schimmel struct nexthop *nh; 3019975ff7f3SIdo Schimmel 3020975ff7f3SIdo Schimmel nh = rb_entry(node, struct nexthop, rb_node); 3021975ff7f3SIdo Schimmel err = call_nexthop_notifier(nb, net, NEXTHOP_EVENT_REPLACE, nh, 3022975ff7f3SIdo Schimmel extack); 3023975ff7f3SIdo Schimmel if (err) 3024975ff7f3SIdo Schimmel break; 3025975ff7f3SIdo Schimmel } 3026975ff7f3SIdo Schimmel 3027975ff7f3SIdo Schimmel return err; 3028975ff7f3SIdo Schimmel } 3029975ff7f3SIdo Schimmel 3030ce7e9c8aSIdo Schimmel int register_nexthop_notifier(struct net *net, struct notifier_block *nb, 3031ce7e9c8aSIdo Schimmel struct netlink_ext_ack *extack) 30328590ceedSRoopa Prabhu { 3033975ff7f3SIdo Schimmel int err; 3034975ff7f3SIdo Schimmel 3035975ff7f3SIdo Schimmel rtnl_lock(); 3036975ff7f3SIdo Schimmel err = nexthops_dump(net, nb, extack); 3037975ff7f3SIdo Schimmel if (err) 3038975ff7f3SIdo Schimmel goto unlock; 3039975ff7f3SIdo Schimmel err = blocking_notifier_chain_register(&net->nexthop.notifier_chain, 304080690ec6SIdo Schimmel nb); 3041975ff7f3SIdo Schimmel unlock: 3042975ff7f3SIdo Schimmel rtnl_unlock(); 3043975ff7f3SIdo Schimmel return err; 30448590ceedSRoopa Prabhu } 30458590ceedSRoopa Prabhu EXPORT_SYMBOL(register_nexthop_notifier); 30468590ceedSRoopa Prabhu 30478590ceedSRoopa Prabhu int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb) 30488590ceedSRoopa Prabhu { 304980690ec6SIdo Schimmel return blocking_notifier_chain_unregister(&net->nexthop.notifier_chain, 30508590ceedSRoopa Prabhu nb); 30518590ceedSRoopa Prabhu } 30528590ceedSRoopa Prabhu EXPORT_SYMBOL(unregister_nexthop_notifier); 30538590ceedSRoopa Prabhu 3054e95f2592SIdo Schimmel void nexthop_set_hw_flags(struct net *net, u32 id, bool offload, bool trap) 3055e95f2592SIdo Schimmel { 3056e95f2592SIdo Schimmel struct nexthop *nexthop; 3057e95f2592SIdo Schimmel 3058e95f2592SIdo Schimmel rcu_read_lock(); 3059e95f2592SIdo Schimmel 3060e95f2592SIdo Schimmel nexthop = nexthop_find_by_id(net, id); 3061e95f2592SIdo Schimmel if (!nexthop) 3062e95f2592SIdo Schimmel goto out; 3063e95f2592SIdo Schimmel 3064e95f2592SIdo Schimmel nexthop->nh_flags &= ~(RTNH_F_OFFLOAD | RTNH_F_TRAP); 3065e95f2592SIdo Schimmel if (offload) 3066e95f2592SIdo Schimmel nexthop->nh_flags |= RTNH_F_OFFLOAD; 3067e95f2592SIdo Schimmel if (trap) 3068e95f2592SIdo Schimmel nexthop->nh_flags |= RTNH_F_TRAP; 3069e95f2592SIdo Schimmel 3070e95f2592SIdo Schimmel out: 3071e95f2592SIdo Schimmel rcu_read_unlock(); 3072e95f2592SIdo Schimmel } 3073e95f2592SIdo Schimmel EXPORT_SYMBOL(nexthop_set_hw_flags); 3074e95f2592SIdo Schimmel 307556ad5ba3SIdo Schimmel void nexthop_bucket_set_hw_flags(struct net *net, u32 id, u16 bucket_index, 307656ad5ba3SIdo Schimmel bool offload, bool trap) 307756ad5ba3SIdo Schimmel { 307856ad5ba3SIdo Schimmel struct nh_res_table *res_table; 307956ad5ba3SIdo Schimmel struct nh_res_bucket *bucket; 308056ad5ba3SIdo Schimmel struct nexthop *nexthop; 308156ad5ba3SIdo Schimmel struct nh_group *nhg; 308256ad5ba3SIdo Schimmel 308356ad5ba3SIdo Schimmel rcu_read_lock(); 308456ad5ba3SIdo Schimmel 308556ad5ba3SIdo Schimmel nexthop = nexthop_find_by_id(net, id); 308656ad5ba3SIdo Schimmel if (!nexthop || !nexthop->is_group) 308756ad5ba3SIdo Schimmel goto out; 308856ad5ba3SIdo Schimmel 308956ad5ba3SIdo Schimmel nhg = rcu_dereference(nexthop->nh_grp); 309056ad5ba3SIdo Schimmel if (!nhg->resilient) 309156ad5ba3SIdo Schimmel goto out; 309256ad5ba3SIdo Schimmel 309356ad5ba3SIdo Schimmel if (bucket_index >= nhg->res_table->num_nh_buckets) 309456ad5ba3SIdo Schimmel goto out; 309556ad5ba3SIdo Schimmel 309656ad5ba3SIdo Schimmel res_table = rcu_dereference(nhg->res_table); 309756ad5ba3SIdo Schimmel bucket = &res_table->nh_buckets[bucket_index]; 309856ad5ba3SIdo Schimmel bucket->nh_flags &= ~(RTNH_F_OFFLOAD | RTNH_F_TRAP); 309956ad5ba3SIdo Schimmel if (offload) 310056ad5ba3SIdo Schimmel bucket->nh_flags |= RTNH_F_OFFLOAD; 310156ad5ba3SIdo Schimmel if (trap) 310256ad5ba3SIdo Schimmel bucket->nh_flags |= RTNH_F_TRAP; 310356ad5ba3SIdo Schimmel 310456ad5ba3SIdo Schimmel out: 310556ad5ba3SIdo Schimmel rcu_read_unlock(); 310656ad5ba3SIdo Schimmel } 310756ad5ba3SIdo Schimmel EXPORT_SYMBOL(nexthop_bucket_set_hw_flags); 310856ad5ba3SIdo Schimmel 3109*cfc15c1dSIdo Schimmel void nexthop_res_grp_activity_update(struct net *net, u32 id, u16 num_buckets, 3110*cfc15c1dSIdo Schimmel unsigned long *activity) 3111*cfc15c1dSIdo Schimmel { 3112*cfc15c1dSIdo Schimmel struct nh_res_table *res_table; 3113*cfc15c1dSIdo Schimmel struct nexthop *nexthop; 3114*cfc15c1dSIdo Schimmel struct nh_group *nhg; 3115*cfc15c1dSIdo Schimmel u16 i; 3116*cfc15c1dSIdo Schimmel 3117*cfc15c1dSIdo Schimmel rcu_read_lock(); 3118*cfc15c1dSIdo Schimmel 3119*cfc15c1dSIdo Schimmel nexthop = nexthop_find_by_id(net, id); 3120*cfc15c1dSIdo Schimmel if (!nexthop || !nexthop->is_group) 3121*cfc15c1dSIdo Schimmel goto out; 3122*cfc15c1dSIdo Schimmel 3123*cfc15c1dSIdo Schimmel nhg = rcu_dereference(nexthop->nh_grp); 3124*cfc15c1dSIdo Schimmel if (!nhg->resilient) 3125*cfc15c1dSIdo Schimmel goto out; 3126*cfc15c1dSIdo Schimmel 3127*cfc15c1dSIdo Schimmel /* Instead of silently ignoring some buckets, demand that the sizes 3128*cfc15c1dSIdo Schimmel * be the same. 3129*cfc15c1dSIdo Schimmel */ 3130*cfc15c1dSIdo Schimmel res_table = rcu_dereference(nhg->res_table); 3131*cfc15c1dSIdo Schimmel if (num_buckets != res_table->num_nh_buckets) 3132*cfc15c1dSIdo Schimmel goto out; 3133*cfc15c1dSIdo Schimmel 3134*cfc15c1dSIdo Schimmel for (i = 0; i < num_buckets; i++) { 3135*cfc15c1dSIdo Schimmel if (test_bit(i, activity)) 3136*cfc15c1dSIdo Schimmel nh_res_bucket_set_busy(&res_table->nh_buckets[i]); 3137*cfc15c1dSIdo Schimmel } 3138*cfc15c1dSIdo Schimmel 3139*cfc15c1dSIdo Schimmel out: 3140*cfc15c1dSIdo Schimmel rcu_read_unlock(); 3141*cfc15c1dSIdo Schimmel } 3142*cfc15c1dSIdo Schimmel EXPORT_SYMBOL(nexthop_res_grp_activity_update); 3143*cfc15c1dSIdo Schimmel 3144ab84be7eSDavid Ahern static void __net_exit nexthop_net_exit(struct net *net) 3145ab84be7eSDavid Ahern { 3146ab84be7eSDavid Ahern rtnl_lock(); 3147ab84be7eSDavid Ahern flush_all_nexthops(net); 3148ab84be7eSDavid Ahern rtnl_unlock(); 3149597cfe4fSDavid Ahern kfree(net->nexthop.devhash); 3150ab84be7eSDavid Ahern } 3151ab84be7eSDavid Ahern 3152ab84be7eSDavid Ahern static int __net_init nexthop_net_init(struct net *net) 3153ab84be7eSDavid Ahern { 3154597cfe4fSDavid Ahern size_t sz = sizeof(struct hlist_head) * NH_DEV_HASHSIZE; 3155597cfe4fSDavid Ahern 3156ab84be7eSDavid Ahern net->nexthop.rb_root = RB_ROOT; 3157597cfe4fSDavid Ahern net->nexthop.devhash = kzalloc(sz, GFP_KERNEL); 3158597cfe4fSDavid Ahern if (!net->nexthop.devhash) 3159597cfe4fSDavid Ahern return -ENOMEM; 316080690ec6SIdo Schimmel BLOCKING_INIT_NOTIFIER_HEAD(&net->nexthop.notifier_chain); 3161ab84be7eSDavid Ahern 3162ab84be7eSDavid Ahern return 0; 3163ab84be7eSDavid Ahern } 3164ab84be7eSDavid Ahern 3165ab84be7eSDavid Ahern static struct pernet_operations nexthop_net_ops = { 3166ab84be7eSDavid Ahern .init = nexthop_net_init, 3167ab84be7eSDavid Ahern .exit = nexthop_net_exit, 3168ab84be7eSDavid Ahern }; 3169ab84be7eSDavid Ahern 3170ab84be7eSDavid Ahern static int __init nexthop_init(void) 3171ab84be7eSDavid Ahern { 3172ab84be7eSDavid Ahern register_pernet_subsys(&nexthop_net_ops); 3173ab84be7eSDavid Ahern 3174597cfe4fSDavid Ahern register_netdevice_notifier(&nh_netdev_notifier); 3175597cfe4fSDavid Ahern 3176ab84be7eSDavid Ahern rtnl_register(PF_UNSPEC, RTM_NEWNEXTHOP, rtm_new_nexthop, NULL, 0); 3177ab84be7eSDavid Ahern rtnl_register(PF_UNSPEC, RTM_DELNEXTHOP, rtm_del_nexthop, NULL, 0); 3178ab84be7eSDavid Ahern rtnl_register(PF_UNSPEC, RTM_GETNEXTHOP, rtm_get_nexthop, 3179ab84be7eSDavid Ahern rtm_dump_nexthop, 0); 3180ab84be7eSDavid Ahern 3181ab84be7eSDavid Ahern rtnl_register(PF_INET, RTM_NEWNEXTHOP, rtm_new_nexthop, NULL, 0); 3182ab84be7eSDavid Ahern rtnl_register(PF_INET, RTM_GETNEXTHOP, NULL, rtm_dump_nexthop, 0); 3183ab84be7eSDavid Ahern 3184ab84be7eSDavid Ahern rtnl_register(PF_INET6, RTM_NEWNEXTHOP, rtm_new_nexthop, NULL, 0); 3185ab84be7eSDavid Ahern rtnl_register(PF_INET6, RTM_GETNEXTHOP, NULL, rtm_dump_nexthop, 0); 3186ab84be7eSDavid Ahern 3187ab84be7eSDavid Ahern return 0; 3188ab84be7eSDavid Ahern } 3189ab84be7eSDavid Ahern subsys_initcall(nexthop_init); 3190