xref: /linux/net/ipv4/nexthop.c (revision 7c37c7e00411b3d1e0c5292368317aca69d1f324)
1ab84be7eSDavid Ahern // SPDX-License-Identifier: GPL-2.0
2ab84be7eSDavid Ahern /* Generic nexthop implementation
3ab84be7eSDavid Ahern  *
4ab84be7eSDavid Ahern  * Copyright (c) 2017-19 Cumulus Networks
5ab84be7eSDavid Ahern  * Copyright (c) 2017-19 David Ahern <dsa@cumulusnetworks.com>
6ab84be7eSDavid Ahern  */
7ab84be7eSDavid Ahern 
8ab84be7eSDavid Ahern #include <linux/nexthop.h>
9ab84be7eSDavid Ahern #include <linux/rtnetlink.h>
10ab84be7eSDavid Ahern #include <linux/slab.h>
11430a0491SDavid Ahern #include <net/arp.h>
1253010f99SDavid Ahern #include <net/ipv6_stubs.h>
13b513bd03SDavid Ahern #include <net/lwtunnel.h>
14430a0491SDavid Ahern #include <net/ndisc.h>
15ab84be7eSDavid Ahern #include <net/nexthop.h>
16597cfe4fSDavid Ahern #include <net/route.h>
17ab84be7eSDavid Ahern #include <net/sock.h>
18ab84be7eSDavid Ahern 
19430a0491SDavid Ahern static void remove_nexthop(struct net *net, struct nexthop *nh,
20430a0491SDavid Ahern 			   struct nl_info *nlinfo);
21430a0491SDavid Ahern 
22597cfe4fSDavid Ahern #define NH_DEV_HASHBITS  8
23597cfe4fSDavid Ahern #define NH_DEV_HASHSIZE (1U << NH_DEV_HASHBITS)
24597cfe4fSDavid Ahern 
25643d0878SPetr Machata static const struct nla_policy rtm_nh_policy_new[] = {
26ab84be7eSDavid Ahern 	[NHA_ID]		= { .type = NLA_U32 },
27ab84be7eSDavid Ahern 	[NHA_GROUP]		= { .type = NLA_BINARY },
28ab84be7eSDavid Ahern 	[NHA_GROUP_TYPE]	= { .type = NLA_U16 },
29ab84be7eSDavid Ahern 	[NHA_BLACKHOLE]		= { .type = NLA_FLAG },
30ab84be7eSDavid Ahern 	[NHA_OIF]		= { .type = NLA_U32 },
31ab84be7eSDavid Ahern 	[NHA_GATEWAY]		= { .type = NLA_BINARY },
32ab84be7eSDavid Ahern 	[NHA_ENCAP_TYPE]	= { .type = NLA_U16 },
33ab84be7eSDavid Ahern 	[NHA_ENCAP]		= { .type = NLA_NESTED },
3438428d68SRoopa Prabhu 	[NHA_FDB]		= { .type = NLA_FLAG },
35ab84be7eSDavid Ahern };
36ab84be7eSDavid Ahern 
3760f5ad5eSPetr Machata static const struct nla_policy rtm_nh_policy_get[] = {
3860f5ad5eSPetr Machata 	[NHA_ID]		= { .type = NLA_U32 },
3960f5ad5eSPetr Machata };
4060f5ad5eSPetr Machata 
4144551bffSPetr Machata static const struct nla_policy rtm_nh_policy_dump[] = {
4244551bffSPetr Machata 	[NHA_OIF]		= { .type = NLA_U32 },
4344551bffSPetr Machata 	[NHA_GROUPS]		= { .type = NLA_FLAG },
4444551bffSPetr Machata 	[NHA_MASTER]		= { .type = NLA_U32 },
4544551bffSPetr Machata 	[NHA_FDB]		= { .type = NLA_FLAG },
4644551bffSPetr Machata };
4744551bffSPetr Machata 
485ca474f2SIdo Schimmel static bool nexthop_notifiers_is_empty(struct net *net)
495ca474f2SIdo Schimmel {
505ca474f2SIdo Schimmel 	return !net->nexthop.notifier_chain.head;
515ca474f2SIdo Schimmel }
525ca474f2SIdo Schimmel 
535ca474f2SIdo Schimmel static void
545ca474f2SIdo Schimmel __nh_notifier_single_info_init(struct nh_notifier_single_info *nh_info,
5596a85625SPetr Machata 			       const struct nh_info *nhi)
565ca474f2SIdo Schimmel {
575ca474f2SIdo Schimmel 	nh_info->dev = nhi->fib_nhc.nhc_dev;
585ca474f2SIdo Schimmel 	nh_info->gw_family = nhi->fib_nhc.nhc_gw_family;
595ca474f2SIdo Schimmel 	if (nh_info->gw_family == AF_INET)
605ca474f2SIdo Schimmel 		nh_info->ipv4 = nhi->fib_nhc.nhc_gw.ipv4;
615ca474f2SIdo Schimmel 	else if (nh_info->gw_family == AF_INET6)
625ca474f2SIdo Schimmel 		nh_info->ipv6 = nhi->fib_nhc.nhc_gw.ipv6;
635ca474f2SIdo Schimmel 
645ca474f2SIdo Schimmel 	nh_info->is_reject = nhi->reject_nh;
655ca474f2SIdo Schimmel 	nh_info->is_fdb = nhi->fdb_nh;
665ca474f2SIdo Schimmel 	nh_info->has_encap = !!nhi->fib_nhc.nhc_lwtstate;
675ca474f2SIdo Schimmel }
685ca474f2SIdo Schimmel 
695ca474f2SIdo Schimmel static int nh_notifier_single_info_init(struct nh_notifier_info *info,
705ca474f2SIdo Schimmel 					const struct nexthop *nh)
715ca474f2SIdo Schimmel {
7296a85625SPetr Machata 	struct nh_info *nhi = rtnl_dereference(nh->nh_info);
7396a85625SPetr Machata 
7409ad6becSIdo Schimmel 	info->type = NH_NOTIFIER_INFO_TYPE_SINGLE;
755ca474f2SIdo Schimmel 	info->nh = kzalloc(sizeof(*info->nh), GFP_KERNEL);
765ca474f2SIdo Schimmel 	if (!info->nh)
775ca474f2SIdo Schimmel 		return -ENOMEM;
785ca474f2SIdo Schimmel 
7996a85625SPetr Machata 	__nh_notifier_single_info_init(info->nh, nhi);
805ca474f2SIdo Schimmel 
815ca474f2SIdo Schimmel 	return 0;
825ca474f2SIdo Schimmel }
835ca474f2SIdo Schimmel 
845ca474f2SIdo Schimmel static void nh_notifier_single_info_fini(struct nh_notifier_info *info)
855ca474f2SIdo Schimmel {
865ca474f2SIdo Schimmel 	kfree(info->nh);
875ca474f2SIdo Schimmel }
885ca474f2SIdo Schimmel 
89da230501SPetr Machata static int nh_notifier_mp_info_init(struct nh_notifier_info *info,
90da230501SPetr Machata 				    struct nh_group *nhg)
915ca474f2SIdo Schimmel {
925ca474f2SIdo Schimmel 	u16 num_nh = nhg->num_nh;
935ca474f2SIdo Schimmel 	int i;
945ca474f2SIdo Schimmel 
9509ad6becSIdo Schimmel 	info->type = NH_NOTIFIER_INFO_TYPE_GRP;
965ca474f2SIdo Schimmel 	info->nh_grp = kzalloc(struct_size(info->nh_grp, nh_entries, num_nh),
975ca474f2SIdo Schimmel 			       GFP_KERNEL);
985ca474f2SIdo Schimmel 	if (!info->nh_grp)
995ca474f2SIdo Schimmel 		return -ENOMEM;
1005ca474f2SIdo Schimmel 
1015ca474f2SIdo Schimmel 	info->nh_grp->num_nh = num_nh;
1025ca474f2SIdo Schimmel 	info->nh_grp->is_fdb = nhg->fdb_nh;
1035ca474f2SIdo Schimmel 
1045ca474f2SIdo Schimmel 	for (i = 0; i < num_nh; i++) {
1055ca474f2SIdo Schimmel 		struct nh_grp_entry *nhge = &nhg->nh_entries[i];
10696a85625SPetr Machata 		struct nh_info *nhi;
1075ca474f2SIdo Schimmel 
10896a85625SPetr Machata 		nhi = rtnl_dereference(nhge->nh->nh_info);
1095ca474f2SIdo Schimmel 		info->nh_grp->nh_entries[i].id = nhge->nh->id;
1105ca474f2SIdo Schimmel 		info->nh_grp->nh_entries[i].weight = nhge->weight;
1115ca474f2SIdo Schimmel 		__nh_notifier_single_info_init(&info->nh_grp->nh_entries[i].nh,
11296a85625SPetr Machata 					       nhi);
1135ca474f2SIdo Schimmel 	}
1145ca474f2SIdo Schimmel 
1155ca474f2SIdo Schimmel 	return 0;
1165ca474f2SIdo Schimmel }
1175ca474f2SIdo Schimmel 
118*7c37c7e0SPetr Machata static int nh_notifier_res_table_info_init(struct nh_notifier_info *info,
119*7c37c7e0SPetr Machata 					   struct nh_group *nhg)
120*7c37c7e0SPetr Machata {
121*7c37c7e0SPetr Machata 	struct nh_res_table *res_table = rtnl_dereference(nhg->res_table);
122*7c37c7e0SPetr Machata 	u16 num_nh_buckets = res_table->num_nh_buckets;
123*7c37c7e0SPetr Machata 	unsigned long size;
124*7c37c7e0SPetr Machata 	u16 i;
125*7c37c7e0SPetr Machata 
126*7c37c7e0SPetr Machata 	info->type = NH_NOTIFIER_INFO_TYPE_RES_TABLE;
127*7c37c7e0SPetr Machata 	size = struct_size(info->nh_res_table, nhs, num_nh_buckets);
128*7c37c7e0SPetr Machata 	info->nh_res_table = __vmalloc(size, GFP_KERNEL | __GFP_ZERO |
129*7c37c7e0SPetr Machata 				       __GFP_NOWARN);
130*7c37c7e0SPetr Machata 	if (!info->nh_res_table)
131*7c37c7e0SPetr Machata 		return -ENOMEM;
132*7c37c7e0SPetr Machata 
133*7c37c7e0SPetr Machata 	info->nh_res_table->num_nh_buckets = num_nh_buckets;
134*7c37c7e0SPetr Machata 
135*7c37c7e0SPetr Machata 	for (i = 0; i < num_nh_buckets; i++) {
136*7c37c7e0SPetr Machata 		struct nh_res_bucket *bucket = &res_table->nh_buckets[i];
137*7c37c7e0SPetr Machata 		struct nh_grp_entry *nhge;
138*7c37c7e0SPetr Machata 		struct nh_info *nhi;
139*7c37c7e0SPetr Machata 
140*7c37c7e0SPetr Machata 		nhge = rtnl_dereference(bucket->nh_entry);
141*7c37c7e0SPetr Machata 		nhi = rtnl_dereference(nhge->nh->nh_info);
142*7c37c7e0SPetr Machata 		__nh_notifier_single_info_init(&info->nh_res_table->nhs[i],
143*7c37c7e0SPetr Machata 					       nhi);
144*7c37c7e0SPetr Machata 	}
145*7c37c7e0SPetr Machata 
146*7c37c7e0SPetr Machata 	return 0;
147*7c37c7e0SPetr Machata }
148*7c37c7e0SPetr Machata 
149da230501SPetr Machata static int nh_notifier_grp_info_init(struct nh_notifier_info *info,
150da230501SPetr Machata 				     const struct nexthop *nh)
1515ca474f2SIdo Schimmel {
152da230501SPetr Machata 	struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
153da230501SPetr Machata 
154da230501SPetr Machata 	if (nhg->mpath)
155da230501SPetr Machata 		return nh_notifier_mp_info_init(info, nhg);
156*7c37c7e0SPetr Machata 	else if (nhg->resilient)
157*7c37c7e0SPetr Machata 		return nh_notifier_res_table_info_init(info, nhg);
158da230501SPetr Machata 	return -EINVAL;
159da230501SPetr Machata }
160da230501SPetr Machata 
161da230501SPetr Machata static void nh_notifier_grp_info_fini(struct nh_notifier_info *info,
162da230501SPetr Machata 				      const struct nexthop *nh)
163da230501SPetr Machata {
164da230501SPetr Machata 	struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
165da230501SPetr Machata 
166da230501SPetr Machata 	if (nhg->mpath)
1675ca474f2SIdo Schimmel 		kfree(info->nh_grp);
168*7c37c7e0SPetr Machata 	else if (nhg->resilient)
169*7c37c7e0SPetr Machata 		vfree(info->nh_res_table);
1705ca474f2SIdo Schimmel }
1715ca474f2SIdo Schimmel 
1725ca474f2SIdo Schimmel static int nh_notifier_info_init(struct nh_notifier_info *info,
1735ca474f2SIdo Schimmel 				 const struct nexthop *nh)
1745ca474f2SIdo Schimmel {
1755ca474f2SIdo Schimmel 	info->id = nh->id;
1765ca474f2SIdo Schimmel 
17709ad6becSIdo Schimmel 	if (nh->is_group)
1785ca474f2SIdo Schimmel 		return nh_notifier_grp_info_init(info, nh);
1795ca474f2SIdo Schimmel 	else
1805ca474f2SIdo Schimmel 		return nh_notifier_single_info_init(info, nh);
1815ca474f2SIdo Schimmel }
1825ca474f2SIdo Schimmel 
18309ad6becSIdo Schimmel static void nh_notifier_info_fini(struct nh_notifier_info *info,
18409ad6becSIdo Schimmel 				  const struct nexthop *nh)
1855ca474f2SIdo Schimmel {
18609ad6becSIdo Schimmel 	if (nh->is_group)
187da230501SPetr Machata 		nh_notifier_grp_info_fini(info, nh);
1885ca474f2SIdo Schimmel 	else
1895ca474f2SIdo Schimmel 		nh_notifier_single_info_fini(info);
1905ca474f2SIdo Schimmel }
1915ca474f2SIdo Schimmel 
1928590ceedSRoopa Prabhu static int call_nexthop_notifiers(struct net *net,
193d8e79f1dSNathan Chancellor 				  enum nexthop_event_type event_type,
1943578d53dSIdo Schimmel 				  struct nexthop *nh,
1953578d53dSIdo Schimmel 				  struct netlink_ext_ack *extack)
1968590ceedSRoopa Prabhu {
1975ca474f2SIdo Schimmel 	struct nh_notifier_info info = {
1985ca474f2SIdo Schimmel 		.net = net,
1995ca474f2SIdo Schimmel 		.extack = extack,
2005ca474f2SIdo Schimmel 	};
2018590ceedSRoopa Prabhu 	int err;
2028590ceedSRoopa Prabhu 
2035ca474f2SIdo Schimmel 	ASSERT_RTNL();
2045ca474f2SIdo Schimmel 
2055ca474f2SIdo Schimmel 	if (nexthop_notifiers_is_empty(net))
2065ca474f2SIdo Schimmel 		return 0;
2075ca474f2SIdo Schimmel 
2085ca474f2SIdo Schimmel 	err = nh_notifier_info_init(&info, nh);
2095ca474f2SIdo Schimmel 	if (err) {
2105ca474f2SIdo Schimmel 		NL_SET_ERR_MSG(extack, "Failed to initialize nexthop notifier info");
2115ca474f2SIdo Schimmel 		return err;
2125ca474f2SIdo Schimmel 	}
2135ca474f2SIdo Schimmel 
21480690ec6SIdo Schimmel 	err = blocking_notifier_call_chain(&net->nexthop.notifier_chain,
2151ec69d18SIdo Schimmel 					   event_type, &info);
21609ad6becSIdo Schimmel 	nh_notifier_info_fini(&info, nh);
2175ca474f2SIdo Schimmel 
2188590ceedSRoopa Prabhu 	return notifier_to_errno(err);
2198590ceedSRoopa Prabhu }
2208590ceedSRoopa Prabhu 
221*7c37c7e0SPetr Machata static int
222*7c37c7e0SPetr Machata nh_notifier_res_bucket_idle_timer_get(const struct nh_notifier_info *info,
223*7c37c7e0SPetr Machata 				      bool force, unsigned int *p_idle_timer_ms)
224*7c37c7e0SPetr Machata {
225*7c37c7e0SPetr Machata 	struct nh_res_table *res_table;
226*7c37c7e0SPetr Machata 	struct nh_group *nhg;
227*7c37c7e0SPetr Machata 	struct nexthop *nh;
228*7c37c7e0SPetr Machata 	int err = 0;
229*7c37c7e0SPetr Machata 
230*7c37c7e0SPetr Machata 	/* When 'force' is false, nexthop bucket replacement is performed
231*7c37c7e0SPetr Machata 	 * because the bucket was deemed to be idle. In this case, capable
232*7c37c7e0SPetr Machata 	 * listeners can choose to perform an atomic replacement: The bucket is
233*7c37c7e0SPetr Machata 	 * only replaced if it is inactive. However, if the idle timer interval
234*7c37c7e0SPetr Machata 	 * is smaller than the interval in which a listener is querying
235*7c37c7e0SPetr Machata 	 * buckets' activity from the device, then atomic replacement should
236*7c37c7e0SPetr Machata 	 * not be tried. Pass the idle timer value to listeners, so that they
237*7c37c7e0SPetr Machata 	 * could determine which type of replacement to perform.
238*7c37c7e0SPetr Machata 	 */
239*7c37c7e0SPetr Machata 	if (force) {
240*7c37c7e0SPetr Machata 		*p_idle_timer_ms = 0;
241*7c37c7e0SPetr Machata 		return 0;
242*7c37c7e0SPetr Machata 	}
243*7c37c7e0SPetr Machata 
244*7c37c7e0SPetr Machata 	rcu_read_lock();
245*7c37c7e0SPetr Machata 
246*7c37c7e0SPetr Machata 	nh = nexthop_find_by_id(info->net, info->id);
247*7c37c7e0SPetr Machata 	if (!nh) {
248*7c37c7e0SPetr Machata 		err = -EINVAL;
249*7c37c7e0SPetr Machata 		goto out;
250*7c37c7e0SPetr Machata 	}
251*7c37c7e0SPetr Machata 
252*7c37c7e0SPetr Machata 	nhg = rcu_dereference(nh->nh_grp);
253*7c37c7e0SPetr Machata 	res_table = rcu_dereference(nhg->res_table);
254*7c37c7e0SPetr Machata 	*p_idle_timer_ms = jiffies_to_msecs(res_table->idle_timer);
255*7c37c7e0SPetr Machata 
256*7c37c7e0SPetr Machata out:
257*7c37c7e0SPetr Machata 	rcu_read_unlock();
258*7c37c7e0SPetr Machata 
259*7c37c7e0SPetr Machata 	return err;
260*7c37c7e0SPetr Machata }
261*7c37c7e0SPetr Machata 
262*7c37c7e0SPetr Machata static int nh_notifier_res_bucket_info_init(struct nh_notifier_info *info,
263*7c37c7e0SPetr Machata 					    u16 bucket_index, bool force,
264*7c37c7e0SPetr Machata 					    struct nh_info *oldi,
265*7c37c7e0SPetr Machata 					    struct nh_info *newi)
266*7c37c7e0SPetr Machata {
267*7c37c7e0SPetr Machata 	unsigned int idle_timer_ms;
268*7c37c7e0SPetr Machata 	int err;
269*7c37c7e0SPetr Machata 
270*7c37c7e0SPetr Machata 	err = nh_notifier_res_bucket_idle_timer_get(info, force,
271*7c37c7e0SPetr Machata 						    &idle_timer_ms);
272*7c37c7e0SPetr Machata 	if (err)
273*7c37c7e0SPetr Machata 		return err;
274*7c37c7e0SPetr Machata 
275*7c37c7e0SPetr Machata 	info->type = NH_NOTIFIER_INFO_TYPE_RES_BUCKET;
276*7c37c7e0SPetr Machata 	info->nh_res_bucket = kzalloc(sizeof(*info->nh_res_bucket),
277*7c37c7e0SPetr Machata 				      GFP_KERNEL);
278*7c37c7e0SPetr Machata 	if (!info->nh_res_bucket)
279*7c37c7e0SPetr Machata 		return -ENOMEM;
280*7c37c7e0SPetr Machata 
281*7c37c7e0SPetr Machata 	info->nh_res_bucket->bucket_index = bucket_index;
282*7c37c7e0SPetr Machata 	info->nh_res_bucket->idle_timer_ms = idle_timer_ms;
283*7c37c7e0SPetr Machata 	info->nh_res_bucket->force = force;
284*7c37c7e0SPetr Machata 	__nh_notifier_single_info_init(&info->nh_res_bucket->old_nh, oldi);
285*7c37c7e0SPetr Machata 	__nh_notifier_single_info_init(&info->nh_res_bucket->new_nh, newi);
286*7c37c7e0SPetr Machata 	return 0;
287*7c37c7e0SPetr Machata }
288*7c37c7e0SPetr Machata 
289*7c37c7e0SPetr Machata static void nh_notifier_res_bucket_info_fini(struct nh_notifier_info *info)
290*7c37c7e0SPetr Machata {
291*7c37c7e0SPetr Machata 	kfree(info->nh_res_bucket);
292*7c37c7e0SPetr Machata }
293*7c37c7e0SPetr Machata 
294*7c37c7e0SPetr Machata static int __call_nexthop_res_bucket_notifiers(struct net *net, u32 nhg_id,
295*7c37c7e0SPetr Machata 					       u16 bucket_index, bool force,
296*7c37c7e0SPetr Machata 					       struct nh_info *oldi,
297*7c37c7e0SPetr Machata 					       struct nh_info *newi,
298*7c37c7e0SPetr Machata 					       struct netlink_ext_ack *extack)
299*7c37c7e0SPetr Machata {
300*7c37c7e0SPetr Machata 	struct nh_notifier_info info = {
301*7c37c7e0SPetr Machata 		.net = net,
302*7c37c7e0SPetr Machata 		.extack = extack,
303*7c37c7e0SPetr Machata 		.id = nhg_id,
304*7c37c7e0SPetr Machata 	};
305*7c37c7e0SPetr Machata 	int err;
306*7c37c7e0SPetr Machata 
307*7c37c7e0SPetr Machata 	if (nexthop_notifiers_is_empty(net))
308*7c37c7e0SPetr Machata 		return 0;
309*7c37c7e0SPetr Machata 
310*7c37c7e0SPetr Machata 	err = nh_notifier_res_bucket_info_init(&info, bucket_index, force,
311*7c37c7e0SPetr Machata 					       oldi, newi);
312*7c37c7e0SPetr Machata 	if (err)
313*7c37c7e0SPetr Machata 		return err;
314*7c37c7e0SPetr Machata 
315*7c37c7e0SPetr Machata 	err = blocking_notifier_call_chain(&net->nexthop.notifier_chain,
316*7c37c7e0SPetr Machata 					   NEXTHOP_EVENT_BUCKET_REPLACE, &info);
317*7c37c7e0SPetr Machata 	nh_notifier_res_bucket_info_fini(&info);
318*7c37c7e0SPetr Machata 
319*7c37c7e0SPetr Machata 	return notifier_to_errno(err);
320*7c37c7e0SPetr Machata }
321*7c37c7e0SPetr Machata 
322283a72a5SPetr Machata /* There are three users of RES_TABLE, and NHs etc. referenced from there:
323283a72a5SPetr Machata  *
324283a72a5SPetr Machata  * 1) a collection of callbacks for NH maintenance. This operates under
325283a72a5SPetr Machata  *    RTNL,
326283a72a5SPetr Machata  * 2) the delayed work that gradually balances the resilient table,
327283a72a5SPetr Machata  * 3) and nexthop_select_path(), operating under RCU.
328283a72a5SPetr Machata  *
329283a72a5SPetr Machata  * Both the delayed work and the RTNL block are writers, and need to
330283a72a5SPetr Machata  * maintain mutual exclusion. Since there are only two and well-known
331283a72a5SPetr Machata  * writers for each table, the RTNL code can make sure it has exclusive
332283a72a5SPetr Machata  * access thus:
333283a72a5SPetr Machata  *
334283a72a5SPetr Machata  * - Have the DW operate without locking;
335283a72a5SPetr Machata  * - synchronously cancel the DW;
336283a72a5SPetr Machata  * - do the writing;
337283a72a5SPetr Machata  * - if the write was not actually a delete, call upkeep, which schedules
338283a72a5SPetr Machata  *   DW again if necessary.
339283a72a5SPetr Machata  *
340283a72a5SPetr Machata  * The functions that are always called from the RTNL context use
341283a72a5SPetr Machata  * rtnl_dereference(). The functions that can also be called from the DW do
342283a72a5SPetr Machata  * a raw dereference and rely on the above mutual exclusion scheme.
343283a72a5SPetr Machata  */
344283a72a5SPetr Machata #define nh_res_dereference(p) (rcu_dereference_raw(p))
345283a72a5SPetr Machata 
346*7c37c7e0SPetr Machata static int call_nexthop_res_bucket_notifiers(struct net *net, u32 nhg_id,
347*7c37c7e0SPetr Machata 					     u16 bucket_index, bool force,
348*7c37c7e0SPetr Machata 					     struct nexthop *old_nh,
349*7c37c7e0SPetr Machata 					     struct nexthop *new_nh,
350*7c37c7e0SPetr Machata 					     struct netlink_ext_ack *extack)
351*7c37c7e0SPetr Machata {
352*7c37c7e0SPetr Machata 	struct nh_info *oldi = nh_res_dereference(old_nh->nh_info);
353*7c37c7e0SPetr Machata 	struct nh_info *newi = nh_res_dereference(new_nh->nh_info);
354*7c37c7e0SPetr Machata 
355*7c37c7e0SPetr Machata 	return __call_nexthop_res_bucket_notifiers(net, nhg_id, bucket_index,
356*7c37c7e0SPetr Machata 						   force, oldi, newi, extack);
357*7c37c7e0SPetr Machata }
358*7c37c7e0SPetr Machata 
359*7c37c7e0SPetr Machata static int call_nexthop_res_table_notifiers(struct net *net, struct nexthop *nh,
360*7c37c7e0SPetr Machata 					    struct netlink_ext_ack *extack)
361*7c37c7e0SPetr Machata {
362*7c37c7e0SPetr Machata 	struct nh_notifier_info info = {
363*7c37c7e0SPetr Machata 		.net = net,
364*7c37c7e0SPetr Machata 		.extack = extack,
365*7c37c7e0SPetr Machata 	};
366*7c37c7e0SPetr Machata 	struct nh_group *nhg;
367*7c37c7e0SPetr Machata 	int err;
368*7c37c7e0SPetr Machata 
369*7c37c7e0SPetr Machata 	ASSERT_RTNL();
370*7c37c7e0SPetr Machata 
371*7c37c7e0SPetr Machata 	if (nexthop_notifiers_is_empty(net))
372*7c37c7e0SPetr Machata 		return 0;
373*7c37c7e0SPetr Machata 
374*7c37c7e0SPetr Machata 	/* At this point, the nexthop buckets are still not populated. Only
375*7c37c7e0SPetr Machata 	 * emit a notification with the logical nexthops, so that a listener
376*7c37c7e0SPetr Machata 	 * could potentially veto it in case of unsupported configuration.
377*7c37c7e0SPetr Machata 	 */
378*7c37c7e0SPetr Machata 	nhg = rtnl_dereference(nh->nh_grp);
379*7c37c7e0SPetr Machata 	err = nh_notifier_mp_info_init(&info, nhg);
380*7c37c7e0SPetr Machata 	if (err) {
381*7c37c7e0SPetr Machata 		NL_SET_ERR_MSG(extack, "Failed to initialize nexthop notifier info");
382*7c37c7e0SPetr Machata 		return err;
383*7c37c7e0SPetr Machata 	}
384*7c37c7e0SPetr Machata 
385*7c37c7e0SPetr Machata 	err = blocking_notifier_call_chain(&net->nexthop.notifier_chain,
386*7c37c7e0SPetr Machata 					   NEXTHOP_EVENT_RES_TABLE_PRE_REPLACE,
387*7c37c7e0SPetr Machata 					   &info);
388*7c37c7e0SPetr Machata 	kfree(info.nh_grp);
389*7c37c7e0SPetr Machata 
390*7c37c7e0SPetr Machata 	return notifier_to_errno(err);
391*7c37c7e0SPetr Machata }
392*7c37c7e0SPetr Machata 
393975ff7f3SIdo Schimmel static int call_nexthop_notifier(struct notifier_block *nb, struct net *net,
394975ff7f3SIdo Schimmel 				 enum nexthop_event_type event_type,
395975ff7f3SIdo Schimmel 				 struct nexthop *nh,
396975ff7f3SIdo Schimmel 				 struct netlink_ext_ack *extack)
397975ff7f3SIdo Schimmel {
398975ff7f3SIdo Schimmel 	struct nh_notifier_info info = {
399975ff7f3SIdo Schimmel 		.net = net,
400975ff7f3SIdo Schimmel 		.extack = extack,
401975ff7f3SIdo Schimmel 	};
402975ff7f3SIdo Schimmel 	int err;
403975ff7f3SIdo Schimmel 
404975ff7f3SIdo Schimmel 	err = nh_notifier_info_init(&info, nh);
405975ff7f3SIdo Schimmel 	if (err)
406975ff7f3SIdo Schimmel 		return err;
407975ff7f3SIdo Schimmel 
408975ff7f3SIdo Schimmel 	err = nb->notifier_call(nb, event_type, &info);
40909ad6becSIdo Schimmel 	nh_notifier_info_fini(&info, nh);
410975ff7f3SIdo Schimmel 
411975ff7f3SIdo Schimmel 	return notifier_to_errno(err);
412975ff7f3SIdo Schimmel }
413975ff7f3SIdo Schimmel 
414597cfe4fSDavid Ahern static unsigned int nh_dev_hashfn(unsigned int val)
415597cfe4fSDavid Ahern {
416597cfe4fSDavid Ahern 	unsigned int mask = NH_DEV_HASHSIZE - 1;
417597cfe4fSDavid Ahern 
418597cfe4fSDavid Ahern 	return (val ^
419597cfe4fSDavid Ahern 		(val >> NH_DEV_HASHBITS) ^
420597cfe4fSDavid Ahern 		(val >> (NH_DEV_HASHBITS * 2))) & mask;
421597cfe4fSDavid Ahern }
422597cfe4fSDavid Ahern 
423597cfe4fSDavid Ahern static void nexthop_devhash_add(struct net *net, struct nh_info *nhi)
424597cfe4fSDavid Ahern {
425597cfe4fSDavid Ahern 	struct net_device *dev = nhi->fib_nhc.nhc_dev;
426597cfe4fSDavid Ahern 	struct hlist_head *head;
427597cfe4fSDavid Ahern 	unsigned int hash;
428597cfe4fSDavid Ahern 
429597cfe4fSDavid Ahern 	WARN_ON(!dev);
430597cfe4fSDavid Ahern 
431597cfe4fSDavid Ahern 	hash = nh_dev_hashfn(dev->ifindex);
432597cfe4fSDavid Ahern 	head = &net->nexthop.devhash[hash];
433597cfe4fSDavid Ahern 	hlist_add_head(&nhi->dev_hash, head);
434597cfe4fSDavid Ahern }
435597cfe4fSDavid Ahern 
4365d1f0f09SDavid Ahern static void nexthop_free_group(struct nexthop *nh)
437ab84be7eSDavid Ahern {
438430a0491SDavid Ahern 	struct nh_group *nhg;
439430a0491SDavid Ahern 	int i;
440430a0491SDavid Ahern 
441430a0491SDavid Ahern 	nhg = rcu_dereference_raw(nh->nh_grp);
44290f33bffSNikolay Aleksandrov 	for (i = 0; i < nhg->num_nh; ++i) {
44390f33bffSNikolay Aleksandrov 		struct nh_grp_entry *nhge = &nhg->nh_entries[i];
444430a0491SDavid Ahern 
44590f33bffSNikolay Aleksandrov 		WARN_ON(!list_empty(&nhge->nh_list));
44690f33bffSNikolay Aleksandrov 		nexthop_put(nhge->nh);
44790f33bffSNikolay Aleksandrov 	}
44890f33bffSNikolay Aleksandrov 
44990f33bffSNikolay Aleksandrov 	WARN_ON(nhg->spare == nhg);
45090f33bffSNikolay Aleksandrov 
451283a72a5SPetr Machata 	if (nhg->resilient)
452283a72a5SPetr Machata 		vfree(rcu_dereference_raw(nhg->res_table));
453283a72a5SPetr Machata 
45490f33bffSNikolay Aleksandrov 	kfree(nhg->spare);
455430a0491SDavid Ahern 	kfree(nhg);
456430a0491SDavid Ahern }
457430a0491SDavid Ahern 
458430a0491SDavid Ahern static void nexthop_free_single(struct nexthop *nh)
459430a0491SDavid Ahern {
460ab84be7eSDavid Ahern 	struct nh_info *nhi;
461ab84be7eSDavid Ahern 
462ab84be7eSDavid Ahern 	nhi = rcu_dereference_raw(nh->nh_info);
463597cfe4fSDavid Ahern 	switch (nhi->family) {
464597cfe4fSDavid Ahern 	case AF_INET:
465597cfe4fSDavid Ahern 		fib_nh_release(nh->net, &nhi->fib_nh);
466597cfe4fSDavid Ahern 		break;
46753010f99SDavid Ahern 	case AF_INET6:
46853010f99SDavid Ahern 		ipv6_stub->fib6_nh_release(&nhi->fib6_nh);
46953010f99SDavid Ahern 		break;
470597cfe4fSDavid Ahern 	}
471ab84be7eSDavid Ahern 	kfree(nhi);
472430a0491SDavid Ahern }
473430a0491SDavid Ahern 
474430a0491SDavid Ahern void nexthop_free_rcu(struct rcu_head *head)
475430a0491SDavid Ahern {
476430a0491SDavid Ahern 	struct nexthop *nh = container_of(head, struct nexthop, rcu);
477430a0491SDavid Ahern 
478430a0491SDavid Ahern 	if (nh->is_group)
4795d1f0f09SDavid Ahern 		nexthop_free_group(nh);
480430a0491SDavid Ahern 	else
481430a0491SDavid Ahern 		nexthop_free_single(nh);
482ab84be7eSDavid Ahern 
483ab84be7eSDavid Ahern 	kfree(nh);
484ab84be7eSDavid Ahern }
485ab84be7eSDavid Ahern EXPORT_SYMBOL_GPL(nexthop_free_rcu);
486ab84be7eSDavid Ahern 
487ab84be7eSDavid Ahern static struct nexthop *nexthop_alloc(void)
488ab84be7eSDavid Ahern {
489ab84be7eSDavid Ahern 	struct nexthop *nh;
490ab84be7eSDavid Ahern 
491ab84be7eSDavid Ahern 	nh = kzalloc(sizeof(struct nexthop), GFP_KERNEL);
492430a0491SDavid Ahern 	if (nh) {
4934c7e8084SDavid Ahern 		INIT_LIST_HEAD(&nh->fi_list);
494f88d8ea6SDavid Ahern 		INIT_LIST_HEAD(&nh->f6i_list);
495430a0491SDavid Ahern 		INIT_LIST_HEAD(&nh->grp_list);
49638428d68SRoopa Prabhu 		INIT_LIST_HEAD(&nh->fdb_list);
497430a0491SDavid Ahern 	}
498ab84be7eSDavid Ahern 	return nh;
499ab84be7eSDavid Ahern }
500ab84be7eSDavid Ahern 
501430a0491SDavid Ahern static struct nh_group *nexthop_grp_alloc(u16 num_nh)
502430a0491SDavid Ahern {
503430a0491SDavid Ahern 	struct nh_group *nhg;
504430a0491SDavid Ahern 
505d7d49dc7SIdo Schimmel 	nhg = kzalloc(struct_size(nhg, nh_entries, num_nh), GFP_KERNEL);
506430a0491SDavid Ahern 	if (nhg)
507430a0491SDavid Ahern 		nhg->num_nh = num_nh;
508430a0491SDavid Ahern 
509430a0491SDavid Ahern 	return nhg;
510430a0491SDavid Ahern }
511430a0491SDavid Ahern 
512283a72a5SPetr Machata static void nh_res_table_upkeep_dw(struct work_struct *work);
513283a72a5SPetr Machata 
514283a72a5SPetr Machata static struct nh_res_table *
515283a72a5SPetr Machata nexthop_res_table_alloc(struct net *net, u32 nhg_id, struct nh_config *cfg)
516283a72a5SPetr Machata {
517283a72a5SPetr Machata 	const u16 num_nh_buckets = cfg->nh_grp_res_num_buckets;
518283a72a5SPetr Machata 	struct nh_res_table *res_table;
519283a72a5SPetr Machata 	unsigned long size;
520283a72a5SPetr Machata 
521283a72a5SPetr Machata 	size = struct_size(res_table, nh_buckets, num_nh_buckets);
522283a72a5SPetr Machata 	res_table = __vmalloc(size, GFP_KERNEL | __GFP_ZERO | __GFP_NOWARN);
523283a72a5SPetr Machata 	if (!res_table)
524283a72a5SPetr Machata 		return NULL;
525283a72a5SPetr Machata 
526283a72a5SPetr Machata 	res_table->net = net;
527283a72a5SPetr Machata 	res_table->nhg_id = nhg_id;
528283a72a5SPetr Machata 	INIT_DELAYED_WORK(&res_table->upkeep_dw, &nh_res_table_upkeep_dw);
529283a72a5SPetr Machata 	INIT_LIST_HEAD(&res_table->uw_nh_entries);
530283a72a5SPetr Machata 	res_table->idle_timer = cfg->nh_grp_res_idle_timer;
531283a72a5SPetr Machata 	res_table->unbalanced_timer = cfg->nh_grp_res_unbalanced_timer;
532283a72a5SPetr Machata 	res_table->num_nh_buckets = num_nh_buckets;
533283a72a5SPetr Machata 	return res_table;
534283a72a5SPetr Machata }
535283a72a5SPetr Machata 
536ab84be7eSDavid Ahern static void nh_base_seq_inc(struct net *net)
537ab84be7eSDavid Ahern {
538ab84be7eSDavid Ahern 	while (++net->nexthop.seq == 0)
539ab84be7eSDavid Ahern 		;
540ab84be7eSDavid Ahern }
541ab84be7eSDavid Ahern 
542ab84be7eSDavid Ahern /* no reference taken; rcu lock or rtnl must be held */
543ab84be7eSDavid Ahern struct nexthop *nexthop_find_by_id(struct net *net, u32 id)
544ab84be7eSDavid Ahern {
545ab84be7eSDavid Ahern 	struct rb_node **pp, *parent = NULL, *next;
546ab84be7eSDavid Ahern 
547ab84be7eSDavid Ahern 	pp = &net->nexthop.rb_root.rb_node;
548ab84be7eSDavid Ahern 	while (1) {
549ab84be7eSDavid Ahern 		struct nexthop *nh;
550ab84be7eSDavid Ahern 
551ab84be7eSDavid Ahern 		next = rcu_dereference_raw(*pp);
552ab84be7eSDavid Ahern 		if (!next)
553ab84be7eSDavid Ahern 			break;
554ab84be7eSDavid Ahern 		parent = next;
555ab84be7eSDavid Ahern 
556ab84be7eSDavid Ahern 		nh = rb_entry(parent, struct nexthop, rb_node);
557ab84be7eSDavid Ahern 		if (id < nh->id)
558ab84be7eSDavid Ahern 			pp = &next->rb_left;
559ab84be7eSDavid Ahern 		else if (id > nh->id)
560ab84be7eSDavid Ahern 			pp = &next->rb_right;
561ab84be7eSDavid Ahern 		else
562ab84be7eSDavid Ahern 			return nh;
563ab84be7eSDavid Ahern 	}
564ab84be7eSDavid Ahern 	return NULL;
565ab84be7eSDavid Ahern }
566ab84be7eSDavid Ahern EXPORT_SYMBOL_GPL(nexthop_find_by_id);
567ab84be7eSDavid Ahern 
568ab84be7eSDavid Ahern /* used for auto id allocation; called with rtnl held */
569ab84be7eSDavid Ahern static u32 nh_find_unused_id(struct net *net)
570ab84be7eSDavid Ahern {
571ab84be7eSDavid Ahern 	u32 id_start = net->nexthop.last_id_allocated;
572ab84be7eSDavid Ahern 
573ab84be7eSDavid Ahern 	while (1) {
574ab84be7eSDavid Ahern 		net->nexthop.last_id_allocated++;
575ab84be7eSDavid Ahern 		if (net->nexthop.last_id_allocated == id_start)
576ab84be7eSDavid Ahern 			break;
577ab84be7eSDavid Ahern 
578ab84be7eSDavid Ahern 		if (!nexthop_find_by_id(net, net->nexthop.last_id_allocated))
579ab84be7eSDavid Ahern 			return net->nexthop.last_id_allocated;
580ab84be7eSDavid Ahern 	}
581ab84be7eSDavid Ahern 	return 0;
582ab84be7eSDavid Ahern }
583ab84be7eSDavid Ahern 
584283a72a5SPetr Machata static void nh_res_time_set_deadline(unsigned long next_time,
585283a72a5SPetr Machata 				     unsigned long *deadline)
586283a72a5SPetr Machata {
587283a72a5SPetr Machata 	if (time_before(next_time, *deadline))
588283a72a5SPetr Machata 		*deadline = next_time;
589283a72a5SPetr Machata }
590283a72a5SPetr Machata 
591430a0491SDavid Ahern static int nla_put_nh_group(struct sk_buff *skb, struct nh_group *nhg)
592430a0491SDavid Ahern {
593430a0491SDavid Ahern 	struct nexthop_grp *p;
594430a0491SDavid Ahern 	size_t len = nhg->num_nh * sizeof(*p);
595430a0491SDavid Ahern 	struct nlattr *nla;
596430a0491SDavid Ahern 	u16 group_type = 0;
597430a0491SDavid Ahern 	int i;
598430a0491SDavid Ahern 
599430a0491SDavid Ahern 	if (nhg->mpath)
600430a0491SDavid Ahern 		group_type = NEXTHOP_GRP_TYPE_MPATH;
601430a0491SDavid Ahern 
602430a0491SDavid Ahern 	if (nla_put_u16(skb, NHA_GROUP_TYPE, group_type))
603430a0491SDavid Ahern 		goto nla_put_failure;
604430a0491SDavid Ahern 
605430a0491SDavid Ahern 	nla = nla_reserve(skb, NHA_GROUP, len);
606430a0491SDavid Ahern 	if (!nla)
607430a0491SDavid Ahern 		goto nla_put_failure;
608430a0491SDavid Ahern 
609430a0491SDavid Ahern 	p = nla_data(nla);
610430a0491SDavid Ahern 	for (i = 0; i < nhg->num_nh; ++i) {
611430a0491SDavid Ahern 		p->id = nhg->nh_entries[i].nh->id;
612430a0491SDavid Ahern 		p->weight = nhg->nh_entries[i].weight - 1;
613430a0491SDavid Ahern 		p += 1;
614430a0491SDavid Ahern 	}
615430a0491SDavid Ahern 
616430a0491SDavid Ahern 	return 0;
617430a0491SDavid Ahern 
618430a0491SDavid Ahern nla_put_failure:
619430a0491SDavid Ahern 	return -EMSGSIZE;
620430a0491SDavid Ahern }
621430a0491SDavid Ahern 
622ab84be7eSDavid Ahern static int nh_fill_node(struct sk_buff *skb, struct nexthop *nh,
623ab84be7eSDavid Ahern 			int event, u32 portid, u32 seq, unsigned int nlflags)
624ab84be7eSDavid Ahern {
62553010f99SDavid Ahern 	struct fib6_nh *fib6_nh;
626597cfe4fSDavid Ahern 	struct fib_nh *fib_nh;
627ab84be7eSDavid Ahern 	struct nlmsghdr *nlh;
628ab84be7eSDavid Ahern 	struct nh_info *nhi;
629ab84be7eSDavid Ahern 	struct nhmsg *nhm;
630ab84be7eSDavid Ahern 
631ab84be7eSDavid Ahern 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nhm), nlflags);
632ab84be7eSDavid Ahern 	if (!nlh)
633ab84be7eSDavid Ahern 		return -EMSGSIZE;
634ab84be7eSDavid Ahern 
635ab84be7eSDavid Ahern 	nhm = nlmsg_data(nlh);
636ab84be7eSDavid Ahern 	nhm->nh_family = AF_UNSPEC;
637ab84be7eSDavid Ahern 	nhm->nh_flags = nh->nh_flags;
638ab84be7eSDavid Ahern 	nhm->nh_protocol = nh->protocol;
639ab84be7eSDavid Ahern 	nhm->nh_scope = 0;
640ab84be7eSDavid Ahern 	nhm->resvd = 0;
641ab84be7eSDavid Ahern 
642ab84be7eSDavid Ahern 	if (nla_put_u32(skb, NHA_ID, nh->id))
643ab84be7eSDavid Ahern 		goto nla_put_failure;
644ab84be7eSDavid Ahern 
645430a0491SDavid Ahern 	if (nh->is_group) {
646430a0491SDavid Ahern 		struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
647430a0491SDavid Ahern 
648ce9ac056SDavid Ahern 		if (nhg->fdb_nh && nla_put_flag(skb, NHA_FDB))
649ce9ac056SDavid Ahern 			goto nla_put_failure;
650430a0491SDavid Ahern 		if (nla_put_nh_group(skb, nhg))
651430a0491SDavid Ahern 			goto nla_put_failure;
652430a0491SDavid Ahern 		goto out;
653430a0491SDavid Ahern 	}
654430a0491SDavid Ahern 
655ab84be7eSDavid Ahern 	nhi = rtnl_dereference(nh->nh_info);
656ab84be7eSDavid Ahern 	nhm->nh_family = nhi->family;
657ab84be7eSDavid Ahern 	if (nhi->reject_nh) {
658ab84be7eSDavid Ahern 		if (nla_put_flag(skb, NHA_BLACKHOLE))
659ab84be7eSDavid Ahern 			goto nla_put_failure;
660ab84be7eSDavid Ahern 		goto out;
661ce9ac056SDavid Ahern 	} else if (nhi->fdb_nh) {
662ce9ac056SDavid Ahern 		if (nla_put_flag(skb, NHA_FDB))
663ce9ac056SDavid Ahern 			goto nla_put_failure;
664ce9ac056SDavid Ahern 	} else {
665597cfe4fSDavid Ahern 		const struct net_device *dev;
666597cfe4fSDavid Ahern 
667597cfe4fSDavid Ahern 		dev = nhi->fib_nhc.nhc_dev;
668597cfe4fSDavid Ahern 		if (dev && nla_put_u32(skb, NHA_OIF, dev->ifindex))
669597cfe4fSDavid Ahern 			goto nla_put_failure;
670597cfe4fSDavid Ahern 	}
671597cfe4fSDavid Ahern 
672597cfe4fSDavid Ahern 	nhm->nh_scope = nhi->fib_nhc.nhc_scope;
673597cfe4fSDavid Ahern 	switch (nhi->family) {
674597cfe4fSDavid Ahern 	case AF_INET:
675597cfe4fSDavid Ahern 		fib_nh = &nhi->fib_nh;
676597cfe4fSDavid Ahern 		if (fib_nh->fib_nh_gw_family &&
67733d80996SIdo Schimmel 		    nla_put_be32(skb, NHA_GATEWAY, fib_nh->fib_nh_gw4))
678597cfe4fSDavid Ahern 			goto nla_put_failure;
679597cfe4fSDavid Ahern 		break;
68053010f99SDavid Ahern 
68153010f99SDavid Ahern 	case AF_INET6:
68253010f99SDavid Ahern 		fib6_nh = &nhi->fib6_nh;
68353010f99SDavid Ahern 		if (fib6_nh->fib_nh_gw_family &&
68453010f99SDavid Ahern 		    nla_put_in6_addr(skb, NHA_GATEWAY, &fib6_nh->fib_nh_gw6))
68553010f99SDavid Ahern 			goto nla_put_failure;
68653010f99SDavid Ahern 		break;
687ab84be7eSDavid Ahern 	}
688ab84be7eSDavid Ahern 
689b513bd03SDavid Ahern 	if (nhi->fib_nhc.nhc_lwtstate &&
690b513bd03SDavid Ahern 	    lwtunnel_fill_encap(skb, nhi->fib_nhc.nhc_lwtstate,
691b513bd03SDavid Ahern 				NHA_ENCAP, NHA_ENCAP_TYPE) < 0)
692b513bd03SDavid Ahern 		goto nla_put_failure;
693b513bd03SDavid Ahern 
694ab84be7eSDavid Ahern out:
695ab84be7eSDavid Ahern 	nlmsg_end(skb, nlh);
696ab84be7eSDavid Ahern 	return 0;
697ab84be7eSDavid Ahern 
698ab84be7eSDavid Ahern nla_put_failure:
699d69100b8SStephen Worley 	nlmsg_cancel(skb, nlh);
700ab84be7eSDavid Ahern 	return -EMSGSIZE;
701ab84be7eSDavid Ahern }
702ab84be7eSDavid Ahern 
703430a0491SDavid Ahern static size_t nh_nlmsg_size_grp(struct nexthop *nh)
704430a0491SDavid Ahern {
705430a0491SDavid Ahern 	struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
706430a0491SDavid Ahern 	size_t sz = sizeof(struct nexthop_grp) * nhg->num_nh;
707430a0491SDavid Ahern 
708430a0491SDavid Ahern 	return nla_total_size(sz) +
709430a0491SDavid Ahern 	       nla_total_size(2);  /* NHA_GROUP_TYPE */
710430a0491SDavid Ahern }
711430a0491SDavid Ahern 
712430a0491SDavid Ahern static size_t nh_nlmsg_size_single(struct nexthop *nh)
713ab84be7eSDavid Ahern {
714597cfe4fSDavid Ahern 	struct nh_info *nhi = rtnl_dereference(nh->nh_info);
715430a0491SDavid Ahern 	size_t sz;
716ab84be7eSDavid Ahern 
717ab84be7eSDavid Ahern 	/* covers NHA_BLACKHOLE since NHA_OIF and BLACKHOLE
718ab84be7eSDavid Ahern 	 * are mutually exclusive
719ab84be7eSDavid Ahern 	 */
720430a0491SDavid Ahern 	sz = nla_total_size(4);  /* NHA_OIF */
721ab84be7eSDavid Ahern 
722597cfe4fSDavid Ahern 	switch (nhi->family) {
723597cfe4fSDavid Ahern 	case AF_INET:
724597cfe4fSDavid Ahern 		if (nhi->fib_nh.fib_nh_gw_family)
725597cfe4fSDavid Ahern 			sz += nla_total_size(4);  /* NHA_GATEWAY */
726597cfe4fSDavid Ahern 		break;
72753010f99SDavid Ahern 
72853010f99SDavid Ahern 	case AF_INET6:
72953010f99SDavid Ahern 		/* NHA_GATEWAY */
73053010f99SDavid Ahern 		if (nhi->fib6_nh.fib_nh_gw_family)
73153010f99SDavid Ahern 			sz += nla_total_size(sizeof(const struct in6_addr));
73253010f99SDavid Ahern 		break;
733597cfe4fSDavid Ahern 	}
734597cfe4fSDavid Ahern 
735b513bd03SDavid Ahern 	if (nhi->fib_nhc.nhc_lwtstate) {
736b513bd03SDavid Ahern 		sz += lwtunnel_get_encap_size(nhi->fib_nhc.nhc_lwtstate);
737b513bd03SDavid Ahern 		sz += nla_total_size(2);  /* NHA_ENCAP_TYPE */
738b513bd03SDavid Ahern 	}
739b513bd03SDavid Ahern 
740ab84be7eSDavid Ahern 	return sz;
741ab84be7eSDavid Ahern }
742ab84be7eSDavid Ahern 
743430a0491SDavid Ahern static size_t nh_nlmsg_size(struct nexthop *nh)
744430a0491SDavid Ahern {
745f9e95555SStephen Worley 	size_t sz = NLMSG_ALIGN(sizeof(struct nhmsg));
746f9e95555SStephen Worley 
747f9e95555SStephen Worley 	sz += nla_total_size(4); /* NHA_ID */
748430a0491SDavid Ahern 
749430a0491SDavid Ahern 	if (nh->is_group)
750430a0491SDavid Ahern 		sz += nh_nlmsg_size_grp(nh);
751430a0491SDavid Ahern 	else
752430a0491SDavid Ahern 		sz += nh_nlmsg_size_single(nh);
753430a0491SDavid Ahern 
754430a0491SDavid Ahern 	return sz;
755430a0491SDavid Ahern }
756430a0491SDavid Ahern 
757ab84be7eSDavid Ahern static void nexthop_notify(int event, struct nexthop *nh, struct nl_info *info)
758ab84be7eSDavid Ahern {
759ab84be7eSDavid Ahern 	unsigned int nlflags = info->nlh ? info->nlh->nlmsg_flags : 0;
760ab84be7eSDavid Ahern 	u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
761ab84be7eSDavid Ahern 	struct sk_buff *skb;
762ab84be7eSDavid Ahern 	int err = -ENOBUFS;
763ab84be7eSDavid Ahern 
764ab84be7eSDavid Ahern 	skb = nlmsg_new(nh_nlmsg_size(nh), gfp_any());
765ab84be7eSDavid Ahern 	if (!skb)
766ab84be7eSDavid Ahern 		goto errout;
767ab84be7eSDavid Ahern 
768ab84be7eSDavid Ahern 	err = nh_fill_node(skb, nh, event, info->portid, seq, nlflags);
769ab84be7eSDavid Ahern 	if (err < 0) {
770ab84be7eSDavid Ahern 		/* -EMSGSIZE implies BUG in nh_nlmsg_size() */
771ab84be7eSDavid Ahern 		WARN_ON(err == -EMSGSIZE);
772ab84be7eSDavid Ahern 		kfree_skb(skb);
773ab84be7eSDavid Ahern 		goto errout;
774ab84be7eSDavid Ahern 	}
775ab84be7eSDavid Ahern 
776ab84be7eSDavid Ahern 	rtnl_notify(skb, info->nl_net, info->portid, RTNLGRP_NEXTHOP,
777ab84be7eSDavid Ahern 		    info->nlh, gfp_any());
778ab84be7eSDavid Ahern 	return;
779ab84be7eSDavid Ahern errout:
780ab84be7eSDavid Ahern 	if (err < 0)
781ab84be7eSDavid Ahern 		rtnl_set_sk_err(info->nl_net, RTNLGRP_NEXTHOP, err);
782ab84be7eSDavid Ahern }
783ab84be7eSDavid Ahern 
784283a72a5SPetr Machata static unsigned long nh_res_bucket_used_time(const struct nh_res_bucket *bucket)
785283a72a5SPetr Machata {
786283a72a5SPetr Machata 	return (unsigned long)atomic_long_read(&bucket->used_time);
787283a72a5SPetr Machata }
788283a72a5SPetr Machata 
789283a72a5SPetr Machata static unsigned long
790283a72a5SPetr Machata nh_res_bucket_idle_point(const struct nh_res_table *res_table,
791283a72a5SPetr Machata 			 const struct nh_res_bucket *bucket,
792283a72a5SPetr Machata 			 unsigned long now)
793283a72a5SPetr Machata {
794283a72a5SPetr Machata 	unsigned long time = nh_res_bucket_used_time(bucket);
795283a72a5SPetr Machata 
796283a72a5SPetr Machata 	/* Bucket was not used since it was migrated. The idle time is now. */
797283a72a5SPetr Machata 	if (time == bucket->migrated_time)
798283a72a5SPetr Machata 		return now;
799283a72a5SPetr Machata 
800283a72a5SPetr Machata 	return time + res_table->idle_timer;
801283a72a5SPetr Machata }
802283a72a5SPetr Machata 
803283a72a5SPetr Machata static unsigned long
804283a72a5SPetr Machata nh_res_table_unb_point(const struct nh_res_table *res_table)
805283a72a5SPetr Machata {
806283a72a5SPetr Machata 	return res_table->unbalanced_since + res_table->unbalanced_timer;
807283a72a5SPetr Machata }
808283a72a5SPetr Machata 
809283a72a5SPetr Machata static void nh_res_bucket_set_idle(const struct nh_res_table *res_table,
810283a72a5SPetr Machata 				   struct nh_res_bucket *bucket)
811283a72a5SPetr Machata {
812283a72a5SPetr Machata 	unsigned long now = jiffies;
813283a72a5SPetr Machata 
814283a72a5SPetr Machata 	atomic_long_set(&bucket->used_time, (long)now);
815283a72a5SPetr Machata 	bucket->migrated_time = now;
816283a72a5SPetr Machata }
817283a72a5SPetr Machata 
818283a72a5SPetr Machata static void nh_res_bucket_set_busy(struct nh_res_bucket *bucket)
819283a72a5SPetr Machata {
820283a72a5SPetr Machata 	atomic_long_set(&bucket->used_time, (long)jiffies);
821283a72a5SPetr Machata }
822283a72a5SPetr Machata 
823430a0491SDavid Ahern static bool valid_group_nh(struct nexthop *nh, unsigned int npaths,
824ce9ac056SDavid Ahern 			   bool *is_fdb, struct netlink_ext_ack *extack)
825597cfe4fSDavid Ahern {
826430a0491SDavid Ahern 	if (nh->is_group) {
827430a0491SDavid Ahern 		struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
828430a0491SDavid Ahern 
829283a72a5SPetr Machata 		/* Nesting groups within groups is not supported. */
830430a0491SDavid Ahern 		if (nhg->mpath) {
831430a0491SDavid Ahern 			NL_SET_ERR_MSG(extack,
832430a0491SDavid Ahern 				       "Multipath group can not be a nexthop within a group");
833430a0491SDavid Ahern 			return false;
834430a0491SDavid Ahern 		}
835283a72a5SPetr Machata 		if (nhg->resilient) {
836283a72a5SPetr Machata 			NL_SET_ERR_MSG(extack,
837283a72a5SPetr Machata 				       "Resilient group can not be a nexthop within a group");
838283a72a5SPetr Machata 			return false;
839283a72a5SPetr Machata 		}
840ce9ac056SDavid Ahern 		*is_fdb = nhg->fdb_nh;
841430a0491SDavid Ahern 	} else {
842430a0491SDavid Ahern 		struct nh_info *nhi = rtnl_dereference(nh->nh_info);
843430a0491SDavid Ahern 
844430a0491SDavid Ahern 		if (nhi->reject_nh && npaths > 1) {
845430a0491SDavid Ahern 			NL_SET_ERR_MSG(extack,
846430a0491SDavid Ahern 				       "Blackhole nexthop can not be used in a group with more than 1 path");
847430a0491SDavid Ahern 			return false;
848430a0491SDavid Ahern 		}
849ce9ac056SDavid Ahern 		*is_fdb = nhi->fdb_nh;
850430a0491SDavid Ahern 	}
851430a0491SDavid Ahern 
852430a0491SDavid Ahern 	return true;
853430a0491SDavid Ahern }
854430a0491SDavid Ahern 
85538428d68SRoopa Prabhu static int nh_check_attr_fdb_group(struct nexthop *nh, u8 *nh_family,
85638428d68SRoopa Prabhu 				   struct netlink_ext_ack *extack)
85738428d68SRoopa Prabhu {
85838428d68SRoopa Prabhu 	struct nh_info *nhi;
85938428d68SRoopa Prabhu 
860ce9ac056SDavid Ahern 	nhi = rtnl_dereference(nh->nh_info);
861ce9ac056SDavid Ahern 
862ce9ac056SDavid Ahern 	if (!nhi->fdb_nh) {
86338428d68SRoopa Prabhu 		NL_SET_ERR_MSG(extack, "FDB nexthop group can only have fdb nexthops");
86438428d68SRoopa Prabhu 		return -EINVAL;
86538428d68SRoopa Prabhu 	}
86638428d68SRoopa Prabhu 
86738428d68SRoopa Prabhu 	if (*nh_family == AF_UNSPEC) {
86838428d68SRoopa Prabhu 		*nh_family = nhi->family;
86938428d68SRoopa Prabhu 	} else if (*nh_family != nhi->family) {
87038428d68SRoopa Prabhu 		NL_SET_ERR_MSG(extack, "FDB nexthop group cannot have mixed family nexthops");
87138428d68SRoopa Prabhu 		return -EINVAL;
87238428d68SRoopa Prabhu 	}
87338428d68SRoopa Prabhu 
87438428d68SRoopa Prabhu 	return 0;
87538428d68SRoopa Prabhu }
87638428d68SRoopa Prabhu 
877643d0878SPetr Machata static int nh_check_attr_group(struct net *net,
878643d0878SPetr Machata 			       struct nlattr *tb[], size_t tb_size,
879430a0491SDavid Ahern 			       struct netlink_ext_ack *extack)
880430a0491SDavid Ahern {
881430a0491SDavid Ahern 	unsigned int len = nla_len(tb[NHA_GROUP]);
88238428d68SRoopa Prabhu 	u8 nh_family = AF_UNSPEC;
883430a0491SDavid Ahern 	struct nexthop_grp *nhg;
884430a0491SDavid Ahern 	unsigned int i, j;
88538428d68SRoopa Prabhu 	u8 nhg_fdb = 0;
886430a0491SDavid Ahern 
887eeaac363SNikolay Aleksandrov 	if (!len || len & (sizeof(struct nexthop_grp) - 1)) {
888430a0491SDavid Ahern 		NL_SET_ERR_MSG(extack,
889430a0491SDavid Ahern 			       "Invalid length for nexthop group attribute");
890430a0491SDavid Ahern 		return -EINVAL;
891430a0491SDavid Ahern 	}
892430a0491SDavid Ahern 
893430a0491SDavid Ahern 	/* convert len to number of nexthop ids */
894430a0491SDavid Ahern 	len /= sizeof(*nhg);
895430a0491SDavid Ahern 
896430a0491SDavid Ahern 	nhg = nla_data(tb[NHA_GROUP]);
897430a0491SDavid Ahern 	for (i = 0; i < len; ++i) {
898430a0491SDavid Ahern 		if (nhg[i].resvd1 || nhg[i].resvd2) {
899430a0491SDavid Ahern 			NL_SET_ERR_MSG(extack, "Reserved fields in nexthop_grp must be 0");
900430a0491SDavid Ahern 			return -EINVAL;
901430a0491SDavid Ahern 		}
902430a0491SDavid Ahern 		if (nhg[i].weight > 254) {
903430a0491SDavid Ahern 			NL_SET_ERR_MSG(extack, "Invalid value for weight");
904430a0491SDavid Ahern 			return -EINVAL;
905430a0491SDavid Ahern 		}
906430a0491SDavid Ahern 		for (j = i + 1; j < len; ++j) {
907430a0491SDavid Ahern 			if (nhg[i].id == nhg[j].id) {
908430a0491SDavid Ahern 				NL_SET_ERR_MSG(extack, "Nexthop id can not be used twice in a group");
909430a0491SDavid Ahern 				return -EINVAL;
910430a0491SDavid Ahern 			}
911430a0491SDavid Ahern 		}
912430a0491SDavid Ahern 	}
913430a0491SDavid Ahern 
91438428d68SRoopa Prabhu 	if (tb[NHA_FDB])
91538428d68SRoopa Prabhu 		nhg_fdb = 1;
916430a0491SDavid Ahern 	nhg = nla_data(tb[NHA_GROUP]);
917430a0491SDavid Ahern 	for (i = 0; i < len; ++i) {
918430a0491SDavid Ahern 		struct nexthop *nh;
919ce9ac056SDavid Ahern 		bool is_fdb_nh;
920430a0491SDavid Ahern 
921430a0491SDavid Ahern 		nh = nexthop_find_by_id(net, nhg[i].id);
922430a0491SDavid Ahern 		if (!nh) {
923430a0491SDavid Ahern 			NL_SET_ERR_MSG(extack, "Invalid nexthop id");
924430a0491SDavid Ahern 			return -EINVAL;
925430a0491SDavid Ahern 		}
926ce9ac056SDavid Ahern 		if (!valid_group_nh(nh, len, &is_fdb_nh, extack))
927430a0491SDavid Ahern 			return -EINVAL;
92838428d68SRoopa Prabhu 
92938428d68SRoopa Prabhu 		if (nhg_fdb && nh_check_attr_fdb_group(nh, &nh_family, extack))
93038428d68SRoopa Prabhu 			return -EINVAL;
93138428d68SRoopa Prabhu 
932ce9ac056SDavid Ahern 		if (!nhg_fdb && is_fdb_nh) {
93338428d68SRoopa Prabhu 			NL_SET_ERR_MSG(extack, "Non FDB nexthop group cannot have fdb nexthops");
93438428d68SRoopa Prabhu 			return -EINVAL;
93538428d68SRoopa Prabhu 		}
936430a0491SDavid Ahern 	}
937643d0878SPetr Machata 	for (i = NHA_GROUP_TYPE + 1; i < tb_size; ++i) {
938430a0491SDavid Ahern 		if (!tb[i])
939430a0491SDavid Ahern 			continue;
940b19218b2SPetr Machata 		if (i == NHA_FDB)
94138428d68SRoopa Prabhu 			continue;
942430a0491SDavid Ahern 		NL_SET_ERR_MSG(extack,
943430a0491SDavid Ahern 			       "No other attributes can be set in nexthop groups");
944430a0491SDavid Ahern 		return -EINVAL;
945430a0491SDavid Ahern 	}
946430a0491SDavid Ahern 
947430a0491SDavid Ahern 	return 0;
948430a0491SDavid Ahern }
949430a0491SDavid Ahern 
950430a0491SDavid Ahern static bool ipv6_good_nh(const struct fib6_nh *nh)
951430a0491SDavid Ahern {
952430a0491SDavid Ahern 	int state = NUD_REACHABLE;
953430a0491SDavid Ahern 	struct neighbour *n;
954430a0491SDavid Ahern 
955430a0491SDavid Ahern 	rcu_read_lock_bh();
956430a0491SDavid Ahern 
957430a0491SDavid Ahern 	n = __ipv6_neigh_lookup_noref_stub(nh->fib_nh_dev, &nh->fib_nh_gw6);
958430a0491SDavid Ahern 	if (n)
959430a0491SDavid Ahern 		state = n->nud_state;
960430a0491SDavid Ahern 
961430a0491SDavid Ahern 	rcu_read_unlock_bh();
962430a0491SDavid Ahern 
963430a0491SDavid Ahern 	return !!(state & NUD_VALID);
964430a0491SDavid Ahern }
965430a0491SDavid Ahern 
966430a0491SDavid Ahern static bool ipv4_good_nh(const struct fib_nh *nh)
967430a0491SDavid Ahern {
968430a0491SDavid Ahern 	int state = NUD_REACHABLE;
969430a0491SDavid Ahern 	struct neighbour *n;
970430a0491SDavid Ahern 
971430a0491SDavid Ahern 	rcu_read_lock_bh();
972430a0491SDavid Ahern 
973430a0491SDavid Ahern 	n = __ipv4_neigh_lookup_noref(nh->fib_nh_dev,
974430a0491SDavid Ahern 				      (__force u32)nh->fib_nh_gw4);
975430a0491SDavid Ahern 	if (n)
976430a0491SDavid Ahern 		state = n->nud_state;
977430a0491SDavid Ahern 
978430a0491SDavid Ahern 	rcu_read_unlock_bh();
979430a0491SDavid Ahern 
980430a0491SDavid Ahern 	return !!(state & NUD_VALID);
981430a0491SDavid Ahern }
982430a0491SDavid Ahern 
98379bc55e3SPetr Machata static struct nexthop *nexthop_select_path_mp(struct nh_group *nhg, int hash)
984430a0491SDavid Ahern {
985430a0491SDavid Ahern 	struct nexthop *rc = NULL;
986430a0491SDavid Ahern 	int i;
987430a0491SDavid Ahern 
988430a0491SDavid Ahern 	for (i = 0; i < nhg->num_nh; ++i) {
989430a0491SDavid Ahern 		struct nh_grp_entry *nhge = &nhg->nh_entries[i];
990430a0491SDavid Ahern 		struct nh_info *nhi;
991430a0491SDavid Ahern 
992b9bae61bSPetr Machata 		if (hash > atomic_read(&nhge->mpath.upper_bound))
993430a0491SDavid Ahern 			continue;
994430a0491SDavid Ahern 
995ce9ac056SDavid Ahern 		nhi = rcu_dereference(nhge->nh->nh_info);
996ce9ac056SDavid Ahern 		if (nhi->fdb_nh)
99738428d68SRoopa Prabhu 			return nhge->nh;
99838428d68SRoopa Prabhu 
999430a0491SDavid Ahern 		/* nexthops always check if it is good and does
1000430a0491SDavid Ahern 		 * not rely on a sysctl for this behavior
1001430a0491SDavid Ahern 		 */
1002430a0491SDavid Ahern 		switch (nhi->family) {
1003430a0491SDavid Ahern 		case AF_INET:
1004430a0491SDavid Ahern 			if (ipv4_good_nh(&nhi->fib_nh))
1005430a0491SDavid Ahern 				return nhge->nh;
1006430a0491SDavid Ahern 			break;
1007430a0491SDavid Ahern 		case AF_INET6:
1008430a0491SDavid Ahern 			if (ipv6_good_nh(&nhi->fib6_nh))
1009430a0491SDavid Ahern 				return nhge->nh;
1010430a0491SDavid Ahern 			break;
1011430a0491SDavid Ahern 		}
1012430a0491SDavid Ahern 
1013430a0491SDavid Ahern 		if (!rc)
1014430a0491SDavid Ahern 			rc = nhge->nh;
1015430a0491SDavid Ahern 	}
1016430a0491SDavid Ahern 
1017430a0491SDavid Ahern 	return rc;
1018430a0491SDavid Ahern }
101979bc55e3SPetr Machata 
1020283a72a5SPetr Machata static struct nexthop *nexthop_select_path_res(struct nh_group *nhg, int hash)
1021283a72a5SPetr Machata {
1022283a72a5SPetr Machata 	struct nh_res_table *res_table = rcu_dereference(nhg->res_table);
1023283a72a5SPetr Machata 	u16 bucket_index = hash % res_table->num_nh_buckets;
1024283a72a5SPetr Machata 	struct nh_res_bucket *bucket;
1025283a72a5SPetr Machata 	struct nh_grp_entry *nhge;
1026283a72a5SPetr Machata 
1027283a72a5SPetr Machata 	/* nexthop_select_path() is expected to return a non-NULL value, so
1028283a72a5SPetr Machata 	 * skip protocol validation and just hand out whatever there is.
1029283a72a5SPetr Machata 	 */
1030283a72a5SPetr Machata 	bucket = &res_table->nh_buckets[bucket_index];
1031283a72a5SPetr Machata 	nh_res_bucket_set_busy(bucket);
1032283a72a5SPetr Machata 	nhge = rcu_dereference(bucket->nh_entry);
1033283a72a5SPetr Machata 	return nhge->nh;
1034283a72a5SPetr Machata }
1035283a72a5SPetr Machata 
103679bc55e3SPetr Machata struct nexthop *nexthop_select_path(struct nexthop *nh, int hash)
103779bc55e3SPetr Machata {
103879bc55e3SPetr Machata 	struct nh_group *nhg;
103979bc55e3SPetr Machata 
104079bc55e3SPetr Machata 	if (!nh->is_group)
104179bc55e3SPetr Machata 		return nh;
104279bc55e3SPetr Machata 
104379bc55e3SPetr Machata 	nhg = rcu_dereference(nh->nh_grp);
104479bc55e3SPetr Machata 	if (nhg->mpath)
104579bc55e3SPetr Machata 		return nexthop_select_path_mp(nhg, hash);
1046283a72a5SPetr Machata 	else if (nhg->resilient)
1047283a72a5SPetr Machata 		return nexthop_select_path_res(nhg, hash);
104879bc55e3SPetr Machata 
104979bc55e3SPetr Machata 	/* Unreachable. */
105079bc55e3SPetr Machata 	return NULL;
105179bc55e3SPetr Machata }
1052430a0491SDavid Ahern EXPORT_SYMBOL_GPL(nexthop_select_path);
1053430a0491SDavid Ahern 
1054f88c9aa1SDavid Ahern int nexthop_for_each_fib6_nh(struct nexthop *nh,
1055f88c9aa1SDavid Ahern 			     int (*cb)(struct fib6_nh *nh, void *arg),
1056f88c9aa1SDavid Ahern 			     void *arg)
1057f88c9aa1SDavid Ahern {
1058f88c9aa1SDavid Ahern 	struct nh_info *nhi;
1059f88c9aa1SDavid Ahern 	int err;
1060f88c9aa1SDavid Ahern 
1061f88c9aa1SDavid Ahern 	if (nh->is_group) {
1062f88c9aa1SDavid Ahern 		struct nh_group *nhg;
1063f88c9aa1SDavid Ahern 		int i;
1064f88c9aa1SDavid Ahern 
1065f88c9aa1SDavid Ahern 		nhg = rcu_dereference_rtnl(nh->nh_grp);
1066f88c9aa1SDavid Ahern 		for (i = 0; i < nhg->num_nh; i++) {
1067f88c9aa1SDavid Ahern 			struct nh_grp_entry *nhge = &nhg->nh_entries[i];
1068f88c9aa1SDavid Ahern 
1069f88c9aa1SDavid Ahern 			nhi = rcu_dereference_rtnl(nhge->nh->nh_info);
1070f88c9aa1SDavid Ahern 			err = cb(&nhi->fib6_nh, arg);
1071f88c9aa1SDavid Ahern 			if (err)
1072f88c9aa1SDavid Ahern 				return err;
1073f88c9aa1SDavid Ahern 		}
1074f88c9aa1SDavid Ahern 	} else {
1075f88c9aa1SDavid Ahern 		nhi = rcu_dereference_rtnl(nh->nh_info);
1076f88c9aa1SDavid Ahern 		err = cb(&nhi->fib6_nh, arg);
1077f88c9aa1SDavid Ahern 		if (err)
1078f88c9aa1SDavid Ahern 			return err;
1079f88c9aa1SDavid Ahern 	}
1080f88c9aa1SDavid Ahern 
1081f88c9aa1SDavid Ahern 	return 0;
1082f88c9aa1SDavid Ahern }
1083f88c9aa1SDavid Ahern EXPORT_SYMBOL_GPL(nexthop_for_each_fib6_nh);
1084f88c9aa1SDavid Ahern 
10857bf4796dSDavid Ahern static int check_src_addr(const struct in6_addr *saddr,
10867bf4796dSDavid Ahern 			  struct netlink_ext_ack *extack)
10877bf4796dSDavid Ahern {
10887bf4796dSDavid Ahern 	if (!ipv6_addr_any(saddr)) {
10897bf4796dSDavid Ahern 		NL_SET_ERR_MSG(extack, "IPv6 routes using source address can not use nexthop objects");
10907bf4796dSDavid Ahern 		return -EINVAL;
10917bf4796dSDavid Ahern 	}
10927bf4796dSDavid Ahern 	return 0;
10937bf4796dSDavid Ahern }
10947bf4796dSDavid Ahern 
1095f88d8ea6SDavid Ahern int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg,
1096f88d8ea6SDavid Ahern 		       struct netlink_ext_ack *extack)
1097f88d8ea6SDavid Ahern {
1098f88d8ea6SDavid Ahern 	struct nh_info *nhi;
1099ce9ac056SDavid Ahern 	bool is_fdb_nh;
110038428d68SRoopa Prabhu 
1101f88d8ea6SDavid Ahern 	/* fib6_src is unique to a fib6_info and limits the ability to cache
1102f88d8ea6SDavid Ahern 	 * routes in fib6_nh within a nexthop that is potentially shared
1103f88d8ea6SDavid Ahern 	 * across multiple fib entries. If the config wants to use source
1104f88d8ea6SDavid Ahern 	 * routing it can not use nexthop objects. mlxsw also does not allow
1105f88d8ea6SDavid Ahern 	 * fib6_src on routes.
1106f88d8ea6SDavid Ahern 	 */
11077bf4796dSDavid Ahern 	if (cfg && check_src_addr(&cfg->fc_src, extack) < 0)
1108f88d8ea6SDavid Ahern 		return -EINVAL;
1109f88d8ea6SDavid Ahern 
1110f88d8ea6SDavid Ahern 	if (nh->is_group) {
1111f88d8ea6SDavid Ahern 		struct nh_group *nhg;
1112f88d8ea6SDavid Ahern 
1113f88d8ea6SDavid Ahern 		nhg = rtnl_dereference(nh->nh_grp);
1114f88d8ea6SDavid Ahern 		if (nhg->has_v4)
1115f88d8ea6SDavid Ahern 			goto no_v4_nh;
1116ce9ac056SDavid Ahern 		is_fdb_nh = nhg->fdb_nh;
1117f88d8ea6SDavid Ahern 	} else {
1118f88d8ea6SDavid Ahern 		nhi = rtnl_dereference(nh->nh_info);
1119f88d8ea6SDavid Ahern 		if (nhi->family == AF_INET)
1120f88d8ea6SDavid Ahern 			goto no_v4_nh;
1121ce9ac056SDavid Ahern 		is_fdb_nh = nhi->fdb_nh;
1122ce9ac056SDavid Ahern 	}
1123ce9ac056SDavid Ahern 
1124ce9ac056SDavid Ahern 	if (is_fdb_nh) {
1125ce9ac056SDavid Ahern 		NL_SET_ERR_MSG(extack, "Route cannot point to a fdb nexthop");
1126ce9ac056SDavid Ahern 		return -EINVAL;
1127f88d8ea6SDavid Ahern 	}
1128f88d8ea6SDavid Ahern 
1129f88d8ea6SDavid Ahern 	return 0;
1130f88d8ea6SDavid Ahern no_v4_nh:
1131f88d8ea6SDavid Ahern 	NL_SET_ERR_MSG(extack, "IPv6 routes can not use an IPv4 nexthop");
1132f88d8ea6SDavid Ahern 	return -EINVAL;
1133f88d8ea6SDavid Ahern }
1134f88d8ea6SDavid Ahern EXPORT_SYMBOL_GPL(fib6_check_nexthop);
1135f88d8ea6SDavid Ahern 
11367bf4796dSDavid Ahern /* if existing nexthop has ipv6 routes linked to it, need
11377bf4796dSDavid Ahern  * to verify this new spec works with ipv6
11387bf4796dSDavid Ahern  */
11397bf4796dSDavid Ahern static int fib6_check_nh_list(struct nexthop *old, struct nexthop *new,
11407bf4796dSDavid Ahern 			      struct netlink_ext_ack *extack)
11417bf4796dSDavid Ahern {
11427bf4796dSDavid Ahern 	struct fib6_info *f6i;
11437bf4796dSDavid Ahern 
11447bf4796dSDavid Ahern 	if (list_empty(&old->f6i_list))
11457bf4796dSDavid Ahern 		return 0;
11467bf4796dSDavid Ahern 
11477bf4796dSDavid Ahern 	list_for_each_entry(f6i, &old->f6i_list, nh_list) {
11487bf4796dSDavid Ahern 		if (check_src_addr(&f6i->fib6_src.addr, extack) < 0)
11497bf4796dSDavid Ahern 			return -EINVAL;
11507bf4796dSDavid Ahern 	}
11517bf4796dSDavid Ahern 
11527bf4796dSDavid Ahern 	return fib6_check_nexthop(new, NULL, extack);
11537bf4796dSDavid Ahern }
11547bf4796dSDavid Ahern 
1155ce9ac056SDavid Ahern static int nexthop_check_scope(struct nh_info *nhi, u8 scope,
11564c7e8084SDavid Ahern 			       struct netlink_ext_ack *extack)
11574c7e8084SDavid Ahern {
11584c7e8084SDavid Ahern 	if (scope == RT_SCOPE_HOST && nhi->fib_nhc.nhc_gw_family) {
11594c7e8084SDavid Ahern 		NL_SET_ERR_MSG(extack,
11604c7e8084SDavid Ahern 			       "Route with host scope can not have a gateway");
11614c7e8084SDavid Ahern 		return -EINVAL;
11624c7e8084SDavid Ahern 	}
11634c7e8084SDavid Ahern 
11644c7e8084SDavid Ahern 	if (nhi->fib_nhc.nhc_flags & RTNH_F_ONLINK && scope >= RT_SCOPE_LINK) {
11654c7e8084SDavid Ahern 		NL_SET_ERR_MSG(extack, "Scope mismatch with nexthop");
11664c7e8084SDavid Ahern 		return -EINVAL;
11674c7e8084SDavid Ahern 	}
11684c7e8084SDavid Ahern 
11694c7e8084SDavid Ahern 	return 0;
11704c7e8084SDavid Ahern }
11714c7e8084SDavid Ahern 
11724c7e8084SDavid Ahern /* Invoked by fib add code to verify nexthop by id is ok with
11734c7e8084SDavid Ahern  * config for prefix; parts of fib_check_nh not done when nexthop
11744c7e8084SDavid Ahern  * object is used.
11754c7e8084SDavid Ahern  */
11764c7e8084SDavid Ahern int fib_check_nexthop(struct nexthop *nh, u8 scope,
11774c7e8084SDavid Ahern 		      struct netlink_ext_ack *extack)
11784c7e8084SDavid Ahern {
1179ce9ac056SDavid Ahern 	struct nh_info *nhi;
11804c7e8084SDavid Ahern 	int err = 0;
11814c7e8084SDavid Ahern 
1182ce9ac056SDavid Ahern 	if (nh->is_group) {
1183ce9ac056SDavid Ahern 		struct nh_group *nhg;
1184ce9ac056SDavid Ahern 
1185ce9ac056SDavid Ahern 		nhg = rtnl_dereference(nh->nh_grp);
1186ce9ac056SDavid Ahern 		if (nhg->fdb_nh) {
118738428d68SRoopa Prabhu 			NL_SET_ERR_MSG(extack, "Route cannot point to a fdb nexthop");
118838428d68SRoopa Prabhu 			err = -EINVAL;
118938428d68SRoopa Prabhu 			goto out;
119038428d68SRoopa Prabhu 		}
119138428d68SRoopa Prabhu 
11924c7e8084SDavid Ahern 		if (scope == RT_SCOPE_HOST) {
11934c7e8084SDavid Ahern 			NL_SET_ERR_MSG(extack, "Route with host scope can not have multiple nexthops");
11944c7e8084SDavid Ahern 			err = -EINVAL;
11954c7e8084SDavid Ahern 			goto out;
11964c7e8084SDavid Ahern 		}
11974c7e8084SDavid Ahern 
11984c7e8084SDavid Ahern 		/* all nexthops in a group have the same scope */
1199ce9ac056SDavid Ahern 		nhi = rtnl_dereference(nhg->nh_entries[0].nh->nh_info);
1200ce9ac056SDavid Ahern 		err = nexthop_check_scope(nhi, scope, extack);
12014c7e8084SDavid Ahern 	} else {
1202ce9ac056SDavid Ahern 		nhi = rtnl_dereference(nh->nh_info);
1203ce9ac056SDavid Ahern 		if (nhi->fdb_nh) {
1204ce9ac056SDavid Ahern 			NL_SET_ERR_MSG(extack, "Route cannot point to a fdb nexthop");
1205ce9ac056SDavid Ahern 			err = -EINVAL;
1206ce9ac056SDavid Ahern 			goto out;
12074c7e8084SDavid Ahern 		}
1208ce9ac056SDavid Ahern 		err = nexthop_check_scope(nhi, scope, extack);
1209ce9ac056SDavid Ahern 	}
1210ce9ac056SDavid Ahern 
12114c7e8084SDavid Ahern out:
12124c7e8084SDavid Ahern 	return err;
12134c7e8084SDavid Ahern }
12144c7e8084SDavid Ahern 
12157bf4796dSDavid Ahern static int fib_check_nh_list(struct nexthop *old, struct nexthop *new,
12167bf4796dSDavid Ahern 			     struct netlink_ext_ack *extack)
12177bf4796dSDavid Ahern {
12187bf4796dSDavid Ahern 	struct fib_info *fi;
12197bf4796dSDavid Ahern 
12207bf4796dSDavid Ahern 	list_for_each_entry(fi, &old->fi_list, nh_list) {
12217bf4796dSDavid Ahern 		int err;
12227bf4796dSDavid Ahern 
12237bf4796dSDavid Ahern 		err = fib_check_nexthop(new, fi->fib_scope, extack);
12247bf4796dSDavid Ahern 		if (err)
12257bf4796dSDavid Ahern 			return err;
12267bf4796dSDavid Ahern 	}
12277bf4796dSDavid Ahern 	return 0;
12287bf4796dSDavid Ahern }
12297bf4796dSDavid Ahern 
1230283a72a5SPetr Machata static bool nh_res_nhge_is_balanced(const struct nh_grp_entry *nhge)
1231283a72a5SPetr Machata {
1232283a72a5SPetr Machata 	return nhge->res.count_buckets == nhge->res.wants_buckets;
1233283a72a5SPetr Machata }
1234283a72a5SPetr Machata 
1235283a72a5SPetr Machata static bool nh_res_nhge_is_ow(const struct nh_grp_entry *nhge)
1236283a72a5SPetr Machata {
1237283a72a5SPetr Machata 	return nhge->res.count_buckets > nhge->res.wants_buckets;
1238283a72a5SPetr Machata }
1239283a72a5SPetr Machata 
1240283a72a5SPetr Machata static bool nh_res_nhge_is_uw(const struct nh_grp_entry *nhge)
1241283a72a5SPetr Machata {
1242283a72a5SPetr Machata 	return nhge->res.count_buckets < nhge->res.wants_buckets;
1243283a72a5SPetr Machata }
1244283a72a5SPetr Machata 
1245283a72a5SPetr Machata static bool nh_res_table_is_balanced(const struct nh_res_table *res_table)
1246283a72a5SPetr Machata {
1247283a72a5SPetr Machata 	return list_empty(&res_table->uw_nh_entries);
1248283a72a5SPetr Machata }
1249283a72a5SPetr Machata 
1250283a72a5SPetr Machata static void nh_res_bucket_unset_nh(struct nh_res_bucket *bucket)
1251283a72a5SPetr Machata {
1252283a72a5SPetr Machata 	struct nh_grp_entry *nhge;
1253283a72a5SPetr Machata 
1254283a72a5SPetr Machata 	if (bucket->occupied) {
1255283a72a5SPetr Machata 		nhge = nh_res_dereference(bucket->nh_entry);
1256283a72a5SPetr Machata 		nhge->res.count_buckets--;
1257283a72a5SPetr Machata 		bucket->occupied = false;
1258283a72a5SPetr Machata 	}
1259283a72a5SPetr Machata }
1260283a72a5SPetr Machata 
1261283a72a5SPetr Machata static void nh_res_bucket_set_nh(struct nh_res_bucket *bucket,
1262283a72a5SPetr Machata 				 struct nh_grp_entry *nhge)
1263283a72a5SPetr Machata {
1264283a72a5SPetr Machata 	nh_res_bucket_unset_nh(bucket);
1265283a72a5SPetr Machata 
1266283a72a5SPetr Machata 	bucket->occupied = true;
1267283a72a5SPetr Machata 	rcu_assign_pointer(bucket->nh_entry, nhge);
1268283a72a5SPetr Machata 	nhge->res.count_buckets++;
1269283a72a5SPetr Machata }
1270283a72a5SPetr Machata 
1271283a72a5SPetr Machata static bool nh_res_bucket_should_migrate(struct nh_res_table *res_table,
1272283a72a5SPetr Machata 					 struct nh_res_bucket *bucket,
1273283a72a5SPetr Machata 					 unsigned long *deadline, bool *force)
1274283a72a5SPetr Machata {
1275283a72a5SPetr Machata 	unsigned long now = jiffies;
1276283a72a5SPetr Machata 	struct nh_grp_entry *nhge;
1277283a72a5SPetr Machata 	unsigned long idle_point;
1278283a72a5SPetr Machata 
1279283a72a5SPetr Machata 	if (!bucket->occupied) {
1280283a72a5SPetr Machata 		/* The bucket is not occupied, its NHGE pointer is either
1281283a72a5SPetr Machata 		 * NULL or obsolete. We _have to_ migrate: set force.
1282283a72a5SPetr Machata 		 */
1283283a72a5SPetr Machata 		*force = true;
1284283a72a5SPetr Machata 		return true;
1285283a72a5SPetr Machata 	}
1286283a72a5SPetr Machata 
1287283a72a5SPetr Machata 	nhge = nh_res_dereference(bucket->nh_entry);
1288283a72a5SPetr Machata 
1289283a72a5SPetr Machata 	/* If the bucket is populated by an underweight or balanced
1290283a72a5SPetr Machata 	 * nexthop, do not migrate.
1291283a72a5SPetr Machata 	 */
1292283a72a5SPetr Machata 	if (!nh_res_nhge_is_ow(nhge))
1293283a72a5SPetr Machata 		return false;
1294283a72a5SPetr Machata 
1295283a72a5SPetr Machata 	/* At this point we know that the bucket is populated with an
1296283a72a5SPetr Machata 	 * overweight nexthop. It needs to be migrated to a new nexthop if
1297283a72a5SPetr Machata 	 * the idle timer of unbalanced timer expired.
1298283a72a5SPetr Machata 	 */
1299283a72a5SPetr Machata 
1300283a72a5SPetr Machata 	idle_point = nh_res_bucket_idle_point(res_table, bucket, now);
1301283a72a5SPetr Machata 	if (time_after_eq(now, idle_point)) {
1302283a72a5SPetr Machata 		/* The bucket is idle. We _can_ migrate: unset force. */
1303283a72a5SPetr Machata 		*force = false;
1304283a72a5SPetr Machata 		return true;
1305283a72a5SPetr Machata 	}
1306283a72a5SPetr Machata 
1307283a72a5SPetr Machata 	/* Unbalanced timer of 0 means "never force". */
1308283a72a5SPetr Machata 	if (res_table->unbalanced_timer) {
1309283a72a5SPetr Machata 		unsigned long unb_point;
1310283a72a5SPetr Machata 
1311283a72a5SPetr Machata 		unb_point = nh_res_table_unb_point(res_table);
1312283a72a5SPetr Machata 		if (time_after(now, unb_point)) {
1313283a72a5SPetr Machata 			/* The bucket is not idle, but the unbalanced timer
1314283a72a5SPetr Machata 			 * expired. We _can_ migrate, but set force anyway,
1315283a72a5SPetr Machata 			 * so that drivers know to ignore activity reports
1316283a72a5SPetr Machata 			 * from the HW.
1317283a72a5SPetr Machata 			 */
1318283a72a5SPetr Machata 			*force = true;
1319283a72a5SPetr Machata 			return true;
1320283a72a5SPetr Machata 		}
1321283a72a5SPetr Machata 
1322283a72a5SPetr Machata 		nh_res_time_set_deadline(unb_point, deadline);
1323283a72a5SPetr Machata 	}
1324283a72a5SPetr Machata 
1325283a72a5SPetr Machata 	nh_res_time_set_deadline(idle_point, deadline);
1326283a72a5SPetr Machata 	return false;
1327283a72a5SPetr Machata }
1328283a72a5SPetr Machata 
1329283a72a5SPetr Machata static bool nh_res_bucket_migrate(struct nh_res_table *res_table,
1330*7c37c7e0SPetr Machata 				  u16 bucket_index, bool notify, bool force)
1331283a72a5SPetr Machata {
1332283a72a5SPetr Machata 	struct nh_res_bucket *bucket = &res_table->nh_buckets[bucket_index];
1333283a72a5SPetr Machata 	struct nh_grp_entry *new_nhge;
1334*7c37c7e0SPetr Machata 	struct netlink_ext_ack extack;
1335*7c37c7e0SPetr Machata 	int err;
1336283a72a5SPetr Machata 
1337283a72a5SPetr Machata 	new_nhge = list_first_entry_or_null(&res_table->uw_nh_entries,
1338283a72a5SPetr Machata 					    struct nh_grp_entry,
1339283a72a5SPetr Machata 					    res.uw_nh_entry);
1340283a72a5SPetr Machata 	if (WARN_ON_ONCE(!new_nhge))
1341283a72a5SPetr Machata 		/* If this function is called, "bucket" is either not
1342283a72a5SPetr Machata 		 * occupied, or it belongs to a next hop that is
1343283a72a5SPetr Machata 		 * overweight. In either case, there ought to be a
1344283a72a5SPetr Machata 		 * corresponding underweight next hop.
1345283a72a5SPetr Machata 		 */
1346283a72a5SPetr Machata 		return false;
1347283a72a5SPetr Machata 
1348*7c37c7e0SPetr Machata 	if (notify) {
1349*7c37c7e0SPetr Machata 		struct nh_grp_entry *old_nhge;
1350*7c37c7e0SPetr Machata 
1351*7c37c7e0SPetr Machata 		old_nhge = nh_res_dereference(bucket->nh_entry);
1352*7c37c7e0SPetr Machata 		err = call_nexthop_res_bucket_notifiers(res_table->net,
1353*7c37c7e0SPetr Machata 							res_table->nhg_id,
1354*7c37c7e0SPetr Machata 							bucket_index, force,
1355*7c37c7e0SPetr Machata 							old_nhge->nh,
1356*7c37c7e0SPetr Machata 							new_nhge->nh, &extack);
1357*7c37c7e0SPetr Machata 		if (err) {
1358*7c37c7e0SPetr Machata 			pr_err_ratelimited("%s\n", extack._msg);
1359*7c37c7e0SPetr Machata 			if (!force)
1360*7c37c7e0SPetr Machata 				return false;
1361*7c37c7e0SPetr Machata 			/* It is not possible to veto a forced replacement, so
1362*7c37c7e0SPetr Machata 			 * just clear the hardware flags from the nexthop
1363*7c37c7e0SPetr Machata 			 * bucket to indicate to user space that this bucket is
1364*7c37c7e0SPetr Machata 			 * not correctly populated in hardware.
1365*7c37c7e0SPetr Machata 			 */
1366*7c37c7e0SPetr Machata 			bucket->nh_flags &= ~(RTNH_F_OFFLOAD | RTNH_F_TRAP);
1367*7c37c7e0SPetr Machata 		}
1368*7c37c7e0SPetr Machata 	}
1369*7c37c7e0SPetr Machata 
1370283a72a5SPetr Machata 	nh_res_bucket_set_nh(bucket, new_nhge);
1371283a72a5SPetr Machata 	nh_res_bucket_set_idle(res_table, bucket);
1372283a72a5SPetr Machata 
1373283a72a5SPetr Machata 	if (nh_res_nhge_is_balanced(new_nhge))
1374283a72a5SPetr Machata 		list_del(&new_nhge->res.uw_nh_entry);
1375283a72a5SPetr Machata 	return true;
1376283a72a5SPetr Machata }
1377283a72a5SPetr Machata 
1378283a72a5SPetr Machata #define NH_RES_UPKEEP_DW_MINIMUM_INTERVAL (HZ / 2)
1379283a72a5SPetr Machata 
1380*7c37c7e0SPetr Machata static void nh_res_table_upkeep(struct nh_res_table *res_table, bool notify)
1381283a72a5SPetr Machata {
1382283a72a5SPetr Machata 	unsigned long now = jiffies;
1383283a72a5SPetr Machata 	unsigned long deadline;
1384283a72a5SPetr Machata 	u16 i;
1385283a72a5SPetr Machata 
1386283a72a5SPetr Machata 	/* Deadline is the next time that upkeep should be run. It is the
1387283a72a5SPetr Machata 	 * earliest time at which one of the buckets might be migrated.
1388283a72a5SPetr Machata 	 * Start at the most pessimistic estimate: either unbalanced_timer
1389283a72a5SPetr Machata 	 * from now, or if there is none, idle_timer from now. For each
1390283a72a5SPetr Machata 	 * encountered time point, call nh_res_time_set_deadline() to
1391283a72a5SPetr Machata 	 * refine the estimate.
1392283a72a5SPetr Machata 	 */
1393283a72a5SPetr Machata 	if (res_table->unbalanced_timer)
1394283a72a5SPetr Machata 		deadline = now + res_table->unbalanced_timer;
1395283a72a5SPetr Machata 	else
1396283a72a5SPetr Machata 		deadline = now + res_table->idle_timer;
1397283a72a5SPetr Machata 
1398283a72a5SPetr Machata 	for (i = 0; i < res_table->num_nh_buckets; i++) {
1399283a72a5SPetr Machata 		struct nh_res_bucket *bucket = &res_table->nh_buckets[i];
1400283a72a5SPetr Machata 		bool force;
1401283a72a5SPetr Machata 
1402283a72a5SPetr Machata 		if (nh_res_bucket_should_migrate(res_table, bucket,
1403283a72a5SPetr Machata 						 &deadline, &force)) {
1404*7c37c7e0SPetr Machata 			if (!nh_res_bucket_migrate(res_table, i, notify,
1405*7c37c7e0SPetr Machata 						   force)) {
1406283a72a5SPetr Machata 				unsigned long idle_point;
1407283a72a5SPetr Machata 
1408283a72a5SPetr Machata 				/* A driver can override the migration
1409283a72a5SPetr Machata 				 * decision if the HW reports that the
1410283a72a5SPetr Machata 				 * bucket is actually not idle. Therefore
1411283a72a5SPetr Machata 				 * remark the bucket as busy again and
1412283a72a5SPetr Machata 				 * update the deadline.
1413283a72a5SPetr Machata 				 */
1414283a72a5SPetr Machata 				nh_res_bucket_set_busy(bucket);
1415283a72a5SPetr Machata 				idle_point = nh_res_bucket_idle_point(res_table,
1416283a72a5SPetr Machata 								      bucket,
1417283a72a5SPetr Machata 								      now);
1418283a72a5SPetr Machata 				nh_res_time_set_deadline(idle_point, &deadline);
1419283a72a5SPetr Machata 			}
1420283a72a5SPetr Machata 		}
1421283a72a5SPetr Machata 	}
1422283a72a5SPetr Machata 
1423283a72a5SPetr Machata 	/* If the group is still unbalanced, schedule the next upkeep to
1424283a72a5SPetr Machata 	 * either the deadline computed above, or the minimum deadline,
1425283a72a5SPetr Machata 	 * whichever comes later.
1426283a72a5SPetr Machata 	 */
1427283a72a5SPetr Machata 	if (!nh_res_table_is_balanced(res_table)) {
1428283a72a5SPetr Machata 		unsigned long now = jiffies;
1429283a72a5SPetr Machata 		unsigned long min_deadline;
1430283a72a5SPetr Machata 
1431283a72a5SPetr Machata 		min_deadline = now + NH_RES_UPKEEP_DW_MINIMUM_INTERVAL;
1432283a72a5SPetr Machata 		if (time_before(deadline, min_deadline))
1433283a72a5SPetr Machata 			deadline = min_deadline;
1434283a72a5SPetr Machata 
1435283a72a5SPetr Machata 		queue_delayed_work(system_power_efficient_wq,
1436283a72a5SPetr Machata 				   &res_table->upkeep_dw, deadline - now);
1437283a72a5SPetr Machata 	}
1438283a72a5SPetr Machata }
1439283a72a5SPetr Machata 
1440283a72a5SPetr Machata static void nh_res_table_upkeep_dw(struct work_struct *work)
1441283a72a5SPetr Machata {
1442283a72a5SPetr Machata 	struct delayed_work *dw = to_delayed_work(work);
1443283a72a5SPetr Machata 	struct nh_res_table *res_table;
1444283a72a5SPetr Machata 
1445283a72a5SPetr Machata 	res_table = container_of(dw, struct nh_res_table, upkeep_dw);
1446*7c37c7e0SPetr Machata 	nh_res_table_upkeep(res_table, true);
1447283a72a5SPetr Machata }
1448283a72a5SPetr Machata 
1449283a72a5SPetr Machata static void nh_res_table_cancel_upkeep(struct nh_res_table *res_table)
1450283a72a5SPetr Machata {
1451283a72a5SPetr Machata 	cancel_delayed_work_sync(&res_table->upkeep_dw);
1452283a72a5SPetr Machata }
1453283a72a5SPetr Machata 
1454283a72a5SPetr Machata static void nh_res_group_rebalance(struct nh_group *nhg,
1455283a72a5SPetr Machata 				   struct nh_res_table *res_table)
1456283a72a5SPetr Machata {
1457283a72a5SPetr Machata 	int prev_upper_bound = 0;
1458283a72a5SPetr Machata 	int total = 0;
1459283a72a5SPetr Machata 	int w = 0;
1460283a72a5SPetr Machata 	int i;
1461283a72a5SPetr Machata 
1462283a72a5SPetr Machata 	INIT_LIST_HEAD(&res_table->uw_nh_entries);
1463283a72a5SPetr Machata 
1464283a72a5SPetr Machata 	for (i = 0; i < nhg->num_nh; ++i)
1465283a72a5SPetr Machata 		total += nhg->nh_entries[i].weight;
1466283a72a5SPetr Machata 
1467283a72a5SPetr Machata 	for (i = 0; i < nhg->num_nh; ++i) {
1468283a72a5SPetr Machata 		struct nh_grp_entry *nhge = &nhg->nh_entries[i];
1469283a72a5SPetr Machata 		int upper_bound;
1470283a72a5SPetr Machata 
1471283a72a5SPetr Machata 		w += nhge->weight;
1472283a72a5SPetr Machata 		upper_bound = DIV_ROUND_CLOSEST(res_table->num_nh_buckets * w,
1473283a72a5SPetr Machata 						total);
1474283a72a5SPetr Machata 		nhge->res.wants_buckets = upper_bound - prev_upper_bound;
1475283a72a5SPetr Machata 		prev_upper_bound = upper_bound;
1476283a72a5SPetr Machata 
1477283a72a5SPetr Machata 		if (nh_res_nhge_is_uw(nhge)) {
1478283a72a5SPetr Machata 			if (list_empty(&res_table->uw_nh_entries))
1479283a72a5SPetr Machata 				res_table->unbalanced_since = jiffies;
1480283a72a5SPetr Machata 			list_add(&nhge->res.uw_nh_entry,
1481283a72a5SPetr Machata 				 &res_table->uw_nh_entries);
1482283a72a5SPetr Machata 		}
1483283a72a5SPetr Machata 	}
1484283a72a5SPetr Machata }
1485283a72a5SPetr Machata 
1486283a72a5SPetr Machata /* Migrate buckets in res_table so that they reference NHGE's from NHG with
1487283a72a5SPetr Machata  * the right NH ID. Set those buckets that do not have a corresponding NHGE
1488283a72a5SPetr Machata  * entry in NHG as not occupied.
1489283a72a5SPetr Machata  */
1490283a72a5SPetr Machata static void nh_res_table_migrate_buckets(struct nh_res_table *res_table,
1491283a72a5SPetr Machata 					 struct nh_group *nhg)
1492283a72a5SPetr Machata {
1493283a72a5SPetr Machata 	u16 i;
1494283a72a5SPetr Machata 
1495283a72a5SPetr Machata 	for (i = 0; i < res_table->num_nh_buckets; i++) {
1496283a72a5SPetr Machata 		struct nh_res_bucket *bucket = &res_table->nh_buckets[i];
1497283a72a5SPetr Machata 		u32 id = rtnl_dereference(bucket->nh_entry)->nh->id;
1498283a72a5SPetr Machata 		bool found = false;
1499283a72a5SPetr Machata 		int j;
1500283a72a5SPetr Machata 
1501283a72a5SPetr Machata 		for (j = 0; j < nhg->num_nh; j++) {
1502283a72a5SPetr Machata 			struct nh_grp_entry *nhge = &nhg->nh_entries[j];
1503283a72a5SPetr Machata 
1504283a72a5SPetr Machata 			if (nhge->nh->id == id) {
1505283a72a5SPetr Machata 				nh_res_bucket_set_nh(bucket, nhge);
1506283a72a5SPetr Machata 				found = true;
1507283a72a5SPetr Machata 				break;
1508283a72a5SPetr Machata 			}
1509283a72a5SPetr Machata 		}
1510283a72a5SPetr Machata 
1511283a72a5SPetr Machata 		if (!found)
1512283a72a5SPetr Machata 			nh_res_bucket_unset_nh(bucket);
1513283a72a5SPetr Machata 	}
1514283a72a5SPetr Machata }
1515283a72a5SPetr Machata 
1516283a72a5SPetr Machata static void replace_nexthop_grp_res(struct nh_group *oldg,
1517283a72a5SPetr Machata 				    struct nh_group *newg)
1518283a72a5SPetr Machata {
1519283a72a5SPetr Machata 	/* For NH group replacement, the new NHG might only have a stub
1520283a72a5SPetr Machata 	 * hash table with 0 buckets, because the number of buckets was not
1521283a72a5SPetr Machata 	 * specified. For NH removal, oldg and newg both reference the same
1522283a72a5SPetr Machata 	 * res_table. So in any case, in the following, we want to work
1523283a72a5SPetr Machata 	 * with oldg->res_table.
1524283a72a5SPetr Machata 	 */
1525283a72a5SPetr Machata 	struct nh_res_table *old_res_table = rtnl_dereference(oldg->res_table);
1526283a72a5SPetr Machata 	unsigned long prev_unbalanced_since = old_res_table->unbalanced_since;
1527283a72a5SPetr Machata 	bool prev_has_uw = !list_empty(&old_res_table->uw_nh_entries);
1528283a72a5SPetr Machata 
1529283a72a5SPetr Machata 	nh_res_table_cancel_upkeep(old_res_table);
1530283a72a5SPetr Machata 	nh_res_table_migrate_buckets(old_res_table, newg);
1531283a72a5SPetr Machata 	nh_res_group_rebalance(newg, old_res_table);
1532283a72a5SPetr Machata 	if (prev_has_uw && !list_empty(&old_res_table->uw_nh_entries))
1533283a72a5SPetr Machata 		old_res_table->unbalanced_since = prev_unbalanced_since;
1534*7c37c7e0SPetr Machata 	nh_res_table_upkeep(old_res_table, true);
1535283a72a5SPetr Machata }
1536283a72a5SPetr Machata 
1537283a72a5SPetr Machata static void nh_mp_group_rebalance(struct nh_group *nhg)
1538430a0491SDavid Ahern {
1539430a0491SDavid Ahern 	int total = 0;
1540430a0491SDavid Ahern 	int w = 0;
1541430a0491SDavid Ahern 	int i;
1542430a0491SDavid Ahern 
1543430a0491SDavid Ahern 	for (i = 0; i < nhg->num_nh; ++i)
1544430a0491SDavid Ahern 		total += nhg->nh_entries[i].weight;
1545430a0491SDavid Ahern 
1546430a0491SDavid Ahern 	for (i = 0; i < nhg->num_nh; ++i) {
1547430a0491SDavid Ahern 		struct nh_grp_entry *nhge = &nhg->nh_entries[i];
1548430a0491SDavid Ahern 		int upper_bound;
1549430a0491SDavid Ahern 
1550430a0491SDavid Ahern 		w += nhge->weight;
1551430a0491SDavid Ahern 		upper_bound = DIV_ROUND_CLOSEST_ULL((u64)w << 31, total) - 1;
1552b9bae61bSPetr Machata 		atomic_set(&nhge->mpath.upper_bound, upper_bound);
1553430a0491SDavid Ahern 	}
1554430a0491SDavid Ahern }
1555430a0491SDavid Ahern 
1556ac21753aSDavid Ahern static void remove_nh_grp_entry(struct net *net, struct nh_grp_entry *nhge,
1557430a0491SDavid Ahern 				struct nl_info *nlinfo)
1558430a0491SDavid Ahern {
155990f33bffSNikolay Aleksandrov 	struct nh_grp_entry *nhges, *new_nhges;
1560ac21753aSDavid Ahern 	struct nexthop *nhp = nhge->nh_parent;
1561833a1065SIdo Schimmel 	struct netlink_ext_ack extack;
1562430a0491SDavid Ahern 	struct nexthop *nh = nhge->nh;
156390f33bffSNikolay Aleksandrov 	struct nh_group *nhg, *newg;
1564833a1065SIdo Schimmel 	int i, j, err;
1565430a0491SDavid Ahern 
1566430a0491SDavid Ahern 	WARN_ON(!nh);
1567430a0491SDavid Ahern 
1568ac21753aSDavid Ahern 	nhg = rtnl_dereference(nhp->nh_grp);
156990f33bffSNikolay Aleksandrov 	newg = nhg->spare;
1570430a0491SDavid Ahern 
157190f33bffSNikolay Aleksandrov 	/* last entry, keep it visible and remove the parent */
157290f33bffSNikolay Aleksandrov 	if (nhg->num_nh == 1) {
157390f33bffSNikolay Aleksandrov 		remove_nexthop(net, nhp, nlinfo);
1574430a0491SDavid Ahern 		return;
157590f33bffSNikolay Aleksandrov 	}
1576430a0491SDavid Ahern 
1577863b2558SIdo Schimmel 	newg->has_v4 = false;
157890e1a9e2SPetr Machata 	newg->is_multipath = nhg->is_multipath;
157990f33bffSNikolay Aleksandrov 	newg->mpath = nhg->mpath;
1580283a72a5SPetr Machata 	newg->resilient = nhg->resilient;
1581ce9ac056SDavid Ahern 	newg->fdb_nh = nhg->fdb_nh;
158290f33bffSNikolay Aleksandrov 	newg->num_nh = nhg->num_nh;
1583430a0491SDavid Ahern 
158490f33bffSNikolay Aleksandrov 	/* copy old entries to new except the one getting removed */
158590f33bffSNikolay Aleksandrov 	nhges = nhg->nh_entries;
158690f33bffSNikolay Aleksandrov 	new_nhges = newg->nh_entries;
158790f33bffSNikolay Aleksandrov 	for (i = 0, j = 0; i < nhg->num_nh; ++i) {
1588863b2558SIdo Schimmel 		struct nh_info *nhi;
1589863b2558SIdo Schimmel 
159090f33bffSNikolay Aleksandrov 		/* current nexthop getting removed */
159190f33bffSNikolay Aleksandrov 		if (nhg->nh_entries[i].nh == nh) {
159290f33bffSNikolay Aleksandrov 			newg->num_nh--;
159390f33bffSNikolay Aleksandrov 			continue;
159490f33bffSNikolay Aleksandrov 		}
1595430a0491SDavid Ahern 
1596863b2558SIdo Schimmel 		nhi = rtnl_dereference(nhges[i].nh->nh_info);
1597863b2558SIdo Schimmel 		if (nhi->family == AF_INET)
1598863b2558SIdo Schimmel 			newg->has_v4 = true;
1599863b2558SIdo Schimmel 
160090f33bffSNikolay Aleksandrov 		list_del(&nhges[i].nh_list);
160190f33bffSNikolay Aleksandrov 		new_nhges[j].nh_parent = nhges[i].nh_parent;
160290f33bffSNikolay Aleksandrov 		new_nhges[j].nh = nhges[i].nh;
160390f33bffSNikolay Aleksandrov 		new_nhges[j].weight = nhges[i].weight;
160490f33bffSNikolay Aleksandrov 		list_add(&new_nhges[j].nh_list, &new_nhges[j].nh->grp_list);
160590f33bffSNikolay Aleksandrov 		j++;
160690f33bffSNikolay Aleksandrov 	}
160790f33bffSNikolay Aleksandrov 
1608283a72a5SPetr Machata 	if (newg->mpath)
1609283a72a5SPetr Machata 		nh_mp_group_rebalance(newg);
1610283a72a5SPetr Machata 	else if (newg->resilient)
1611283a72a5SPetr Machata 		replace_nexthop_grp_res(nhg, newg);
1612283a72a5SPetr Machata 
161390f33bffSNikolay Aleksandrov 	rcu_assign_pointer(nhp->nh_grp, newg);
161490f33bffSNikolay Aleksandrov 
161590f33bffSNikolay Aleksandrov 	list_del(&nhge->nh_list);
161690f33bffSNikolay Aleksandrov 	nexthop_put(nhge->nh);
1617430a0491SDavid Ahern 
1618*7c37c7e0SPetr Machata 	/* Removal of a NH from a resilient group is notified through
1619*7c37c7e0SPetr Machata 	 * bucket notifications.
1620*7c37c7e0SPetr Machata 	 */
1621*7c37c7e0SPetr Machata 	if (newg->mpath) {
1622*7c37c7e0SPetr Machata 		err = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, nhp,
1623*7c37c7e0SPetr Machata 					     &extack);
1624833a1065SIdo Schimmel 		if (err)
1625833a1065SIdo Schimmel 			pr_err("%s\n", extack._msg);
1626*7c37c7e0SPetr Machata 	}
1627833a1065SIdo Schimmel 
1628430a0491SDavid Ahern 	if (nlinfo)
1629ac21753aSDavid Ahern 		nexthop_notify(RTM_NEWNEXTHOP, nhp, nlinfo);
1630430a0491SDavid Ahern }
1631430a0491SDavid Ahern 
1632430a0491SDavid Ahern static void remove_nexthop_from_groups(struct net *net, struct nexthop *nh,
1633430a0491SDavid Ahern 				       struct nl_info *nlinfo)
1634430a0491SDavid Ahern {
1635430a0491SDavid Ahern 	struct nh_grp_entry *nhge, *tmp;
1636430a0491SDavid Ahern 
1637ac21753aSDavid Ahern 	list_for_each_entry_safe(nhge, tmp, &nh->grp_list, nh_list)
1638ac21753aSDavid Ahern 		remove_nh_grp_entry(net, nhge, nlinfo);
1639430a0491SDavid Ahern 
164090f33bffSNikolay Aleksandrov 	/* make sure all see the newly published array before releasing rtnl */
1641df6afe2fSIdo Schimmel 	synchronize_net();
1642430a0491SDavid Ahern }
1643430a0491SDavid Ahern 
1644430a0491SDavid Ahern static void remove_nexthop_group(struct nexthop *nh, struct nl_info *nlinfo)
1645430a0491SDavid Ahern {
1646430a0491SDavid Ahern 	struct nh_group *nhg = rcu_dereference_rtnl(nh->nh_grp);
1647283a72a5SPetr Machata 	struct nh_res_table *res_table;
1648430a0491SDavid Ahern 	int i, num_nh = nhg->num_nh;
1649430a0491SDavid Ahern 
1650430a0491SDavid Ahern 	for (i = 0; i < num_nh; ++i) {
1651430a0491SDavid Ahern 		struct nh_grp_entry *nhge = &nhg->nh_entries[i];
1652430a0491SDavid Ahern 
1653430a0491SDavid Ahern 		if (WARN_ON(!nhge->nh))
1654430a0491SDavid Ahern 			continue;
1655430a0491SDavid Ahern 
165690f33bffSNikolay Aleksandrov 		list_del_init(&nhge->nh_list);
1657430a0491SDavid Ahern 	}
1658283a72a5SPetr Machata 
1659283a72a5SPetr Machata 	if (nhg->resilient) {
1660283a72a5SPetr Machata 		res_table = rtnl_dereference(nhg->res_table);
1661283a72a5SPetr Machata 		nh_res_table_cancel_upkeep(res_table);
1662283a72a5SPetr Machata 	}
1663430a0491SDavid Ahern }
1664430a0491SDavid Ahern 
16657bf4796dSDavid Ahern /* not called for nexthop replace */
16664c7e8084SDavid Ahern static void __remove_nexthop_fib(struct net *net, struct nexthop *nh)
16674c7e8084SDavid Ahern {
1668f88d8ea6SDavid Ahern 	struct fib6_info *f6i, *tmp;
16694c7e8084SDavid Ahern 	bool do_flush = false;
16704c7e8084SDavid Ahern 	struct fib_info *fi;
16714c7e8084SDavid Ahern 
16724c7e8084SDavid Ahern 	list_for_each_entry(fi, &nh->fi_list, nh_list) {
16734c7e8084SDavid Ahern 		fi->fib_flags |= RTNH_F_DEAD;
16744c7e8084SDavid Ahern 		do_flush = true;
16754c7e8084SDavid Ahern 	}
16764c7e8084SDavid Ahern 	if (do_flush)
16774c7e8084SDavid Ahern 		fib_flush(net);
1678f88d8ea6SDavid Ahern 
1679f88d8ea6SDavid Ahern 	/* ip6_del_rt removes the entry from this list hence the _safe */
1680f88d8ea6SDavid Ahern 	list_for_each_entry_safe(f6i, tmp, &nh->f6i_list, nh_list) {
1681f88d8ea6SDavid Ahern 		/* __ip6_del_rt does a release, so do a hold here */
1682f88d8ea6SDavid Ahern 		fib6_info_hold(f6i);
16834f80116dSRoopa Prabhu 		ipv6_stub->ip6_del_rt(net, f6i,
16844f80116dSRoopa Prabhu 				      !net->ipv4.sysctl_nexthop_compat_mode);
1685f88d8ea6SDavid Ahern 	}
16864c7e8084SDavid Ahern }
16874c7e8084SDavid Ahern 
1688430a0491SDavid Ahern static void __remove_nexthop(struct net *net, struct nexthop *nh,
1689430a0491SDavid Ahern 			     struct nl_info *nlinfo)
1690430a0491SDavid Ahern {
16914c7e8084SDavid Ahern 	__remove_nexthop_fib(net, nh);
16924c7e8084SDavid Ahern 
1693430a0491SDavid Ahern 	if (nh->is_group) {
1694430a0491SDavid Ahern 		remove_nexthop_group(nh, nlinfo);
1695430a0491SDavid Ahern 	} else {
1696597cfe4fSDavid Ahern 		struct nh_info *nhi;
1697597cfe4fSDavid Ahern 
1698597cfe4fSDavid Ahern 		nhi = rtnl_dereference(nh->nh_info);
1699597cfe4fSDavid Ahern 		if (nhi->fib_nhc.nhc_dev)
1700597cfe4fSDavid Ahern 			hlist_del(&nhi->dev_hash);
1701430a0491SDavid Ahern 
1702430a0491SDavid Ahern 		remove_nexthop_from_groups(net, nh, nlinfo);
1703430a0491SDavid Ahern 	}
1704597cfe4fSDavid Ahern }
1705597cfe4fSDavid Ahern 
1706ab84be7eSDavid Ahern static void remove_nexthop(struct net *net, struct nexthop *nh,
1707430a0491SDavid Ahern 			   struct nl_info *nlinfo)
1708ab84be7eSDavid Ahern {
17093578d53dSIdo Schimmel 	call_nexthop_notifiers(net, NEXTHOP_EVENT_DEL, nh, NULL);
17100695564bSIdo Schimmel 
1711ab84be7eSDavid Ahern 	/* remove from the tree */
1712ab84be7eSDavid Ahern 	rb_erase(&nh->rb_node, &net->nexthop.rb_root);
1713ab84be7eSDavid Ahern 
1714ab84be7eSDavid Ahern 	if (nlinfo)
1715ab84be7eSDavid Ahern 		nexthop_notify(RTM_DELNEXTHOP, nh, nlinfo);
1716ab84be7eSDavid Ahern 
1717430a0491SDavid Ahern 	__remove_nexthop(net, nh, nlinfo);
1718ab84be7eSDavid Ahern 	nh_base_seq_inc(net);
1719ab84be7eSDavid Ahern 
1720ab84be7eSDavid Ahern 	nexthop_put(nh);
1721ab84be7eSDavid Ahern }
1722ab84be7eSDavid Ahern 
17237bf4796dSDavid Ahern /* if any FIB entries reference this nexthop, any dst entries
17247bf4796dSDavid Ahern  * need to be regenerated
17257bf4796dSDavid Ahern  */
17267bf4796dSDavid Ahern static void nh_rt_cache_flush(struct net *net, struct nexthop *nh)
17277bf4796dSDavid Ahern {
17287bf4796dSDavid Ahern 	struct fib6_info *f6i;
17297bf4796dSDavid Ahern 
17307bf4796dSDavid Ahern 	if (!list_empty(&nh->fi_list))
17317bf4796dSDavid Ahern 		rt_cache_flush(net);
17327bf4796dSDavid Ahern 
17337bf4796dSDavid Ahern 	list_for_each_entry(f6i, &nh->f6i_list, nh_list)
17347bf4796dSDavid Ahern 		ipv6_stub->fib6_update_sernum(net, f6i);
17357bf4796dSDavid Ahern }
17367bf4796dSDavid Ahern 
17377bf4796dSDavid Ahern static int replace_nexthop_grp(struct net *net, struct nexthop *old,
1738597f48e4SPetr Machata 			       struct nexthop *new, const struct nh_config *cfg,
17397bf4796dSDavid Ahern 			       struct netlink_ext_ack *extack)
17407bf4796dSDavid Ahern {
1741283a72a5SPetr Machata 	struct nh_res_table *tmp_table = NULL;
1742283a72a5SPetr Machata 	struct nh_res_table *new_res_table;
1743283a72a5SPetr Machata 	struct nh_res_table *old_res_table;
17447bf4796dSDavid Ahern 	struct nh_group *oldg, *newg;
1745d144cc5fSIdo Schimmel 	int i, err;
17467bf4796dSDavid Ahern 
17477bf4796dSDavid Ahern 	if (!new->is_group) {
17487bf4796dSDavid Ahern 		NL_SET_ERR_MSG(extack, "Can not replace a nexthop group with a nexthop.");
17497bf4796dSDavid Ahern 		return -EINVAL;
17507bf4796dSDavid Ahern 	}
17517bf4796dSDavid Ahern 
17527bf4796dSDavid Ahern 	oldg = rtnl_dereference(old->nh_grp);
17537bf4796dSDavid Ahern 	newg = rtnl_dereference(new->nh_grp);
17547bf4796dSDavid Ahern 
1755283a72a5SPetr Machata 	if (newg->mpath != oldg->mpath) {
1756283a72a5SPetr Machata 		NL_SET_ERR_MSG(extack, "Can not replace a nexthop group with one of a different type.");
1757283a72a5SPetr Machata 		return -EINVAL;
1758283a72a5SPetr Machata 	}
1759283a72a5SPetr Machata 
1760283a72a5SPetr Machata 	if (newg->mpath) {
1761283a72a5SPetr Machata 		err = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, new,
1762283a72a5SPetr Machata 					     extack);
1763283a72a5SPetr Machata 		if (err)
1764283a72a5SPetr Machata 			return err;
1765283a72a5SPetr Machata 	} else if (newg->resilient) {
1766283a72a5SPetr Machata 		new_res_table = rtnl_dereference(newg->res_table);
1767283a72a5SPetr Machata 		old_res_table = rtnl_dereference(oldg->res_table);
1768283a72a5SPetr Machata 
1769283a72a5SPetr Machata 		/* Accept if num_nh_buckets was not given, but if it was
1770283a72a5SPetr Machata 		 * given, demand that the value be correct.
1771283a72a5SPetr Machata 		 */
1772283a72a5SPetr Machata 		if (cfg->nh_grp_res_has_num_buckets &&
1773283a72a5SPetr Machata 		    cfg->nh_grp_res_num_buckets !=
1774283a72a5SPetr Machata 		    old_res_table->num_nh_buckets) {
1775283a72a5SPetr Machata 			NL_SET_ERR_MSG(extack, "Can not change number of buckets of a resilient nexthop group.");
1776283a72a5SPetr Machata 			return -EINVAL;
1777283a72a5SPetr Machata 		}
1778283a72a5SPetr Machata 
1779*7c37c7e0SPetr Machata 		/* Emit a pre-replace notification so that listeners could veto
1780*7c37c7e0SPetr Machata 		 * a potentially unsupported configuration. Otherwise,
1781*7c37c7e0SPetr Machata 		 * individual bucket replacement notifications would need to be
1782*7c37c7e0SPetr Machata 		 * vetoed, which is something that should only happen if the
1783*7c37c7e0SPetr Machata 		 * bucket is currently active.
1784*7c37c7e0SPetr Machata 		 */
1785*7c37c7e0SPetr Machata 		err = call_nexthop_res_table_notifiers(net, new, extack);
1786*7c37c7e0SPetr Machata 		if (err)
1787*7c37c7e0SPetr Machata 			return err;
1788*7c37c7e0SPetr Machata 
1789283a72a5SPetr Machata 		if (cfg->nh_grp_res_has_idle_timer)
1790283a72a5SPetr Machata 			old_res_table->idle_timer = cfg->nh_grp_res_idle_timer;
1791283a72a5SPetr Machata 		if (cfg->nh_grp_res_has_unbalanced_timer)
1792283a72a5SPetr Machata 			old_res_table->unbalanced_timer =
1793283a72a5SPetr Machata 				cfg->nh_grp_res_unbalanced_timer;
1794283a72a5SPetr Machata 
1795283a72a5SPetr Machata 		replace_nexthop_grp_res(oldg, newg);
1796283a72a5SPetr Machata 
1797283a72a5SPetr Machata 		tmp_table = new_res_table;
1798283a72a5SPetr Machata 		rcu_assign_pointer(newg->res_table, old_res_table);
1799283a72a5SPetr Machata 		rcu_assign_pointer(newg->spare->res_table, old_res_table);
1800283a72a5SPetr Machata 	}
1801283a72a5SPetr Machata 
18027bf4796dSDavid Ahern 	/* update parents - used by nexthop code for cleanup */
18037bf4796dSDavid Ahern 	for (i = 0; i < newg->num_nh; i++)
18047bf4796dSDavid Ahern 		newg->nh_entries[i].nh_parent = old;
18057bf4796dSDavid Ahern 
18067bf4796dSDavid Ahern 	rcu_assign_pointer(old->nh_grp, newg);
18077bf4796dSDavid Ahern 
1808283a72a5SPetr Machata 	if (newg->resilient) {
1809283a72a5SPetr Machata 		rcu_assign_pointer(oldg->res_table, tmp_table);
1810283a72a5SPetr Machata 		rcu_assign_pointer(oldg->spare->res_table, tmp_table);
1811283a72a5SPetr Machata 	}
1812283a72a5SPetr Machata 
18137bf4796dSDavid Ahern 	for (i = 0; i < oldg->num_nh; i++)
18147bf4796dSDavid Ahern 		oldg->nh_entries[i].nh_parent = new;
18157bf4796dSDavid Ahern 
18167bf4796dSDavid Ahern 	rcu_assign_pointer(new->nh_grp, oldg);
18177bf4796dSDavid Ahern 
18187bf4796dSDavid Ahern 	return 0;
18197bf4796dSDavid Ahern }
18207bf4796dSDavid Ahern 
1821885a3b15SIdo Schimmel static void nh_group_v4_update(struct nh_group *nhg)
1822885a3b15SIdo Schimmel {
1823885a3b15SIdo Schimmel 	struct nh_grp_entry *nhges;
1824885a3b15SIdo Schimmel 	bool has_v4 = false;
1825885a3b15SIdo Schimmel 	int i;
1826885a3b15SIdo Schimmel 
1827885a3b15SIdo Schimmel 	nhges = nhg->nh_entries;
1828885a3b15SIdo Schimmel 	for (i = 0; i < nhg->num_nh; i++) {
1829885a3b15SIdo Schimmel 		struct nh_info *nhi;
1830885a3b15SIdo Schimmel 
1831885a3b15SIdo Schimmel 		nhi = rtnl_dereference(nhges[i].nh->nh_info);
1832885a3b15SIdo Schimmel 		if (nhi->family == AF_INET)
1833885a3b15SIdo Schimmel 			has_v4 = true;
1834885a3b15SIdo Schimmel 	}
1835885a3b15SIdo Schimmel 	nhg->has_v4 = has_v4;
1836885a3b15SIdo Schimmel }
1837885a3b15SIdo Schimmel 
1838*7c37c7e0SPetr Machata static int replace_nexthop_single_notify_res(struct net *net,
1839*7c37c7e0SPetr Machata 					     struct nh_res_table *res_table,
1840*7c37c7e0SPetr Machata 					     struct nexthop *old,
1841*7c37c7e0SPetr Machata 					     struct nh_info *oldi,
1842*7c37c7e0SPetr Machata 					     struct nh_info *newi,
1843*7c37c7e0SPetr Machata 					     struct netlink_ext_ack *extack)
1844*7c37c7e0SPetr Machata {
1845*7c37c7e0SPetr Machata 	u32 nhg_id = res_table->nhg_id;
1846*7c37c7e0SPetr Machata 	int err;
1847*7c37c7e0SPetr Machata 	u16 i;
1848*7c37c7e0SPetr Machata 
1849*7c37c7e0SPetr Machata 	for (i = 0; i < res_table->num_nh_buckets; i++) {
1850*7c37c7e0SPetr Machata 		struct nh_res_bucket *bucket = &res_table->nh_buckets[i];
1851*7c37c7e0SPetr Machata 		struct nh_grp_entry *nhge;
1852*7c37c7e0SPetr Machata 
1853*7c37c7e0SPetr Machata 		nhge = rtnl_dereference(bucket->nh_entry);
1854*7c37c7e0SPetr Machata 		if (nhge->nh == old) {
1855*7c37c7e0SPetr Machata 			err = __call_nexthop_res_bucket_notifiers(net, nhg_id,
1856*7c37c7e0SPetr Machata 								  i, true,
1857*7c37c7e0SPetr Machata 								  oldi, newi,
1858*7c37c7e0SPetr Machata 								  extack);
1859*7c37c7e0SPetr Machata 			if (err)
1860*7c37c7e0SPetr Machata 				goto err_notify;
1861*7c37c7e0SPetr Machata 		}
1862*7c37c7e0SPetr Machata 	}
1863*7c37c7e0SPetr Machata 
1864*7c37c7e0SPetr Machata 	return 0;
1865*7c37c7e0SPetr Machata 
1866*7c37c7e0SPetr Machata err_notify:
1867*7c37c7e0SPetr Machata 	while (i-- > 0) {
1868*7c37c7e0SPetr Machata 		struct nh_res_bucket *bucket = &res_table->nh_buckets[i];
1869*7c37c7e0SPetr Machata 		struct nh_grp_entry *nhge;
1870*7c37c7e0SPetr Machata 
1871*7c37c7e0SPetr Machata 		nhge = rtnl_dereference(bucket->nh_entry);
1872*7c37c7e0SPetr Machata 		if (nhge->nh == old)
1873*7c37c7e0SPetr Machata 			__call_nexthop_res_bucket_notifiers(net, nhg_id, i,
1874*7c37c7e0SPetr Machata 							    true, newi, oldi,
1875*7c37c7e0SPetr Machata 							    extack);
1876*7c37c7e0SPetr Machata 	}
1877*7c37c7e0SPetr Machata 	return err;
1878*7c37c7e0SPetr Machata }
1879*7c37c7e0SPetr Machata 
1880*7c37c7e0SPetr Machata static int replace_nexthop_single_notify(struct net *net,
1881*7c37c7e0SPetr Machata 					 struct nexthop *group_nh,
1882*7c37c7e0SPetr Machata 					 struct nexthop *old,
1883*7c37c7e0SPetr Machata 					 struct nh_info *oldi,
1884*7c37c7e0SPetr Machata 					 struct nh_info *newi,
1885*7c37c7e0SPetr Machata 					 struct netlink_ext_ack *extack)
1886*7c37c7e0SPetr Machata {
1887*7c37c7e0SPetr Machata 	struct nh_group *nhg = rtnl_dereference(group_nh->nh_grp);
1888*7c37c7e0SPetr Machata 	struct nh_res_table *res_table;
1889*7c37c7e0SPetr Machata 
1890*7c37c7e0SPetr Machata 	if (nhg->mpath) {
1891*7c37c7e0SPetr Machata 		return call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE,
1892*7c37c7e0SPetr Machata 					      group_nh, extack);
1893*7c37c7e0SPetr Machata 	} else if (nhg->resilient) {
1894*7c37c7e0SPetr Machata 		res_table = rtnl_dereference(nhg->res_table);
1895*7c37c7e0SPetr Machata 		return replace_nexthop_single_notify_res(net, res_table,
1896*7c37c7e0SPetr Machata 							 old, oldi, newi,
1897*7c37c7e0SPetr Machata 							 extack);
1898*7c37c7e0SPetr Machata 	}
1899*7c37c7e0SPetr Machata 
1900*7c37c7e0SPetr Machata 	return -EINVAL;
1901*7c37c7e0SPetr Machata }
1902*7c37c7e0SPetr Machata 
19037bf4796dSDavid Ahern static int replace_nexthop_single(struct net *net, struct nexthop *old,
19047bf4796dSDavid Ahern 				  struct nexthop *new,
19057bf4796dSDavid Ahern 				  struct netlink_ext_ack *extack)
19067bf4796dSDavid Ahern {
1907f17bc33dSIdo Schimmel 	u8 old_protocol, old_nh_flags;
19087bf4796dSDavid Ahern 	struct nh_info *oldi, *newi;
1909f17bc33dSIdo Schimmel 	struct nh_grp_entry *nhge;
19108c09c9f9SIdo Schimmel 	int err;
19117bf4796dSDavid Ahern 
19127bf4796dSDavid Ahern 	if (new->is_group) {
19137bf4796dSDavid Ahern 		NL_SET_ERR_MSG(extack, "Can not replace a nexthop with a nexthop group.");
19147bf4796dSDavid Ahern 		return -EINVAL;
19157bf4796dSDavid Ahern 	}
19167bf4796dSDavid Ahern 
19178c09c9f9SIdo Schimmel 	err = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, new, extack);
19188c09c9f9SIdo Schimmel 	if (err)
19198c09c9f9SIdo Schimmel 		return err;
19208c09c9f9SIdo Schimmel 
19218c09c9f9SIdo Schimmel 	/* Hardware flags were set on 'old' as 'new' is not in the red-black
19228c09c9f9SIdo Schimmel 	 * tree. Therefore, inherit the flags from 'old' to 'new'.
19238c09c9f9SIdo Schimmel 	 */
19248c09c9f9SIdo Schimmel 	new->nh_flags |= old->nh_flags & (RTNH_F_OFFLOAD | RTNH_F_TRAP);
19258c09c9f9SIdo Schimmel 
19267bf4796dSDavid Ahern 	oldi = rtnl_dereference(old->nh_info);
19277bf4796dSDavid Ahern 	newi = rtnl_dereference(new->nh_info);
19287bf4796dSDavid Ahern 
19297bf4796dSDavid Ahern 	newi->nh_parent = old;
19307bf4796dSDavid Ahern 	oldi->nh_parent = new;
19317bf4796dSDavid Ahern 
1932f17bc33dSIdo Schimmel 	old_protocol = old->protocol;
1933f17bc33dSIdo Schimmel 	old_nh_flags = old->nh_flags;
1934f17bc33dSIdo Schimmel 
19357bf4796dSDavid Ahern 	old->protocol = new->protocol;
19367bf4796dSDavid Ahern 	old->nh_flags = new->nh_flags;
19377bf4796dSDavid Ahern 
19387bf4796dSDavid Ahern 	rcu_assign_pointer(old->nh_info, newi);
19397bf4796dSDavid Ahern 	rcu_assign_pointer(new->nh_info, oldi);
19407bf4796dSDavid Ahern 
1941f17bc33dSIdo Schimmel 	/* Send a replace notification for all the groups using the nexthop. */
1942f17bc33dSIdo Schimmel 	list_for_each_entry(nhge, &old->grp_list, nh_list) {
1943f17bc33dSIdo Schimmel 		struct nexthop *nhp = nhge->nh_parent;
1944f17bc33dSIdo Schimmel 
1945*7c37c7e0SPetr Machata 		err = replace_nexthop_single_notify(net, nhp, old, oldi, newi,
1946f17bc33dSIdo Schimmel 						    extack);
1947f17bc33dSIdo Schimmel 		if (err)
1948f17bc33dSIdo Schimmel 			goto err_notify;
1949f17bc33dSIdo Schimmel 	}
1950f17bc33dSIdo Schimmel 
1951885a3b15SIdo Schimmel 	/* When replacing an IPv4 nexthop with an IPv6 nexthop, potentially
1952885a3b15SIdo Schimmel 	 * update IPv4 indication in all the groups using the nexthop.
1953885a3b15SIdo Schimmel 	 */
1954885a3b15SIdo Schimmel 	if (oldi->family == AF_INET && newi->family == AF_INET6) {
1955885a3b15SIdo Schimmel 		list_for_each_entry(nhge, &old->grp_list, nh_list) {
1956885a3b15SIdo Schimmel 			struct nexthop *nhp = nhge->nh_parent;
1957885a3b15SIdo Schimmel 			struct nh_group *nhg;
1958885a3b15SIdo Schimmel 
1959885a3b15SIdo Schimmel 			nhg = rtnl_dereference(nhp->nh_grp);
1960885a3b15SIdo Schimmel 			nh_group_v4_update(nhg);
1961885a3b15SIdo Schimmel 		}
1962885a3b15SIdo Schimmel 	}
1963885a3b15SIdo Schimmel 
19647bf4796dSDavid Ahern 	return 0;
1965f17bc33dSIdo Schimmel 
1966f17bc33dSIdo Schimmel err_notify:
1967f17bc33dSIdo Schimmel 	rcu_assign_pointer(new->nh_info, newi);
1968f17bc33dSIdo Schimmel 	rcu_assign_pointer(old->nh_info, oldi);
1969f17bc33dSIdo Schimmel 	old->nh_flags = old_nh_flags;
1970f17bc33dSIdo Schimmel 	old->protocol = old_protocol;
1971f17bc33dSIdo Schimmel 	oldi->nh_parent = old;
1972f17bc33dSIdo Schimmel 	newi->nh_parent = new;
1973f17bc33dSIdo Schimmel 	list_for_each_entry_continue_reverse(nhge, &old->grp_list, nh_list) {
1974f17bc33dSIdo Schimmel 		struct nexthop *nhp = nhge->nh_parent;
1975f17bc33dSIdo Schimmel 
1976*7c37c7e0SPetr Machata 		replace_nexthop_single_notify(net, nhp, old, newi, oldi, NULL);
1977f17bc33dSIdo Schimmel 	}
1978f17bc33dSIdo Schimmel 	call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, old, extack);
1979f17bc33dSIdo Schimmel 	return err;
19807bf4796dSDavid Ahern }
19817bf4796dSDavid Ahern 
19827bf4796dSDavid Ahern static void __nexthop_replace_notify(struct net *net, struct nexthop *nh,
19837bf4796dSDavid Ahern 				     struct nl_info *info)
19847bf4796dSDavid Ahern {
19857bf4796dSDavid Ahern 	struct fib6_info *f6i;
19867bf4796dSDavid Ahern 
19877bf4796dSDavid Ahern 	if (!list_empty(&nh->fi_list)) {
19887bf4796dSDavid Ahern 		struct fib_info *fi;
19897bf4796dSDavid Ahern 
19907bf4796dSDavid Ahern 		/* expectation is a few fib_info per nexthop and then
19917bf4796dSDavid Ahern 		 * a lot of routes per fib_info. So mark the fib_info
19927bf4796dSDavid Ahern 		 * and then walk the fib tables once
19937bf4796dSDavid Ahern 		 */
19947bf4796dSDavid Ahern 		list_for_each_entry(fi, &nh->fi_list, nh_list)
19957bf4796dSDavid Ahern 			fi->nh_updated = true;
19967bf4796dSDavid Ahern 
19977bf4796dSDavid Ahern 		fib_info_notify_update(net, info);
19987bf4796dSDavid Ahern 
19997bf4796dSDavid Ahern 		list_for_each_entry(fi, &nh->fi_list, nh_list)
20007bf4796dSDavid Ahern 			fi->nh_updated = false;
20017bf4796dSDavid Ahern 	}
20027bf4796dSDavid Ahern 
20037bf4796dSDavid Ahern 	list_for_each_entry(f6i, &nh->f6i_list, nh_list)
20047bf4796dSDavid Ahern 		ipv6_stub->fib6_rt_update(net, f6i, info);
20057bf4796dSDavid Ahern }
20067bf4796dSDavid Ahern 
20077bf4796dSDavid Ahern /* send RTM_NEWROUTE with REPLACE flag set for all FIB entries
20087bf4796dSDavid Ahern  * linked to this nexthop and for all groups that the nexthop
20097bf4796dSDavid Ahern  * is a member of
20107bf4796dSDavid Ahern  */
20117bf4796dSDavid Ahern static void nexthop_replace_notify(struct net *net, struct nexthop *nh,
20127bf4796dSDavid Ahern 				   struct nl_info *info)
20137bf4796dSDavid Ahern {
20147bf4796dSDavid Ahern 	struct nh_grp_entry *nhge;
20157bf4796dSDavid Ahern 
20167bf4796dSDavid Ahern 	__nexthop_replace_notify(net, nh, info);
20177bf4796dSDavid Ahern 
20187bf4796dSDavid Ahern 	list_for_each_entry(nhge, &nh->grp_list, nh_list)
20197bf4796dSDavid Ahern 		__nexthop_replace_notify(net, nhge->nh_parent, info);
20207bf4796dSDavid Ahern }
20217bf4796dSDavid Ahern 
2022ab84be7eSDavid Ahern static int replace_nexthop(struct net *net, struct nexthop *old,
2023597f48e4SPetr Machata 			   struct nexthop *new, const struct nh_config *cfg,
2024597f48e4SPetr Machata 			   struct netlink_ext_ack *extack)
2025ab84be7eSDavid Ahern {
20267bf4796dSDavid Ahern 	bool new_is_reject = false;
20277bf4796dSDavid Ahern 	struct nh_grp_entry *nhge;
20287bf4796dSDavid Ahern 	int err;
20297bf4796dSDavid Ahern 
20307bf4796dSDavid Ahern 	/* check that existing FIB entries are ok with the
20317bf4796dSDavid Ahern 	 * new nexthop definition
20327bf4796dSDavid Ahern 	 */
20337bf4796dSDavid Ahern 	err = fib_check_nh_list(old, new, extack);
20347bf4796dSDavid Ahern 	if (err)
20357bf4796dSDavid Ahern 		return err;
20367bf4796dSDavid Ahern 
20377bf4796dSDavid Ahern 	err = fib6_check_nh_list(old, new, extack);
20387bf4796dSDavid Ahern 	if (err)
20397bf4796dSDavid Ahern 		return err;
20407bf4796dSDavid Ahern 
20417bf4796dSDavid Ahern 	if (!new->is_group) {
20427bf4796dSDavid Ahern 		struct nh_info *nhi = rtnl_dereference(new->nh_info);
20437bf4796dSDavid Ahern 
20447bf4796dSDavid Ahern 		new_is_reject = nhi->reject_nh;
20457bf4796dSDavid Ahern 	}
20467bf4796dSDavid Ahern 
20477bf4796dSDavid Ahern 	list_for_each_entry(nhge, &old->grp_list, nh_list) {
20487bf4796dSDavid Ahern 		/* if new nexthop is a blackhole, any groups using this
20497bf4796dSDavid Ahern 		 * nexthop cannot have more than 1 path
20507bf4796dSDavid Ahern 		 */
20517bf4796dSDavid Ahern 		if (new_is_reject &&
20527bf4796dSDavid Ahern 		    nexthop_num_path(nhge->nh_parent) > 1) {
20537bf4796dSDavid Ahern 			NL_SET_ERR_MSG(extack, "Blackhole nexthop can not be a member of a group with more than one path");
20547bf4796dSDavid Ahern 			return -EINVAL;
20557bf4796dSDavid Ahern 		}
20567bf4796dSDavid Ahern 
20577bf4796dSDavid Ahern 		err = fib_check_nh_list(nhge->nh_parent, new, extack);
20587bf4796dSDavid Ahern 		if (err)
20597bf4796dSDavid Ahern 			return err;
20607bf4796dSDavid Ahern 
20617bf4796dSDavid Ahern 		err = fib6_check_nh_list(nhge->nh_parent, new, extack);
20627bf4796dSDavid Ahern 		if (err)
20637bf4796dSDavid Ahern 			return err;
20647bf4796dSDavid Ahern 	}
20657bf4796dSDavid Ahern 
20667bf4796dSDavid Ahern 	if (old->is_group)
2067597f48e4SPetr Machata 		err = replace_nexthop_grp(net, old, new, cfg, extack);
20687bf4796dSDavid Ahern 	else
20697bf4796dSDavid Ahern 		err = replace_nexthop_single(net, old, new, extack);
20707bf4796dSDavid Ahern 
20717bf4796dSDavid Ahern 	if (!err) {
20727bf4796dSDavid Ahern 		nh_rt_cache_flush(net, old);
20737bf4796dSDavid Ahern 
20747bf4796dSDavid Ahern 		__remove_nexthop(net, new, NULL);
20757bf4796dSDavid Ahern 		nexthop_put(new);
20767bf4796dSDavid Ahern 	}
20777bf4796dSDavid Ahern 
20787bf4796dSDavid Ahern 	return err;
2079ab84be7eSDavid Ahern }
2080ab84be7eSDavid Ahern 
2081ab84be7eSDavid Ahern /* called with rtnl_lock held */
2082ab84be7eSDavid Ahern static int insert_nexthop(struct net *net, struct nexthop *new_nh,
2083ab84be7eSDavid Ahern 			  struct nh_config *cfg, struct netlink_ext_ack *extack)
2084ab84be7eSDavid Ahern {
2085ab84be7eSDavid Ahern 	struct rb_node **pp, *parent = NULL, *next;
2086ab84be7eSDavid Ahern 	struct rb_root *root = &net->nexthop.rb_root;
2087ab84be7eSDavid Ahern 	bool replace = !!(cfg->nlflags & NLM_F_REPLACE);
2088ab84be7eSDavid Ahern 	bool create = !!(cfg->nlflags & NLM_F_CREATE);
2089ab84be7eSDavid Ahern 	u32 new_id = new_nh->id;
20907bf4796dSDavid Ahern 	int replace_notify = 0;
2091ab84be7eSDavid Ahern 	int rc = -EEXIST;
2092ab84be7eSDavid Ahern 
2093ab84be7eSDavid Ahern 	pp = &root->rb_node;
2094ab84be7eSDavid Ahern 	while (1) {
2095ab84be7eSDavid Ahern 		struct nexthop *nh;
2096ab84be7eSDavid Ahern 
2097233c6378SIdo Schimmel 		next = *pp;
2098ab84be7eSDavid Ahern 		if (!next)
2099ab84be7eSDavid Ahern 			break;
2100ab84be7eSDavid Ahern 
2101ab84be7eSDavid Ahern 		parent = next;
2102ab84be7eSDavid Ahern 
2103ab84be7eSDavid Ahern 		nh = rb_entry(parent, struct nexthop, rb_node);
2104ab84be7eSDavid Ahern 		if (new_id < nh->id) {
2105ab84be7eSDavid Ahern 			pp = &next->rb_left;
2106ab84be7eSDavid Ahern 		} else if (new_id > nh->id) {
2107ab84be7eSDavid Ahern 			pp = &next->rb_right;
2108ab84be7eSDavid Ahern 		} else if (replace) {
2109597f48e4SPetr Machata 			rc = replace_nexthop(net, nh, new_nh, cfg, extack);
21107bf4796dSDavid Ahern 			if (!rc) {
2111ab84be7eSDavid Ahern 				new_nh = nh; /* send notification with old nh */
21127bf4796dSDavid Ahern 				replace_notify = 1;
21137bf4796dSDavid Ahern 			}
2114ab84be7eSDavid Ahern 			goto out;
2115ab84be7eSDavid Ahern 		} else {
2116ab84be7eSDavid Ahern 			/* id already exists and not a replace */
2117ab84be7eSDavid Ahern 			goto out;
2118ab84be7eSDavid Ahern 		}
2119ab84be7eSDavid Ahern 	}
2120ab84be7eSDavid Ahern 
2121ab84be7eSDavid Ahern 	if (replace && !create) {
2122ab84be7eSDavid Ahern 		NL_SET_ERR_MSG(extack, "Replace specified without create and no entry exists");
2123ab84be7eSDavid Ahern 		rc = -ENOENT;
2124ab84be7eSDavid Ahern 		goto out;
2125ab84be7eSDavid Ahern 	}
2126ab84be7eSDavid Ahern 
2127283a72a5SPetr Machata 	if (new_nh->is_group) {
2128283a72a5SPetr Machata 		struct nh_group *nhg = rtnl_dereference(new_nh->nh_grp);
2129283a72a5SPetr Machata 		struct nh_res_table *res_table;
2130283a72a5SPetr Machata 
2131283a72a5SPetr Machata 		if (nhg->resilient) {
2132283a72a5SPetr Machata 			res_table = rtnl_dereference(nhg->res_table);
2133283a72a5SPetr Machata 
2134283a72a5SPetr Machata 			/* Not passing the number of buckets is OK when
2135283a72a5SPetr Machata 			 * replacing, but not when creating a new group.
2136283a72a5SPetr Machata 			 */
2137283a72a5SPetr Machata 			if (!cfg->nh_grp_res_has_num_buckets) {
2138283a72a5SPetr Machata 				NL_SET_ERR_MSG(extack, "Number of buckets not specified for nexthop group insertion");
2139283a72a5SPetr Machata 				rc = -EINVAL;
2140283a72a5SPetr Machata 				goto out;
2141283a72a5SPetr Machata 			}
2142283a72a5SPetr Machata 
2143283a72a5SPetr Machata 			nh_res_group_rebalance(nhg, res_table);
2144*7c37c7e0SPetr Machata 
2145*7c37c7e0SPetr Machata 			/* Do not send bucket notifications, we do full
2146*7c37c7e0SPetr Machata 			 * notification below.
2147*7c37c7e0SPetr Machata 			 */
2148*7c37c7e0SPetr Machata 			nh_res_table_upkeep(res_table, false);
2149283a72a5SPetr Machata 		}
2150283a72a5SPetr Machata 	}
2151283a72a5SPetr Machata 
2152ab84be7eSDavid Ahern 	rb_link_node_rcu(&new_nh->rb_node, parent, pp);
2153ab84be7eSDavid Ahern 	rb_insert_color(&new_nh->rb_node, root);
2154732d167bSIdo Schimmel 
2155*7c37c7e0SPetr Machata 	/* The initial insertion is a full notification for mpath as well
2156*7c37c7e0SPetr Machata 	 * as resilient groups.
2157*7c37c7e0SPetr Machata 	 */
2158732d167bSIdo Schimmel 	rc = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, new_nh, extack);
2159732d167bSIdo Schimmel 	if (rc)
2160732d167bSIdo Schimmel 		rb_erase(&new_nh->rb_node, &net->nexthop.rb_root);
2161732d167bSIdo Schimmel 
2162ab84be7eSDavid Ahern out:
2163ab84be7eSDavid Ahern 	if (!rc) {
2164ab84be7eSDavid Ahern 		nh_base_seq_inc(net);
2165ab84be7eSDavid Ahern 		nexthop_notify(RTM_NEWNEXTHOP, new_nh, &cfg->nlinfo);
21664f80116dSRoopa Prabhu 		if (replace_notify && net->ipv4.sysctl_nexthop_compat_mode)
21677bf4796dSDavid Ahern 			nexthop_replace_notify(net, new_nh, &cfg->nlinfo);
2168ab84be7eSDavid Ahern 	}
2169ab84be7eSDavid Ahern 
2170ab84be7eSDavid Ahern 	return rc;
2171ab84be7eSDavid Ahern }
2172ab84be7eSDavid Ahern 
2173597cfe4fSDavid Ahern /* rtnl */
2174597cfe4fSDavid Ahern /* remove all nexthops tied to a device being deleted */
217576c03bf8SIdo Schimmel static void nexthop_flush_dev(struct net_device *dev, unsigned long event)
2176597cfe4fSDavid Ahern {
2177597cfe4fSDavid Ahern 	unsigned int hash = nh_dev_hashfn(dev->ifindex);
2178597cfe4fSDavid Ahern 	struct net *net = dev_net(dev);
2179597cfe4fSDavid Ahern 	struct hlist_head *head = &net->nexthop.devhash[hash];
2180597cfe4fSDavid Ahern 	struct hlist_node *n;
2181597cfe4fSDavid Ahern 	struct nh_info *nhi;
2182597cfe4fSDavid Ahern 
2183597cfe4fSDavid Ahern 	hlist_for_each_entry_safe(nhi, n, head, dev_hash) {
2184597cfe4fSDavid Ahern 		if (nhi->fib_nhc.nhc_dev != dev)
2185597cfe4fSDavid Ahern 			continue;
2186597cfe4fSDavid Ahern 
218776c03bf8SIdo Schimmel 		if (nhi->reject_nh &&
218876c03bf8SIdo Schimmel 		    (event == NETDEV_DOWN || event == NETDEV_CHANGE))
218976c03bf8SIdo Schimmel 			continue;
219076c03bf8SIdo Schimmel 
2191430a0491SDavid Ahern 		remove_nexthop(net, nhi->nh_parent, NULL);
2192597cfe4fSDavid Ahern 	}
2193597cfe4fSDavid Ahern }
2194597cfe4fSDavid Ahern 
2195ab84be7eSDavid Ahern /* rtnl; called when net namespace is deleted */
2196ab84be7eSDavid Ahern static void flush_all_nexthops(struct net *net)
2197ab84be7eSDavid Ahern {
2198ab84be7eSDavid Ahern 	struct rb_root *root = &net->nexthop.rb_root;
2199ab84be7eSDavid Ahern 	struct rb_node *node;
2200ab84be7eSDavid Ahern 	struct nexthop *nh;
2201ab84be7eSDavid Ahern 
2202ab84be7eSDavid Ahern 	while ((node = rb_first(root))) {
2203ab84be7eSDavid Ahern 		nh = rb_entry(node, struct nexthop, rb_node);
2204430a0491SDavid Ahern 		remove_nexthop(net, nh, NULL);
2205ab84be7eSDavid Ahern 		cond_resched();
2206ab84be7eSDavid Ahern 	}
2207ab84be7eSDavid Ahern }
2208ab84be7eSDavid Ahern 
2209430a0491SDavid Ahern static struct nexthop *nexthop_create_group(struct net *net,
2210430a0491SDavid Ahern 					    struct nh_config *cfg)
2211430a0491SDavid Ahern {
2212430a0491SDavid Ahern 	struct nlattr *grps_attr = cfg->nh_grp;
2213430a0491SDavid Ahern 	struct nexthop_grp *entry = nla_data(grps_attr);
221490f33bffSNikolay Aleksandrov 	u16 num_nh = nla_len(grps_attr) / sizeof(*entry);
2215430a0491SDavid Ahern 	struct nh_group *nhg;
2216430a0491SDavid Ahern 	struct nexthop *nh;
2217283a72a5SPetr Machata 	int err;
2218430a0491SDavid Ahern 	int i;
2219430a0491SDavid Ahern 
2220eeaac363SNikolay Aleksandrov 	if (WARN_ON(!num_nh))
2221eeaac363SNikolay Aleksandrov 		return ERR_PTR(-EINVAL);
2222eeaac363SNikolay Aleksandrov 
2223430a0491SDavid Ahern 	nh = nexthop_alloc();
2224430a0491SDavid Ahern 	if (!nh)
2225430a0491SDavid Ahern 		return ERR_PTR(-ENOMEM);
2226430a0491SDavid Ahern 
2227430a0491SDavid Ahern 	nh->is_group = 1;
2228430a0491SDavid Ahern 
222990f33bffSNikolay Aleksandrov 	nhg = nexthop_grp_alloc(num_nh);
2230430a0491SDavid Ahern 	if (!nhg) {
2231430a0491SDavid Ahern 		kfree(nh);
2232430a0491SDavid Ahern 		return ERR_PTR(-ENOMEM);
2233430a0491SDavid Ahern 	}
2234430a0491SDavid Ahern 
223590f33bffSNikolay Aleksandrov 	/* spare group used for removals */
223690f33bffSNikolay Aleksandrov 	nhg->spare = nexthop_grp_alloc(num_nh);
2237dafe2078SPatrick Eigensatz 	if (!nhg->spare) {
223890f33bffSNikolay Aleksandrov 		kfree(nhg);
223990f33bffSNikolay Aleksandrov 		kfree(nh);
2240dafe2078SPatrick Eigensatz 		return ERR_PTR(-ENOMEM);
224190f33bffSNikolay Aleksandrov 	}
224290f33bffSNikolay Aleksandrov 	nhg->spare->spare = nhg;
224390f33bffSNikolay Aleksandrov 
2244430a0491SDavid Ahern 	for (i = 0; i < nhg->num_nh; ++i) {
2245430a0491SDavid Ahern 		struct nexthop *nhe;
2246430a0491SDavid Ahern 		struct nh_info *nhi;
2247430a0491SDavid Ahern 
2248430a0491SDavid Ahern 		nhe = nexthop_find_by_id(net, entry[i].id);
2249283a72a5SPetr Machata 		if (!nexthop_get(nhe)) {
2250283a72a5SPetr Machata 			err = -ENOENT;
2251430a0491SDavid Ahern 			goto out_no_nh;
2252283a72a5SPetr Machata 		}
2253430a0491SDavid Ahern 
2254430a0491SDavid Ahern 		nhi = rtnl_dereference(nhe->nh_info);
2255430a0491SDavid Ahern 		if (nhi->family == AF_INET)
2256430a0491SDavid Ahern 			nhg->has_v4 = true;
2257430a0491SDavid Ahern 
2258430a0491SDavid Ahern 		nhg->nh_entries[i].nh = nhe;
2259430a0491SDavid Ahern 		nhg->nh_entries[i].weight = entry[i].weight + 1;
2260430a0491SDavid Ahern 		list_add(&nhg->nh_entries[i].nh_list, &nhe->grp_list);
2261430a0491SDavid Ahern 		nhg->nh_entries[i].nh_parent = nh;
2262430a0491SDavid Ahern 	}
2263430a0491SDavid Ahern 
226490e1a9e2SPetr Machata 	if (cfg->nh_grp_type == NEXTHOP_GRP_TYPE_MPATH) {
2265430a0491SDavid Ahern 		nhg->mpath = 1;
226690e1a9e2SPetr Machata 		nhg->is_multipath = true;
2267710ec562SIdo Schimmel 	} else if (cfg->nh_grp_type == NEXTHOP_GRP_TYPE_RES) {
2268283a72a5SPetr Machata 		struct nh_res_table *res_table;
2269283a72a5SPetr Machata 
2270283a72a5SPetr Machata 		/* Bounce resilient groups for now. */
2271283a72a5SPetr Machata 		err = -EINVAL;
2272283a72a5SPetr Machata 		goto out_no_nh;
2273283a72a5SPetr Machata 
2274283a72a5SPetr Machata 		res_table = nexthop_res_table_alloc(net, cfg->nh_id, cfg);
2275283a72a5SPetr Machata 		if (!res_table) {
2276283a72a5SPetr Machata 			err = -ENOMEM;
2277710ec562SIdo Schimmel 			goto out_no_nh;
227890e1a9e2SPetr Machata 		}
2279720ccd9aSPetr Machata 
2280283a72a5SPetr Machata 		rcu_assign_pointer(nhg->spare->res_table, res_table);
2281283a72a5SPetr Machata 		rcu_assign_pointer(nhg->res_table, res_table);
2282283a72a5SPetr Machata 		nhg->resilient = true;
2283283a72a5SPetr Machata 		nhg->is_multipath = true;
2284283a72a5SPetr Machata 	}
2285283a72a5SPetr Machata 
2286283a72a5SPetr Machata 	WARN_ON_ONCE(nhg->mpath + nhg->resilient != 1);
2287720ccd9aSPetr Machata 
2288720ccd9aSPetr Machata 	if (nhg->mpath)
2289283a72a5SPetr Machata 		nh_mp_group_rebalance(nhg);
2290430a0491SDavid Ahern 
229138428d68SRoopa Prabhu 	if (cfg->nh_fdb)
2292ce9ac056SDavid Ahern 		nhg->fdb_nh = 1;
229338428d68SRoopa Prabhu 
2294430a0491SDavid Ahern 	rcu_assign_pointer(nh->nh_grp, nhg);
2295430a0491SDavid Ahern 
2296430a0491SDavid Ahern 	return nh;
2297430a0491SDavid Ahern 
2298430a0491SDavid Ahern out_no_nh:
22997b01e53eSIdo Schimmel 	for (i--; i >= 0; --i) {
23007b01e53eSIdo Schimmel 		list_del(&nhg->nh_entries[i].nh_list);
2301430a0491SDavid Ahern 		nexthop_put(nhg->nh_entries[i].nh);
23027b01e53eSIdo Schimmel 	}
2303430a0491SDavid Ahern 
230490f33bffSNikolay Aleksandrov 	kfree(nhg->spare);
2305430a0491SDavid Ahern 	kfree(nhg);
2306430a0491SDavid Ahern 	kfree(nh);
2307430a0491SDavid Ahern 
2308283a72a5SPetr Machata 	return ERR_PTR(err);
2309430a0491SDavid Ahern }
2310430a0491SDavid Ahern 
2311597cfe4fSDavid Ahern static int nh_create_ipv4(struct net *net, struct nexthop *nh,
2312597cfe4fSDavid Ahern 			  struct nh_info *nhi, struct nh_config *cfg,
2313597cfe4fSDavid Ahern 			  struct netlink_ext_ack *extack)
2314597cfe4fSDavid Ahern {
2315597cfe4fSDavid Ahern 	struct fib_nh *fib_nh = &nhi->fib_nh;
2316597cfe4fSDavid Ahern 	struct fib_config fib_cfg = {
2317597cfe4fSDavid Ahern 		.fc_oif   = cfg->nh_ifindex,
2318597cfe4fSDavid Ahern 		.fc_gw4   = cfg->gw.ipv4,
2319597cfe4fSDavid Ahern 		.fc_gw_family = cfg->gw.ipv4 ? AF_INET : 0,
2320597cfe4fSDavid Ahern 		.fc_flags = cfg->nh_flags,
2321b513bd03SDavid Ahern 		.fc_encap = cfg->nh_encap,
2322b513bd03SDavid Ahern 		.fc_encap_type = cfg->nh_encap_type,
2323597cfe4fSDavid Ahern 	};
232438428d68SRoopa Prabhu 	u32 tb_id = (cfg->dev ? l3mdev_fib_table(cfg->dev) : RT_TABLE_MAIN);
2325c76c9925SColin Ian King 	int err;
2326597cfe4fSDavid Ahern 
2327597cfe4fSDavid Ahern 	err = fib_nh_init(net, fib_nh, &fib_cfg, 1, extack);
2328597cfe4fSDavid Ahern 	if (err) {
2329597cfe4fSDavid Ahern 		fib_nh_release(net, fib_nh);
2330597cfe4fSDavid Ahern 		goto out;
2331597cfe4fSDavid Ahern 	}
2332597cfe4fSDavid Ahern 
2333ce9ac056SDavid Ahern 	if (nhi->fdb_nh)
233438428d68SRoopa Prabhu 		goto out;
233538428d68SRoopa Prabhu 
2336597cfe4fSDavid Ahern 	/* sets nh_dev if successful */
2337597cfe4fSDavid Ahern 	err = fib_check_nh(net, fib_nh, tb_id, 0, extack);
2338597cfe4fSDavid Ahern 	if (!err) {
2339597cfe4fSDavid Ahern 		nh->nh_flags = fib_nh->fib_nh_flags;
2340dcb1ecb5SDavid Ahern 		fib_info_update_nhc_saddr(net, &fib_nh->nh_common,
2341dcb1ecb5SDavid Ahern 					  fib_nh->fib_nh_scope);
2342597cfe4fSDavid Ahern 	} else {
2343597cfe4fSDavid Ahern 		fib_nh_release(net, fib_nh);
2344597cfe4fSDavid Ahern 	}
2345597cfe4fSDavid Ahern out:
2346597cfe4fSDavid Ahern 	return err;
2347597cfe4fSDavid Ahern }
2348597cfe4fSDavid Ahern 
234953010f99SDavid Ahern static int nh_create_ipv6(struct net *net,  struct nexthop *nh,
235053010f99SDavid Ahern 			  struct nh_info *nhi, struct nh_config *cfg,
235153010f99SDavid Ahern 			  struct netlink_ext_ack *extack)
235253010f99SDavid Ahern {
235353010f99SDavid Ahern 	struct fib6_nh *fib6_nh = &nhi->fib6_nh;
235453010f99SDavid Ahern 	struct fib6_config fib6_cfg = {
235553010f99SDavid Ahern 		.fc_table = l3mdev_fib_table(cfg->dev),
235653010f99SDavid Ahern 		.fc_ifindex = cfg->nh_ifindex,
235753010f99SDavid Ahern 		.fc_gateway = cfg->gw.ipv6,
235853010f99SDavid Ahern 		.fc_flags = cfg->nh_flags,
2359b513bd03SDavid Ahern 		.fc_encap = cfg->nh_encap,
2360b513bd03SDavid Ahern 		.fc_encap_type = cfg->nh_encap_type,
236138428d68SRoopa Prabhu 		.fc_is_fdb = cfg->nh_fdb,
236253010f99SDavid Ahern 	};
23636f43e525SColin Ian King 	int err;
236453010f99SDavid Ahern 
236553010f99SDavid Ahern 	if (!ipv6_addr_any(&cfg->gw.ipv6))
236653010f99SDavid Ahern 		fib6_cfg.fc_flags |= RTF_GATEWAY;
236753010f99SDavid Ahern 
236853010f99SDavid Ahern 	/* sets nh_dev if successful */
236953010f99SDavid Ahern 	err = ipv6_stub->fib6_nh_init(net, fib6_nh, &fib6_cfg, GFP_KERNEL,
237053010f99SDavid Ahern 				      extack);
237153010f99SDavid Ahern 	if (err)
237253010f99SDavid Ahern 		ipv6_stub->fib6_nh_release(fib6_nh);
237353010f99SDavid Ahern 	else
237453010f99SDavid Ahern 		nh->nh_flags = fib6_nh->fib_nh_flags;
237553010f99SDavid Ahern 
237653010f99SDavid Ahern 	return err;
237753010f99SDavid Ahern }
237853010f99SDavid Ahern 
2379ab84be7eSDavid Ahern static struct nexthop *nexthop_create(struct net *net, struct nh_config *cfg,
2380ab84be7eSDavid Ahern 				      struct netlink_ext_ack *extack)
2381ab84be7eSDavid Ahern {
2382ab84be7eSDavid Ahern 	struct nh_info *nhi;
2383ab84be7eSDavid Ahern 	struct nexthop *nh;
2384ab84be7eSDavid Ahern 	int err = 0;
2385ab84be7eSDavid Ahern 
2386ab84be7eSDavid Ahern 	nh = nexthop_alloc();
2387ab84be7eSDavid Ahern 	if (!nh)
2388ab84be7eSDavid Ahern 		return ERR_PTR(-ENOMEM);
2389ab84be7eSDavid Ahern 
2390ab84be7eSDavid Ahern 	nhi = kzalloc(sizeof(*nhi), GFP_KERNEL);
2391ab84be7eSDavid Ahern 	if (!nhi) {
2392ab84be7eSDavid Ahern 		kfree(nh);
2393ab84be7eSDavid Ahern 		return ERR_PTR(-ENOMEM);
2394ab84be7eSDavid Ahern 	}
2395ab84be7eSDavid Ahern 
2396ab84be7eSDavid Ahern 	nh->nh_flags = cfg->nh_flags;
2397ab84be7eSDavid Ahern 	nh->net = net;
2398ab84be7eSDavid Ahern 
2399ab84be7eSDavid Ahern 	nhi->nh_parent = nh;
2400ab84be7eSDavid Ahern 	nhi->family = cfg->nh_family;
2401ab84be7eSDavid Ahern 	nhi->fib_nhc.nhc_scope = RT_SCOPE_LINK;
2402ab84be7eSDavid Ahern 
240338428d68SRoopa Prabhu 	if (cfg->nh_fdb)
2404ce9ac056SDavid Ahern 		nhi->fdb_nh = 1;
240538428d68SRoopa Prabhu 
2406ab84be7eSDavid Ahern 	if (cfg->nh_blackhole) {
2407ab84be7eSDavid Ahern 		nhi->reject_nh = 1;
2408ab84be7eSDavid Ahern 		cfg->nh_ifindex = net->loopback_dev->ifindex;
2409ab84be7eSDavid Ahern 	}
2410ab84be7eSDavid Ahern 
2411597cfe4fSDavid Ahern 	switch (cfg->nh_family) {
2412597cfe4fSDavid Ahern 	case AF_INET:
2413597cfe4fSDavid Ahern 		err = nh_create_ipv4(net, nh, nhi, cfg, extack);
2414597cfe4fSDavid Ahern 		break;
241553010f99SDavid Ahern 	case AF_INET6:
241653010f99SDavid Ahern 		err = nh_create_ipv6(net, nh, nhi, cfg, extack);
241753010f99SDavid Ahern 		break;
2418597cfe4fSDavid Ahern 	}
2419597cfe4fSDavid Ahern 
2420ab84be7eSDavid Ahern 	if (err) {
2421ab84be7eSDavid Ahern 		kfree(nhi);
2422ab84be7eSDavid Ahern 		kfree(nh);
2423ab84be7eSDavid Ahern 		return ERR_PTR(err);
2424ab84be7eSDavid Ahern 	}
2425ab84be7eSDavid Ahern 
2426597cfe4fSDavid Ahern 	/* add the entry to the device based hash */
2427ce9ac056SDavid Ahern 	if (!nhi->fdb_nh)
2428597cfe4fSDavid Ahern 		nexthop_devhash_add(net, nhi);
2429597cfe4fSDavid Ahern 
2430ab84be7eSDavid Ahern 	rcu_assign_pointer(nh->nh_info, nhi);
2431ab84be7eSDavid Ahern 
2432ab84be7eSDavid Ahern 	return nh;
2433ab84be7eSDavid Ahern }
2434ab84be7eSDavid Ahern 
2435ab84be7eSDavid Ahern /* called with rtnl lock held */
2436ab84be7eSDavid Ahern static struct nexthop *nexthop_add(struct net *net, struct nh_config *cfg,
2437ab84be7eSDavid Ahern 				   struct netlink_ext_ack *extack)
2438ab84be7eSDavid Ahern {
2439ab84be7eSDavid Ahern 	struct nexthop *nh;
2440ab84be7eSDavid Ahern 	int err;
2441ab84be7eSDavid Ahern 
2442ab84be7eSDavid Ahern 	if (cfg->nlflags & NLM_F_REPLACE && !cfg->nh_id) {
2443ab84be7eSDavid Ahern 		NL_SET_ERR_MSG(extack, "Replace requires nexthop id");
2444ab84be7eSDavid Ahern 		return ERR_PTR(-EINVAL);
2445ab84be7eSDavid Ahern 	}
2446ab84be7eSDavid Ahern 
2447ab84be7eSDavid Ahern 	if (!cfg->nh_id) {
2448ab84be7eSDavid Ahern 		cfg->nh_id = nh_find_unused_id(net);
2449ab84be7eSDavid Ahern 		if (!cfg->nh_id) {
2450ab84be7eSDavid Ahern 			NL_SET_ERR_MSG(extack, "No unused id");
2451ab84be7eSDavid Ahern 			return ERR_PTR(-EINVAL);
2452ab84be7eSDavid Ahern 		}
2453ab84be7eSDavid Ahern 	}
2454ab84be7eSDavid Ahern 
2455430a0491SDavid Ahern 	if (cfg->nh_grp)
2456430a0491SDavid Ahern 		nh = nexthop_create_group(net, cfg);
2457430a0491SDavid Ahern 	else
2458ab84be7eSDavid Ahern 		nh = nexthop_create(net, cfg, extack);
2459430a0491SDavid Ahern 
2460ab84be7eSDavid Ahern 	if (IS_ERR(nh))
2461ab84be7eSDavid Ahern 		return nh;
2462ab84be7eSDavid Ahern 
2463ab84be7eSDavid Ahern 	refcount_set(&nh->refcnt, 1);
2464ab84be7eSDavid Ahern 	nh->id = cfg->nh_id;
2465ab84be7eSDavid Ahern 	nh->protocol = cfg->nh_protocol;
2466ab84be7eSDavid Ahern 	nh->net = net;
2467ab84be7eSDavid Ahern 
2468ab84be7eSDavid Ahern 	err = insert_nexthop(net, nh, cfg, extack);
2469ab84be7eSDavid Ahern 	if (err) {
2470430a0491SDavid Ahern 		__remove_nexthop(net, nh, NULL);
2471ab84be7eSDavid Ahern 		nexthop_put(nh);
2472ab84be7eSDavid Ahern 		nh = ERR_PTR(err);
2473ab84be7eSDavid Ahern 	}
2474ab84be7eSDavid Ahern 
2475ab84be7eSDavid Ahern 	return nh;
2476ab84be7eSDavid Ahern }
2477ab84be7eSDavid Ahern 
2478ab84be7eSDavid Ahern static int rtm_to_nh_config(struct net *net, struct sk_buff *skb,
2479ab84be7eSDavid Ahern 			    struct nlmsghdr *nlh, struct nh_config *cfg,
2480ab84be7eSDavid Ahern 			    struct netlink_ext_ack *extack)
2481ab84be7eSDavid Ahern {
2482ab84be7eSDavid Ahern 	struct nhmsg *nhm = nlmsg_data(nlh);
2483643d0878SPetr Machata 	struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_new)];
2484ab84be7eSDavid Ahern 	int err;
2485ab84be7eSDavid Ahern 
2486643d0878SPetr Machata 	err = nlmsg_parse(nlh, sizeof(*nhm), tb,
2487643d0878SPetr Machata 			  ARRAY_SIZE(rtm_nh_policy_new) - 1,
2488643d0878SPetr Machata 			  rtm_nh_policy_new, extack);
2489ab84be7eSDavid Ahern 	if (err < 0)
2490ab84be7eSDavid Ahern 		return err;
2491ab84be7eSDavid Ahern 
2492ab84be7eSDavid Ahern 	err = -EINVAL;
2493ab84be7eSDavid Ahern 	if (nhm->resvd || nhm->nh_scope) {
2494ab84be7eSDavid Ahern 		NL_SET_ERR_MSG(extack, "Invalid values in ancillary header");
2495ab84be7eSDavid Ahern 		goto out;
2496ab84be7eSDavid Ahern 	}
2497ab84be7eSDavid Ahern 	if (nhm->nh_flags & ~NEXTHOP_VALID_USER_FLAGS) {
2498ab84be7eSDavid Ahern 		NL_SET_ERR_MSG(extack, "Invalid nexthop flags in ancillary header");
2499ab84be7eSDavid Ahern 		goto out;
2500ab84be7eSDavid Ahern 	}
2501ab84be7eSDavid Ahern 
2502ab84be7eSDavid Ahern 	switch (nhm->nh_family) {
2503597cfe4fSDavid Ahern 	case AF_INET:
250453010f99SDavid Ahern 	case AF_INET6:
2505597cfe4fSDavid Ahern 		break;
2506430a0491SDavid Ahern 	case AF_UNSPEC:
2507430a0491SDavid Ahern 		if (tb[NHA_GROUP])
2508430a0491SDavid Ahern 			break;
2509a8eceea8SJoe Perches 		fallthrough;
2510ab84be7eSDavid Ahern 	default:
2511ab84be7eSDavid Ahern 		NL_SET_ERR_MSG(extack, "Invalid address family");
2512ab84be7eSDavid Ahern 		goto out;
2513ab84be7eSDavid Ahern 	}
2514ab84be7eSDavid Ahern 
2515ab84be7eSDavid Ahern 	memset(cfg, 0, sizeof(*cfg));
2516ab84be7eSDavid Ahern 	cfg->nlflags = nlh->nlmsg_flags;
2517ab84be7eSDavid Ahern 	cfg->nlinfo.portid = NETLINK_CB(skb).portid;
2518ab84be7eSDavid Ahern 	cfg->nlinfo.nlh = nlh;
2519ab84be7eSDavid Ahern 	cfg->nlinfo.nl_net = net;
2520ab84be7eSDavid Ahern 
2521ab84be7eSDavid Ahern 	cfg->nh_family = nhm->nh_family;
2522ab84be7eSDavid Ahern 	cfg->nh_protocol = nhm->nh_protocol;
2523ab84be7eSDavid Ahern 	cfg->nh_flags = nhm->nh_flags;
2524ab84be7eSDavid Ahern 
2525ab84be7eSDavid Ahern 	if (tb[NHA_ID])
2526ab84be7eSDavid Ahern 		cfg->nh_id = nla_get_u32(tb[NHA_ID]);
2527ab84be7eSDavid Ahern 
252838428d68SRoopa Prabhu 	if (tb[NHA_FDB]) {
252938428d68SRoopa Prabhu 		if (tb[NHA_OIF] || tb[NHA_BLACKHOLE] ||
253038428d68SRoopa Prabhu 		    tb[NHA_ENCAP]   || tb[NHA_ENCAP_TYPE]) {
253138428d68SRoopa Prabhu 			NL_SET_ERR_MSG(extack, "Fdb attribute can not be used with encap, oif or blackhole");
253238428d68SRoopa Prabhu 			goto out;
253338428d68SRoopa Prabhu 		}
253438428d68SRoopa Prabhu 		if (nhm->nh_flags) {
253538428d68SRoopa Prabhu 			NL_SET_ERR_MSG(extack, "Unsupported nexthop flags in ancillary header");
253638428d68SRoopa Prabhu 			goto out;
253738428d68SRoopa Prabhu 		}
253838428d68SRoopa Prabhu 		cfg->nh_fdb = nla_get_flag(tb[NHA_FDB]);
253938428d68SRoopa Prabhu 	}
254038428d68SRoopa Prabhu 
2541430a0491SDavid Ahern 	if (tb[NHA_GROUP]) {
2542430a0491SDavid Ahern 		if (nhm->nh_family != AF_UNSPEC) {
2543430a0491SDavid Ahern 			NL_SET_ERR_MSG(extack, "Invalid family for group");
2544430a0491SDavid Ahern 			goto out;
2545430a0491SDavid Ahern 		}
2546430a0491SDavid Ahern 		cfg->nh_grp = tb[NHA_GROUP];
2547430a0491SDavid Ahern 
2548430a0491SDavid Ahern 		cfg->nh_grp_type = NEXTHOP_GRP_TYPE_MPATH;
2549430a0491SDavid Ahern 		if (tb[NHA_GROUP_TYPE])
2550430a0491SDavid Ahern 			cfg->nh_grp_type = nla_get_u16(tb[NHA_GROUP_TYPE]);
2551430a0491SDavid Ahern 
2552430a0491SDavid Ahern 		if (cfg->nh_grp_type > NEXTHOP_GRP_TYPE_MAX) {
2553430a0491SDavid Ahern 			NL_SET_ERR_MSG(extack, "Invalid group type");
2554430a0491SDavid Ahern 			goto out;
2555430a0491SDavid Ahern 		}
2556643d0878SPetr Machata 		err = nh_check_attr_group(net, tb, ARRAY_SIZE(tb), extack);
2557430a0491SDavid Ahern 
2558430a0491SDavid Ahern 		/* no other attributes should be set */
2559430a0491SDavid Ahern 		goto out;
2560430a0491SDavid Ahern 	}
2561430a0491SDavid Ahern 
2562ab84be7eSDavid Ahern 	if (tb[NHA_BLACKHOLE]) {
2563b513bd03SDavid Ahern 		if (tb[NHA_GATEWAY] || tb[NHA_OIF] ||
256438428d68SRoopa Prabhu 		    tb[NHA_ENCAP]   || tb[NHA_ENCAP_TYPE] || tb[NHA_FDB]) {
256538428d68SRoopa Prabhu 			NL_SET_ERR_MSG(extack, "Blackhole attribute can not be used with gateway, oif, encap or fdb");
2566ab84be7eSDavid Ahern 			goto out;
2567ab84be7eSDavid Ahern 		}
2568ab84be7eSDavid Ahern 
2569ab84be7eSDavid Ahern 		cfg->nh_blackhole = 1;
2570ab84be7eSDavid Ahern 		err = 0;
2571ab84be7eSDavid Ahern 		goto out;
2572ab84be7eSDavid Ahern 	}
2573ab84be7eSDavid Ahern 
257438428d68SRoopa Prabhu 	if (!cfg->nh_fdb && !tb[NHA_OIF]) {
257538428d68SRoopa Prabhu 		NL_SET_ERR_MSG(extack, "Device attribute required for non-blackhole and non-fdb nexthops");
2576ab84be7eSDavid Ahern 		goto out;
2577ab84be7eSDavid Ahern 	}
2578ab84be7eSDavid Ahern 
257938428d68SRoopa Prabhu 	if (!cfg->nh_fdb && tb[NHA_OIF]) {
2580ab84be7eSDavid Ahern 		cfg->nh_ifindex = nla_get_u32(tb[NHA_OIF]);
2581ab84be7eSDavid Ahern 		if (cfg->nh_ifindex)
2582ab84be7eSDavid Ahern 			cfg->dev = __dev_get_by_index(net, cfg->nh_ifindex);
2583ab84be7eSDavid Ahern 
2584ab84be7eSDavid Ahern 		if (!cfg->dev) {
2585ab84be7eSDavid Ahern 			NL_SET_ERR_MSG(extack, "Invalid device index");
2586ab84be7eSDavid Ahern 			goto out;
2587ab84be7eSDavid Ahern 		} else if (!(cfg->dev->flags & IFF_UP)) {
2588ab84be7eSDavid Ahern 			NL_SET_ERR_MSG(extack, "Nexthop device is not up");
2589ab84be7eSDavid Ahern 			err = -ENETDOWN;
2590ab84be7eSDavid Ahern 			goto out;
2591ab84be7eSDavid Ahern 		} else if (!netif_carrier_ok(cfg->dev)) {
2592ab84be7eSDavid Ahern 			NL_SET_ERR_MSG(extack, "Carrier for nexthop device is down");
2593ab84be7eSDavid Ahern 			err = -ENETDOWN;
2594ab84be7eSDavid Ahern 			goto out;
2595ab84be7eSDavid Ahern 		}
259638428d68SRoopa Prabhu 	}
2597ab84be7eSDavid Ahern 
2598597cfe4fSDavid Ahern 	err = -EINVAL;
2599597cfe4fSDavid Ahern 	if (tb[NHA_GATEWAY]) {
2600597cfe4fSDavid Ahern 		struct nlattr *gwa = tb[NHA_GATEWAY];
2601597cfe4fSDavid Ahern 
2602597cfe4fSDavid Ahern 		switch (cfg->nh_family) {
2603597cfe4fSDavid Ahern 		case AF_INET:
2604597cfe4fSDavid Ahern 			if (nla_len(gwa) != sizeof(u32)) {
2605597cfe4fSDavid Ahern 				NL_SET_ERR_MSG(extack, "Invalid gateway");
2606597cfe4fSDavid Ahern 				goto out;
2607597cfe4fSDavid Ahern 			}
2608597cfe4fSDavid Ahern 			cfg->gw.ipv4 = nla_get_be32(gwa);
2609597cfe4fSDavid Ahern 			break;
261053010f99SDavid Ahern 		case AF_INET6:
261153010f99SDavid Ahern 			if (nla_len(gwa) != sizeof(struct in6_addr)) {
261253010f99SDavid Ahern 				NL_SET_ERR_MSG(extack, "Invalid gateway");
261353010f99SDavid Ahern 				goto out;
261453010f99SDavid Ahern 			}
261553010f99SDavid Ahern 			cfg->gw.ipv6 = nla_get_in6_addr(gwa);
261653010f99SDavid Ahern 			break;
2617597cfe4fSDavid Ahern 		default:
2618597cfe4fSDavid Ahern 			NL_SET_ERR_MSG(extack,
2619597cfe4fSDavid Ahern 				       "Unknown address family for gateway");
2620597cfe4fSDavid Ahern 			goto out;
2621597cfe4fSDavid Ahern 		}
2622597cfe4fSDavid Ahern 	} else {
2623597cfe4fSDavid Ahern 		/* device only nexthop (no gateway) */
2624597cfe4fSDavid Ahern 		if (cfg->nh_flags & RTNH_F_ONLINK) {
2625597cfe4fSDavid Ahern 			NL_SET_ERR_MSG(extack,
2626597cfe4fSDavid Ahern 				       "ONLINK flag can not be set for nexthop without a gateway");
2627597cfe4fSDavid Ahern 			goto out;
2628597cfe4fSDavid Ahern 		}
2629597cfe4fSDavid Ahern 	}
2630597cfe4fSDavid Ahern 
2631b513bd03SDavid Ahern 	if (tb[NHA_ENCAP]) {
2632b513bd03SDavid Ahern 		cfg->nh_encap = tb[NHA_ENCAP];
2633b513bd03SDavid Ahern 
2634b513bd03SDavid Ahern 		if (!tb[NHA_ENCAP_TYPE]) {
2635b513bd03SDavid Ahern 			NL_SET_ERR_MSG(extack, "LWT encapsulation type is missing");
2636b513bd03SDavid Ahern 			goto out;
2637b513bd03SDavid Ahern 		}
2638b513bd03SDavid Ahern 
2639b513bd03SDavid Ahern 		cfg->nh_encap_type = nla_get_u16(tb[NHA_ENCAP_TYPE]);
2640b513bd03SDavid Ahern 		err = lwtunnel_valid_encap_type(cfg->nh_encap_type, extack);
2641b513bd03SDavid Ahern 		if (err < 0)
2642b513bd03SDavid Ahern 			goto out;
2643b513bd03SDavid Ahern 
2644b513bd03SDavid Ahern 	} else if (tb[NHA_ENCAP_TYPE]) {
2645b513bd03SDavid Ahern 		NL_SET_ERR_MSG(extack, "LWT encapsulation attribute is missing");
2646b513bd03SDavid Ahern 		goto out;
2647b513bd03SDavid Ahern 	}
2648b513bd03SDavid Ahern 
2649b513bd03SDavid Ahern 
2650ab84be7eSDavid Ahern 	err = 0;
2651ab84be7eSDavid Ahern out:
2652ab84be7eSDavid Ahern 	return err;
2653ab84be7eSDavid Ahern }
2654ab84be7eSDavid Ahern 
2655ab84be7eSDavid Ahern /* rtnl */
2656ab84be7eSDavid Ahern static int rtm_new_nexthop(struct sk_buff *skb, struct nlmsghdr *nlh,
2657ab84be7eSDavid Ahern 			   struct netlink_ext_ack *extack)
2658ab84be7eSDavid Ahern {
2659ab84be7eSDavid Ahern 	struct net *net = sock_net(skb->sk);
2660ab84be7eSDavid Ahern 	struct nh_config cfg;
2661ab84be7eSDavid Ahern 	struct nexthop *nh;
2662ab84be7eSDavid Ahern 	int err;
2663ab84be7eSDavid Ahern 
2664ab84be7eSDavid Ahern 	err = rtm_to_nh_config(net, skb, nlh, &cfg, extack);
2665ab84be7eSDavid Ahern 	if (!err) {
2666ab84be7eSDavid Ahern 		nh = nexthop_add(net, &cfg, extack);
2667ab84be7eSDavid Ahern 		if (IS_ERR(nh))
2668ab84be7eSDavid Ahern 			err = PTR_ERR(nh);
2669ab84be7eSDavid Ahern 	}
2670ab84be7eSDavid Ahern 
2671ab84be7eSDavid Ahern 	return err;
2672ab84be7eSDavid Ahern }
2673ab84be7eSDavid Ahern 
26740bccf8edSPetr Machata static int __nh_valid_get_del_req(const struct nlmsghdr *nlh,
26750bccf8edSPetr Machata 				  struct nlattr **tb, u32 *id,
2676ab84be7eSDavid Ahern 				  struct netlink_ext_ack *extack)
2677ab84be7eSDavid Ahern {
2678ab84be7eSDavid Ahern 	struct nhmsg *nhm = nlmsg_data(nlh);
26790bccf8edSPetr Machata 
26800bccf8edSPetr Machata 	if (nhm->nh_protocol || nhm->resvd || nhm->nh_scope || nhm->nh_flags) {
26810bccf8edSPetr Machata 		NL_SET_ERR_MSG(extack, "Invalid values in header");
26820bccf8edSPetr Machata 		return -EINVAL;
26830bccf8edSPetr Machata 	}
26840bccf8edSPetr Machata 
26850bccf8edSPetr Machata 	if (!tb[NHA_ID]) {
26860bccf8edSPetr Machata 		NL_SET_ERR_MSG(extack, "Nexthop id is missing");
26870bccf8edSPetr Machata 		return -EINVAL;
26880bccf8edSPetr Machata 	}
26890bccf8edSPetr Machata 
26900bccf8edSPetr Machata 	*id = nla_get_u32(tb[NHA_ID]);
26910bccf8edSPetr Machata 	if (!(*id)) {
26920bccf8edSPetr Machata 		NL_SET_ERR_MSG(extack, "Invalid nexthop id");
26930bccf8edSPetr Machata 		return -EINVAL;
26940bccf8edSPetr Machata 	}
26950bccf8edSPetr Machata 
26960bccf8edSPetr Machata 	return 0;
26970bccf8edSPetr Machata }
26980bccf8edSPetr Machata 
26990bccf8edSPetr Machata static int nh_valid_get_del_req(const struct nlmsghdr *nlh, u32 *id,
27000bccf8edSPetr Machata 				struct netlink_ext_ack *extack)
27010bccf8edSPetr Machata {
270260f5ad5eSPetr Machata 	struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_get)];
270360f5ad5eSPetr Machata 	int err;
2704ab84be7eSDavid Ahern 
27050bccf8edSPetr Machata 	err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb,
270660f5ad5eSPetr Machata 			  ARRAY_SIZE(rtm_nh_policy_get) - 1,
270760f5ad5eSPetr Machata 			  rtm_nh_policy_get, extack);
2708ab84be7eSDavid Ahern 	if (err < 0)
2709ab84be7eSDavid Ahern 		return err;
2710ab84be7eSDavid Ahern 
27110bccf8edSPetr Machata 	return __nh_valid_get_del_req(nlh, tb, id, extack);
2712ab84be7eSDavid Ahern }
2713ab84be7eSDavid Ahern 
2714ab84be7eSDavid Ahern /* rtnl */
2715ab84be7eSDavid Ahern static int rtm_del_nexthop(struct sk_buff *skb, struct nlmsghdr *nlh,
2716ab84be7eSDavid Ahern 			   struct netlink_ext_ack *extack)
2717ab84be7eSDavid Ahern {
2718ab84be7eSDavid Ahern 	struct net *net = sock_net(skb->sk);
2719ab84be7eSDavid Ahern 	struct nl_info nlinfo = {
2720ab84be7eSDavid Ahern 		.nlh = nlh,
2721ab84be7eSDavid Ahern 		.nl_net = net,
2722ab84be7eSDavid Ahern 		.portid = NETLINK_CB(skb).portid,
2723ab84be7eSDavid Ahern 	};
2724ab84be7eSDavid Ahern 	struct nexthop *nh;
2725ab84be7eSDavid Ahern 	int err;
2726ab84be7eSDavid Ahern 	u32 id;
2727ab84be7eSDavid Ahern 
2728ab84be7eSDavid Ahern 	err = nh_valid_get_del_req(nlh, &id, extack);
2729ab84be7eSDavid Ahern 	if (err)
2730ab84be7eSDavid Ahern 		return err;
2731ab84be7eSDavid Ahern 
2732ab84be7eSDavid Ahern 	nh = nexthop_find_by_id(net, id);
2733ab84be7eSDavid Ahern 	if (!nh)
2734ab84be7eSDavid Ahern 		return -ENOENT;
2735ab84be7eSDavid Ahern 
2736430a0491SDavid Ahern 	remove_nexthop(net, nh, &nlinfo);
2737ab84be7eSDavid Ahern 
2738ab84be7eSDavid Ahern 	return 0;
2739ab84be7eSDavid Ahern }
2740ab84be7eSDavid Ahern 
2741ab84be7eSDavid Ahern /* rtnl */
2742ab84be7eSDavid Ahern static int rtm_get_nexthop(struct sk_buff *in_skb, struct nlmsghdr *nlh,
2743ab84be7eSDavid Ahern 			   struct netlink_ext_ack *extack)
2744ab84be7eSDavid Ahern {
2745ab84be7eSDavid Ahern 	struct net *net = sock_net(in_skb->sk);
2746ab84be7eSDavid Ahern 	struct sk_buff *skb = NULL;
2747ab84be7eSDavid Ahern 	struct nexthop *nh;
2748ab84be7eSDavid Ahern 	int err;
2749ab84be7eSDavid Ahern 	u32 id;
2750ab84be7eSDavid Ahern 
2751ab84be7eSDavid Ahern 	err = nh_valid_get_del_req(nlh, &id, extack);
2752ab84be7eSDavid Ahern 	if (err)
2753ab84be7eSDavid Ahern 		return err;
2754ab84be7eSDavid Ahern 
2755ab84be7eSDavid Ahern 	err = -ENOBUFS;
2756ab84be7eSDavid Ahern 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2757ab84be7eSDavid Ahern 	if (!skb)
2758ab84be7eSDavid Ahern 		goto out;
2759ab84be7eSDavid Ahern 
2760ab84be7eSDavid Ahern 	err = -ENOENT;
2761ab84be7eSDavid Ahern 	nh = nexthop_find_by_id(net, id);
2762ab84be7eSDavid Ahern 	if (!nh)
2763ab84be7eSDavid Ahern 		goto errout_free;
2764ab84be7eSDavid Ahern 
2765ab84be7eSDavid Ahern 	err = nh_fill_node(skb, nh, RTM_NEWNEXTHOP, NETLINK_CB(in_skb).portid,
2766ab84be7eSDavid Ahern 			   nlh->nlmsg_seq, 0);
2767ab84be7eSDavid Ahern 	if (err < 0) {
2768ab84be7eSDavid Ahern 		WARN_ON(err == -EMSGSIZE);
2769ab84be7eSDavid Ahern 		goto errout_free;
2770ab84be7eSDavid Ahern 	}
2771ab84be7eSDavid Ahern 
2772ab84be7eSDavid Ahern 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2773ab84be7eSDavid Ahern out:
2774ab84be7eSDavid Ahern 	return err;
2775ab84be7eSDavid Ahern errout_free:
2776ab84be7eSDavid Ahern 	kfree_skb(skb);
2777ab84be7eSDavid Ahern 	goto out;
2778ab84be7eSDavid Ahern }
2779ab84be7eSDavid Ahern 
278056450ec6SPetr Machata struct nh_dump_filter {
278156450ec6SPetr Machata 	int dev_idx;
278256450ec6SPetr Machata 	int master_idx;
278356450ec6SPetr Machata 	bool group_filter;
278456450ec6SPetr Machata 	bool fdb_filter;
278556450ec6SPetr Machata };
278656450ec6SPetr Machata 
278756450ec6SPetr Machata static bool nh_dump_filtered(struct nexthop *nh,
278856450ec6SPetr Machata 			     struct nh_dump_filter *filter, u8 family)
2789ab84be7eSDavid Ahern {
2790ab84be7eSDavid Ahern 	const struct net_device *dev;
2791ab84be7eSDavid Ahern 	const struct nh_info *nhi;
2792ab84be7eSDavid Ahern 
279356450ec6SPetr Machata 	if (filter->group_filter && !nh->is_group)
2794430a0491SDavid Ahern 		return true;
2795430a0491SDavid Ahern 
279656450ec6SPetr Machata 	if (!filter->dev_idx && !filter->master_idx && !family)
2797ab84be7eSDavid Ahern 		return false;
2798ab84be7eSDavid Ahern 
2799430a0491SDavid Ahern 	if (nh->is_group)
2800430a0491SDavid Ahern 		return true;
2801430a0491SDavid Ahern 
2802ab84be7eSDavid Ahern 	nhi = rtnl_dereference(nh->nh_info);
2803ab84be7eSDavid Ahern 	if (family && nhi->family != family)
2804ab84be7eSDavid Ahern 		return true;
2805ab84be7eSDavid Ahern 
2806ab84be7eSDavid Ahern 	dev = nhi->fib_nhc.nhc_dev;
280756450ec6SPetr Machata 	if (filter->dev_idx && (!dev || dev->ifindex != filter->dev_idx))
2808ab84be7eSDavid Ahern 		return true;
2809ab84be7eSDavid Ahern 
281056450ec6SPetr Machata 	if (filter->master_idx) {
2811ab84be7eSDavid Ahern 		struct net_device *master;
2812ab84be7eSDavid Ahern 
2813ab84be7eSDavid Ahern 		if (!dev)
2814ab84be7eSDavid Ahern 			return true;
2815ab84be7eSDavid Ahern 
2816ab84be7eSDavid Ahern 		master = netdev_master_upper_dev_get((struct net_device *)dev);
281756450ec6SPetr Machata 		if (!master || master->ifindex != filter->master_idx)
2818ab84be7eSDavid Ahern 			return true;
2819ab84be7eSDavid Ahern 	}
2820ab84be7eSDavid Ahern 
2821ab84be7eSDavid Ahern 	return false;
2822ab84be7eSDavid Ahern }
2823ab84be7eSDavid Ahern 
2824b9ebea12SPetr Machata static int __nh_valid_dump_req(const struct nlmsghdr *nlh, struct nlattr **tb,
282556450ec6SPetr Machata 			       struct nh_dump_filter *filter,
2826b9ebea12SPetr Machata 			       struct netlink_ext_ack *extack)
2827ab84be7eSDavid Ahern {
2828ab84be7eSDavid Ahern 	struct nhmsg *nhm;
2829ab84be7eSDavid Ahern 	u32 idx;
2830ab84be7eSDavid Ahern 
283144551bffSPetr Machata 	if (tb[NHA_OIF]) {
283244551bffSPetr Machata 		idx = nla_get_u32(tb[NHA_OIF]);
2833ab84be7eSDavid Ahern 		if (idx > INT_MAX) {
2834ab84be7eSDavid Ahern 			NL_SET_ERR_MSG(extack, "Invalid device index");
2835ab84be7eSDavid Ahern 			return -EINVAL;
2836ab84be7eSDavid Ahern 		}
283756450ec6SPetr Machata 		filter->dev_idx = idx;
283844551bffSPetr Machata 	}
283944551bffSPetr Machata 	if (tb[NHA_MASTER]) {
284044551bffSPetr Machata 		idx = nla_get_u32(tb[NHA_MASTER]);
2841ab84be7eSDavid Ahern 		if (idx > INT_MAX) {
2842ab84be7eSDavid Ahern 			NL_SET_ERR_MSG(extack, "Invalid master device index");
2843ab84be7eSDavid Ahern 			return -EINVAL;
2844ab84be7eSDavid Ahern 		}
284556450ec6SPetr Machata 		filter->master_idx = idx;
2846ab84be7eSDavid Ahern 	}
284756450ec6SPetr Machata 	filter->group_filter = nla_get_flag(tb[NHA_GROUPS]);
284856450ec6SPetr Machata 	filter->fdb_filter = nla_get_flag(tb[NHA_FDB]);
2849ab84be7eSDavid Ahern 
2850ab84be7eSDavid Ahern 	nhm = nlmsg_data(nlh);
2851ab84be7eSDavid Ahern 	if (nhm->nh_protocol || nhm->resvd || nhm->nh_scope || nhm->nh_flags) {
2852ab84be7eSDavid Ahern 		NL_SET_ERR_MSG(extack, "Invalid values in header for nexthop dump request");
2853ab84be7eSDavid Ahern 		return -EINVAL;
2854ab84be7eSDavid Ahern 	}
2855ab84be7eSDavid Ahern 
2856ab84be7eSDavid Ahern 	return 0;
2857ab84be7eSDavid Ahern }
2858ab84be7eSDavid Ahern 
2859b9ebea12SPetr Machata static int nh_valid_dump_req(const struct nlmsghdr *nlh,
2860b9ebea12SPetr Machata 			     struct nh_dump_filter *filter,
2861b9ebea12SPetr Machata 			     struct netlink_callback *cb)
2862b9ebea12SPetr Machata {
2863b9ebea12SPetr Machata 	struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_dump)];
2864b9ebea12SPetr Machata 	int err;
2865b9ebea12SPetr Machata 
2866b9ebea12SPetr Machata 	err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb,
2867b9ebea12SPetr Machata 			  ARRAY_SIZE(rtm_nh_policy_dump) - 1,
2868b9ebea12SPetr Machata 			  rtm_nh_policy_dump, cb->extack);
2869b9ebea12SPetr Machata 	if (err < 0)
2870b9ebea12SPetr Machata 		return err;
2871b9ebea12SPetr Machata 
2872b9ebea12SPetr Machata 	return __nh_valid_dump_req(nlh, tb, filter, cb->extack);
2873b9ebea12SPetr Machata }
2874b9ebea12SPetr Machata 
2875a6fbbaa6SPetr Machata struct rtm_dump_nh_ctx {
2876a6fbbaa6SPetr Machata 	u32 idx;
2877a6fbbaa6SPetr Machata };
2878a6fbbaa6SPetr Machata 
2879a6fbbaa6SPetr Machata static struct rtm_dump_nh_ctx *
2880a6fbbaa6SPetr Machata rtm_dump_nh_ctx(struct netlink_callback *cb)
2881a6fbbaa6SPetr Machata {
2882a6fbbaa6SPetr Machata 	struct rtm_dump_nh_ctx *ctx = (void *)cb->ctx;
2883a6fbbaa6SPetr Machata 
2884a6fbbaa6SPetr Machata 	BUILD_BUG_ON(sizeof(*ctx) > sizeof(cb->ctx));
2885a6fbbaa6SPetr Machata 	return ctx;
2886a6fbbaa6SPetr Machata }
2887a6fbbaa6SPetr Machata 
2888cbee1807SPetr Machata static int rtm_dump_walk_nexthops(struct sk_buff *skb,
2889cbee1807SPetr Machata 				  struct netlink_callback *cb,
2890cbee1807SPetr Machata 				  struct rb_root *root,
2891cbee1807SPetr Machata 				  struct rtm_dump_nh_ctx *ctx,
2892e948217dSPetr Machata 				  int (*nh_cb)(struct sk_buff *skb,
2893e948217dSPetr Machata 					       struct netlink_callback *cb,
2894e948217dSPetr Machata 					       struct nexthop *nh, void *data),
2895e948217dSPetr Machata 				  void *data)
2896ab84be7eSDavid Ahern {
2897ab84be7eSDavid Ahern 	struct rb_node *node;
2898ab84be7eSDavid Ahern 	int idx = 0, s_idx;
2899ab84be7eSDavid Ahern 	int err;
2900ab84be7eSDavid Ahern 
2901a6fbbaa6SPetr Machata 	s_idx = ctx->idx;
2902ab84be7eSDavid Ahern 	for (node = rb_first(root); node; node = rb_next(node)) {
2903ab84be7eSDavid Ahern 		struct nexthop *nh;
2904ab84be7eSDavid Ahern 
2905ab84be7eSDavid Ahern 		if (idx < s_idx)
2906ab84be7eSDavid Ahern 			goto cont;
2907ab84be7eSDavid Ahern 
2908ab84be7eSDavid Ahern 		nh = rb_entry(node, struct nexthop, rb_node);
2909cbee1807SPetr Machata 		ctx->idx = idx;
2910e948217dSPetr Machata 		err = nh_cb(skb, cb, nh, data);
2911e948217dSPetr Machata 		if (err)
2912cbee1807SPetr Machata 			return err;
2913cbee1807SPetr Machata cont:
2914cbee1807SPetr Machata 		idx++;
2915cbee1807SPetr Machata 	}
2916cbee1807SPetr Machata 
2917cbee1807SPetr Machata 	ctx->idx = idx;
2918cbee1807SPetr Machata 	return 0;
2919cbee1807SPetr Machata }
2920cbee1807SPetr Machata 
2921e948217dSPetr Machata static int rtm_dump_nexthop_cb(struct sk_buff *skb, struct netlink_callback *cb,
2922e948217dSPetr Machata 			       struct nexthop *nh, void *data)
2923e948217dSPetr Machata {
2924e948217dSPetr Machata 	struct nhmsg *nhm = nlmsg_data(cb->nlh);
2925e948217dSPetr Machata 	struct nh_dump_filter *filter = data;
2926e948217dSPetr Machata 
2927e948217dSPetr Machata 	if (nh_dump_filtered(nh, filter, nhm->nh_family))
2928e948217dSPetr Machata 		return 0;
2929e948217dSPetr Machata 
2930e948217dSPetr Machata 	return nh_fill_node(skb, nh, RTM_NEWNEXTHOP,
2931e948217dSPetr Machata 			    NETLINK_CB(cb->skb).portid,
2932e948217dSPetr Machata 			    cb->nlh->nlmsg_seq, NLM_F_MULTI);
2933e948217dSPetr Machata }
2934e948217dSPetr Machata 
2935cbee1807SPetr Machata /* rtnl */
2936cbee1807SPetr Machata static int rtm_dump_nexthop(struct sk_buff *skb, struct netlink_callback *cb)
2937cbee1807SPetr Machata {
2938cbee1807SPetr Machata 	struct rtm_dump_nh_ctx *ctx = rtm_dump_nh_ctx(cb);
2939cbee1807SPetr Machata 	struct net *net = sock_net(skb->sk);
2940cbee1807SPetr Machata 	struct rb_root *root = &net->nexthop.rb_root;
2941cbee1807SPetr Machata 	struct nh_dump_filter filter = {};
2942cbee1807SPetr Machata 	int err;
2943cbee1807SPetr Machata 
2944cbee1807SPetr Machata 	err = nh_valid_dump_req(cb->nlh, &filter, cb);
2945cbee1807SPetr Machata 	if (err < 0)
2946cbee1807SPetr Machata 		return err;
2947cbee1807SPetr Machata 
2948e948217dSPetr Machata 	err = rtm_dump_walk_nexthops(skb, cb, root, ctx,
2949e948217dSPetr Machata 				     &rtm_dump_nexthop_cb, &filter);
2950ab84be7eSDavid Ahern 	if (err < 0) {
2951ab84be7eSDavid Ahern 		if (likely(skb->len))
2952ab84be7eSDavid Ahern 			goto out;
2953ab84be7eSDavid Ahern 		goto out_err;
2954ab84be7eSDavid Ahern 	}
2955ab84be7eSDavid Ahern 
2956ab84be7eSDavid Ahern out:
2957ab84be7eSDavid Ahern 	err = skb->len;
2958ab84be7eSDavid Ahern out_err:
2959ab84be7eSDavid Ahern 	cb->seq = net->nexthop.seq;
2960ab84be7eSDavid Ahern 	nl_dump_check_consistent(cb, nlmsg_hdr(skb));
2961ab84be7eSDavid Ahern 	return err;
2962ab84be7eSDavid Ahern }
2963ab84be7eSDavid Ahern 
2964597cfe4fSDavid Ahern static void nexthop_sync_mtu(struct net_device *dev, u32 orig_mtu)
2965597cfe4fSDavid Ahern {
2966597cfe4fSDavid Ahern 	unsigned int hash = nh_dev_hashfn(dev->ifindex);
2967597cfe4fSDavid Ahern 	struct net *net = dev_net(dev);
2968597cfe4fSDavid Ahern 	struct hlist_head *head = &net->nexthop.devhash[hash];
2969597cfe4fSDavid Ahern 	struct hlist_node *n;
2970597cfe4fSDavid Ahern 	struct nh_info *nhi;
2971597cfe4fSDavid Ahern 
2972597cfe4fSDavid Ahern 	hlist_for_each_entry_safe(nhi, n, head, dev_hash) {
2973597cfe4fSDavid Ahern 		if (nhi->fib_nhc.nhc_dev == dev) {
2974597cfe4fSDavid Ahern 			if (nhi->family == AF_INET)
2975597cfe4fSDavid Ahern 				fib_nhc_update_mtu(&nhi->fib_nhc, dev->mtu,
2976597cfe4fSDavid Ahern 						   orig_mtu);
2977597cfe4fSDavid Ahern 		}
2978597cfe4fSDavid Ahern 	}
2979597cfe4fSDavid Ahern }
2980597cfe4fSDavid Ahern 
2981597cfe4fSDavid Ahern /* rtnl */
2982597cfe4fSDavid Ahern static int nh_netdev_event(struct notifier_block *this,
2983597cfe4fSDavid Ahern 			   unsigned long event, void *ptr)
2984597cfe4fSDavid Ahern {
2985597cfe4fSDavid Ahern 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
2986597cfe4fSDavid Ahern 	struct netdev_notifier_info_ext *info_ext;
2987597cfe4fSDavid Ahern 
2988597cfe4fSDavid Ahern 	switch (event) {
2989597cfe4fSDavid Ahern 	case NETDEV_DOWN:
2990597cfe4fSDavid Ahern 	case NETDEV_UNREGISTER:
299176c03bf8SIdo Schimmel 		nexthop_flush_dev(dev, event);
2992597cfe4fSDavid Ahern 		break;
2993597cfe4fSDavid Ahern 	case NETDEV_CHANGE:
2994597cfe4fSDavid Ahern 		if (!(dev_get_flags(dev) & (IFF_RUNNING | IFF_LOWER_UP)))
299576c03bf8SIdo Schimmel 			nexthop_flush_dev(dev, event);
2996597cfe4fSDavid Ahern 		break;
2997597cfe4fSDavid Ahern 	case NETDEV_CHANGEMTU:
2998597cfe4fSDavid Ahern 		info_ext = ptr;
2999597cfe4fSDavid Ahern 		nexthop_sync_mtu(dev, info_ext->ext.mtu);
3000597cfe4fSDavid Ahern 		rt_cache_flush(dev_net(dev));
3001597cfe4fSDavid Ahern 		break;
3002597cfe4fSDavid Ahern 	}
3003597cfe4fSDavid Ahern 	return NOTIFY_DONE;
3004597cfe4fSDavid Ahern }
3005597cfe4fSDavid Ahern 
3006597cfe4fSDavid Ahern static struct notifier_block nh_netdev_notifier = {
3007597cfe4fSDavid Ahern 	.notifier_call = nh_netdev_event,
3008597cfe4fSDavid Ahern };
3009597cfe4fSDavid Ahern 
3010975ff7f3SIdo Schimmel static int nexthops_dump(struct net *net, struct notifier_block *nb,
3011975ff7f3SIdo Schimmel 			 struct netlink_ext_ack *extack)
3012975ff7f3SIdo Schimmel {
3013975ff7f3SIdo Schimmel 	struct rb_root *root = &net->nexthop.rb_root;
3014975ff7f3SIdo Schimmel 	struct rb_node *node;
3015975ff7f3SIdo Schimmel 	int err = 0;
3016975ff7f3SIdo Schimmel 
3017975ff7f3SIdo Schimmel 	for (node = rb_first(root); node; node = rb_next(node)) {
3018975ff7f3SIdo Schimmel 		struct nexthop *nh;
3019975ff7f3SIdo Schimmel 
3020975ff7f3SIdo Schimmel 		nh = rb_entry(node, struct nexthop, rb_node);
3021975ff7f3SIdo Schimmel 		err = call_nexthop_notifier(nb, net, NEXTHOP_EVENT_REPLACE, nh,
3022975ff7f3SIdo Schimmel 					    extack);
3023975ff7f3SIdo Schimmel 		if (err)
3024975ff7f3SIdo Schimmel 			break;
3025975ff7f3SIdo Schimmel 	}
3026975ff7f3SIdo Schimmel 
3027975ff7f3SIdo Schimmel 	return err;
3028975ff7f3SIdo Schimmel }
3029975ff7f3SIdo Schimmel 
3030ce7e9c8aSIdo Schimmel int register_nexthop_notifier(struct net *net, struct notifier_block *nb,
3031ce7e9c8aSIdo Schimmel 			      struct netlink_ext_ack *extack)
30328590ceedSRoopa Prabhu {
3033975ff7f3SIdo Schimmel 	int err;
3034975ff7f3SIdo Schimmel 
3035975ff7f3SIdo Schimmel 	rtnl_lock();
3036975ff7f3SIdo Schimmel 	err = nexthops_dump(net, nb, extack);
3037975ff7f3SIdo Schimmel 	if (err)
3038975ff7f3SIdo Schimmel 		goto unlock;
3039975ff7f3SIdo Schimmel 	err = blocking_notifier_chain_register(&net->nexthop.notifier_chain,
304080690ec6SIdo Schimmel 					       nb);
3041975ff7f3SIdo Schimmel unlock:
3042975ff7f3SIdo Schimmel 	rtnl_unlock();
3043975ff7f3SIdo Schimmel 	return err;
30448590ceedSRoopa Prabhu }
30458590ceedSRoopa Prabhu EXPORT_SYMBOL(register_nexthop_notifier);
30468590ceedSRoopa Prabhu 
30478590ceedSRoopa Prabhu int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb)
30488590ceedSRoopa Prabhu {
304980690ec6SIdo Schimmel 	return blocking_notifier_chain_unregister(&net->nexthop.notifier_chain,
30508590ceedSRoopa Prabhu 						  nb);
30518590ceedSRoopa Prabhu }
30528590ceedSRoopa Prabhu EXPORT_SYMBOL(unregister_nexthop_notifier);
30538590ceedSRoopa Prabhu 
3054e95f2592SIdo Schimmel void nexthop_set_hw_flags(struct net *net, u32 id, bool offload, bool trap)
3055e95f2592SIdo Schimmel {
3056e95f2592SIdo Schimmel 	struct nexthop *nexthop;
3057e95f2592SIdo Schimmel 
3058e95f2592SIdo Schimmel 	rcu_read_lock();
3059e95f2592SIdo Schimmel 
3060e95f2592SIdo Schimmel 	nexthop = nexthop_find_by_id(net, id);
3061e95f2592SIdo Schimmel 	if (!nexthop)
3062e95f2592SIdo Schimmel 		goto out;
3063e95f2592SIdo Schimmel 
3064e95f2592SIdo Schimmel 	nexthop->nh_flags &= ~(RTNH_F_OFFLOAD | RTNH_F_TRAP);
3065e95f2592SIdo Schimmel 	if (offload)
3066e95f2592SIdo Schimmel 		nexthop->nh_flags |= RTNH_F_OFFLOAD;
3067e95f2592SIdo Schimmel 	if (trap)
3068e95f2592SIdo Schimmel 		nexthop->nh_flags |= RTNH_F_TRAP;
3069e95f2592SIdo Schimmel 
3070e95f2592SIdo Schimmel out:
3071e95f2592SIdo Schimmel 	rcu_read_unlock();
3072e95f2592SIdo Schimmel }
3073e95f2592SIdo Schimmel EXPORT_SYMBOL(nexthop_set_hw_flags);
3074e95f2592SIdo Schimmel 
3075ab84be7eSDavid Ahern static void __net_exit nexthop_net_exit(struct net *net)
3076ab84be7eSDavid Ahern {
3077ab84be7eSDavid Ahern 	rtnl_lock();
3078ab84be7eSDavid Ahern 	flush_all_nexthops(net);
3079ab84be7eSDavid Ahern 	rtnl_unlock();
3080597cfe4fSDavid Ahern 	kfree(net->nexthop.devhash);
3081ab84be7eSDavid Ahern }
3082ab84be7eSDavid Ahern 
3083ab84be7eSDavid Ahern static int __net_init nexthop_net_init(struct net *net)
3084ab84be7eSDavid Ahern {
3085597cfe4fSDavid Ahern 	size_t sz = sizeof(struct hlist_head) * NH_DEV_HASHSIZE;
3086597cfe4fSDavid Ahern 
3087ab84be7eSDavid Ahern 	net->nexthop.rb_root = RB_ROOT;
3088597cfe4fSDavid Ahern 	net->nexthop.devhash = kzalloc(sz, GFP_KERNEL);
3089597cfe4fSDavid Ahern 	if (!net->nexthop.devhash)
3090597cfe4fSDavid Ahern 		return -ENOMEM;
309180690ec6SIdo Schimmel 	BLOCKING_INIT_NOTIFIER_HEAD(&net->nexthop.notifier_chain);
3092ab84be7eSDavid Ahern 
3093ab84be7eSDavid Ahern 	return 0;
3094ab84be7eSDavid Ahern }
3095ab84be7eSDavid Ahern 
3096ab84be7eSDavid Ahern static struct pernet_operations nexthop_net_ops = {
3097ab84be7eSDavid Ahern 	.init = nexthop_net_init,
3098ab84be7eSDavid Ahern 	.exit = nexthop_net_exit,
3099ab84be7eSDavid Ahern };
3100ab84be7eSDavid Ahern 
3101ab84be7eSDavid Ahern static int __init nexthop_init(void)
3102ab84be7eSDavid Ahern {
3103ab84be7eSDavid Ahern 	register_pernet_subsys(&nexthop_net_ops);
3104ab84be7eSDavid Ahern 
3105597cfe4fSDavid Ahern 	register_netdevice_notifier(&nh_netdev_notifier);
3106597cfe4fSDavid Ahern 
3107ab84be7eSDavid Ahern 	rtnl_register(PF_UNSPEC, RTM_NEWNEXTHOP, rtm_new_nexthop, NULL, 0);
3108ab84be7eSDavid Ahern 	rtnl_register(PF_UNSPEC, RTM_DELNEXTHOP, rtm_del_nexthop, NULL, 0);
3109ab84be7eSDavid Ahern 	rtnl_register(PF_UNSPEC, RTM_GETNEXTHOP, rtm_get_nexthop,
3110ab84be7eSDavid Ahern 		      rtm_dump_nexthop, 0);
3111ab84be7eSDavid Ahern 
3112ab84be7eSDavid Ahern 	rtnl_register(PF_INET, RTM_NEWNEXTHOP, rtm_new_nexthop, NULL, 0);
3113ab84be7eSDavid Ahern 	rtnl_register(PF_INET, RTM_GETNEXTHOP, NULL, rtm_dump_nexthop, 0);
3114ab84be7eSDavid Ahern 
3115ab84be7eSDavid Ahern 	rtnl_register(PF_INET6, RTM_NEWNEXTHOP, rtm_new_nexthop, NULL, 0);
3116ab84be7eSDavid Ahern 	rtnl_register(PF_INET6, RTM_GETNEXTHOP, NULL, rtm_dump_nexthop, 0);
3117ab84be7eSDavid Ahern 
3118ab84be7eSDavid Ahern 	return 0;
3119ab84be7eSDavid Ahern }
3120ab84be7eSDavid Ahern subsys_initcall(nexthop_init);
3121