xref: /linux/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c (revision cdd30ebb1b9f36159d66f088b61aee264e649d7a)
1 // SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
2 /* Copyright (c) 2016-2018 Mellanox Technologies. All rights reserved */
3 
4 #include <linux/kernel.h>
5 #include <linux/types.h>
6 #include <linux/rhashtable.h>
7 #include <linux/bitops.h>
8 #include <linux/in6.h>
9 #include <linux/notifier.h>
10 #include <linux/inetdevice.h>
11 #include <linux/netdevice.h>
12 #include <linux/if_bridge.h>
13 #include <linux/socket.h>
14 #include <linux/route.h>
15 #include <linux/gcd.h>
16 #include <linux/if_macvlan.h>
17 #include <linux/refcount.h>
18 #include <linux/jhash.h>
19 #include <linux/net_namespace.h>
20 #include <linux/mutex.h>
21 #include <linux/genalloc.h>
22 #include <linux/xarray.h>
23 #include <net/netevent.h>
24 #include <net/neighbour.h>
25 #include <net/arp.h>
26 #include <net/inet_dscp.h>
27 #include <net/ip_fib.h>
28 #include <net/ip6_fib.h>
29 #include <net/nexthop.h>
30 #include <net/fib_rules.h>
31 #include <net/ip_tunnels.h>
32 #include <net/l3mdev.h>
33 #include <net/addrconf.h>
34 #include <net/ndisc.h>
35 #include <net/ipv6.h>
36 #include <net/fib_notifier.h>
37 #include <net/switchdev.h>
38 
39 #include "spectrum.h"
40 #include "core.h"
41 #include "reg.h"
42 #include "spectrum_cnt.h"
43 #include "spectrum_dpipe.h"
44 #include "spectrum_ipip.h"
45 #include "spectrum_mr.h"
46 #include "spectrum_mr_tcam.h"
47 #include "spectrum_router.h"
48 #include "spectrum_span.h"
49 
50 struct mlxsw_sp_fib;
51 struct mlxsw_sp_vr;
52 struct mlxsw_sp_lpm_tree;
53 struct mlxsw_sp_rif_ops;
54 
55 struct mlxsw_sp_crif_key {
56 	struct net_device *dev;
57 };
58 
59 struct mlxsw_sp_crif {
60 	struct mlxsw_sp_crif_key key;
61 	struct rhash_head ht_node;
62 	bool can_destroy;
63 	struct list_head nexthop_list;
64 	struct mlxsw_sp_rif *rif;
65 };
66 
67 static const struct rhashtable_params mlxsw_sp_crif_ht_params = {
68 	.key_offset = offsetof(struct mlxsw_sp_crif, key),
69 	.key_len = sizeof_field(struct mlxsw_sp_crif, key),
70 	.head_offset = offsetof(struct mlxsw_sp_crif, ht_node),
71 };
72 
73 struct mlxsw_sp_rif {
74 	struct mlxsw_sp_crif *crif; /* NULL for underlay RIF */
75 	netdevice_tracker dev_tracker;
76 	struct list_head neigh_list;
77 	struct mlxsw_sp_fid *fid;
78 	unsigned char addr[ETH_ALEN];
79 	int mtu;
80 	u16 rif_index;
81 	u8 mac_profile_id;
82 	u8 rif_entries;
83 	u16 vr_id;
84 	const struct mlxsw_sp_rif_ops *ops;
85 	struct mlxsw_sp *mlxsw_sp;
86 
87 	unsigned int counter_ingress;
88 	bool counter_ingress_valid;
89 	unsigned int counter_egress;
90 	bool counter_egress_valid;
91 };
92 
93 static struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif)
94 {
95 	if (!rif->crif)
96 		return NULL;
97 	return rif->crif->key.dev;
98 }
99 
100 struct mlxsw_sp_rif_params {
101 	struct net_device *dev;
102 	union {
103 		u16 system_port;
104 		u16 lag_id;
105 	};
106 	u16 vid;
107 	bool lag;
108 	bool double_entry;
109 };
110 
111 struct mlxsw_sp_rif_subport {
112 	struct mlxsw_sp_rif common;
113 	refcount_t ref_count;
114 	union {
115 		u16 system_port;
116 		u16 lag_id;
117 	};
118 	u16 vid;
119 	bool lag;
120 };
121 
122 struct mlxsw_sp_rif_ipip_lb {
123 	struct mlxsw_sp_rif common;
124 	struct mlxsw_sp_rif_ipip_lb_config lb_config;
125 	u16 ul_vr_id;	/* Spectrum-1. */
126 	u16 ul_rif_id;	/* Spectrum-2+. */
127 };
128 
129 struct mlxsw_sp_rif_params_ipip_lb {
130 	struct mlxsw_sp_rif_params common;
131 	struct mlxsw_sp_rif_ipip_lb_config lb_config;
132 };
133 
134 struct mlxsw_sp_rif_ops {
135 	enum mlxsw_sp_rif_type type;
136 	size_t rif_size;
137 
138 	void (*setup)(struct mlxsw_sp_rif *rif,
139 		      const struct mlxsw_sp_rif_params *params);
140 	int (*configure)(struct mlxsw_sp_rif *rif,
141 			 struct netlink_ext_ack *extack);
142 	void (*deconfigure)(struct mlxsw_sp_rif *rif);
143 	struct mlxsw_sp_fid * (*fid_get)(struct mlxsw_sp_rif *rif,
144 					 const struct mlxsw_sp_rif_params *params,
145 					 struct netlink_ext_ack *extack);
146 	void (*fdb_del)(struct mlxsw_sp_rif *rif, const char *mac);
147 };
148 
149 struct mlxsw_sp_rif_mac_profile {
150 	unsigned char mac_prefix[ETH_ALEN];
151 	refcount_t ref_count;
152 	u8 id;
153 };
154 
155 struct mlxsw_sp_router_ops {
156 	int (*init)(struct mlxsw_sp *mlxsw_sp);
157 	int (*ipips_init)(struct mlxsw_sp *mlxsw_sp);
158 };
159 
160 static struct mlxsw_sp_rif *
161 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
162 			 const struct net_device *dev);
163 static void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif);
164 static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree);
165 static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
166 				  struct mlxsw_sp_lpm_tree *lpm_tree);
167 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
168 				     const struct mlxsw_sp_fib *fib,
169 				     u8 tree_id);
170 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
171 				       const struct mlxsw_sp_fib *fib);
172 
173 static unsigned int *
174 mlxsw_sp_rif_p_counter_get(struct mlxsw_sp_rif *rif,
175 			   enum mlxsw_sp_rif_counter_dir dir)
176 {
177 	switch (dir) {
178 	case MLXSW_SP_RIF_COUNTER_EGRESS:
179 		return &rif->counter_egress;
180 	case MLXSW_SP_RIF_COUNTER_INGRESS:
181 		return &rif->counter_ingress;
182 	}
183 	return NULL;
184 }
185 
186 static bool
187 mlxsw_sp_rif_counter_valid_get(struct mlxsw_sp_rif *rif,
188 			       enum mlxsw_sp_rif_counter_dir dir)
189 {
190 	switch (dir) {
191 	case MLXSW_SP_RIF_COUNTER_EGRESS:
192 		return rif->counter_egress_valid;
193 	case MLXSW_SP_RIF_COUNTER_INGRESS:
194 		return rif->counter_ingress_valid;
195 	}
196 	return false;
197 }
198 
199 static void
200 mlxsw_sp_rif_counter_valid_set(struct mlxsw_sp_rif *rif,
201 			       enum mlxsw_sp_rif_counter_dir dir,
202 			       bool valid)
203 {
204 	switch (dir) {
205 	case MLXSW_SP_RIF_COUNTER_EGRESS:
206 		rif->counter_egress_valid = valid;
207 		break;
208 	case MLXSW_SP_RIF_COUNTER_INGRESS:
209 		rif->counter_ingress_valid = valid;
210 		break;
211 	}
212 }
213 
214 static int mlxsw_sp_rif_counter_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
215 				     unsigned int counter_index, bool enable,
216 				     enum mlxsw_sp_rif_counter_dir dir)
217 {
218 	char ritr_pl[MLXSW_REG_RITR_LEN];
219 	bool is_egress = false;
220 	int err;
221 
222 	if (dir == MLXSW_SP_RIF_COUNTER_EGRESS)
223 		is_egress = true;
224 	mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
225 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
226 	if (err)
227 		return err;
228 
229 	mlxsw_reg_ritr_counter_pack(ritr_pl, counter_index, enable,
230 				    is_egress);
231 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
232 }
233 
234 int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp,
235 				   struct mlxsw_sp_rif *rif,
236 				   enum mlxsw_sp_rif_counter_dir dir, u64 *cnt)
237 {
238 	char ricnt_pl[MLXSW_REG_RICNT_LEN];
239 	unsigned int *p_counter_index;
240 	bool valid;
241 	int err;
242 
243 	valid = mlxsw_sp_rif_counter_valid_get(rif, dir);
244 	if (!valid)
245 		return -EINVAL;
246 
247 	p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
248 	if (!p_counter_index)
249 		return -EINVAL;
250 	mlxsw_reg_ricnt_pack(ricnt_pl, *p_counter_index,
251 			     MLXSW_REG_RICNT_OPCODE_NOP);
252 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
253 	if (err)
254 		return err;
255 	*cnt = mlxsw_reg_ricnt_good_unicast_packets_get(ricnt_pl);
256 	return 0;
257 }
258 
259 struct mlxsw_sp_rif_counter_set_basic {
260 	u64 good_unicast_packets;
261 	u64 good_multicast_packets;
262 	u64 good_broadcast_packets;
263 	u64 good_unicast_bytes;
264 	u64 good_multicast_bytes;
265 	u64 good_broadcast_bytes;
266 	u64 error_packets;
267 	u64 discard_packets;
268 	u64 error_bytes;
269 	u64 discard_bytes;
270 };
271 
272 static int
273 mlxsw_sp_rif_counter_fetch_clear(struct mlxsw_sp_rif *rif,
274 				 enum mlxsw_sp_rif_counter_dir dir,
275 				 struct mlxsw_sp_rif_counter_set_basic *set)
276 {
277 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
278 	char ricnt_pl[MLXSW_REG_RICNT_LEN];
279 	unsigned int *p_counter_index;
280 	int err;
281 
282 	if (!mlxsw_sp_rif_counter_valid_get(rif, dir))
283 		return -EINVAL;
284 
285 	p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
286 	if (!p_counter_index)
287 		return -EINVAL;
288 
289 	mlxsw_reg_ricnt_pack(ricnt_pl, *p_counter_index,
290 			     MLXSW_REG_RICNT_OPCODE_CLEAR);
291 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
292 	if (err)
293 		return err;
294 
295 	if (!set)
296 		return 0;
297 
298 #define MLXSW_SP_RIF_COUNTER_EXTRACT(NAME)				\
299 		(set->NAME = mlxsw_reg_ricnt_ ## NAME ## _get(ricnt_pl))
300 
301 	MLXSW_SP_RIF_COUNTER_EXTRACT(good_unicast_packets);
302 	MLXSW_SP_RIF_COUNTER_EXTRACT(good_multicast_packets);
303 	MLXSW_SP_RIF_COUNTER_EXTRACT(good_broadcast_packets);
304 	MLXSW_SP_RIF_COUNTER_EXTRACT(good_unicast_bytes);
305 	MLXSW_SP_RIF_COUNTER_EXTRACT(good_multicast_bytes);
306 	MLXSW_SP_RIF_COUNTER_EXTRACT(good_broadcast_bytes);
307 	MLXSW_SP_RIF_COUNTER_EXTRACT(error_packets);
308 	MLXSW_SP_RIF_COUNTER_EXTRACT(discard_packets);
309 	MLXSW_SP_RIF_COUNTER_EXTRACT(error_bytes);
310 	MLXSW_SP_RIF_COUNTER_EXTRACT(discard_bytes);
311 
312 #undef MLXSW_SP_RIF_COUNTER_EXTRACT
313 
314 	return 0;
315 }
316 
317 static int mlxsw_sp_rif_counter_clear(struct mlxsw_sp *mlxsw_sp,
318 				      unsigned int counter_index)
319 {
320 	char ricnt_pl[MLXSW_REG_RICNT_LEN];
321 
322 	mlxsw_reg_ricnt_pack(ricnt_pl, counter_index,
323 			     MLXSW_REG_RICNT_OPCODE_CLEAR);
324 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
325 }
326 
327 int mlxsw_sp_rif_counter_alloc(struct mlxsw_sp_rif *rif,
328 			       enum mlxsw_sp_rif_counter_dir dir)
329 {
330 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
331 	unsigned int *p_counter_index;
332 	int err;
333 
334 	if (mlxsw_sp_rif_counter_valid_get(rif, dir))
335 		return 0;
336 
337 	p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
338 	if (!p_counter_index)
339 		return -EINVAL;
340 
341 	err = mlxsw_sp_counter_alloc(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
342 				     p_counter_index);
343 	if (err)
344 		return err;
345 
346 	err = mlxsw_sp_rif_counter_clear(mlxsw_sp, *p_counter_index);
347 	if (err)
348 		goto err_counter_clear;
349 
350 	err = mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
351 					*p_counter_index, true, dir);
352 	if (err)
353 		goto err_counter_edit;
354 	mlxsw_sp_rif_counter_valid_set(rif, dir, true);
355 	return 0;
356 
357 err_counter_edit:
358 err_counter_clear:
359 	mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
360 			      *p_counter_index);
361 	return err;
362 }
363 
364 void mlxsw_sp_rif_counter_free(struct mlxsw_sp_rif *rif,
365 			       enum mlxsw_sp_rif_counter_dir dir)
366 {
367 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
368 	unsigned int *p_counter_index;
369 
370 	if (!mlxsw_sp_rif_counter_valid_get(rif, dir))
371 		return;
372 
373 	p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
374 	if (WARN_ON(!p_counter_index))
375 		return;
376 	mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
377 				  *p_counter_index, false, dir);
378 	mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
379 			      *p_counter_index);
380 	mlxsw_sp_rif_counter_valid_set(rif, dir, false);
381 }
382 
383 static void mlxsw_sp_rif_counters_alloc(struct mlxsw_sp_rif *rif)
384 {
385 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
386 	struct devlink *devlink;
387 
388 	devlink = priv_to_devlink(mlxsw_sp->core);
389 	if (!devlink_dpipe_table_counter_enabled(devlink,
390 						 MLXSW_SP_DPIPE_TABLE_NAME_ERIF))
391 		return;
392 	mlxsw_sp_rif_counter_alloc(rif, MLXSW_SP_RIF_COUNTER_EGRESS);
393 }
394 
395 static void mlxsw_sp_rif_counters_free(struct mlxsw_sp_rif *rif)
396 {
397 	mlxsw_sp_rif_counter_free(rif, MLXSW_SP_RIF_COUNTER_EGRESS);
398 }
399 
400 #define MLXSW_SP_PREFIX_COUNT (sizeof(struct in6_addr) * BITS_PER_BYTE + 1)
401 
402 struct mlxsw_sp_prefix_usage {
403 	DECLARE_BITMAP(b, MLXSW_SP_PREFIX_COUNT);
404 };
405 
406 #define mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) \
407 	for_each_set_bit(prefix, (prefix_usage)->b, MLXSW_SP_PREFIX_COUNT)
408 
409 static bool
410 mlxsw_sp_prefix_usage_eq(struct mlxsw_sp_prefix_usage *prefix_usage1,
411 			 struct mlxsw_sp_prefix_usage *prefix_usage2)
412 {
413 	return !memcmp(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
414 }
415 
416 static void
417 mlxsw_sp_prefix_usage_cpy(struct mlxsw_sp_prefix_usage *prefix_usage1,
418 			  struct mlxsw_sp_prefix_usage *prefix_usage2)
419 {
420 	memcpy(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
421 }
422 
423 static void
424 mlxsw_sp_prefix_usage_set(struct mlxsw_sp_prefix_usage *prefix_usage,
425 			  unsigned char prefix_len)
426 {
427 	set_bit(prefix_len, prefix_usage->b);
428 }
429 
430 static void
431 mlxsw_sp_prefix_usage_clear(struct mlxsw_sp_prefix_usage *prefix_usage,
432 			    unsigned char prefix_len)
433 {
434 	clear_bit(prefix_len, prefix_usage->b);
435 }
436 
437 struct mlxsw_sp_fib_key {
438 	unsigned char addr[sizeof(struct in6_addr)];
439 	unsigned char prefix_len;
440 };
441 
442 enum mlxsw_sp_fib_entry_type {
443 	MLXSW_SP_FIB_ENTRY_TYPE_REMOTE,
444 	MLXSW_SP_FIB_ENTRY_TYPE_LOCAL,
445 	MLXSW_SP_FIB_ENTRY_TYPE_TRAP,
446 	MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE,
447 	MLXSW_SP_FIB_ENTRY_TYPE_UNREACHABLE,
448 
449 	/* This is a special case of local delivery, where a packet should be
450 	 * decapsulated on reception. Note that there is no corresponding ENCAP,
451 	 * because that's a type of next hop, not of FIB entry. (There can be
452 	 * several next hops in a REMOTE entry, and some of them may be
453 	 * encapsulating entries.)
454 	 */
455 	MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP,
456 	MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP,
457 };
458 
459 struct mlxsw_sp_nexthop_group_info;
460 struct mlxsw_sp_nexthop_group;
461 struct mlxsw_sp_fib_entry;
462 
463 struct mlxsw_sp_fib_node {
464 	struct mlxsw_sp_fib_entry *fib_entry;
465 	struct list_head list;
466 	struct rhash_head ht_node;
467 	struct mlxsw_sp_fib *fib;
468 	struct mlxsw_sp_fib_key key;
469 };
470 
471 struct mlxsw_sp_fib_entry_decap {
472 	struct mlxsw_sp_ipip_entry *ipip_entry;
473 	u32 tunnel_index;
474 };
475 
476 struct mlxsw_sp_fib_entry {
477 	struct mlxsw_sp_fib_node *fib_node;
478 	enum mlxsw_sp_fib_entry_type type;
479 	struct list_head nexthop_group_node;
480 	struct mlxsw_sp_nexthop_group *nh_group;
481 	struct mlxsw_sp_fib_entry_decap decap; /* Valid for decap entries. */
482 };
483 
484 struct mlxsw_sp_fib4_entry {
485 	struct mlxsw_sp_fib_entry common;
486 	struct fib_info *fi;
487 	u32 tb_id;
488 	dscp_t dscp;
489 	u8 type;
490 };
491 
492 struct mlxsw_sp_fib6_entry {
493 	struct mlxsw_sp_fib_entry common;
494 	struct list_head rt6_list;
495 	unsigned int nrt6;
496 };
497 
498 struct mlxsw_sp_rt6 {
499 	struct list_head list;
500 	struct fib6_info *rt;
501 };
502 
503 struct mlxsw_sp_lpm_tree {
504 	u8 id; /* tree ID */
505 	refcount_t ref_count;
506 	enum mlxsw_sp_l3proto proto;
507 	unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT];
508 	struct mlxsw_sp_prefix_usage prefix_usage;
509 };
510 
511 struct mlxsw_sp_fib {
512 	struct rhashtable ht;
513 	struct list_head node_list;
514 	struct mlxsw_sp_vr *vr;
515 	struct mlxsw_sp_lpm_tree *lpm_tree;
516 	enum mlxsw_sp_l3proto proto;
517 };
518 
519 struct mlxsw_sp_vr {
520 	u16 id; /* virtual router ID */
521 	u32 tb_id; /* kernel fib table id */
522 	unsigned int rif_count;
523 	struct mlxsw_sp_fib *fib4;
524 	struct mlxsw_sp_fib *fib6;
525 	struct mlxsw_sp_mr_table *mr_table[MLXSW_SP_L3_PROTO_MAX];
526 	struct mlxsw_sp_rif *ul_rif;
527 	refcount_t ul_rif_refcnt;
528 };
529 
530 static const struct rhashtable_params mlxsw_sp_fib_ht_params;
531 
532 static struct mlxsw_sp_fib *mlxsw_sp_fib_create(struct mlxsw_sp *mlxsw_sp,
533 						struct mlxsw_sp_vr *vr,
534 						enum mlxsw_sp_l3proto proto)
535 {
536 	struct mlxsw_sp_lpm_tree *lpm_tree;
537 	struct mlxsw_sp_fib *fib;
538 	int err;
539 
540 	lpm_tree = mlxsw_sp->router->lpm.proto_trees[proto];
541 	fib = kzalloc(sizeof(*fib), GFP_KERNEL);
542 	if (!fib)
543 		return ERR_PTR(-ENOMEM);
544 	err = rhashtable_init(&fib->ht, &mlxsw_sp_fib_ht_params);
545 	if (err)
546 		goto err_rhashtable_init;
547 	INIT_LIST_HEAD(&fib->node_list);
548 	fib->proto = proto;
549 	fib->vr = vr;
550 	fib->lpm_tree = lpm_tree;
551 	mlxsw_sp_lpm_tree_hold(lpm_tree);
552 	err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, lpm_tree->id);
553 	if (err)
554 		goto err_lpm_tree_bind;
555 	return fib;
556 
557 err_lpm_tree_bind:
558 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
559 err_rhashtable_init:
560 	kfree(fib);
561 	return ERR_PTR(err);
562 }
563 
564 static void mlxsw_sp_fib_destroy(struct mlxsw_sp *mlxsw_sp,
565 				 struct mlxsw_sp_fib *fib)
566 {
567 	mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, fib);
568 	mlxsw_sp_lpm_tree_put(mlxsw_sp, fib->lpm_tree);
569 	WARN_ON(!list_empty(&fib->node_list));
570 	rhashtable_destroy(&fib->ht);
571 	kfree(fib);
572 }
573 
574 static struct mlxsw_sp_lpm_tree *
575 mlxsw_sp_lpm_tree_find_unused(struct mlxsw_sp *mlxsw_sp)
576 {
577 	static struct mlxsw_sp_lpm_tree *lpm_tree;
578 	int i;
579 
580 	for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
581 		lpm_tree = &mlxsw_sp->router->lpm.trees[i];
582 		if (refcount_read(&lpm_tree->ref_count) == 0)
583 			return lpm_tree;
584 	}
585 	return NULL;
586 }
587 
588 static int mlxsw_sp_lpm_tree_alloc(struct mlxsw_sp *mlxsw_sp,
589 				   struct mlxsw_sp_lpm_tree *lpm_tree)
590 {
591 	char ralta_pl[MLXSW_REG_RALTA_LEN];
592 
593 	mlxsw_reg_ralta_pack(ralta_pl, true,
594 			     (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
595 			     lpm_tree->id);
596 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
597 }
598 
599 static void mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp,
600 				   struct mlxsw_sp_lpm_tree *lpm_tree)
601 {
602 	char ralta_pl[MLXSW_REG_RALTA_LEN];
603 
604 	mlxsw_reg_ralta_pack(ralta_pl, false,
605 			     (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
606 			     lpm_tree->id);
607 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
608 }
609 
610 static int
611 mlxsw_sp_lpm_tree_left_struct_set(struct mlxsw_sp *mlxsw_sp,
612 				  struct mlxsw_sp_prefix_usage *prefix_usage,
613 				  struct mlxsw_sp_lpm_tree *lpm_tree)
614 {
615 	char ralst_pl[MLXSW_REG_RALST_LEN];
616 	u8 root_bin = 0;
617 	u8 prefix;
618 	u8 last_prefix = MLXSW_REG_RALST_BIN_NO_CHILD;
619 
620 	mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage)
621 		root_bin = prefix;
622 
623 	mlxsw_reg_ralst_pack(ralst_pl, root_bin, lpm_tree->id);
624 	mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) {
625 		if (prefix == 0)
626 			continue;
627 		mlxsw_reg_ralst_bin_pack(ralst_pl, prefix, last_prefix,
628 					 MLXSW_REG_RALST_BIN_NO_CHILD);
629 		last_prefix = prefix;
630 	}
631 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
632 }
633 
634 static struct mlxsw_sp_lpm_tree *
635 mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp,
636 			 struct mlxsw_sp_prefix_usage *prefix_usage,
637 			 enum mlxsw_sp_l3proto proto)
638 {
639 	struct mlxsw_sp_lpm_tree *lpm_tree;
640 	int err;
641 
642 	lpm_tree = mlxsw_sp_lpm_tree_find_unused(mlxsw_sp);
643 	if (!lpm_tree)
644 		return ERR_PTR(-EBUSY);
645 	lpm_tree->proto = proto;
646 	err = mlxsw_sp_lpm_tree_alloc(mlxsw_sp, lpm_tree);
647 	if (err)
648 		return ERR_PTR(err);
649 
650 	err = mlxsw_sp_lpm_tree_left_struct_set(mlxsw_sp, prefix_usage,
651 						lpm_tree);
652 	if (err)
653 		goto err_left_struct_set;
654 	memcpy(&lpm_tree->prefix_usage, prefix_usage,
655 	       sizeof(lpm_tree->prefix_usage));
656 	memset(&lpm_tree->prefix_ref_count, 0,
657 	       sizeof(lpm_tree->prefix_ref_count));
658 	refcount_set(&lpm_tree->ref_count, 1);
659 	return lpm_tree;
660 
661 err_left_struct_set:
662 	mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
663 	return ERR_PTR(err);
664 }
665 
666 static void mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp *mlxsw_sp,
667 				      struct mlxsw_sp_lpm_tree *lpm_tree)
668 {
669 	mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
670 }
671 
672 static struct mlxsw_sp_lpm_tree *
673 mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp,
674 		      struct mlxsw_sp_prefix_usage *prefix_usage,
675 		      enum mlxsw_sp_l3proto proto)
676 {
677 	struct mlxsw_sp_lpm_tree *lpm_tree;
678 	int i;
679 
680 	for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
681 		lpm_tree = &mlxsw_sp->router->lpm.trees[i];
682 		if (refcount_read(&lpm_tree->ref_count) &&
683 		    lpm_tree->proto == proto &&
684 		    mlxsw_sp_prefix_usage_eq(&lpm_tree->prefix_usage,
685 					     prefix_usage)) {
686 			mlxsw_sp_lpm_tree_hold(lpm_tree);
687 			return lpm_tree;
688 		}
689 	}
690 	return mlxsw_sp_lpm_tree_create(mlxsw_sp, prefix_usage, proto);
691 }
692 
693 static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree)
694 {
695 	refcount_inc(&lpm_tree->ref_count);
696 }
697 
698 static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
699 				  struct mlxsw_sp_lpm_tree *lpm_tree)
700 {
701 	if (!refcount_dec_and_test(&lpm_tree->ref_count))
702 		return;
703 	mlxsw_sp_lpm_tree_destroy(mlxsw_sp, lpm_tree);
704 }
705 
706 #define MLXSW_SP_LPM_TREE_MIN 1 /* tree 0 is reserved */
707 
708 static int mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp)
709 {
710 	struct mlxsw_sp_prefix_usage req_prefix_usage = {{ 0 } };
711 	struct mlxsw_sp_lpm_tree *lpm_tree;
712 	u64 max_trees;
713 	int err, i;
714 
715 	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_LPM_TREES))
716 		return -EIO;
717 
718 	max_trees = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_LPM_TREES);
719 	mlxsw_sp->router->lpm.tree_count = max_trees - MLXSW_SP_LPM_TREE_MIN;
720 	mlxsw_sp->router->lpm.trees = kcalloc(mlxsw_sp->router->lpm.tree_count,
721 					     sizeof(struct mlxsw_sp_lpm_tree),
722 					     GFP_KERNEL);
723 	if (!mlxsw_sp->router->lpm.trees)
724 		return -ENOMEM;
725 
726 	for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
727 		lpm_tree = &mlxsw_sp->router->lpm.trees[i];
728 		lpm_tree->id = i + MLXSW_SP_LPM_TREE_MIN;
729 	}
730 
731 	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
732 					 MLXSW_SP_L3_PROTO_IPV4);
733 	if (IS_ERR(lpm_tree)) {
734 		err = PTR_ERR(lpm_tree);
735 		goto err_ipv4_tree_get;
736 	}
737 	mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4] = lpm_tree;
738 
739 	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
740 					 MLXSW_SP_L3_PROTO_IPV6);
741 	if (IS_ERR(lpm_tree)) {
742 		err = PTR_ERR(lpm_tree);
743 		goto err_ipv6_tree_get;
744 	}
745 	mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV6] = lpm_tree;
746 
747 	return 0;
748 
749 err_ipv6_tree_get:
750 	lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4];
751 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
752 err_ipv4_tree_get:
753 	kfree(mlxsw_sp->router->lpm.trees);
754 	return err;
755 }
756 
757 static void mlxsw_sp_lpm_fini(struct mlxsw_sp *mlxsw_sp)
758 {
759 	struct mlxsw_sp_lpm_tree *lpm_tree;
760 
761 	lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV6];
762 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
763 
764 	lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4];
765 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
766 
767 	kfree(mlxsw_sp->router->lpm.trees);
768 }
769 
770 static bool mlxsw_sp_vr_is_used(const struct mlxsw_sp_vr *vr)
771 {
772 	return !!vr->fib4 || !!vr->fib6 ||
773 	       !!vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] ||
774 	       !!vr->mr_table[MLXSW_SP_L3_PROTO_IPV6];
775 }
776 
777 static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp)
778 {
779 	int max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
780 	struct mlxsw_sp_vr *vr;
781 	int i;
782 
783 	for (i = 0; i < max_vrs; i++) {
784 		vr = &mlxsw_sp->router->vrs[i];
785 		if (!mlxsw_sp_vr_is_used(vr))
786 			return vr;
787 	}
788 	return NULL;
789 }
790 
791 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
792 				     const struct mlxsw_sp_fib *fib, u8 tree_id)
793 {
794 	char raltb_pl[MLXSW_REG_RALTB_LEN];
795 
796 	mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
797 			     (enum mlxsw_reg_ralxx_protocol) fib->proto,
798 			     tree_id);
799 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
800 }
801 
802 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
803 				       const struct mlxsw_sp_fib *fib)
804 {
805 	char raltb_pl[MLXSW_REG_RALTB_LEN];
806 
807 	/* Bind to tree 0 which is default */
808 	mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
809 			     (enum mlxsw_reg_ralxx_protocol) fib->proto, 0);
810 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
811 }
812 
813 static u32 mlxsw_sp_fix_tb_id(u32 tb_id)
814 {
815 	/* For our purpose, squash main, default and local tables into one */
816 	if (tb_id == RT_TABLE_LOCAL || tb_id == RT_TABLE_DEFAULT)
817 		tb_id = RT_TABLE_MAIN;
818 	return tb_id;
819 }
820 
821 static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp,
822 					    u32 tb_id)
823 {
824 	int max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
825 	struct mlxsw_sp_vr *vr;
826 	int i;
827 
828 	tb_id = mlxsw_sp_fix_tb_id(tb_id);
829 
830 	for (i = 0; i < max_vrs; i++) {
831 		vr = &mlxsw_sp->router->vrs[i];
832 		if (mlxsw_sp_vr_is_used(vr) && vr->tb_id == tb_id)
833 			return vr;
834 	}
835 	return NULL;
836 }
837 
838 int mlxsw_sp_router_tb_id_vr_id(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
839 				u16 *vr_id)
840 {
841 	struct mlxsw_sp_vr *vr;
842 	int err = 0;
843 
844 	mutex_lock(&mlxsw_sp->router->lock);
845 	vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
846 	if (!vr) {
847 		err = -ESRCH;
848 		goto out;
849 	}
850 	*vr_id = vr->id;
851 out:
852 	mutex_unlock(&mlxsw_sp->router->lock);
853 	return err;
854 }
855 
856 static struct mlxsw_sp_fib *mlxsw_sp_vr_fib(const struct mlxsw_sp_vr *vr,
857 					    enum mlxsw_sp_l3proto proto)
858 {
859 	switch (proto) {
860 	case MLXSW_SP_L3_PROTO_IPV4:
861 		return vr->fib4;
862 	case MLXSW_SP_L3_PROTO_IPV6:
863 		return vr->fib6;
864 	}
865 	return NULL;
866 }
867 
868 static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp,
869 					      u32 tb_id,
870 					      struct netlink_ext_ack *extack)
871 {
872 	struct mlxsw_sp_mr_table *mr4_table, *mr6_table;
873 	struct mlxsw_sp_fib *fib4;
874 	struct mlxsw_sp_fib *fib6;
875 	struct mlxsw_sp_vr *vr;
876 	int err;
877 
878 	vr = mlxsw_sp_vr_find_unused(mlxsw_sp);
879 	if (!vr) {
880 		NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported virtual routers");
881 		return ERR_PTR(-EBUSY);
882 	}
883 	fib4 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
884 	if (IS_ERR(fib4))
885 		return ERR_CAST(fib4);
886 	fib6 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
887 	if (IS_ERR(fib6)) {
888 		err = PTR_ERR(fib6);
889 		goto err_fib6_create;
890 	}
891 	mr4_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id,
892 					     MLXSW_SP_L3_PROTO_IPV4);
893 	if (IS_ERR(mr4_table)) {
894 		err = PTR_ERR(mr4_table);
895 		goto err_mr4_table_create;
896 	}
897 	mr6_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id,
898 					     MLXSW_SP_L3_PROTO_IPV6);
899 	if (IS_ERR(mr6_table)) {
900 		err = PTR_ERR(mr6_table);
901 		goto err_mr6_table_create;
902 	}
903 
904 	vr->fib4 = fib4;
905 	vr->fib6 = fib6;
906 	vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] = mr4_table;
907 	vr->mr_table[MLXSW_SP_L3_PROTO_IPV6] = mr6_table;
908 	vr->tb_id = tb_id;
909 	return vr;
910 
911 err_mr6_table_create:
912 	mlxsw_sp_mr_table_destroy(mr4_table);
913 err_mr4_table_create:
914 	mlxsw_sp_fib_destroy(mlxsw_sp, fib6);
915 err_fib6_create:
916 	mlxsw_sp_fib_destroy(mlxsw_sp, fib4);
917 	return ERR_PTR(err);
918 }
919 
920 static void mlxsw_sp_vr_destroy(struct mlxsw_sp *mlxsw_sp,
921 				struct mlxsw_sp_vr *vr)
922 {
923 	mlxsw_sp_mr_table_destroy(vr->mr_table[MLXSW_SP_L3_PROTO_IPV6]);
924 	vr->mr_table[MLXSW_SP_L3_PROTO_IPV6] = NULL;
925 	mlxsw_sp_mr_table_destroy(vr->mr_table[MLXSW_SP_L3_PROTO_IPV4]);
926 	vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] = NULL;
927 	mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib6);
928 	vr->fib6 = NULL;
929 	mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib4);
930 	vr->fib4 = NULL;
931 }
932 
933 static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
934 					   struct netlink_ext_ack *extack)
935 {
936 	struct mlxsw_sp_vr *vr;
937 
938 	tb_id = mlxsw_sp_fix_tb_id(tb_id);
939 	vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
940 	if (!vr)
941 		vr = mlxsw_sp_vr_create(mlxsw_sp, tb_id, extack);
942 	return vr;
943 }
944 
945 static void mlxsw_sp_vr_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr)
946 {
947 	if (!vr->rif_count && list_empty(&vr->fib4->node_list) &&
948 	    list_empty(&vr->fib6->node_list) &&
949 	    mlxsw_sp_mr_table_empty(vr->mr_table[MLXSW_SP_L3_PROTO_IPV4]) &&
950 	    mlxsw_sp_mr_table_empty(vr->mr_table[MLXSW_SP_L3_PROTO_IPV6]))
951 		mlxsw_sp_vr_destroy(mlxsw_sp, vr);
952 }
953 
954 static bool
955 mlxsw_sp_vr_lpm_tree_should_replace(struct mlxsw_sp_vr *vr,
956 				    enum mlxsw_sp_l3proto proto, u8 tree_id)
957 {
958 	struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
959 
960 	if (!mlxsw_sp_vr_is_used(vr))
961 		return false;
962 	if (fib->lpm_tree->id == tree_id)
963 		return true;
964 	return false;
965 }
966 
967 static int mlxsw_sp_vr_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
968 					struct mlxsw_sp_fib *fib,
969 					struct mlxsw_sp_lpm_tree *new_tree)
970 {
971 	struct mlxsw_sp_lpm_tree *old_tree = fib->lpm_tree;
972 	int err;
973 
974 	fib->lpm_tree = new_tree;
975 	mlxsw_sp_lpm_tree_hold(new_tree);
976 	err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, new_tree->id);
977 	if (err)
978 		goto err_tree_bind;
979 	mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree);
980 	return 0;
981 
982 err_tree_bind:
983 	mlxsw_sp_lpm_tree_put(mlxsw_sp, new_tree);
984 	fib->lpm_tree = old_tree;
985 	return err;
986 }
987 
988 static int mlxsw_sp_vrs_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
989 					 struct mlxsw_sp_fib *fib,
990 					 struct mlxsw_sp_lpm_tree *new_tree)
991 {
992 	int max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
993 	enum mlxsw_sp_l3proto proto = fib->proto;
994 	struct mlxsw_sp_lpm_tree *old_tree;
995 	u8 old_id, new_id = new_tree->id;
996 	struct mlxsw_sp_vr *vr;
997 	int i, err;
998 
999 	old_tree = mlxsw_sp->router->lpm.proto_trees[proto];
1000 	old_id = old_tree->id;
1001 
1002 	for (i = 0; i < max_vrs; i++) {
1003 		vr = &mlxsw_sp->router->vrs[i];
1004 		if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, old_id))
1005 			continue;
1006 		err = mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
1007 						   mlxsw_sp_vr_fib(vr, proto),
1008 						   new_tree);
1009 		if (err)
1010 			goto err_tree_replace;
1011 	}
1012 
1013 	memcpy(new_tree->prefix_ref_count, old_tree->prefix_ref_count,
1014 	       sizeof(new_tree->prefix_ref_count));
1015 	mlxsw_sp->router->lpm.proto_trees[proto] = new_tree;
1016 	mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree);
1017 
1018 	return 0;
1019 
1020 err_tree_replace:
1021 	for (i--; i >= 0; i--) {
1022 		if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, new_id))
1023 			continue;
1024 		mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
1025 					     mlxsw_sp_vr_fib(vr, proto),
1026 					     old_tree);
1027 	}
1028 	return err;
1029 }
1030 
1031 static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp)
1032 {
1033 	struct mlxsw_sp_vr *vr;
1034 	u64 max_vrs;
1035 	int i;
1036 
1037 	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_VRS))
1038 		return -EIO;
1039 
1040 	max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
1041 	mlxsw_sp->router->vrs = kcalloc(max_vrs, sizeof(struct mlxsw_sp_vr),
1042 					GFP_KERNEL);
1043 	if (!mlxsw_sp->router->vrs)
1044 		return -ENOMEM;
1045 
1046 	for (i = 0; i < max_vrs; i++) {
1047 		vr = &mlxsw_sp->router->vrs[i];
1048 		vr->id = i;
1049 	}
1050 
1051 	return 0;
1052 }
1053 
1054 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp);
1055 
1056 static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp)
1057 {
1058 	/* At this stage we're guaranteed not to have new incoming
1059 	 * FIB notifications and the work queue is free from FIBs
1060 	 * sitting on top of mlxsw netdevs. However, we can still
1061 	 * have other FIBs queued. Flush the queue before flushing
1062 	 * the device's tables. No need for locks, as we're the only
1063 	 * writer.
1064 	 */
1065 	mlxsw_core_flush_owq();
1066 	mlxsw_sp_router_fib_flush(mlxsw_sp);
1067 	kfree(mlxsw_sp->router->vrs);
1068 }
1069 
1070 u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev)
1071 {
1072 	struct net_device *d;
1073 	u32 tb_id;
1074 
1075 	rcu_read_lock();
1076 	d = mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
1077 	if (d)
1078 		tb_id = l3mdev_fib_table(d) ? : RT_TABLE_MAIN;
1079 	else
1080 		tb_id = RT_TABLE_MAIN;
1081 	rcu_read_unlock();
1082 
1083 	return tb_id;
1084 }
1085 
1086 static void
1087 mlxsw_sp_crif_init(struct mlxsw_sp_crif *crif, struct net_device *dev)
1088 {
1089 	crif->key.dev = dev;
1090 	INIT_LIST_HEAD(&crif->nexthop_list);
1091 }
1092 
1093 static struct mlxsw_sp_crif *
1094 mlxsw_sp_crif_alloc(struct net_device *dev)
1095 {
1096 	struct mlxsw_sp_crif *crif;
1097 
1098 	crif = kzalloc(sizeof(*crif), GFP_KERNEL);
1099 	if (!crif)
1100 		return NULL;
1101 
1102 	mlxsw_sp_crif_init(crif, dev);
1103 	return crif;
1104 }
1105 
1106 static void mlxsw_sp_crif_free(struct mlxsw_sp_crif *crif)
1107 {
1108 	if (WARN_ON(crif->rif))
1109 		return;
1110 
1111 	WARN_ON(!list_empty(&crif->nexthop_list));
1112 	kfree(crif);
1113 }
1114 
1115 static int mlxsw_sp_crif_insert(struct mlxsw_sp_router *router,
1116 				struct mlxsw_sp_crif *crif)
1117 {
1118 	return rhashtable_insert_fast(&router->crif_ht, &crif->ht_node,
1119 				      mlxsw_sp_crif_ht_params);
1120 }
1121 
1122 static void mlxsw_sp_crif_remove(struct mlxsw_sp_router *router,
1123 				 struct mlxsw_sp_crif *crif)
1124 {
1125 	rhashtable_remove_fast(&router->crif_ht, &crif->ht_node,
1126 			       mlxsw_sp_crif_ht_params);
1127 }
1128 
1129 static struct mlxsw_sp_crif *
1130 mlxsw_sp_crif_lookup(struct mlxsw_sp_router *router,
1131 		     const struct net_device *dev)
1132 {
1133 	struct mlxsw_sp_crif_key key = {
1134 		.dev = (struct net_device *)dev,
1135 	};
1136 
1137 	return rhashtable_lookup_fast(&router->crif_ht, &key,
1138 				      mlxsw_sp_crif_ht_params);
1139 }
1140 
1141 static struct mlxsw_sp_rif *
1142 mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
1143 		    const struct mlxsw_sp_rif_params *params,
1144 		    struct netlink_ext_ack *extack);
1145 
1146 static struct mlxsw_sp_rif_ipip_lb *
1147 mlxsw_sp_ipip_ol_ipip_lb_create(struct mlxsw_sp *mlxsw_sp,
1148 				enum mlxsw_sp_ipip_type ipipt,
1149 				struct net_device *ol_dev,
1150 				struct netlink_ext_ack *extack)
1151 {
1152 	struct mlxsw_sp_rif_params_ipip_lb lb_params;
1153 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1154 	struct mlxsw_sp_rif *rif;
1155 
1156 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
1157 	lb_params = (struct mlxsw_sp_rif_params_ipip_lb) {
1158 		.common.dev = ol_dev,
1159 		.common.lag = false,
1160 		.common.double_entry = ipip_ops->double_rif_entry,
1161 		.lb_config = ipip_ops->ol_loopback_config(mlxsw_sp, ol_dev),
1162 	};
1163 
1164 	rif = mlxsw_sp_rif_create(mlxsw_sp, &lb_params.common, extack);
1165 	if (IS_ERR(rif))
1166 		return ERR_CAST(rif);
1167 	return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
1168 }
1169 
1170 static struct mlxsw_sp_ipip_entry *
1171 mlxsw_sp_ipip_entry_alloc(struct mlxsw_sp *mlxsw_sp,
1172 			  enum mlxsw_sp_ipip_type ipipt,
1173 			  struct net_device *ol_dev)
1174 {
1175 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1176 	struct mlxsw_sp_ipip_entry *ipip_entry;
1177 	struct mlxsw_sp_ipip_entry *ret = NULL;
1178 	int err;
1179 
1180 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
1181 	ipip_entry = kzalloc(sizeof(*ipip_entry), GFP_KERNEL);
1182 	if (!ipip_entry)
1183 		return ERR_PTR(-ENOMEM);
1184 
1185 	ipip_entry->ol_lb = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp, ipipt,
1186 							    ol_dev, NULL);
1187 	if (IS_ERR(ipip_entry->ol_lb)) {
1188 		ret = ERR_CAST(ipip_entry->ol_lb);
1189 		goto err_ol_ipip_lb_create;
1190 	}
1191 
1192 	ipip_entry->ipipt = ipipt;
1193 	ipip_entry->ol_dev = ol_dev;
1194 	ipip_entry->parms = ipip_ops->parms_init(ol_dev);
1195 
1196 	err = ipip_ops->rem_ip_addr_set(mlxsw_sp, ipip_entry);
1197 	if (err) {
1198 		ret = ERR_PTR(err);
1199 		goto err_rem_ip_addr_set;
1200 	}
1201 
1202 	return ipip_entry;
1203 
1204 err_rem_ip_addr_set:
1205 	mlxsw_sp_rif_destroy(&ipip_entry->ol_lb->common);
1206 err_ol_ipip_lb_create:
1207 	kfree(ipip_entry);
1208 	return ret;
1209 }
1210 
1211 static void mlxsw_sp_ipip_entry_dealloc(struct mlxsw_sp *mlxsw_sp,
1212 					struct mlxsw_sp_ipip_entry *ipip_entry)
1213 {
1214 	const struct mlxsw_sp_ipip_ops *ipip_ops =
1215 		mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1216 
1217 	ipip_ops->rem_ip_addr_unset(mlxsw_sp, ipip_entry);
1218 	mlxsw_sp_rif_destroy(&ipip_entry->ol_lb->common);
1219 	kfree(ipip_entry);
1220 }
1221 
1222 static bool
1223 mlxsw_sp_ipip_entry_saddr_matches(struct mlxsw_sp *mlxsw_sp,
1224 				  const enum mlxsw_sp_l3proto ul_proto,
1225 				  union mlxsw_sp_l3addr saddr,
1226 				  u32 ul_tb_id,
1227 				  struct mlxsw_sp_ipip_entry *ipip_entry)
1228 {
1229 	u32 tun_ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
1230 	enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
1231 	union mlxsw_sp_l3addr tun_saddr;
1232 
1233 	if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
1234 		return false;
1235 
1236 	tun_saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ipip_entry->ol_dev);
1237 	return tun_ul_tb_id == ul_tb_id &&
1238 	       mlxsw_sp_l3addr_eq(&tun_saddr, &saddr);
1239 }
1240 
1241 static int mlxsw_sp_ipip_decap_parsing_depth_inc(struct mlxsw_sp *mlxsw_sp,
1242 						 enum mlxsw_sp_ipip_type ipipt)
1243 {
1244 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1245 
1246 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
1247 
1248 	/* Not all tunnels require to increase the default pasing depth
1249 	 * (96 bytes).
1250 	 */
1251 	if (ipip_ops->inc_parsing_depth)
1252 		return mlxsw_sp_parsing_depth_inc(mlxsw_sp);
1253 
1254 	return 0;
1255 }
1256 
1257 static void mlxsw_sp_ipip_decap_parsing_depth_dec(struct mlxsw_sp *mlxsw_sp,
1258 						  enum mlxsw_sp_ipip_type ipipt)
1259 {
1260 	const struct mlxsw_sp_ipip_ops *ipip_ops =
1261 		mlxsw_sp->router->ipip_ops_arr[ipipt];
1262 
1263 	if (ipip_ops->inc_parsing_depth)
1264 		mlxsw_sp_parsing_depth_dec(mlxsw_sp);
1265 }
1266 
1267 static int
1268 mlxsw_sp_fib_entry_decap_init(struct mlxsw_sp *mlxsw_sp,
1269 			      struct mlxsw_sp_fib_entry *fib_entry,
1270 			      struct mlxsw_sp_ipip_entry *ipip_entry)
1271 {
1272 	u32 tunnel_index;
1273 	int err;
1274 
1275 	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
1276 				  1, &tunnel_index);
1277 	if (err)
1278 		return err;
1279 
1280 	err = mlxsw_sp_ipip_decap_parsing_depth_inc(mlxsw_sp,
1281 						    ipip_entry->ipipt);
1282 	if (err)
1283 		goto err_parsing_depth_inc;
1284 
1285 	ipip_entry->decap_fib_entry = fib_entry;
1286 	fib_entry->decap.ipip_entry = ipip_entry;
1287 	fib_entry->decap.tunnel_index = tunnel_index;
1288 
1289 	return 0;
1290 
1291 err_parsing_depth_inc:
1292 	mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1,
1293 			   fib_entry->decap.tunnel_index);
1294 	return err;
1295 }
1296 
1297 static void mlxsw_sp_fib_entry_decap_fini(struct mlxsw_sp *mlxsw_sp,
1298 					  struct mlxsw_sp_fib_entry *fib_entry)
1299 {
1300 	enum mlxsw_sp_ipip_type ipipt = fib_entry->decap.ipip_entry->ipipt;
1301 
1302 	/* Unlink this node from the IPIP entry that it's the decap entry of. */
1303 	fib_entry->decap.ipip_entry->decap_fib_entry = NULL;
1304 	fib_entry->decap.ipip_entry = NULL;
1305 	mlxsw_sp_ipip_decap_parsing_depth_dec(mlxsw_sp, ipipt);
1306 	mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
1307 			   1, fib_entry->decap.tunnel_index);
1308 }
1309 
1310 static struct mlxsw_sp_fib_node *
1311 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
1312 			 size_t addr_len, unsigned char prefix_len);
1313 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
1314 				     struct mlxsw_sp_fib_entry *fib_entry);
1315 
1316 static void
1317 mlxsw_sp_ipip_entry_demote_decap(struct mlxsw_sp *mlxsw_sp,
1318 				 struct mlxsw_sp_ipip_entry *ipip_entry)
1319 {
1320 	struct mlxsw_sp_fib_entry *fib_entry = ipip_entry->decap_fib_entry;
1321 
1322 	mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, fib_entry);
1323 	fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1324 
1325 	mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1326 }
1327 
1328 static void
1329 mlxsw_sp_ipip_entry_promote_decap(struct mlxsw_sp *mlxsw_sp,
1330 				  struct mlxsw_sp_ipip_entry *ipip_entry,
1331 				  struct mlxsw_sp_fib_entry *decap_fib_entry)
1332 {
1333 	if (mlxsw_sp_fib_entry_decap_init(mlxsw_sp, decap_fib_entry,
1334 					  ipip_entry))
1335 		return;
1336 	decap_fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
1337 
1338 	if (mlxsw_sp_fib_entry_update(mlxsw_sp, decap_fib_entry))
1339 		mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1340 }
1341 
1342 static struct mlxsw_sp_fib_entry *
1343 mlxsw_sp_router_ip2me_fib_entry_find(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
1344 				     enum mlxsw_sp_l3proto proto,
1345 				     const union mlxsw_sp_l3addr *addr,
1346 				     enum mlxsw_sp_fib_entry_type type)
1347 {
1348 	struct mlxsw_sp_fib_node *fib_node;
1349 	unsigned char addr_prefix_len;
1350 	struct mlxsw_sp_fib *fib;
1351 	struct mlxsw_sp_vr *vr;
1352 	const void *addrp;
1353 	size_t addr_len;
1354 	u32 addr4;
1355 
1356 	vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
1357 	if (!vr)
1358 		return NULL;
1359 	fib = mlxsw_sp_vr_fib(vr, proto);
1360 
1361 	switch (proto) {
1362 	case MLXSW_SP_L3_PROTO_IPV4:
1363 		addr4 = be32_to_cpu(addr->addr4);
1364 		addrp = &addr4;
1365 		addr_len = 4;
1366 		addr_prefix_len = 32;
1367 		break;
1368 	case MLXSW_SP_L3_PROTO_IPV6:
1369 		addrp = &addr->addr6;
1370 		addr_len = 16;
1371 		addr_prefix_len = 128;
1372 		break;
1373 	default:
1374 		WARN_ON(1);
1375 		return NULL;
1376 	}
1377 
1378 	fib_node = mlxsw_sp_fib_node_lookup(fib, addrp, addr_len,
1379 					    addr_prefix_len);
1380 	if (!fib_node || fib_node->fib_entry->type != type)
1381 		return NULL;
1382 
1383 	return fib_node->fib_entry;
1384 }
1385 
1386 /* Given an IPIP entry, find the corresponding decap route. */
1387 static struct mlxsw_sp_fib_entry *
1388 mlxsw_sp_ipip_entry_find_decap(struct mlxsw_sp *mlxsw_sp,
1389 			       struct mlxsw_sp_ipip_entry *ipip_entry)
1390 {
1391 	static struct mlxsw_sp_fib_node *fib_node;
1392 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1393 	unsigned char saddr_prefix_len;
1394 	union mlxsw_sp_l3addr saddr;
1395 	struct mlxsw_sp_fib *ul_fib;
1396 	struct mlxsw_sp_vr *ul_vr;
1397 	const void *saddrp;
1398 	size_t saddr_len;
1399 	u32 ul_tb_id;
1400 	u32 saddr4;
1401 
1402 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1403 
1404 	ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
1405 	ul_vr = mlxsw_sp_vr_find(mlxsw_sp, ul_tb_id);
1406 	if (!ul_vr)
1407 		return NULL;
1408 
1409 	ul_fib = mlxsw_sp_vr_fib(ul_vr, ipip_ops->ul_proto);
1410 	saddr = mlxsw_sp_ipip_netdev_saddr(ipip_ops->ul_proto,
1411 					   ipip_entry->ol_dev);
1412 
1413 	switch (ipip_ops->ul_proto) {
1414 	case MLXSW_SP_L3_PROTO_IPV4:
1415 		saddr4 = be32_to_cpu(saddr.addr4);
1416 		saddrp = &saddr4;
1417 		saddr_len = 4;
1418 		saddr_prefix_len = 32;
1419 		break;
1420 	case MLXSW_SP_L3_PROTO_IPV6:
1421 		saddrp = &saddr.addr6;
1422 		saddr_len = 16;
1423 		saddr_prefix_len = 128;
1424 		break;
1425 	default:
1426 		WARN_ON(1);
1427 		return NULL;
1428 	}
1429 
1430 	fib_node = mlxsw_sp_fib_node_lookup(ul_fib, saddrp, saddr_len,
1431 					    saddr_prefix_len);
1432 	if (!fib_node ||
1433 	    fib_node->fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_TRAP)
1434 		return NULL;
1435 
1436 	return fib_node->fib_entry;
1437 }
1438 
1439 static struct mlxsw_sp_ipip_entry *
1440 mlxsw_sp_ipip_entry_create(struct mlxsw_sp *mlxsw_sp,
1441 			   enum mlxsw_sp_ipip_type ipipt,
1442 			   struct net_device *ol_dev)
1443 {
1444 	struct mlxsw_sp_ipip_entry *ipip_entry;
1445 
1446 	ipip_entry = mlxsw_sp_ipip_entry_alloc(mlxsw_sp, ipipt, ol_dev);
1447 	if (IS_ERR(ipip_entry))
1448 		return ipip_entry;
1449 
1450 	list_add_tail(&ipip_entry->ipip_list_node,
1451 		      &mlxsw_sp->router->ipip_list);
1452 
1453 	return ipip_entry;
1454 }
1455 
1456 static void
1457 mlxsw_sp_ipip_entry_destroy(struct mlxsw_sp *mlxsw_sp,
1458 			    struct mlxsw_sp_ipip_entry *ipip_entry)
1459 {
1460 	list_del(&ipip_entry->ipip_list_node);
1461 	mlxsw_sp_ipip_entry_dealloc(mlxsw_sp, ipip_entry);
1462 }
1463 
1464 static bool
1465 mlxsw_sp_ipip_entry_matches_decap(struct mlxsw_sp *mlxsw_sp,
1466 				  const struct net_device *ul_dev,
1467 				  enum mlxsw_sp_l3proto ul_proto,
1468 				  union mlxsw_sp_l3addr ul_dip,
1469 				  struct mlxsw_sp_ipip_entry *ipip_entry)
1470 {
1471 	u32 ul_tb_id = l3mdev_fib_table(ul_dev) ? : RT_TABLE_MAIN;
1472 	enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
1473 
1474 	if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
1475 		return false;
1476 
1477 	return mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, ul_dip,
1478 						 ul_tb_id, ipip_entry);
1479 }
1480 
1481 /* Given decap parameters, find the corresponding IPIP entry. */
1482 static struct mlxsw_sp_ipip_entry *
1483 mlxsw_sp_ipip_entry_find_by_decap(struct mlxsw_sp *mlxsw_sp, int ul_dev_ifindex,
1484 				  enum mlxsw_sp_l3proto ul_proto,
1485 				  union mlxsw_sp_l3addr ul_dip)
1486 {
1487 	struct mlxsw_sp_ipip_entry *ipip_entry = NULL;
1488 	struct net_device *ul_dev;
1489 
1490 	rcu_read_lock();
1491 
1492 	ul_dev = dev_get_by_index_rcu(mlxsw_sp_net(mlxsw_sp), ul_dev_ifindex);
1493 	if (!ul_dev)
1494 		goto out_unlock;
1495 
1496 	list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
1497 			    ipip_list_node)
1498 		if (mlxsw_sp_ipip_entry_matches_decap(mlxsw_sp, ul_dev,
1499 						      ul_proto, ul_dip,
1500 						      ipip_entry))
1501 			goto out_unlock;
1502 
1503 	rcu_read_unlock();
1504 
1505 	return NULL;
1506 
1507 out_unlock:
1508 	rcu_read_unlock();
1509 	return ipip_entry;
1510 }
1511 
1512 static bool mlxsw_sp_netdev_ipip_type(const struct mlxsw_sp *mlxsw_sp,
1513 				      const struct net_device *dev,
1514 				      enum mlxsw_sp_ipip_type *p_type)
1515 {
1516 	struct mlxsw_sp_router *router = mlxsw_sp->router;
1517 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1518 	enum mlxsw_sp_ipip_type ipipt;
1519 
1520 	for (ipipt = 0; ipipt < MLXSW_SP_IPIP_TYPE_MAX; ++ipipt) {
1521 		ipip_ops = router->ipip_ops_arr[ipipt];
1522 		if (dev->type == ipip_ops->dev_type) {
1523 			if (p_type)
1524 				*p_type = ipipt;
1525 			return true;
1526 		}
1527 	}
1528 	return false;
1529 }
1530 
1531 static bool mlxsw_sp_netdev_is_ipip_ol(const struct mlxsw_sp *mlxsw_sp,
1532 				       const struct net_device *dev)
1533 {
1534 	return mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL);
1535 }
1536 
1537 static struct mlxsw_sp_ipip_entry *
1538 mlxsw_sp_ipip_entry_find_by_ol_dev(struct mlxsw_sp *mlxsw_sp,
1539 				   const struct net_device *ol_dev)
1540 {
1541 	struct mlxsw_sp_ipip_entry *ipip_entry;
1542 
1543 	list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
1544 			    ipip_list_node)
1545 		if (ipip_entry->ol_dev == ol_dev)
1546 			return ipip_entry;
1547 
1548 	return NULL;
1549 }
1550 
1551 static struct mlxsw_sp_ipip_entry *
1552 mlxsw_sp_ipip_entry_find_by_ul_dev(const struct mlxsw_sp *mlxsw_sp,
1553 				   const struct net_device *ul_dev,
1554 				   struct mlxsw_sp_ipip_entry *start)
1555 {
1556 	struct mlxsw_sp_ipip_entry *ipip_entry;
1557 
1558 	ipip_entry = list_prepare_entry(start, &mlxsw_sp->router->ipip_list,
1559 					ipip_list_node);
1560 	list_for_each_entry_continue(ipip_entry, &mlxsw_sp->router->ipip_list,
1561 				     ipip_list_node) {
1562 		struct net_device *ol_dev = ipip_entry->ol_dev;
1563 		struct net_device *ipip_ul_dev;
1564 
1565 		rcu_read_lock();
1566 		ipip_ul_dev = mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
1567 		rcu_read_unlock();
1568 
1569 		if (ipip_ul_dev == ul_dev)
1570 			return ipip_entry;
1571 	}
1572 
1573 	return NULL;
1574 }
1575 
1576 static bool mlxsw_sp_netdev_is_ipip_ul(struct mlxsw_sp *mlxsw_sp,
1577 				       const struct net_device *dev)
1578 {
1579 	return mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp, dev, NULL);
1580 }
1581 
1582 static bool mlxsw_sp_netdevice_ipip_can_offload(struct mlxsw_sp *mlxsw_sp,
1583 						const struct net_device *ol_dev,
1584 						enum mlxsw_sp_ipip_type ipipt)
1585 {
1586 	const struct mlxsw_sp_ipip_ops *ops
1587 		= mlxsw_sp->router->ipip_ops_arr[ipipt];
1588 
1589 	return ops->can_offload(mlxsw_sp, ol_dev);
1590 }
1591 
1592 static int mlxsw_sp_netdevice_ipip_ol_reg_event(struct mlxsw_sp *mlxsw_sp,
1593 						struct net_device *ol_dev)
1594 {
1595 	enum mlxsw_sp_ipip_type ipipt = MLXSW_SP_IPIP_TYPE_MAX;
1596 	struct mlxsw_sp_ipip_entry *ipip_entry;
1597 	enum mlxsw_sp_l3proto ul_proto;
1598 	union mlxsw_sp_l3addr saddr;
1599 	u32 ul_tb_id;
1600 
1601 	mlxsw_sp_netdev_ipip_type(mlxsw_sp, ol_dev, &ipipt);
1602 	if (mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev, ipipt)) {
1603 		ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev);
1604 		ul_proto = mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto;
1605 		saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev);
1606 		if (!mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto,
1607 							  saddr, ul_tb_id,
1608 							  NULL)) {
1609 			ipip_entry = mlxsw_sp_ipip_entry_create(mlxsw_sp, ipipt,
1610 								ol_dev);
1611 			if (IS_ERR(ipip_entry))
1612 				return PTR_ERR(ipip_entry);
1613 		}
1614 	}
1615 
1616 	return 0;
1617 }
1618 
1619 static void mlxsw_sp_netdevice_ipip_ol_unreg_event(struct mlxsw_sp *mlxsw_sp,
1620 						   struct net_device *ol_dev)
1621 {
1622 	struct mlxsw_sp_ipip_entry *ipip_entry;
1623 
1624 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1625 	if (ipip_entry)
1626 		mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
1627 }
1628 
1629 static void
1630 mlxsw_sp_ipip_entry_ol_up_event(struct mlxsw_sp *mlxsw_sp,
1631 				struct mlxsw_sp_ipip_entry *ipip_entry)
1632 {
1633 	struct mlxsw_sp_fib_entry *decap_fib_entry;
1634 
1635 	decap_fib_entry = mlxsw_sp_ipip_entry_find_decap(mlxsw_sp, ipip_entry);
1636 	if (decap_fib_entry)
1637 		mlxsw_sp_ipip_entry_promote_decap(mlxsw_sp, ipip_entry,
1638 						  decap_fib_entry);
1639 }
1640 
1641 static int
1642 mlxsw_sp_rif_ipip_lb_op(struct mlxsw_sp_rif_ipip_lb *lb_rif, u16 ul_vr_id,
1643 			u16 ul_rif_id, bool enable)
1644 {
1645 	struct mlxsw_sp_rif_ipip_lb_config lb_cf = lb_rif->lb_config;
1646 	struct net_device *dev = mlxsw_sp_rif_dev(&lb_rif->common);
1647 	enum mlxsw_reg_ritr_loopback_ipip_options ipip_options;
1648 	struct mlxsw_sp_rif *rif = &lb_rif->common;
1649 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
1650 	char ritr_pl[MLXSW_REG_RITR_LEN];
1651 	struct in6_addr *saddr6;
1652 	u32 saddr4;
1653 
1654 	ipip_options = MLXSW_REG_RITR_LOOPBACK_IPIP_OPTIONS_GRE_KEY_PRESET;
1655 	switch (lb_cf.ul_protocol) {
1656 	case MLXSW_SP_L3_PROTO_IPV4:
1657 		saddr4 = be32_to_cpu(lb_cf.saddr.addr4);
1658 		mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF,
1659 				    rif->rif_index, rif->vr_id, dev->mtu);
1660 		mlxsw_reg_ritr_loopback_ipip4_pack(ritr_pl, lb_cf.lb_ipipt,
1661 						   ipip_options, ul_vr_id,
1662 						   ul_rif_id, saddr4,
1663 						   lb_cf.okey);
1664 		break;
1665 
1666 	case MLXSW_SP_L3_PROTO_IPV6:
1667 		saddr6 = &lb_cf.saddr.addr6;
1668 		mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF,
1669 				    rif->rif_index, rif->vr_id, dev->mtu);
1670 		mlxsw_reg_ritr_loopback_ipip6_pack(ritr_pl, lb_cf.lb_ipipt,
1671 						   ipip_options, ul_vr_id,
1672 						   ul_rif_id, saddr6,
1673 						   lb_cf.okey);
1674 		break;
1675 	}
1676 
1677 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
1678 }
1679 
1680 static int mlxsw_sp_netdevice_ipip_ol_update_mtu(struct mlxsw_sp *mlxsw_sp,
1681 						 struct net_device *ol_dev)
1682 {
1683 	struct mlxsw_sp_ipip_entry *ipip_entry;
1684 	struct mlxsw_sp_rif_ipip_lb *lb_rif;
1685 	int err = 0;
1686 
1687 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1688 	if (ipip_entry) {
1689 		lb_rif = ipip_entry->ol_lb;
1690 		err = mlxsw_sp_rif_ipip_lb_op(lb_rif, lb_rif->ul_vr_id,
1691 					      lb_rif->ul_rif_id, true);
1692 		if (err)
1693 			goto out;
1694 		lb_rif->common.mtu = ol_dev->mtu;
1695 	}
1696 
1697 out:
1698 	return err;
1699 }
1700 
1701 static void mlxsw_sp_netdevice_ipip_ol_up_event(struct mlxsw_sp *mlxsw_sp,
1702 						struct net_device *ol_dev)
1703 {
1704 	struct mlxsw_sp_ipip_entry *ipip_entry;
1705 
1706 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1707 	if (ipip_entry)
1708 		mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry);
1709 }
1710 
1711 static void
1712 mlxsw_sp_ipip_entry_ol_down_event(struct mlxsw_sp *mlxsw_sp,
1713 				  struct mlxsw_sp_ipip_entry *ipip_entry)
1714 {
1715 	if (ipip_entry->decap_fib_entry)
1716 		mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1717 }
1718 
1719 static void mlxsw_sp_netdevice_ipip_ol_down_event(struct mlxsw_sp *mlxsw_sp,
1720 						  struct net_device *ol_dev)
1721 {
1722 	struct mlxsw_sp_ipip_entry *ipip_entry;
1723 
1724 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1725 	if (ipip_entry)
1726 		mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry);
1727 }
1728 
1729 static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp,
1730 					struct mlxsw_sp_rif *rif);
1731 
1732 static void mlxsw_sp_rif_migrate_destroy(struct mlxsw_sp *mlxsw_sp,
1733 					 struct mlxsw_sp_rif *old_rif,
1734 					 struct mlxsw_sp_rif *new_rif,
1735 					 bool migrate_nhs)
1736 {
1737 	struct mlxsw_sp_crif *crif = old_rif->crif;
1738 	struct mlxsw_sp_crif mock_crif = {};
1739 
1740 	if (migrate_nhs)
1741 		mlxsw_sp_nexthop_rif_update(mlxsw_sp, new_rif);
1742 
1743 	/* Plant a mock CRIF so that destroying the old RIF doesn't unoffload
1744 	 * our nexthops and IPIP tunnels, and doesn't sever the crif->rif link.
1745 	 */
1746 	mlxsw_sp_crif_init(&mock_crif, crif->key.dev);
1747 	old_rif->crif = &mock_crif;
1748 	mock_crif.rif = old_rif;
1749 	mlxsw_sp_rif_destroy(old_rif);
1750 }
1751 
1752 static int
1753 mlxsw_sp_ipip_entry_ol_lb_update(struct mlxsw_sp *mlxsw_sp,
1754 				 struct mlxsw_sp_ipip_entry *ipip_entry,
1755 				 bool keep_encap,
1756 				 struct netlink_ext_ack *extack)
1757 {
1758 	struct mlxsw_sp_rif_ipip_lb *old_lb_rif = ipip_entry->ol_lb;
1759 	struct mlxsw_sp_rif_ipip_lb *new_lb_rif;
1760 
1761 	new_lb_rif = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp,
1762 						     ipip_entry->ipipt,
1763 						     ipip_entry->ol_dev,
1764 						     extack);
1765 	if (IS_ERR(new_lb_rif))
1766 		return PTR_ERR(new_lb_rif);
1767 	ipip_entry->ol_lb = new_lb_rif;
1768 
1769 	mlxsw_sp_rif_migrate_destroy(mlxsw_sp, &old_lb_rif->common,
1770 				     &new_lb_rif->common, keep_encap);
1771 	return 0;
1772 }
1773 
1774 /**
1775  * __mlxsw_sp_ipip_entry_update_tunnel - Update offload related to IPIP entry.
1776  * @mlxsw_sp: mlxsw_sp.
1777  * @ipip_entry: IPIP entry.
1778  * @recreate_loopback: Recreates the associated loopback RIF.
1779  * @keep_encap: Updates next hops that use the tunnel netdevice. This is only
1780  *              relevant when recreate_loopback is true.
1781  * @update_nexthops: Updates next hops, keeping the current loopback RIF. This
1782  *                   is only relevant when recreate_loopback is false.
1783  * @extack: extack.
1784  *
1785  * Return: Non-zero value on failure.
1786  */
1787 int __mlxsw_sp_ipip_entry_update_tunnel(struct mlxsw_sp *mlxsw_sp,
1788 					struct mlxsw_sp_ipip_entry *ipip_entry,
1789 					bool recreate_loopback,
1790 					bool keep_encap,
1791 					bool update_nexthops,
1792 					struct netlink_ext_ack *extack)
1793 {
1794 	int err;
1795 
1796 	/* RIFs can't be edited, so to update loopback, we need to destroy and
1797 	 * recreate it. That creates a window of opportunity where RALUE and
1798 	 * RATR registers end up referencing a RIF that's already gone. RATRs
1799 	 * are handled in mlxsw_sp_ipip_entry_ol_lb_update(), and to take care
1800 	 * of RALUE, demote the decap route back.
1801 	 */
1802 	if (ipip_entry->decap_fib_entry)
1803 		mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1804 
1805 	if (recreate_loopback) {
1806 		err = mlxsw_sp_ipip_entry_ol_lb_update(mlxsw_sp, ipip_entry,
1807 						       keep_encap, extack);
1808 		if (err)
1809 			return err;
1810 	} else if (update_nexthops) {
1811 		mlxsw_sp_nexthop_rif_update(mlxsw_sp,
1812 					    &ipip_entry->ol_lb->common);
1813 	}
1814 
1815 	if (ipip_entry->ol_dev->flags & IFF_UP)
1816 		mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry);
1817 
1818 	return 0;
1819 }
1820 
1821 static int mlxsw_sp_netdevice_ipip_ol_vrf_event(struct mlxsw_sp *mlxsw_sp,
1822 						struct net_device *ol_dev,
1823 						struct netlink_ext_ack *extack)
1824 {
1825 	struct mlxsw_sp_ipip_entry *ipip_entry =
1826 		mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1827 
1828 	if (!ipip_entry)
1829 		return 0;
1830 
1831 	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1832 						   true, false, false, extack);
1833 }
1834 
1835 static int
1836 mlxsw_sp_netdevice_ipip_ul_vrf_event(struct mlxsw_sp *mlxsw_sp,
1837 				     struct mlxsw_sp_ipip_entry *ipip_entry,
1838 				     struct net_device *ul_dev,
1839 				     bool *demote_this,
1840 				     struct netlink_ext_ack *extack)
1841 {
1842 	u32 ul_tb_id = l3mdev_fib_table(ul_dev) ? : RT_TABLE_MAIN;
1843 	enum mlxsw_sp_l3proto ul_proto;
1844 	union mlxsw_sp_l3addr saddr;
1845 
1846 	/* Moving underlay to a different VRF might cause local address
1847 	 * conflict, and the conflicting tunnels need to be demoted.
1848 	 */
1849 	ul_proto = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt]->ul_proto;
1850 	saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ipip_entry->ol_dev);
1851 	if (mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto,
1852 						 saddr, ul_tb_id,
1853 						 ipip_entry)) {
1854 		*demote_this = true;
1855 		return 0;
1856 	}
1857 
1858 	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1859 						   true, true, false, extack);
1860 }
1861 
1862 static int
1863 mlxsw_sp_netdevice_ipip_ul_up_event(struct mlxsw_sp *mlxsw_sp,
1864 				    struct mlxsw_sp_ipip_entry *ipip_entry,
1865 				    struct net_device *ul_dev)
1866 {
1867 	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1868 						   false, false, true, NULL);
1869 }
1870 
1871 static int
1872 mlxsw_sp_netdevice_ipip_ul_down_event(struct mlxsw_sp *mlxsw_sp,
1873 				      struct mlxsw_sp_ipip_entry *ipip_entry,
1874 				      struct net_device *ul_dev)
1875 {
1876 	/* A down underlay device causes encapsulated packets to not be
1877 	 * forwarded, but decap still works. So refresh next hops without
1878 	 * touching anything else.
1879 	 */
1880 	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1881 						   false, false, true, NULL);
1882 }
1883 
1884 static int
1885 mlxsw_sp_netdevice_ipip_ol_change_event(struct mlxsw_sp *mlxsw_sp,
1886 					struct net_device *ol_dev,
1887 					struct netlink_ext_ack *extack)
1888 {
1889 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1890 	struct mlxsw_sp_ipip_entry *ipip_entry;
1891 	int err;
1892 
1893 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1894 	if (!ipip_entry)
1895 		/* A change might make a tunnel eligible for offloading, but
1896 		 * that is currently not implemented. What falls to slow path
1897 		 * stays there.
1898 		 */
1899 		return 0;
1900 
1901 	/* A change might make a tunnel not eligible for offloading. */
1902 	if (!mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev,
1903 						 ipip_entry->ipipt)) {
1904 		mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1905 		return 0;
1906 	}
1907 
1908 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1909 	err = ipip_ops->ol_netdev_change(mlxsw_sp, ipip_entry, extack);
1910 	return err;
1911 }
1912 
1913 void mlxsw_sp_ipip_entry_demote_tunnel(struct mlxsw_sp *mlxsw_sp,
1914 				       struct mlxsw_sp_ipip_entry *ipip_entry)
1915 {
1916 	struct net_device *ol_dev = ipip_entry->ol_dev;
1917 
1918 	if (ol_dev->flags & IFF_UP)
1919 		mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry);
1920 	mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
1921 }
1922 
1923 /* The configuration where several tunnels have the same local address in the
1924  * same underlay table needs special treatment in the HW. That is currently not
1925  * implemented in the driver. This function finds and demotes the first tunnel
1926  * with a given source address, except the one passed in the argument
1927  * `except'.
1928  */
1929 bool
1930 mlxsw_sp_ipip_demote_tunnel_by_saddr(struct mlxsw_sp *mlxsw_sp,
1931 				     enum mlxsw_sp_l3proto ul_proto,
1932 				     union mlxsw_sp_l3addr saddr,
1933 				     u32 ul_tb_id,
1934 				     const struct mlxsw_sp_ipip_entry *except)
1935 {
1936 	struct mlxsw_sp_ipip_entry *ipip_entry, *tmp;
1937 
1938 	list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list,
1939 				 ipip_list_node) {
1940 		if (ipip_entry != except &&
1941 		    mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, saddr,
1942 						      ul_tb_id, ipip_entry)) {
1943 			mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1944 			return true;
1945 		}
1946 	}
1947 
1948 	return false;
1949 }
1950 
1951 static void mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(struct mlxsw_sp *mlxsw_sp,
1952 						     struct net_device *ul_dev)
1953 {
1954 	struct mlxsw_sp_ipip_entry *ipip_entry, *tmp;
1955 
1956 	list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list,
1957 				 ipip_list_node) {
1958 		struct net_device *ol_dev = ipip_entry->ol_dev;
1959 		struct net_device *ipip_ul_dev;
1960 
1961 		rcu_read_lock();
1962 		ipip_ul_dev = mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
1963 		rcu_read_unlock();
1964 		if (ipip_ul_dev == ul_dev)
1965 			mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1966 	}
1967 }
1968 
1969 static int mlxsw_sp_netdevice_ipip_ol_event(struct mlxsw_sp *mlxsw_sp,
1970 					    struct net_device *ol_dev,
1971 					    unsigned long event,
1972 					    struct netdev_notifier_info *info)
1973 {
1974 	struct netdev_notifier_changeupper_info *chup;
1975 	struct netlink_ext_ack *extack;
1976 	int err = 0;
1977 
1978 	switch (event) {
1979 	case NETDEV_REGISTER:
1980 		err = mlxsw_sp_netdevice_ipip_ol_reg_event(mlxsw_sp, ol_dev);
1981 		break;
1982 	case NETDEV_UNREGISTER:
1983 		mlxsw_sp_netdevice_ipip_ol_unreg_event(mlxsw_sp, ol_dev);
1984 		break;
1985 	case NETDEV_UP:
1986 		mlxsw_sp_netdevice_ipip_ol_up_event(mlxsw_sp, ol_dev);
1987 		break;
1988 	case NETDEV_DOWN:
1989 		mlxsw_sp_netdevice_ipip_ol_down_event(mlxsw_sp, ol_dev);
1990 		break;
1991 	case NETDEV_CHANGEUPPER:
1992 		chup = container_of(info, typeof(*chup), info);
1993 		extack = info->extack;
1994 		if (netif_is_l3_master(chup->upper_dev))
1995 			err = mlxsw_sp_netdevice_ipip_ol_vrf_event(mlxsw_sp,
1996 								   ol_dev,
1997 								   extack);
1998 		break;
1999 	case NETDEV_CHANGE:
2000 		extack = info->extack;
2001 		err = mlxsw_sp_netdevice_ipip_ol_change_event(mlxsw_sp,
2002 							      ol_dev, extack);
2003 		break;
2004 	case NETDEV_CHANGEMTU:
2005 		err = mlxsw_sp_netdevice_ipip_ol_update_mtu(mlxsw_sp, ol_dev);
2006 		break;
2007 	}
2008 	return err;
2009 }
2010 
2011 static int
2012 __mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
2013 				   struct mlxsw_sp_ipip_entry *ipip_entry,
2014 				   struct net_device *ul_dev,
2015 				   bool *demote_this,
2016 				   unsigned long event,
2017 				   struct netdev_notifier_info *info)
2018 {
2019 	struct netdev_notifier_changeupper_info *chup;
2020 	struct netlink_ext_ack *extack;
2021 
2022 	switch (event) {
2023 	case NETDEV_CHANGEUPPER:
2024 		chup = container_of(info, typeof(*chup), info);
2025 		extack = info->extack;
2026 		if (netif_is_l3_master(chup->upper_dev))
2027 			return mlxsw_sp_netdevice_ipip_ul_vrf_event(mlxsw_sp,
2028 								    ipip_entry,
2029 								    ul_dev,
2030 								    demote_this,
2031 								    extack);
2032 		break;
2033 
2034 	case NETDEV_UP:
2035 		return mlxsw_sp_netdevice_ipip_ul_up_event(mlxsw_sp, ipip_entry,
2036 							   ul_dev);
2037 	case NETDEV_DOWN:
2038 		return mlxsw_sp_netdevice_ipip_ul_down_event(mlxsw_sp,
2039 							     ipip_entry,
2040 							     ul_dev);
2041 	}
2042 	return 0;
2043 }
2044 
2045 static int
2046 mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
2047 				 struct net_device *ul_dev,
2048 				 unsigned long event,
2049 				 struct netdev_notifier_info *info)
2050 {
2051 	struct mlxsw_sp_ipip_entry *ipip_entry = NULL;
2052 	int err;
2053 
2054 	while ((ipip_entry = mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp,
2055 								ul_dev,
2056 								ipip_entry))) {
2057 		struct mlxsw_sp_ipip_entry *prev;
2058 		bool demote_this = false;
2059 
2060 		err = __mlxsw_sp_netdevice_ipip_ul_event(mlxsw_sp, ipip_entry,
2061 							 ul_dev, &demote_this,
2062 							 event, info);
2063 		if (err) {
2064 			mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(mlxsw_sp,
2065 								 ul_dev);
2066 			return err;
2067 		}
2068 
2069 		if (demote_this) {
2070 			if (list_is_first(&ipip_entry->ipip_list_node,
2071 					  &mlxsw_sp->router->ipip_list))
2072 				prev = NULL;
2073 			else
2074 				/* This can't be cached from previous iteration,
2075 				 * because that entry could be gone now.
2076 				 */
2077 				prev = list_prev_entry(ipip_entry,
2078 						       ipip_list_node);
2079 			mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
2080 			ipip_entry = prev;
2081 		}
2082 	}
2083 
2084 	return 0;
2085 }
2086 
2087 int mlxsw_sp_router_nve_promote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
2088 				      enum mlxsw_sp_l3proto ul_proto,
2089 				      const union mlxsw_sp_l3addr *ul_sip,
2090 				      u32 tunnel_index)
2091 {
2092 	enum mlxsw_sp_fib_entry_type type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
2093 	struct mlxsw_sp_router *router = mlxsw_sp->router;
2094 	struct mlxsw_sp_fib_entry *fib_entry;
2095 	int err = 0;
2096 
2097 	mutex_lock(&mlxsw_sp->router->lock);
2098 
2099 	if (WARN_ON_ONCE(router->nve_decap_config.valid)) {
2100 		err = -EINVAL;
2101 		goto out;
2102 	}
2103 
2104 	router->nve_decap_config.ul_tb_id = ul_tb_id;
2105 	router->nve_decap_config.tunnel_index = tunnel_index;
2106 	router->nve_decap_config.ul_proto = ul_proto;
2107 	router->nve_decap_config.ul_sip = *ul_sip;
2108 	router->nve_decap_config.valid = true;
2109 
2110 	/* It is valid to create a tunnel with a local IP and only later
2111 	 * assign this IP address to a local interface
2112 	 */
2113 	fib_entry = mlxsw_sp_router_ip2me_fib_entry_find(mlxsw_sp, ul_tb_id,
2114 							 ul_proto, ul_sip,
2115 							 type);
2116 	if (!fib_entry)
2117 		goto out;
2118 
2119 	fib_entry->decap.tunnel_index = tunnel_index;
2120 	fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
2121 
2122 	err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
2123 	if (err)
2124 		goto err_fib_entry_update;
2125 
2126 	goto out;
2127 
2128 err_fib_entry_update:
2129 	fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
2130 	mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
2131 out:
2132 	mutex_unlock(&mlxsw_sp->router->lock);
2133 	return err;
2134 }
2135 
2136 void mlxsw_sp_router_nve_demote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
2137 				      enum mlxsw_sp_l3proto ul_proto,
2138 				      const union mlxsw_sp_l3addr *ul_sip)
2139 {
2140 	enum mlxsw_sp_fib_entry_type type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
2141 	struct mlxsw_sp_router *router = mlxsw_sp->router;
2142 	struct mlxsw_sp_fib_entry *fib_entry;
2143 
2144 	mutex_lock(&mlxsw_sp->router->lock);
2145 
2146 	if (WARN_ON_ONCE(!router->nve_decap_config.valid))
2147 		goto out;
2148 
2149 	router->nve_decap_config.valid = false;
2150 
2151 	fib_entry = mlxsw_sp_router_ip2me_fib_entry_find(mlxsw_sp, ul_tb_id,
2152 							 ul_proto, ul_sip,
2153 							 type);
2154 	if (!fib_entry)
2155 		goto out;
2156 
2157 	fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
2158 	mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
2159 out:
2160 	mutex_unlock(&mlxsw_sp->router->lock);
2161 }
2162 
2163 static bool mlxsw_sp_router_nve_is_decap(struct mlxsw_sp *mlxsw_sp,
2164 					 u32 ul_tb_id,
2165 					 enum mlxsw_sp_l3proto ul_proto,
2166 					 const union mlxsw_sp_l3addr *ul_sip)
2167 {
2168 	struct mlxsw_sp_router *router = mlxsw_sp->router;
2169 
2170 	return router->nve_decap_config.valid &&
2171 	       router->nve_decap_config.ul_tb_id == ul_tb_id &&
2172 	       router->nve_decap_config.ul_proto == ul_proto &&
2173 	       !memcmp(&router->nve_decap_config.ul_sip, ul_sip,
2174 		       sizeof(*ul_sip));
2175 }
2176 
2177 struct mlxsw_sp_neigh_key {
2178 	struct neighbour *n;
2179 };
2180 
2181 struct mlxsw_sp_neigh_entry {
2182 	struct list_head rif_list_node;
2183 	struct rhash_head ht_node;
2184 	struct mlxsw_sp_neigh_key key;
2185 	u16 rif;
2186 	bool connected;
2187 	unsigned char ha[ETH_ALEN];
2188 	struct list_head nexthop_list; /* list of nexthops using
2189 					* this neigh entry
2190 					*/
2191 	struct list_head nexthop_neighs_list_node;
2192 	unsigned int counter_index;
2193 	bool counter_valid;
2194 };
2195 
2196 static const struct rhashtable_params mlxsw_sp_neigh_ht_params = {
2197 	.key_offset = offsetof(struct mlxsw_sp_neigh_entry, key),
2198 	.head_offset = offsetof(struct mlxsw_sp_neigh_entry, ht_node),
2199 	.key_len = sizeof(struct mlxsw_sp_neigh_key),
2200 };
2201 
2202 struct mlxsw_sp_neigh_entry *
2203 mlxsw_sp_rif_neigh_next(struct mlxsw_sp_rif *rif,
2204 			struct mlxsw_sp_neigh_entry *neigh_entry)
2205 {
2206 	if (!neigh_entry) {
2207 		if (list_empty(&rif->neigh_list))
2208 			return NULL;
2209 		else
2210 			return list_first_entry(&rif->neigh_list,
2211 						typeof(*neigh_entry),
2212 						rif_list_node);
2213 	}
2214 	if (list_is_last(&neigh_entry->rif_list_node, &rif->neigh_list))
2215 		return NULL;
2216 	return list_next_entry(neigh_entry, rif_list_node);
2217 }
2218 
2219 int mlxsw_sp_neigh_entry_type(struct mlxsw_sp_neigh_entry *neigh_entry)
2220 {
2221 	return neigh_entry->key.n->tbl->family;
2222 }
2223 
2224 unsigned char *
2225 mlxsw_sp_neigh_entry_ha(struct mlxsw_sp_neigh_entry *neigh_entry)
2226 {
2227 	return neigh_entry->ha;
2228 }
2229 
2230 u32 mlxsw_sp_neigh4_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
2231 {
2232 	struct neighbour *n;
2233 
2234 	n = neigh_entry->key.n;
2235 	return ntohl(*((__be32 *) n->primary_key));
2236 }
2237 
2238 struct in6_addr *
2239 mlxsw_sp_neigh6_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
2240 {
2241 	struct neighbour *n;
2242 
2243 	n = neigh_entry->key.n;
2244 	return (struct in6_addr *) &n->primary_key;
2245 }
2246 
2247 int mlxsw_sp_neigh_counter_get(struct mlxsw_sp *mlxsw_sp,
2248 			       struct mlxsw_sp_neigh_entry *neigh_entry,
2249 			       u64 *p_counter)
2250 {
2251 	if (!neigh_entry->counter_valid)
2252 		return -EINVAL;
2253 
2254 	return mlxsw_sp_flow_counter_get(mlxsw_sp, neigh_entry->counter_index,
2255 					 false, p_counter, NULL);
2256 }
2257 
2258 static struct mlxsw_sp_neigh_entry *
2259 mlxsw_sp_neigh_entry_alloc(struct mlxsw_sp *mlxsw_sp, struct neighbour *n,
2260 			   u16 rif)
2261 {
2262 	struct mlxsw_sp_neigh_entry *neigh_entry;
2263 
2264 	neigh_entry = kzalloc(sizeof(*neigh_entry), GFP_KERNEL);
2265 	if (!neigh_entry)
2266 		return NULL;
2267 
2268 	neigh_entry->key.n = n;
2269 	neigh_entry->rif = rif;
2270 	INIT_LIST_HEAD(&neigh_entry->nexthop_list);
2271 
2272 	return neigh_entry;
2273 }
2274 
2275 static void mlxsw_sp_neigh_entry_free(struct mlxsw_sp_neigh_entry *neigh_entry)
2276 {
2277 	kfree(neigh_entry);
2278 }
2279 
2280 static int
2281 mlxsw_sp_neigh_entry_insert(struct mlxsw_sp *mlxsw_sp,
2282 			    struct mlxsw_sp_neigh_entry *neigh_entry)
2283 {
2284 	return rhashtable_insert_fast(&mlxsw_sp->router->neigh_ht,
2285 				      &neigh_entry->ht_node,
2286 				      mlxsw_sp_neigh_ht_params);
2287 }
2288 
2289 static void
2290 mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp,
2291 			    struct mlxsw_sp_neigh_entry *neigh_entry)
2292 {
2293 	rhashtable_remove_fast(&mlxsw_sp->router->neigh_ht,
2294 			       &neigh_entry->ht_node,
2295 			       mlxsw_sp_neigh_ht_params);
2296 }
2297 
2298 static bool
2299 mlxsw_sp_neigh_counter_should_alloc(struct mlxsw_sp *mlxsw_sp,
2300 				    struct mlxsw_sp_neigh_entry *neigh_entry)
2301 {
2302 	struct devlink *devlink;
2303 	const char *table_name;
2304 
2305 	switch (mlxsw_sp_neigh_entry_type(neigh_entry)) {
2306 	case AF_INET:
2307 		table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST4;
2308 		break;
2309 	case AF_INET6:
2310 		table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST6;
2311 		break;
2312 	default:
2313 		WARN_ON(1);
2314 		return false;
2315 	}
2316 
2317 	devlink = priv_to_devlink(mlxsw_sp->core);
2318 	return devlink_dpipe_table_counter_enabled(devlink, table_name);
2319 }
2320 
2321 static void
2322 mlxsw_sp_neigh_counter_alloc(struct mlxsw_sp *mlxsw_sp,
2323 			     struct mlxsw_sp_neigh_entry *neigh_entry)
2324 {
2325 	if (!mlxsw_sp_neigh_counter_should_alloc(mlxsw_sp, neigh_entry))
2326 		return;
2327 
2328 	if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &neigh_entry->counter_index))
2329 		return;
2330 
2331 	neigh_entry->counter_valid = true;
2332 }
2333 
2334 static void
2335 mlxsw_sp_neigh_counter_free(struct mlxsw_sp *mlxsw_sp,
2336 			    struct mlxsw_sp_neigh_entry *neigh_entry)
2337 {
2338 	if (!neigh_entry->counter_valid)
2339 		return;
2340 	mlxsw_sp_flow_counter_free(mlxsw_sp,
2341 				   neigh_entry->counter_index);
2342 	neigh_entry->counter_valid = false;
2343 }
2344 
2345 static struct mlxsw_sp_neigh_entry *
2346 mlxsw_sp_neigh_entry_create(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
2347 {
2348 	struct mlxsw_sp_neigh_entry *neigh_entry;
2349 	struct mlxsw_sp_rif *rif;
2350 	int err;
2351 
2352 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, n->dev);
2353 	if (!rif)
2354 		return ERR_PTR(-EINVAL);
2355 
2356 	neigh_entry = mlxsw_sp_neigh_entry_alloc(mlxsw_sp, n, rif->rif_index);
2357 	if (!neigh_entry)
2358 		return ERR_PTR(-ENOMEM);
2359 
2360 	err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
2361 	if (err)
2362 		goto err_neigh_entry_insert;
2363 
2364 	mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
2365 	atomic_inc(&mlxsw_sp->router->neighs_update.neigh_count);
2366 	list_add(&neigh_entry->rif_list_node, &rif->neigh_list);
2367 
2368 	return neigh_entry;
2369 
2370 err_neigh_entry_insert:
2371 	mlxsw_sp_neigh_entry_free(neigh_entry);
2372 	return ERR_PTR(err);
2373 }
2374 
2375 static void
2376 mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp *mlxsw_sp,
2377 			     struct mlxsw_sp_neigh_entry *neigh_entry)
2378 {
2379 	list_del(&neigh_entry->rif_list_node);
2380 	atomic_dec(&mlxsw_sp->router->neighs_update.neigh_count);
2381 	mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
2382 	mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry);
2383 	mlxsw_sp_neigh_entry_free(neigh_entry);
2384 }
2385 
2386 static struct mlxsw_sp_neigh_entry *
2387 mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
2388 {
2389 	struct mlxsw_sp_neigh_key key;
2390 
2391 	key.n = n;
2392 	return rhashtable_lookup_fast(&mlxsw_sp->router->neigh_ht,
2393 				      &key, mlxsw_sp_neigh_ht_params);
2394 }
2395 
2396 static void
2397 mlxsw_sp_router_neighs_update_interval_init(struct mlxsw_sp *mlxsw_sp)
2398 {
2399 	unsigned long interval;
2400 
2401 #if IS_ENABLED(CONFIG_IPV6)
2402 	interval = min_t(unsigned long,
2403 			 NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME),
2404 			 NEIGH_VAR(&nd_tbl.parms, DELAY_PROBE_TIME));
2405 #else
2406 	interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
2407 #endif
2408 	mlxsw_sp->router->neighs_update.interval = jiffies_to_msecs(interval);
2409 }
2410 
2411 static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp,
2412 						   char *rauhtd_pl,
2413 						   int ent_index)
2414 {
2415 	u64 max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
2416 	struct net_device *dev;
2417 	struct neighbour *n;
2418 	__be32 dipn;
2419 	u32 dip;
2420 	u16 rif;
2421 
2422 	mlxsw_reg_rauhtd_ent_ipv4_unpack(rauhtd_pl, ent_index, &rif, &dip);
2423 
2424 	if (WARN_ON_ONCE(rif >= max_rifs))
2425 		return;
2426 	if (!mlxsw_sp->router->rifs[rif]) {
2427 		dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
2428 		return;
2429 	}
2430 
2431 	dipn = htonl(dip);
2432 	dev = mlxsw_sp_rif_dev(mlxsw_sp->router->rifs[rif]);
2433 	n = neigh_lookup(&arp_tbl, &dipn, dev);
2434 	if (!n)
2435 		return;
2436 
2437 	netdev_dbg(dev, "Updating neighbour with IP=%pI4h\n", &dip);
2438 	neigh_event_send(n, NULL);
2439 	neigh_release(n);
2440 }
2441 
2442 #if IS_ENABLED(CONFIG_IPV6)
2443 static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2444 						   char *rauhtd_pl,
2445 						   int rec_index)
2446 {
2447 	struct net_device *dev;
2448 	struct neighbour *n;
2449 	struct in6_addr dip;
2450 	u16 rif;
2451 
2452 	mlxsw_reg_rauhtd_ent_ipv6_unpack(rauhtd_pl, rec_index, &rif,
2453 					 (char *) &dip);
2454 
2455 	if (!mlxsw_sp->router->rifs[rif]) {
2456 		dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
2457 		return;
2458 	}
2459 
2460 	dev = mlxsw_sp_rif_dev(mlxsw_sp->router->rifs[rif]);
2461 	n = neigh_lookup(&nd_tbl, &dip, dev);
2462 	if (!n)
2463 		return;
2464 
2465 	netdev_dbg(dev, "Updating neighbour with IP=%pI6c\n", &dip);
2466 	neigh_event_send(n, NULL);
2467 	neigh_release(n);
2468 }
2469 #else
2470 static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2471 						   char *rauhtd_pl,
2472 						   int rec_index)
2473 {
2474 }
2475 #endif
2476 
2477 static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp,
2478 						   char *rauhtd_pl,
2479 						   int rec_index)
2480 {
2481 	u8 num_entries;
2482 	int i;
2483 
2484 	num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
2485 								rec_index);
2486 	/* Hardware starts counting at 0, so add 1. */
2487 	num_entries++;
2488 
2489 	/* Each record consists of several neighbour entries. */
2490 	for (i = 0; i < num_entries; i++) {
2491 		int ent_index;
2492 
2493 		ent_index = rec_index * MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC + i;
2494 		mlxsw_sp_router_neigh_ent_ipv4_process(mlxsw_sp, rauhtd_pl,
2495 						       ent_index);
2496 	}
2497 
2498 }
2499 
2500 static void mlxsw_sp_router_neigh_rec_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2501 						   char *rauhtd_pl,
2502 						   int rec_index)
2503 {
2504 	/* One record contains one entry. */
2505 	mlxsw_sp_router_neigh_ent_ipv6_process(mlxsw_sp, rauhtd_pl,
2506 					       rec_index);
2507 }
2508 
2509 static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp,
2510 					      char *rauhtd_pl, int rec_index)
2511 {
2512 	switch (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, rec_index)) {
2513 	case MLXSW_REG_RAUHTD_TYPE_IPV4:
2514 		mlxsw_sp_router_neigh_rec_ipv4_process(mlxsw_sp, rauhtd_pl,
2515 						       rec_index);
2516 		break;
2517 	case MLXSW_REG_RAUHTD_TYPE_IPV6:
2518 		mlxsw_sp_router_neigh_rec_ipv6_process(mlxsw_sp, rauhtd_pl,
2519 						       rec_index);
2520 		break;
2521 	}
2522 }
2523 
2524 static bool mlxsw_sp_router_rauhtd_is_full(char *rauhtd_pl)
2525 {
2526 	u8 num_rec, last_rec_index, num_entries;
2527 
2528 	num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
2529 	last_rec_index = num_rec - 1;
2530 
2531 	if (num_rec < MLXSW_REG_RAUHTD_REC_MAX_NUM)
2532 		return false;
2533 	if (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, last_rec_index) ==
2534 	    MLXSW_REG_RAUHTD_TYPE_IPV6)
2535 		return true;
2536 
2537 	num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
2538 								last_rec_index);
2539 	if (++num_entries == MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC)
2540 		return true;
2541 	return false;
2542 }
2543 
2544 static int
2545 __mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp,
2546 				       char *rauhtd_pl,
2547 				       enum mlxsw_reg_rauhtd_type type)
2548 {
2549 	int i, num_rec;
2550 	int err;
2551 
2552 	/* Ensure the RIF we read from the device does not change mid-dump. */
2553 	mutex_lock(&mlxsw_sp->router->lock);
2554 	do {
2555 		mlxsw_reg_rauhtd_pack(rauhtd_pl, type);
2556 		err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd),
2557 				      rauhtd_pl);
2558 		if (err) {
2559 			dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to dump neighbour table\n");
2560 			break;
2561 		}
2562 		num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
2563 		for (i = 0; i < num_rec; i++)
2564 			mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl,
2565 							  i);
2566 	} while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl));
2567 	mutex_unlock(&mlxsw_sp->router->lock);
2568 
2569 	return err;
2570 }
2571 
2572 static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp)
2573 {
2574 	enum mlxsw_reg_rauhtd_type type;
2575 	char *rauhtd_pl;
2576 	int err;
2577 
2578 	if (!atomic_read(&mlxsw_sp->router->neighs_update.neigh_count))
2579 		return 0;
2580 
2581 	rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL);
2582 	if (!rauhtd_pl)
2583 		return -ENOMEM;
2584 
2585 	type = MLXSW_REG_RAUHTD_TYPE_IPV4;
2586 	err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
2587 	if (err)
2588 		goto out;
2589 
2590 	type = MLXSW_REG_RAUHTD_TYPE_IPV6;
2591 	err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
2592 out:
2593 	kfree(rauhtd_pl);
2594 	return err;
2595 }
2596 
2597 static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp *mlxsw_sp)
2598 {
2599 	struct mlxsw_sp_neigh_entry *neigh_entry;
2600 
2601 	mutex_lock(&mlxsw_sp->router->lock);
2602 	list_for_each_entry(neigh_entry, &mlxsw_sp->router->nexthop_neighs_list,
2603 			    nexthop_neighs_list_node)
2604 		/* If this neigh have nexthops, make the kernel think this neigh
2605 		 * is active regardless of the traffic.
2606 		 */
2607 		neigh_event_send(neigh_entry->key.n, NULL);
2608 	mutex_unlock(&mlxsw_sp->router->lock);
2609 }
2610 
2611 static void
2612 mlxsw_sp_router_neighs_update_work_schedule(struct mlxsw_sp *mlxsw_sp)
2613 {
2614 	unsigned long interval = mlxsw_sp->router->neighs_update.interval;
2615 
2616 	mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw,
2617 			       msecs_to_jiffies(interval));
2618 }
2619 
2620 static void mlxsw_sp_router_neighs_update_work(struct work_struct *work)
2621 {
2622 	struct mlxsw_sp_router *router;
2623 	int err;
2624 
2625 	router = container_of(work, struct mlxsw_sp_router,
2626 			      neighs_update.dw.work);
2627 	err = mlxsw_sp_router_neighs_update_rauhtd(router->mlxsw_sp);
2628 	if (err)
2629 		dev_err(router->mlxsw_sp->bus_info->dev, "Could not update kernel for neigh activity");
2630 
2631 	mlxsw_sp_router_neighs_update_nh(router->mlxsw_sp);
2632 
2633 	mlxsw_sp_router_neighs_update_work_schedule(router->mlxsw_sp);
2634 }
2635 
2636 static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work)
2637 {
2638 	struct mlxsw_sp_neigh_entry *neigh_entry;
2639 	struct mlxsw_sp_router *router;
2640 
2641 	router = container_of(work, struct mlxsw_sp_router,
2642 			      nexthop_probe_dw.work);
2643 	/* Iterate over nexthop neighbours, find those who are unresolved and
2644 	 * send arp on them. This solves the chicken-egg problem when
2645 	 * the nexthop wouldn't get offloaded until the neighbor is resolved
2646 	 * but it wouldn't get resolved ever in case traffic is flowing in HW
2647 	 * using different nexthop.
2648 	 */
2649 	mutex_lock(&router->lock);
2650 	list_for_each_entry(neigh_entry, &router->nexthop_neighs_list,
2651 			    nexthop_neighs_list_node)
2652 		if (!neigh_entry->connected)
2653 			neigh_event_send(neigh_entry->key.n, NULL);
2654 	mutex_unlock(&router->lock);
2655 
2656 	mlxsw_core_schedule_dw(&router->nexthop_probe_dw,
2657 			       MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL);
2658 }
2659 
2660 static void
2661 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
2662 			      struct mlxsw_sp_neigh_entry *neigh_entry,
2663 			      bool removing, bool dead);
2664 
2665 static enum mlxsw_reg_rauht_op mlxsw_sp_rauht_op(bool adding)
2666 {
2667 	return adding ? MLXSW_REG_RAUHT_OP_WRITE_ADD :
2668 			MLXSW_REG_RAUHT_OP_WRITE_DELETE;
2669 }
2670 
2671 static int
2672 mlxsw_sp_router_neigh_entry_op4(struct mlxsw_sp *mlxsw_sp,
2673 				struct mlxsw_sp_neigh_entry *neigh_entry,
2674 				enum mlxsw_reg_rauht_op op)
2675 {
2676 	struct neighbour *n = neigh_entry->key.n;
2677 	u32 dip = ntohl(*((__be32 *) n->primary_key));
2678 	char rauht_pl[MLXSW_REG_RAUHT_LEN];
2679 
2680 	mlxsw_reg_rauht_pack4(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
2681 			      dip);
2682 	if (neigh_entry->counter_valid)
2683 		mlxsw_reg_rauht_pack_counter(rauht_pl,
2684 					     neigh_entry->counter_index);
2685 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
2686 }
2687 
2688 static int
2689 mlxsw_sp_router_neigh_entry_op6(struct mlxsw_sp *mlxsw_sp,
2690 				struct mlxsw_sp_neigh_entry *neigh_entry,
2691 				enum mlxsw_reg_rauht_op op)
2692 {
2693 	struct neighbour *n = neigh_entry->key.n;
2694 	char rauht_pl[MLXSW_REG_RAUHT_LEN];
2695 	const char *dip = n->primary_key;
2696 
2697 	mlxsw_reg_rauht_pack6(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
2698 			      dip);
2699 	if (neigh_entry->counter_valid)
2700 		mlxsw_reg_rauht_pack_counter(rauht_pl,
2701 					     neigh_entry->counter_index);
2702 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
2703 }
2704 
2705 bool mlxsw_sp_neigh_ipv6_ignore(struct mlxsw_sp_neigh_entry *neigh_entry)
2706 {
2707 	struct neighbour *n = neigh_entry->key.n;
2708 
2709 	/* Packets with a link-local destination address are trapped
2710 	 * after LPM lookup and never reach the neighbour table, so
2711 	 * there is no need to program such neighbours to the device.
2712 	 */
2713 	if (ipv6_addr_type((struct in6_addr *) &n->primary_key) &
2714 	    IPV6_ADDR_LINKLOCAL)
2715 		return true;
2716 	return false;
2717 }
2718 
2719 static void
2720 mlxsw_sp_neigh_entry_update(struct mlxsw_sp *mlxsw_sp,
2721 			    struct mlxsw_sp_neigh_entry *neigh_entry,
2722 			    bool adding)
2723 {
2724 	enum mlxsw_reg_rauht_op op = mlxsw_sp_rauht_op(adding);
2725 	int err;
2726 
2727 	if (!adding && !neigh_entry->connected)
2728 		return;
2729 	neigh_entry->connected = adding;
2730 	if (neigh_entry->key.n->tbl->family == AF_INET) {
2731 		err = mlxsw_sp_router_neigh_entry_op4(mlxsw_sp, neigh_entry,
2732 						      op);
2733 		if (err)
2734 			return;
2735 	} else if (neigh_entry->key.n->tbl->family == AF_INET6) {
2736 		if (mlxsw_sp_neigh_ipv6_ignore(neigh_entry))
2737 			return;
2738 		err = mlxsw_sp_router_neigh_entry_op6(mlxsw_sp, neigh_entry,
2739 						      op);
2740 		if (err)
2741 			return;
2742 	} else {
2743 		WARN_ON_ONCE(1);
2744 		return;
2745 	}
2746 
2747 	if (adding)
2748 		neigh_entry->key.n->flags |= NTF_OFFLOADED;
2749 	else
2750 		neigh_entry->key.n->flags &= ~NTF_OFFLOADED;
2751 }
2752 
2753 void
2754 mlxsw_sp_neigh_entry_counter_update(struct mlxsw_sp *mlxsw_sp,
2755 				    struct mlxsw_sp_neigh_entry *neigh_entry,
2756 				    bool adding)
2757 {
2758 	if (adding)
2759 		mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
2760 	else
2761 		mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
2762 	mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, true);
2763 }
2764 
2765 struct mlxsw_sp_netevent_work {
2766 	struct work_struct work;
2767 	struct mlxsw_sp *mlxsw_sp;
2768 	struct neighbour *n;
2769 };
2770 
2771 static void mlxsw_sp_router_neigh_event_work(struct work_struct *work)
2772 {
2773 	struct mlxsw_sp_netevent_work *net_work =
2774 		container_of(work, struct mlxsw_sp_netevent_work, work);
2775 	struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2776 	struct mlxsw_sp_neigh_entry *neigh_entry;
2777 	struct neighbour *n = net_work->n;
2778 	unsigned char ha[ETH_ALEN];
2779 	bool entry_connected;
2780 	u8 nud_state, dead;
2781 
2782 	/* If these parameters are changed after we release the lock,
2783 	 * then we are guaranteed to receive another event letting us
2784 	 * know about it.
2785 	 */
2786 	read_lock_bh(&n->lock);
2787 	memcpy(ha, n->ha, ETH_ALEN);
2788 	nud_state = n->nud_state;
2789 	dead = n->dead;
2790 	read_unlock_bh(&n->lock);
2791 
2792 	mutex_lock(&mlxsw_sp->router->lock);
2793 	mlxsw_sp_span_respin(mlxsw_sp);
2794 
2795 	entry_connected = nud_state & NUD_VALID && !dead;
2796 	neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
2797 	if (!entry_connected && !neigh_entry)
2798 		goto out;
2799 	if (!neigh_entry) {
2800 		neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
2801 		if (IS_ERR(neigh_entry))
2802 			goto out;
2803 	}
2804 
2805 	if (neigh_entry->connected && entry_connected &&
2806 	    !memcmp(neigh_entry->ha, ha, ETH_ALEN))
2807 		goto out;
2808 
2809 	memcpy(neigh_entry->ha, ha, ETH_ALEN);
2810 	mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, entry_connected);
2811 	mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, !entry_connected,
2812 				      dead);
2813 
2814 	if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
2815 		mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2816 
2817 out:
2818 	mutex_unlock(&mlxsw_sp->router->lock);
2819 	neigh_release(n);
2820 	kfree(net_work);
2821 }
2822 
2823 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp);
2824 
2825 static void mlxsw_sp_router_mp_hash_event_work(struct work_struct *work)
2826 {
2827 	struct mlxsw_sp_netevent_work *net_work =
2828 		container_of(work, struct mlxsw_sp_netevent_work, work);
2829 	struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2830 
2831 	mlxsw_sp_mp_hash_init(mlxsw_sp);
2832 	kfree(net_work);
2833 }
2834 
2835 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp);
2836 
2837 static void mlxsw_sp_router_update_priority_work(struct work_struct *work)
2838 {
2839 	struct mlxsw_sp_netevent_work *net_work =
2840 		container_of(work, struct mlxsw_sp_netevent_work, work);
2841 	struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2842 
2843 	__mlxsw_sp_router_init(mlxsw_sp);
2844 	kfree(net_work);
2845 }
2846 
2847 static int mlxsw_sp_router_schedule_work(struct net *net,
2848 					 struct mlxsw_sp_router *router,
2849 					 struct neighbour *n,
2850 					 void (*cb)(struct work_struct *))
2851 {
2852 	struct mlxsw_sp_netevent_work *net_work;
2853 
2854 	if (!net_eq(net, mlxsw_sp_net(router->mlxsw_sp)))
2855 		return NOTIFY_DONE;
2856 
2857 	net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC);
2858 	if (!net_work)
2859 		return NOTIFY_BAD;
2860 
2861 	INIT_WORK(&net_work->work, cb);
2862 	net_work->mlxsw_sp = router->mlxsw_sp;
2863 	net_work->n = n;
2864 	mlxsw_core_schedule_work(&net_work->work);
2865 	return NOTIFY_DONE;
2866 }
2867 
2868 static bool mlxsw_sp_dev_lower_is_port(struct net_device *dev)
2869 {
2870 	struct mlxsw_sp_port *mlxsw_sp_port;
2871 
2872 	rcu_read_lock();
2873 	mlxsw_sp_port = mlxsw_sp_port_dev_lower_find_rcu(dev);
2874 	rcu_read_unlock();
2875 	return !!mlxsw_sp_port;
2876 }
2877 
2878 static int mlxsw_sp_router_schedule_neigh_work(struct mlxsw_sp_router *router,
2879 					       struct neighbour *n)
2880 {
2881 	struct net *net;
2882 
2883 	net = neigh_parms_net(n->parms);
2884 
2885 	/* Take a reference to ensure the neighbour won't be destructed until we
2886 	 * drop the reference in delayed work.
2887 	 */
2888 	neigh_clone(n);
2889 	return mlxsw_sp_router_schedule_work(net, router, n,
2890 					     mlxsw_sp_router_neigh_event_work);
2891 }
2892 
2893 static int mlxsw_sp_router_netevent_event(struct notifier_block *nb,
2894 					  unsigned long event, void *ptr)
2895 {
2896 	struct mlxsw_sp_router *router;
2897 	unsigned long interval;
2898 	struct neigh_parms *p;
2899 	struct neighbour *n;
2900 
2901 	router = container_of(nb, struct mlxsw_sp_router, netevent_nb);
2902 
2903 	switch (event) {
2904 	case NETEVENT_DELAY_PROBE_TIME_UPDATE:
2905 		p = ptr;
2906 
2907 		/* We don't care about changes in the default table. */
2908 		if (!p->dev || (p->tbl->family != AF_INET &&
2909 				p->tbl->family != AF_INET6))
2910 			return NOTIFY_DONE;
2911 
2912 		/* We are in atomic context and can't take RTNL mutex,
2913 		 * so use RCU variant to walk the device chain.
2914 		 */
2915 		if (!mlxsw_sp_dev_lower_is_port(p->dev))
2916 			return NOTIFY_DONE;
2917 
2918 		interval = jiffies_to_msecs(NEIGH_VAR(p, DELAY_PROBE_TIME));
2919 		router->neighs_update.interval = interval;
2920 		break;
2921 	case NETEVENT_NEIGH_UPDATE:
2922 		n = ptr;
2923 
2924 		if (n->tbl->family != AF_INET && n->tbl->family != AF_INET6)
2925 			return NOTIFY_DONE;
2926 
2927 		if (!mlxsw_sp_dev_lower_is_port(n->dev))
2928 			return NOTIFY_DONE;
2929 
2930 		return mlxsw_sp_router_schedule_neigh_work(router, n);
2931 
2932 	case NETEVENT_IPV4_MPATH_HASH_UPDATE:
2933 	case NETEVENT_IPV6_MPATH_HASH_UPDATE:
2934 		return mlxsw_sp_router_schedule_work(ptr, router, NULL,
2935 				mlxsw_sp_router_mp_hash_event_work);
2936 
2937 	case NETEVENT_IPV4_FWD_UPDATE_PRIORITY_UPDATE:
2938 		return mlxsw_sp_router_schedule_work(ptr, router, NULL,
2939 				mlxsw_sp_router_update_priority_work);
2940 	}
2941 
2942 	return NOTIFY_DONE;
2943 }
2944 
2945 static int mlxsw_sp_neigh_init(struct mlxsw_sp *mlxsw_sp)
2946 {
2947 	int err;
2948 
2949 	err = rhashtable_init(&mlxsw_sp->router->neigh_ht,
2950 			      &mlxsw_sp_neigh_ht_params);
2951 	if (err)
2952 		return err;
2953 
2954 	/* Initialize the polling interval according to the default
2955 	 * table.
2956 	 */
2957 	mlxsw_sp_router_neighs_update_interval_init(mlxsw_sp);
2958 
2959 	/* Create the delayed works for the activity_update */
2960 	INIT_DELAYED_WORK(&mlxsw_sp->router->neighs_update.dw,
2961 			  mlxsw_sp_router_neighs_update_work);
2962 	INIT_DELAYED_WORK(&mlxsw_sp->router->nexthop_probe_dw,
2963 			  mlxsw_sp_router_probe_unresolved_nexthops);
2964 	atomic_set(&mlxsw_sp->router->neighs_update.neigh_count, 0);
2965 	mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw, 0);
2966 	mlxsw_core_schedule_dw(&mlxsw_sp->router->nexthop_probe_dw, 0);
2967 	return 0;
2968 }
2969 
2970 static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp)
2971 {
2972 	cancel_delayed_work_sync(&mlxsw_sp->router->neighs_update.dw);
2973 	cancel_delayed_work_sync(&mlxsw_sp->router->nexthop_probe_dw);
2974 	rhashtable_destroy(&mlxsw_sp->router->neigh_ht);
2975 }
2976 
2977 static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
2978 					 struct mlxsw_sp_rif *rif)
2979 {
2980 	struct mlxsw_sp_neigh_entry *neigh_entry, *tmp;
2981 
2982 	list_for_each_entry_safe(neigh_entry, tmp, &rif->neigh_list,
2983 				 rif_list_node) {
2984 		mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, false);
2985 		mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2986 	}
2987 }
2988 
2989 struct mlxsw_sp_neigh_rif_made_sync {
2990 	struct mlxsw_sp *mlxsw_sp;
2991 	struct mlxsw_sp_rif *rif;
2992 	int err;
2993 };
2994 
2995 static void mlxsw_sp_neigh_rif_made_sync_each(struct neighbour *n, void *data)
2996 {
2997 	struct mlxsw_sp_neigh_rif_made_sync *rms = data;
2998 	int rc;
2999 
3000 	if (rms->err)
3001 		return;
3002 	if (n->dev != mlxsw_sp_rif_dev(rms->rif))
3003 		return;
3004 	rc = mlxsw_sp_router_schedule_neigh_work(rms->mlxsw_sp->router, n);
3005 	if (rc != NOTIFY_DONE)
3006 		rms->err = -ENOMEM;
3007 }
3008 
3009 static int mlxsw_sp_neigh_rif_made_sync(struct mlxsw_sp *mlxsw_sp,
3010 					struct mlxsw_sp_rif *rif)
3011 {
3012 	struct mlxsw_sp_neigh_rif_made_sync rms = {
3013 		.mlxsw_sp = mlxsw_sp,
3014 		.rif = rif,
3015 	};
3016 
3017 	neigh_for_each(&arp_tbl, mlxsw_sp_neigh_rif_made_sync_each, &rms);
3018 	if (rms.err)
3019 		goto err_arp;
3020 
3021 #if IS_ENABLED(CONFIG_IPV6)
3022 	neigh_for_each(&nd_tbl, mlxsw_sp_neigh_rif_made_sync_each, &rms);
3023 #endif
3024 	if (rms.err)
3025 		goto err_nd;
3026 
3027 	return 0;
3028 
3029 err_nd:
3030 err_arp:
3031 	mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, rif);
3032 	return rms.err;
3033 }
3034 
3035 enum mlxsw_sp_nexthop_type {
3036 	MLXSW_SP_NEXTHOP_TYPE_ETH,
3037 	MLXSW_SP_NEXTHOP_TYPE_IPIP,
3038 };
3039 
3040 enum mlxsw_sp_nexthop_action {
3041 	/* Nexthop forwards packets to an egress RIF */
3042 	MLXSW_SP_NEXTHOP_ACTION_FORWARD,
3043 	/* Nexthop discards packets */
3044 	MLXSW_SP_NEXTHOP_ACTION_DISCARD,
3045 	/* Nexthop traps packets */
3046 	MLXSW_SP_NEXTHOP_ACTION_TRAP,
3047 };
3048 
3049 struct mlxsw_sp_nexthop_key {
3050 	struct fib_nh *fib_nh;
3051 };
3052 
3053 struct mlxsw_sp_nexthop_counter;
3054 
3055 struct mlxsw_sp_nexthop {
3056 	struct list_head neigh_list_node; /* member of neigh entry list */
3057 	struct list_head crif_list_node;
3058 	struct list_head router_list_node;
3059 	struct mlxsw_sp_nexthop_group_info *nhgi; /* pointer back to the group
3060 						   * this nexthop belongs to
3061 						   */
3062 	struct rhash_head ht_node;
3063 	struct neigh_table *neigh_tbl;
3064 	struct mlxsw_sp_nexthop_key key;
3065 	unsigned char gw_addr[sizeof(struct in6_addr)];
3066 	int ifindex;
3067 	int nh_weight;
3068 	int norm_nh_weight;
3069 	int num_adj_entries;
3070 	struct mlxsw_sp_crif *crif;
3071 	u8 should_offload:1, /* set indicates this nexthop should be written
3072 			      * to the adjacency table.
3073 			      */
3074 	   offloaded:1, /* set indicates this nexthop was written to the
3075 			 * adjacency table.
3076 			 */
3077 	   update:1; /* set indicates this nexthop should be updated in the
3078 		      * adjacency table (f.e., its MAC changed).
3079 		      */
3080 	enum mlxsw_sp_nexthop_action action;
3081 	enum mlxsw_sp_nexthop_type type;
3082 	union {
3083 		struct mlxsw_sp_neigh_entry *neigh_entry;
3084 		struct mlxsw_sp_ipip_entry *ipip_entry;
3085 	};
3086 	struct mlxsw_sp_nexthop_counter *counter;
3087 	u32 id;		/* NH ID for members of a NH object group. */
3088 };
3089 
3090 static struct net_device *
3091 mlxsw_sp_nexthop_dev(const struct mlxsw_sp_nexthop *nh)
3092 {
3093 	if (!nh->crif)
3094 		return NULL;
3095 	return nh->crif->key.dev;
3096 }
3097 
3098 enum mlxsw_sp_nexthop_group_type {
3099 	MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4,
3100 	MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6,
3101 	MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ,
3102 };
3103 
3104 struct mlxsw_sp_nexthop_group_info {
3105 	struct mlxsw_sp_nexthop_group *nh_grp;
3106 	u32 adj_index;
3107 	u16 ecmp_size;
3108 	u16 count;
3109 	int sum_norm_weight;
3110 	u8 adj_index_valid:1,
3111 	   gateway:1, /* routes using the group use a gateway */
3112 	   is_resilient:1,
3113 	   hw_stats:1;
3114 	struct list_head list; /* member in nh_res_grp_list */
3115 	struct xarray nexthop_counters;
3116 	struct mlxsw_sp_nexthop nexthops[] __counted_by(count);
3117 };
3118 
3119 static struct mlxsw_sp_rif *
3120 mlxsw_sp_nhgi_rif(const struct mlxsw_sp_nexthop_group_info *nhgi)
3121 {
3122 	struct mlxsw_sp_crif *crif = nhgi->nexthops[0].crif;
3123 
3124 	if (!crif)
3125 		return NULL;
3126 	return crif->rif;
3127 }
3128 
3129 struct mlxsw_sp_nexthop_group_vr_key {
3130 	u16 vr_id;
3131 	enum mlxsw_sp_l3proto proto;
3132 };
3133 
3134 struct mlxsw_sp_nexthop_group_vr_entry {
3135 	struct list_head list; /* member in vr_list */
3136 	struct rhash_head ht_node; /* member in vr_ht */
3137 	refcount_t ref_count;
3138 	struct mlxsw_sp_nexthop_group_vr_key key;
3139 };
3140 
3141 struct mlxsw_sp_nexthop_group {
3142 	struct rhash_head ht_node;
3143 	struct list_head fib_list; /* list of fib entries that use this group */
3144 	union {
3145 		struct {
3146 			struct fib_info *fi;
3147 		} ipv4;
3148 		struct {
3149 			u32 id;
3150 		} obj;
3151 	};
3152 	struct mlxsw_sp_nexthop_group_info *nhgi;
3153 	struct list_head vr_list;
3154 	struct rhashtable vr_ht;
3155 	enum mlxsw_sp_nexthop_group_type type;
3156 	bool can_destroy;
3157 };
3158 
3159 struct mlxsw_sp_nexthop_counter {
3160 	unsigned int counter_index;
3161 	refcount_t ref_count;
3162 };
3163 
3164 static struct mlxsw_sp_nexthop_counter *
3165 mlxsw_sp_nexthop_counter_alloc(struct mlxsw_sp *mlxsw_sp)
3166 {
3167 	struct mlxsw_sp_nexthop_counter *nhct;
3168 	int err;
3169 
3170 	nhct = kzalloc(sizeof(*nhct), GFP_KERNEL);
3171 	if (!nhct)
3172 		return ERR_PTR(-ENOMEM);
3173 
3174 	err = mlxsw_sp_flow_counter_alloc(mlxsw_sp, &nhct->counter_index);
3175 	if (err)
3176 		goto err_counter_alloc;
3177 
3178 	refcount_set(&nhct->ref_count, 1);
3179 	return nhct;
3180 
3181 err_counter_alloc:
3182 	kfree(nhct);
3183 	return ERR_PTR(err);
3184 }
3185 
3186 static void
3187 mlxsw_sp_nexthop_counter_free(struct mlxsw_sp *mlxsw_sp,
3188 			      struct mlxsw_sp_nexthop_counter *nhct)
3189 {
3190 	mlxsw_sp_flow_counter_free(mlxsw_sp, nhct->counter_index);
3191 	kfree(nhct);
3192 }
3193 
3194 static struct mlxsw_sp_nexthop_counter *
3195 mlxsw_sp_nexthop_sh_counter_get(struct mlxsw_sp *mlxsw_sp,
3196 				struct mlxsw_sp_nexthop *nh)
3197 {
3198 	struct mlxsw_sp_nexthop_group *nh_grp = nh->nhgi->nh_grp;
3199 	struct mlxsw_sp_nexthop_counter *nhct;
3200 	int err;
3201 
3202 	nhct = xa_load(&nh_grp->nhgi->nexthop_counters, nh->id);
3203 	if (nhct) {
3204 		refcount_inc(&nhct->ref_count);
3205 		return nhct;
3206 	}
3207 
3208 	nhct = mlxsw_sp_nexthop_counter_alloc(mlxsw_sp);
3209 	if (IS_ERR(nhct))
3210 		return nhct;
3211 
3212 	err = xa_err(xa_store(&nh_grp->nhgi->nexthop_counters, nh->id, nhct,
3213 			      GFP_KERNEL));
3214 	if (err)
3215 		goto err_store;
3216 
3217 	return nhct;
3218 
3219 err_store:
3220 	mlxsw_sp_nexthop_counter_free(mlxsw_sp, nhct);
3221 	return ERR_PTR(err);
3222 }
3223 
3224 static void mlxsw_sp_nexthop_sh_counter_put(struct mlxsw_sp *mlxsw_sp,
3225 					    struct mlxsw_sp_nexthop *nh)
3226 {
3227 	struct mlxsw_sp_nexthop_group *nh_grp = nh->nhgi->nh_grp;
3228 	struct mlxsw_sp_nexthop_counter *nhct;
3229 
3230 	nhct = xa_load(&nh_grp->nhgi->nexthop_counters, nh->id);
3231 	if (WARN_ON(!nhct))
3232 		return;
3233 
3234 	if (!refcount_dec_and_test(&nhct->ref_count))
3235 		return;
3236 
3237 	xa_erase(&nh_grp->nhgi->nexthop_counters, nh->id);
3238 	mlxsw_sp_nexthop_counter_free(mlxsw_sp, nhct);
3239 }
3240 
3241 int mlxsw_sp_nexthop_counter_enable(struct mlxsw_sp *mlxsw_sp,
3242 				    struct mlxsw_sp_nexthop *nh)
3243 {
3244 	const char *table_adj = MLXSW_SP_DPIPE_TABLE_NAME_ADJ;
3245 	struct mlxsw_sp_nexthop_counter *nhct;
3246 	struct devlink *devlink;
3247 	bool dpipe_stats;
3248 
3249 	if (nh->counter)
3250 		return 0;
3251 
3252 	devlink = priv_to_devlink(mlxsw_sp->core);
3253 	dpipe_stats = devlink_dpipe_table_counter_enabled(devlink, table_adj);
3254 	if (!(nh->nhgi->hw_stats || dpipe_stats))
3255 		return 0;
3256 
3257 	if (nh->id)
3258 		nhct = mlxsw_sp_nexthop_sh_counter_get(mlxsw_sp, nh);
3259 	else
3260 		nhct = mlxsw_sp_nexthop_counter_alloc(mlxsw_sp);
3261 	if (IS_ERR(nhct))
3262 		return PTR_ERR(nhct);
3263 
3264 	nh->counter = nhct;
3265 	return 0;
3266 }
3267 
3268 void mlxsw_sp_nexthop_counter_disable(struct mlxsw_sp *mlxsw_sp,
3269 				      struct mlxsw_sp_nexthop *nh)
3270 {
3271 	if (!nh->counter)
3272 		return;
3273 
3274 	if (nh->id)
3275 		mlxsw_sp_nexthop_sh_counter_put(mlxsw_sp, nh);
3276 	else
3277 		mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh->counter);
3278 	nh->counter = NULL;
3279 }
3280 
3281 static int mlxsw_sp_nexthop_counter_update(struct mlxsw_sp *mlxsw_sp,
3282 					   struct mlxsw_sp_nexthop *nh)
3283 {
3284 	if (nh->nhgi->hw_stats)
3285 		return mlxsw_sp_nexthop_counter_enable(mlxsw_sp, nh);
3286 	mlxsw_sp_nexthop_counter_disable(mlxsw_sp, nh);
3287 	return 0;
3288 }
3289 
3290 int mlxsw_sp_nexthop_counter_get(struct mlxsw_sp *mlxsw_sp,
3291 				 struct mlxsw_sp_nexthop *nh, u64 *p_counter)
3292 {
3293 	if (!nh->counter)
3294 		return -EINVAL;
3295 
3296 	return mlxsw_sp_flow_counter_get(mlxsw_sp, nh->counter->counter_index,
3297 					 true, p_counter, NULL);
3298 }
3299 
3300 struct mlxsw_sp_nexthop *mlxsw_sp_nexthop_next(struct mlxsw_sp_router *router,
3301 					       struct mlxsw_sp_nexthop *nh)
3302 {
3303 	if (!nh) {
3304 		if (list_empty(&router->nexthop_list))
3305 			return NULL;
3306 		else
3307 			return list_first_entry(&router->nexthop_list,
3308 						typeof(*nh), router_list_node);
3309 	}
3310 	if (list_is_last(&nh->router_list_node, &router->nexthop_list))
3311 		return NULL;
3312 	return list_next_entry(nh, router_list_node);
3313 }
3314 
3315 bool mlxsw_sp_nexthop_is_forward(const struct mlxsw_sp_nexthop *nh)
3316 {
3317 	return nh->offloaded && nh->action == MLXSW_SP_NEXTHOP_ACTION_FORWARD;
3318 }
3319 
3320 unsigned char *mlxsw_sp_nexthop_ha(struct mlxsw_sp_nexthop *nh)
3321 {
3322 	if (nh->type != MLXSW_SP_NEXTHOP_TYPE_ETH ||
3323 	    !mlxsw_sp_nexthop_is_forward(nh))
3324 		return NULL;
3325 	return nh->neigh_entry->ha;
3326 }
3327 
3328 int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop *nh, u32 *p_adj_index,
3329 			     u32 *p_adj_size, u32 *p_adj_hash_index)
3330 {
3331 	struct mlxsw_sp_nexthop_group_info *nhgi = nh->nhgi;
3332 	u32 adj_hash_index = 0;
3333 	int i;
3334 
3335 	if (!nh->offloaded || !nhgi->adj_index_valid)
3336 		return -EINVAL;
3337 
3338 	*p_adj_index = nhgi->adj_index;
3339 	*p_adj_size = nhgi->ecmp_size;
3340 
3341 	for (i = 0; i < nhgi->count; i++) {
3342 		struct mlxsw_sp_nexthop *nh_iter = &nhgi->nexthops[i];
3343 
3344 		if (nh_iter == nh)
3345 			break;
3346 		if (nh_iter->offloaded)
3347 			adj_hash_index += nh_iter->num_adj_entries;
3348 	}
3349 
3350 	*p_adj_hash_index = adj_hash_index;
3351 	return 0;
3352 }
3353 
3354 struct mlxsw_sp_rif *mlxsw_sp_nexthop_rif(struct mlxsw_sp_nexthop *nh)
3355 {
3356 	if (WARN_ON(!nh->crif))
3357 		return NULL;
3358 	return nh->crif->rif;
3359 }
3360 
3361 bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh)
3362 {
3363 	struct mlxsw_sp_nexthop_group_info *nhgi = nh->nhgi;
3364 	int i;
3365 
3366 	for (i = 0; i < nhgi->count; i++) {
3367 		struct mlxsw_sp_nexthop *nh_iter = &nhgi->nexthops[i];
3368 
3369 		if (nh_iter->type == MLXSW_SP_NEXTHOP_TYPE_IPIP)
3370 			return true;
3371 	}
3372 	return false;
3373 }
3374 
3375 static const struct rhashtable_params mlxsw_sp_nexthop_group_vr_ht_params = {
3376 	.key_offset = offsetof(struct mlxsw_sp_nexthop_group_vr_entry, key),
3377 	.head_offset = offsetof(struct mlxsw_sp_nexthop_group_vr_entry, ht_node),
3378 	.key_len = sizeof(struct mlxsw_sp_nexthop_group_vr_key),
3379 	.automatic_shrinking = true,
3380 };
3381 
3382 static struct mlxsw_sp_nexthop_group_vr_entry *
3383 mlxsw_sp_nexthop_group_vr_entry_lookup(struct mlxsw_sp_nexthop_group *nh_grp,
3384 				       const struct mlxsw_sp_fib *fib)
3385 {
3386 	struct mlxsw_sp_nexthop_group_vr_key key;
3387 
3388 	memset(&key, 0, sizeof(key));
3389 	key.vr_id = fib->vr->id;
3390 	key.proto = fib->proto;
3391 	return rhashtable_lookup_fast(&nh_grp->vr_ht, &key,
3392 				      mlxsw_sp_nexthop_group_vr_ht_params);
3393 }
3394 
3395 static int
3396 mlxsw_sp_nexthop_group_vr_entry_create(struct mlxsw_sp_nexthop_group *nh_grp,
3397 				       const struct mlxsw_sp_fib *fib)
3398 {
3399 	struct mlxsw_sp_nexthop_group_vr_entry *vr_entry;
3400 	int err;
3401 
3402 	vr_entry = kzalloc(sizeof(*vr_entry), GFP_KERNEL);
3403 	if (!vr_entry)
3404 		return -ENOMEM;
3405 
3406 	vr_entry->key.vr_id = fib->vr->id;
3407 	vr_entry->key.proto = fib->proto;
3408 	refcount_set(&vr_entry->ref_count, 1);
3409 
3410 	err = rhashtable_insert_fast(&nh_grp->vr_ht, &vr_entry->ht_node,
3411 				     mlxsw_sp_nexthop_group_vr_ht_params);
3412 	if (err)
3413 		goto err_hashtable_insert;
3414 
3415 	list_add(&vr_entry->list, &nh_grp->vr_list);
3416 
3417 	return 0;
3418 
3419 err_hashtable_insert:
3420 	kfree(vr_entry);
3421 	return err;
3422 }
3423 
3424 static void
3425 mlxsw_sp_nexthop_group_vr_entry_destroy(struct mlxsw_sp_nexthop_group *nh_grp,
3426 					struct mlxsw_sp_nexthop_group_vr_entry *vr_entry)
3427 {
3428 	list_del(&vr_entry->list);
3429 	rhashtable_remove_fast(&nh_grp->vr_ht, &vr_entry->ht_node,
3430 			       mlxsw_sp_nexthop_group_vr_ht_params);
3431 	kfree(vr_entry);
3432 }
3433 
3434 static int
3435 mlxsw_sp_nexthop_group_vr_link(struct mlxsw_sp_nexthop_group *nh_grp,
3436 			       const struct mlxsw_sp_fib *fib)
3437 {
3438 	struct mlxsw_sp_nexthop_group_vr_entry *vr_entry;
3439 
3440 	vr_entry = mlxsw_sp_nexthop_group_vr_entry_lookup(nh_grp, fib);
3441 	if (vr_entry) {
3442 		refcount_inc(&vr_entry->ref_count);
3443 		return 0;
3444 	}
3445 
3446 	return mlxsw_sp_nexthop_group_vr_entry_create(nh_grp, fib);
3447 }
3448 
3449 static void
3450 mlxsw_sp_nexthop_group_vr_unlink(struct mlxsw_sp_nexthop_group *nh_grp,
3451 				 const struct mlxsw_sp_fib *fib)
3452 {
3453 	struct mlxsw_sp_nexthop_group_vr_entry *vr_entry;
3454 
3455 	vr_entry = mlxsw_sp_nexthop_group_vr_entry_lookup(nh_grp, fib);
3456 	if (WARN_ON_ONCE(!vr_entry))
3457 		return;
3458 
3459 	if (!refcount_dec_and_test(&vr_entry->ref_count))
3460 		return;
3461 
3462 	mlxsw_sp_nexthop_group_vr_entry_destroy(nh_grp, vr_entry);
3463 }
3464 
3465 struct mlxsw_sp_nexthop_group_cmp_arg {
3466 	enum mlxsw_sp_nexthop_group_type type;
3467 	union {
3468 		struct fib_info *fi;
3469 		struct mlxsw_sp_fib6_entry *fib6_entry;
3470 		u32 id;
3471 	};
3472 };
3473 
3474 static bool
3475 mlxsw_sp_nexthop6_group_has_nexthop(const struct mlxsw_sp_nexthop_group *nh_grp,
3476 				    const struct in6_addr *gw, int ifindex,
3477 				    int weight)
3478 {
3479 	int i;
3480 
3481 	for (i = 0; i < nh_grp->nhgi->count; i++) {
3482 		const struct mlxsw_sp_nexthop *nh;
3483 
3484 		nh = &nh_grp->nhgi->nexthops[i];
3485 		if (nh->ifindex == ifindex && nh->nh_weight == weight &&
3486 		    ipv6_addr_equal(gw, (struct in6_addr *) nh->gw_addr))
3487 			return true;
3488 	}
3489 
3490 	return false;
3491 }
3492 
3493 static bool
3494 mlxsw_sp_nexthop6_group_cmp(const struct mlxsw_sp_nexthop_group *nh_grp,
3495 			    const struct mlxsw_sp_fib6_entry *fib6_entry)
3496 {
3497 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
3498 
3499 	if (nh_grp->nhgi->count != fib6_entry->nrt6)
3500 		return false;
3501 
3502 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
3503 		struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh;
3504 		struct in6_addr *gw;
3505 		int ifindex, weight;
3506 
3507 		ifindex = fib6_nh->fib_nh_dev->ifindex;
3508 		weight = fib6_nh->fib_nh_weight;
3509 		gw = &fib6_nh->fib_nh_gw6;
3510 		if (!mlxsw_sp_nexthop6_group_has_nexthop(nh_grp, gw, ifindex,
3511 							 weight))
3512 			return false;
3513 	}
3514 
3515 	return true;
3516 }
3517 
3518 static int
3519 mlxsw_sp_nexthop_group_cmp(struct rhashtable_compare_arg *arg, const void *ptr)
3520 {
3521 	const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = arg->key;
3522 	const struct mlxsw_sp_nexthop_group *nh_grp = ptr;
3523 
3524 	if (nh_grp->type != cmp_arg->type)
3525 		return 1;
3526 
3527 	switch (cmp_arg->type) {
3528 	case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4:
3529 		return cmp_arg->fi != nh_grp->ipv4.fi;
3530 	case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6:
3531 		return !mlxsw_sp_nexthop6_group_cmp(nh_grp,
3532 						    cmp_arg->fib6_entry);
3533 	case MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ:
3534 		return cmp_arg->id != nh_grp->obj.id;
3535 	default:
3536 		WARN_ON(1);
3537 		return 1;
3538 	}
3539 }
3540 
3541 static u32 mlxsw_sp_nexthop_group_hash_obj(const void *data, u32 len, u32 seed)
3542 {
3543 	const struct mlxsw_sp_nexthop_group *nh_grp = data;
3544 	const struct mlxsw_sp_nexthop *nh;
3545 	struct fib_info *fi;
3546 	unsigned int val;
3547 	int i;
3548 
3549 	switch (nh_grp->type) {
3550 	case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4:
3551 		fi = nh_grp->ipv4.fi;
3552 		return jhash(&fi, sizeof(fi), seed);
3553 	case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6:
3554 		val = nh_grp->nhgi->count;
3555 		for (i = 0; i < nh_grp->nhgi->count; i++) {
3556 			nh = &nh_grp->nhgi->nexthops[i];
3557 			val ^= jhash(&nh->ifindex, sizeof(nh->ifindex), seed);
3558 			val ^= jhash(&nh->gw_addr, sizeof(nh->gw_addr), seed);
3559 		}
3560 		return jhash(&val, sizeof(val), seed);
3561 	case MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ:
3562 		return jhash(&nh_grp->obj.id, sizeof(nh_grp->obj.id), seed);
3563 	default:
3564 		WARN_ON(1);
3565 		return 0;
3566 	}
3567 }
3568 
3569 static u32
3570 mlxsw_sp_nexthop6_group_hash(struct mlxsw_sp_fib6_entry *fib6_entry, u32 seed)
3571 {
3572 	unsigned int val = fib6_entry->nrt6;
3573 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
3574 
3575 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
3576 		struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh;
3577 		struct net_device *dev = fib6_nh->fib_nh_dev;
3578 		struct in6_addr *gw = &fib6_nh->fib_nh_gw6;
3579 
3580 		val ^= jhash(&dev->ifindex, sizeof(dev->ifindex), seed);
3581 		val ^= jhash(gw, sizeof(*gw), seed);
3582 	}
3583 
3584 	return jhash(&val, sizeof(val), seed);
3585 }
3586 
3587 static u32
3588 mlxsw_sp_nexthop_group_hash(const void *data, u32 len, u32 seed)
3589 {
3590 	const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = data;
3591 
3592 	switch (cmp_arg->type) {
3593 	case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4:
3594 		return jhash(&cmp_arg->fi, sizeof(cmp_arg->fi), seed);
3595 	case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6:
3596 		return mlxsw_sp_nexthop6_group_hash(cmp_arg->fib6_entry, seed);
3597 	case MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ:
3598 		return jhash(&cmp_arg->id, sizeof(cmp_arg->id), seed);
3599 	default:
3600 		WARN_ON(1);
3601 		return 0;
3602 	}
3603 }
3604 
3605 static const struct rhashtable_params mlxsw_sp_nexthop_group_ht_params = {
3606 	.head_offset = offsetof(struct mlxsw_sp_nexthop_group, ht_node),
3607 	.hashfn	     = mlxsw_sp_nexthop_group_hash,
3608 	.obj_hashfn  = mlxsw_sp_nexthop_group_hash_obj,
3609 	.obj_cmpfn   = mlxsw_sp_nexthop_group_cmp,
3610 };
3611 
3612 static int mlxsw_sp_nexthop_group_insert(struct mlxsw_sp *mlxsw_sp,
3613 					 struct mlxsw_sp_nexthop_group *nh_grp)
3614 {
3615 	if (nh_grp->type == MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6 &&
3616 	    !nh_grp->nhgi->gateway)
3617 		return 0;
3618 
3619 	return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_group_ht,
3620 				      &nh_grp->ht_node,
3621 				      mlxsw_sp_nexthop_group_ht_params);
3622 }
3623 
3624 static void mlxsw_sp_nexthop_group_remove(struct mlxsw_sp *mlxsw_sp,
3625 					  struct mlxsw_sp_nexthop_group *nh_grp)
3626 {
3627 	if (nh_grp->type == MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6 &&
3628 	    !nh_grp->nhgi->gateway)
3629 		return;
3630 
3631 	rhashtable_remove_fast(&mlxsw_sp->router->nexthop_group_ht,
3632 			       &nh_grp->ht_node,
3633 			       mlxsw_sp_nexthop_group_ht_params);
3634 }
3635 
3636 static struct mlxsw_sp_nexthop_group *
3637 mlxsw_sp_nexthop4_group_lookup(struct mlxsw_sp *mlxsw_sp,
3638 			       struct fib_info *fi)
3639 {
3640 	struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
3641 
3642 	cmp_arg.type = MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4;
3643 	cmp_arg.fi = fi;
3644 	return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
3645 				      &cmp_arg,
3646 				      mlxsw_sp_nexthop_group_ht_params);
3647 }
3648 
3649 static struct mlxsw_sp_nexthop_group *
3650 mlxsw_sp_nexthop6_group_lookup(struct mlxsw_sp *mlxsw_sp,
3651 			       struct mlxsw_sp_fib6_entry *fib6_entry)
3652 {
3653 	struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
3654 
3655 	cmp_arg.type = MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6;
3656 	cmp_arg.fib6_entry = fib6_entry;
3657 	return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
3658 				      &cmp_arg,
3659 				      mlxsw_sp_nexthop_group_ht_params);
3660 }
3661 
3662 static const struct rhashtable_params mlxsw_sp_nexthop_ht_params = {
3663 	.key_offset = offsetof(struct mlxsw_sp_nexthop, key),
3664 	.head_offset = offsetof(struct mlxsw_sp_nexthop, ht_node),
3665 	.key_len = sizeof(struct mlxsw_sp_nexthop_key),
3666 };
3667 
3668 static int mlxsw_sp_nexthop_insert(struct mlxsw_sp *mlxsw_sp,
3669 				   struct mlxsw_sp_nexthop *nh)
3670 {
3671 	return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_ht,
3672 				      &nh->ht_node, mlxsw_sp_nexthop_ht_params);
3673 }
3674 
3675 static void mlxsw_sp_nexthop_remove(struct mlxsw_sp *mlxsw_sp,
3676 				    struct mlxsw_sp_nexthop *nh)
3677 {
3678 	rhashtable_remove_fast(&mlxsw_sp->router->nexthop_ht, &nh->ht_node,
3679 			       mlxsw_sp_nexthop_ht_params);
3680 }
3681 
3682 static struct mlxsw_sp_nexthop *
3683 mlxsw_sp_nexthop_lookup(struct mlxsw_sp *mlxsw_sp,
3684 			struct mlxsw_sp_nexthop_key key)
3685 {
3686 	return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_ht, &key,
3687 				      mlxsw_sp_nexthop_ht_params);
3688 }
3689 
3690 static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp,
3691 					     enum mlxsw_sp_l3proto proto,
3692 					     u16 vr_id,
3693 					     u32 adj_index, u16 ecmp_size,
3694 					     u32 new_adj_index,
3695 					     u16 new_ecmp_size)
3696 {
3697 	char raleu_pl[MLXSW_REG_RALEU_LEN];
3698 
3699 	mlxsw_reg_raleu_pack(raleu_pl,
3700 			     (enum mlxsw_reg_ralxx_protocol) proto, vr_id,
3701 			     adj_index, ecmp_size, new_adj_index,
3702 			     new_ecmp_size);
3703 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raleu), raleu_pl);
3704 }
3705 
3706 static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp,
3707 					  struct mlxsw_sp_nexthop_group *nh_grp,
3708 					  u32 old_adj_index, u16 old_ecmp_size)
3709 {
3710 	struct mlxsw_sp_nexthop_group_info *nhgi = nh_grp->nhgi;
3711 	struct mlxsw_sp_nexthop_group_vr_entry *vr_entry;
3712 	int err;
3713 
3714 	list_for_each_entry(vr_entry, &nh_grp->vr_list, list) {
3715 		err = mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp,
3716 							vr_entry->key.proto,
3717 							vr_entry->key.vr_id,
3718 							old_adj_index,
3719 							old_ecmp_size,
3720 							nhgi->adj_index,
3721 							nhgi->ecmp_size);
3722 		if (err)
3723 			goto err_mass_update_vr;
3724 	}
3725 	return 0;
3726 
3727 err_mass_update_vr:
3728 	list_for_each_entry_continue_reverse(vr_entry, &nh_grp->vr_list, list)
3729 		mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, vr_entry->key.proto,
3730 						  vr_entry->key.vr_id,
3731 						  nhgi->adj_index,
3732 						  nhgi->ecmp_size,
3733 						  old_adj_index, old_ecmp_size);
3734 	return err;
3735 }
3736 
3737 static int __mlxsw_sp_nexthop_eth_update(struct mlxsw_sp *mlxsw_sp,
3738 					 u32 adj_index,
3739 					 struct mlxsw_sp_nexthop *nh,
3740 					 bool force, char *ratr_pl)
3741 {
3742 	struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
3743 	struct mlxsw_sp_rif *rif = mlxsw_sp_nexthop_rif(nh);
3744 	enum mlxsw_reg_ratr_op op;
3745 	u16 rif_index;
3746 
3747 	rif_index = rif ? rif->rif_index :
3748 			  mlxsw_sp->router->lb_crif->rif->rif_index;
3749 	op = force ? MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY :
3750 		     MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY_ON_ACTIVITY;
3751 	mlxsw_reg_ratr_pack(ratr_pl, op, true, MLXSW_REG_RATR_TYPE_ETHERNET,
3752 			    adj_index, rif_index);
3753 	switch (nh->action) {
3754 	case MLXSW_SP_NEXTHOP_ACTION_FORWARD:
3755 		mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha);
3756 		break;
3757 	case MLXSW_SP_NEXTHOP_ACTION_DISCARD:
3758 		mlxsw_reg_ratr_trap_action_set(ratr_pl,
3759 					       MLXSW_REG_RATR_TRAP_ACTION_DISCARD_ERRORS);
3760 		break;
3761 	case MLXSW_SP_NEXTHOP_ACTION_TRAP:
3762 		mlxsw_reg_ratr_trap_action_set(ratr_pl,
3763 					       MLXSW_REG_RATR_TRAP_ACTION_TRAP);
3764 		mlxsw_reg_ratr_trap_id_set(ratr_pl, MLXSW_TRAP_ID_RTR_EGRESS0);
3765 		break;
3766 	default:
3767 		WARN_ON_ONCE(1);
3768 		return -EINVAL;
3769 	}
3770 	if (nh->counter)
3771 		mlxsw_reg_ratr_counter_pack(ratr_pl, nh->counter->counter_index,
3772 					    true);
3773 	else
3774 		mlxsw_reg_ratr_counter_pack(ratr_pl, 0, false);
3775 
3776 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
3777 }
3778 
3779 int mlxsw_sp_nexthop_eth_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
3780 				struct mlxsw_sp_nexthop *nh, bool force,
3781 				char *ratr_pl)
3782 {
3783 	int i;
3784 
3785 	for (i = 0; i < nh->num_adj_entries; i++) {
3786 		int err;
3787 
3788 		err = __mlxsw_sp_nexthop_eth_update(mlxsw_sp, adj_index + i,
3789 						    nh, force, ratr_pl);
3790 		if (err)
3791 			return err;
3792 	}
3793 
3794 	return 0;
3795 }
3796 
3797 static int __mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
3798 					  u32 adj_index,
3799 					  struct mlxsw_sp_nexthop *nh,
3800 					  bool force, char *ratr_pl)
3801 {
3802 	const struct mlxsw_sp_ipip_ops *ipip_ops;
3803 
3804 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[nh->ipip_entry->ipipt];
3805 	return ipip_ops->nexthop_update(mlxsw_sp, adj_index, nh->ipip_entry,
3806 					force, ratr_pl);
3807 }
3808 
3809 static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
3810 					u32 adj_index,
3811 					struct mlxsw_sp_nexthop *nh, bool force,
3812 					char *ratr_pl)
3813 {
3814 	int i;
3815 
3816 	for (i = 0; i < nh->num_adj_entries; i++) {
3817 		int err;
3818 
3819 		err = __mlxsw_sp_nexthop_ipip_update(mlxsw_sp, adj_index + i,
3820 						     nh, force, ratr_pl);
3821 		if (err)
3822 			return err;
3823 	}
3824 
3825 	return 0;
3826 }
3827 
3828 static int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
3829 				   struct mlxsw_sp_nexthop *nh, bool force,
3830 				   char *ratr_pl)
3831 {
3832 	/* When action is discard or trap, the nexthop must be
3833 	 * programmed as an Ethernet nexthop.
3834 	 */
3835 	if (nh->type == MLXSW_SP_NEXTHOP_TYPE_ETH ||
3836 	    nh->action == MLXSW_SP_NEXTHOP_ACTION_DISCARD ||
3837 	    nh->action == MLXSW_SP_NEXTHOP_ACTION_TRAP)
3838 		return mlxsw_sp_nexthop_eth_update(mlxsw_sp, adj_index, nh,
3839 						   force, ratr_pl);
3840 	else
3841 		return mlxsw_sp_nexthop_ipip_update(mlxsw_sp, adj_index, nh,
3842 						    force, ratr_pl);
3843 }
3844 
3845 static int
3846 mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp,
3847 			      struct mlxsw_sp_nexthop_group_info *nhgi,
3848 			      bool reallocate)
3849 {
3850 	char ratr_pl[MLXSW_REG_RATR_LEN];
3851 	u32 adj_index = nhgi->adj_index; /* base */
3852 	struct mlxsw_sp_nexthop *nh;
3853 	int i;
3854 
3855 	for (i = 0; i < nhgi->count; i++) {
3856 		nh = &nhgi->nexthops[i];
3857 
3858 		if (!nh->should_offload) {
3859 			mlxsw_sp_nexthop_counter_disable(mlxsw_sp, nh);
3860 			nh->offloaded = 0;
3861 			continue;
3862 		}
3863 
3864 		if (nh->update || reallocate) {
3865 			int err = 0;
3866 
3867 			err = mlxsw_sp_nexthop_counter_update(mlxsw_sp, nh);
3868 			if (err)
3869 				return err;
3870 
3871 			err = mlxsw_sp_nexthop_update(mlxsw_sp, adj_index, nh,
3872 						      true, ratr_pl);
3873 			if (err)
3874 				return err;
3875 			nh->update = 0;
3876 			nh->offloaded = 1;
3877 		}
3878 		adj_index += nh->num_adj_entries;
3879 	}
3880 	return 0;
3881 }
3882 
3883 static int
3884 mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp,
3885 				    struct mlxsw_sp_nexthop_group *nh_grp)
3886 {
3887 	struct mlxsw_sp_fib_entry *fib_entry;
3888 	int err;
3889 
3890 	list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
3891 		err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
3892 		if (err)
3893 			return err;
3894 	}
3895 	return 0;
3896 }
3897 
3898 struct mlxsw_sp_adj_grp_size_range {
3899 	u16 start; /* Inclusive */
3900 	u16 end; /* Inclusive */
3901 };
3902 
3903 /* Ordered by range start value */
3904 static const struct mlxsw_sp_adj_grp_size_range
3905 mlxsw_sp1_adj_grp_size_ranges[] = {
3906 	{ .start = 1, .end = 64 },
3907 	{ .start = 512, .end = 512 },
3908 	{ .start = 1024, .end = 1024 },
3909 	{ .start = 2048, .end = 2048 },
3910 	{ .start = 4096, .end = 4096 },
3911 };
3912 
3913 /* Ordered by range start value */
3914 static const struct mlxsw_sp_adj_grp_size_range
3915 mlxsw_sp2_adj_grp_size_ranges[] = {
3916 	{ .start = 1, .end = 128 },
3917 	{ .start = 256, .end = 256 },
3918 	{ .start = 512, .end = 512 },
3919 	{ .start = 1024, .end = 1024 },
3920 	{ .start = 2048, .end = 2048 },
3921 	{ .start = 4096, .end = 4096 },
3922 };
3923 
3924 static void mlxsw_sp_adj_grp_size_round_up(const struct mlxsw_sp *mlxsw_sp,
3925 					   u16 *p_adj_grp_size)
3926 {
3927 	int i;
3928 
3929 	for (i = 0; i < mlxsw_sp->router->adj_grp_size_ranges_count; i++) {
3930 		const struct mlxsw_sp_adj_grp_size_range *size_range;
3931 
3932 		size_range = &mlxsw_sp->router->adj_grp_size_ranges[i];
3933 
3934 		if (*p_adj_grp_size >= size_range->start &&
3935 		    *p_adj_grp_size <= size_range->end)
3936 			return;
3937 
3938 		if (*p_adj_grp_size <= size_range->end) {
3939 			*p_adj_grp_size = size_range->end;
3940 			return;
3941 		}
3942 	}
3943 }
3944 
3945 static void mlxsw_sp_adj_grp_size_round_down(const struct mlxsw_sp *mlxsw_sp,
3946 					     u16 *p_adj_grp_size,
3947 					     unsigned int alloc_size)
3948 {
3949 	int i;
3950 
3951 	for (i = mlxsw_sp->router->adj_grp_size_ranges_count - 1; i >= 0; i--) {
3952 		const struct mlxsw_sp_adj_grp_size_range *size_range;
3953 
3954 		size_range = &mlxsw_sp->router->adj_grp_size_ranges[i];
3955 
3956 		if (alloc_size >= size_range->end) {
3957 			*p_adj_grp_size = size_range->end;
3958 			return;
3959 		}
3960 	}
3961 }
3962 
3963 static int mlxsw_sp_fix_adj_grp_size(struct mlxsw_sp *mlxsw_sp,
3964 				     u16 *p_adj_grp_size)
3965 {
3966 	unsigned int alloc_size;
3967 	int err;
3968 
3969 	/* Round up the requested group size to the next size supported
3970 	 * by the device and make sure the request can be satisfied.
3971 	 */
3972 	mlxsw_sp_adj_grp_size_round_up(mlxsw_sp, p_adj_grp_size);
3973 	err = mlxsw_sp_kvdl_alloc_count_query(mlxsw_sp,
3974 					      MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3975 					      *p_adj_grp_size, &alloc_size);
3976 	if (err)
3977 		return err;
3978 	/* It is possible the allocation results in more allocated
3979 	 * entries than requested. Try to use as much of them as
3980 	 * possible.
3981 	 */
3982 	mlxsw_sp_adj_grp_size_round_down(mlxsw_sp, p_adj_grp_size, alloc_size);
3983 
3984 	return 0;
3985 }
3986 
3987 static void
3988 mlxsw_sp_nexthop_group_normalize(struct mlxsw_sp_nexthop_group_info *nhgi)
3989 {
3990 	int i, g = 0, sum_norm_weight = 0;
3991 	struct mlxsw_sp_nexthop *nh;
3992 
3993 	for (i = 0; i < nhgi->count; i++) {
3994 		nh = &nhgi->nexthops[i];
3995 
3996 		if (!nh->should_offload)
3997 			continue;
3998 		if (g > 0)
3999 			g = gcd(nh->nh_weight, g);
4000 		else
4001 			g = nh->nh_weight;
4002 	}
4003 
4004 	for (i = 0; i < nhgi->count; i++) {
4005 		nh = &nhgi->nexthops[i];
4006 
4007 		if (!nh->should_offload)
4008 			continue;
4009 		nh->norm_nh_weight = nh->nh_weight / g;
4010 		sum_norm_weight += nh->norm_nh_weight;
4011 	}
4012 
4013 	nhgi->sum_norm_weight = sum_norm_weight;
4014 }
4015 
4016 static void
4017 mlxsw_sp_nexthop_group_rebalance(struct mlxsw_sp_nexthop_group_info *nhgi)
4018 {
4019 	int i, weight = 0, lower_bound = 0;
4020 	int total = nhgi->sum_norm_weight;
4021 	u16 ecmp_size = nhgi->ecmp_size;
4022 
4023 	for (i = 0; i < nhgi->count; i++) {
4024 		struct mlxsw_sp_nexthop *nh = &nhgi->nexthops[i];
4025 		int upper_bound;
4026 
4027 		if (!nh->should_offload)
4028 			continue;
4029 		weight += nh->norm_nh_weight;
4030 		upper_bound = DIV_ROUND_CLOSEST(ecmp_size * weight, total);
4031 		nh->num_adj_entries = upper_bound - lower_bound;
4032 		lower_bound = upper_bound;
4033 	}
4034 }
4035 
4036 static struct mlxsw_sp_nexthop *
4037 mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp,
4038 		     const struct mlxsw_sp_rt6 *mlxsw_sp_rt6);
4039 
4040 static void
4041 mlxsw_sp_nexthop4_group_offload_refresh(struct mlxsw_sp *mlxsw_sp,
4042 					struct mlxsw_sp_nexthop_group *nh_grp)
4043 {
4044 	int i;
4045 
4046 	for (i = 0; i < nh_grp->nhgi->count; i++) {
4047 		struct mlxsw_sp_nexthop *nh = &nh_grp->nhgi->nexthops[i];
4048 
4049 		if (nh->offloaded)
4050 			nh->key.fib_nh->fib_nh_flags |= RTNH_F_OFFLOAD;
4051 		else
4052 			nh->key.fib_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
4053 	}
4054 }
4055 
4056 static void
4057 __mlxsw_sp_nexthop6_group_offload_refresh(struct mlxsw_sp_nexthop_group *nh_grp,
4058 					  struct mlxsw_sp_fib6_entry *fib6_entry)
4059 {
4060 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4061 
4062 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
4063 		struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh;
4064 		struct mlxsw_sp_nexthop *nh;
4065 
4066 		nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6);
4067 		if (nh && nh->offloaded)
4068 			fib6_nh->fib_nh_flags |= RTNH_F_OFFLOAD;
4069 		else
4070 			fib6_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
4071 	}
4072 }
4073 
4074 static void
4075 mlxsw_sp_nexthop6_group_offload_refresh(struct mlxsw_sp *mlxsw_sp,
4076 					struct mlxsw_sp_nexthop_group *nh_grp)
4077 {
4078 	struct mlxsw_sp_fib6_entry *fib6_entry;
4079 
4080 	/* Unfortunately, in IPv6 the route and the nexthop are described by
4081 	 * the same struct, so we need to iterate over all the routes using the
4082 	 * nexthop group and set / clear the offload indication for them.
4083 	 */
4084 	list_for_each_entry(fib6_entry, &nh_grp->fib_list,
4085 			    common.nexthop_group_node)
4086 		__mlxsw_sp_nexthop6_group_offload_refresh(nh_grp, fib6_entry);
4087 }
4088 
4089 static void
4090 mlxsw_sp_nexthop_bucket_offload_refresh(struct mlxsw_sp *mlxsw_sp,
4091 					const struct mlxsw_sp_nexthop *nh,
4092 					u16 bucket_index)
4093 {
4094 	struct mlxsw_sp_nexthop_group *nh_grp = nh->nhgi->nh_grp;
4095 	bool offload = false, trap = false;
4096 
4097 	if (nh->offloaded) {
4098 		if (nh->action == MLXSW_SP_NEXTHOP_ACTION_TRAP)
4099 			trap = true;
4100 		else
4101 			offload = true;
4102 	}
4103 	nexthop_bucket_set_hw_flags(mlxsw_sp_net(mlxsw_sp), nh_grp->obj.id,
4104 				    bucket_index, offload, trap);
4105 }
4106 
4107 static void
4108 mlxsw_sp_nexthop_obj_group_offload_refresh(struct mlxsw_sp *mlxsw_sp,
4109 					   struct mlxsw_sp_nexthop_group *nh_grp)
4110 {
4111 	int i;
4112 
4113 	/* Do not update the flags if the nexthop group is being destroyed
4114 	 * since:
4115 	 * 1. The nexthop objects is being deleted, in which case the flags are
4116 	 * irrelevant.
4117 	 * 2. The nexthop group was replaced by a newer group, in which case
4118 	 * the flags of the nexthop object were already updated based on the
4119 	 * new group.
4120 	 */
4121 	if (nh_grp->can_destroy)
4122 		return;
4123 
4124 	nexthop_set_hw_flags(mlxsw_sp_net(mlxsw_sp), nh_grp->obj.id,
4125 			     nh_grp->nhgi->adj_index_valid, false);
4126 
4127 	/* Update flags of individual nexthop buckets in case of a resilient
4128 	 * nexthop group.
4129 	 */
4130 	if (!nh_grp->nhgi->is_resilient)
4131 		return;
4132 
4133 	for (i = 0; i < nh_grp->nhgi->count; i++) {
4134 		struct mlxsw_sp_nexthop *nh = &nh_grp->nhgi->nexthops[i];
4135 
4136 		mlxsw_sp_nexthop_bucket_offload_refresh(mlxsw_sp, nh, i);
4137 	}
4138 }
4139 
4140 static void
4141 mlxsw_sp_nexthop_group_offload_refresh(struct mlxsw_sp *mlxsw_sp,
4142 				       struct mlxsw_sp_nexthop_group *nh_grp)
4143 {
4144 	switch (nh_grp->type) {
4145 	case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4:
4146 		mlxsw_sp_nexthop4_group_offload_refresh(mlxsw_sp, nh_grp);
4147 		break;
4148 	case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6:
4149 		mlxsw_sp_nexthop6_group_offload_refresh(mlxsw_sp, nh_grp);
4150 		break;
4151 	case MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ:
4152 		mlxsw_sp_nexthop_obj_group_offload_refresh(mlxsw_sp, nh_grp);
4153 		break;
4154 	}
4155 }
4156 
4157 static int
4158 mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
4159 			       struct mlxsw_sp_nexthop_group *nh_grp)
4160 {
4161 	struct mlxsw_sp_nexthop_group_info *nhgi = nh_grp->nhgi;
4162 	u16 ecmp_size, old_ecmp_size;
4163 	struct mlxsw_sp_nexthop *nh;
4164 	bool offload_change = false;
4165 	u32 adj_index;
4166 	bool old_adj_index_valid;
4167 	u32 old_adj_index;
4168 	int i, err2, err;
4169 
4170 	if (!nhgi->gateway)
4171 		return mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
4172 
4173 	for (i = 0; i < nhgi->count; i++) {
4174 		nh = &nhgi->nexthops[i];
4175 
4176 		if (nh->should_offload != nh->offloaded) {
4177 			offload_change = true;
4178 			if (nh->should_offload)
4179 				nh->update = 1;
4180 		}
4181 	}
4182 	if (!offload_change) {
4183 		/* Nothing was added or removed, so no need to reallocate. Just
4184 		 * update MAC on existing adjacency indexes.
4185 		 */
4186 		err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nhgi, false);
4187 		if (err) {
4188 			dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
4189 			goto set_trap;
4190 		}
4191 		/* Flags of individual nexthop buckets might need to be
4192 		 * updated.
4193 		 */
4194 		mlxsw_sp_nexthop_group_offload_refresh(mlxsw_sp, nh_grp);
4195 		return 0;
4196 	}
4197 	mlxsw_sp_nexthop_group_normalize(nhgi);
4198 	if (!nhgi->sum_norm_weight) {
4199 		/* No neigh of this group is connected so we just set
4200 		 * the trap and let everthing flow through kernel.
4201 		 */
4202 		err = 0;
4203 		goto set_trap;
4204 	}
4205 
4206 	ecmp_size = nhgi->sum_norm_weight;
4207 	err = mlxsw_sp_fix_adj_grp_size(mlxsw_sp, &ecmp_size);
4208 	if (err)
4209 		/* No valid allocation size available. */
4210 		goto set_trap;
4211 
4212 	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
4213 				  ecmp_size, &adj_index);
4214 	if (err) {
4215 		/* We ran out of KVD linear space, just set the
4216 		 * trap and let everything flow through kernel.
4217 		 */
4218 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to allocate KVD linear area for nexthop group.\n");
4219 		goto set_trap;
4220 	}
4221 	old_adj_index_valid = nhgi->adj_index_valid;
4222 	old_adj_index = nhgi->adj_index;
4223 	old_ecmp_size = nhgi->ecmp_size;
4224 	nhgi->adj_index_valid = 1;
4225 	nhgi->adj_index = adj_index;
4226 	nhgi->ecmp_size = ecmp_size;
4227 	mlxsw_sp_nexthop_group_rebalance(nhgi);
4228 	err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nhgi, true);
4229 	if (err) {
4230 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
4231 		goto set_trap;
4232 	}
4233 
4234 	mlxsw_sp_nexthop_group_offload_refresh(mlxsw_sp, nh_grp);
4235 
4236 	if (!old_adj_index_valid) {
4237 		/* The trap was set for fib entries, so we have to call
4238 		 * fib entry update to unset it and use adjacency index.
4239 		 */
4240 		err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
4241 		if (err) {
4242 			dev_warn(mlxsw_sp->bus_info->dev, "Failed to add adjacency index to fib entries.\n");
4243 			goto set_trap;
4244 		}
4245 		return 0;
4246 	}
4247 
4248 	err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, nh_grp,
4249 					     old_adj_index, old_ecmp_size);
4250 	mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
4251 			   old_ecmp_size, old_adj_index);
4252 	if (err) {
4253 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to mass-update adjacency index for nexthop group.\n");
4254 		goto set_trap;
4255 	}
4256 
4257 	return 0;
4258 
4259 set_trap:
4260 	old_adj_index_valid = nhgi->adj_index_valid;
4261 	nhgi->adj_index_valid = 0;
4262 	for (i = 0; i < nhgi->count; i++) {
4263 		nh = &nhgi->nexthops[i];
4264 		nh->offloaded = 0;
4265 	}
4266 	err2 = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
4267 	if (err2)
4268 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n");
4269 	mlxsw_sp_nexthop_group_offload_refresh(mlxsw_sp, nh_grp);
4270 	if (old_adj_index_valid)
4271 		mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
4272 				   nhgi->ecmp_size, nhgi->adj_index);
4273 	return err;
4274 }
4275 
4276 static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh,
4277 					    bool removing)
4278 {
4279 	if (!removing) {
4280 		nh->action = MLXSW_SP_NEXTHOP_ACTION_FORWARD;
4281 		nh->should_offload = 1;
4282 	} else if (nh->nhgi->is_resilient) {
4283 		nh->action = MLXSW_SP_NEXTHOP_ACTION_TRAP;
4284 		nh->should_offload = 1;
4285 	} else {
4286 		nh->should_offload = 0;
4287 	}
4288 	nh->update = 1;
4289 }
4290 
4291 static int
4292 mlxsw_sp_nexthop_dead_neigh_replace(struct mlxsw_sp *mlxsw_sp,
4293 				    struct mlxsw_sp_neigh_entry *neigh_entry)
4294 {
4295 	struct neighbour *n, *old_n = neigh_entry->key.n;
4296 	struct mlxsw_sp_nexthop *nh;
4297 	struct net_device *dev;
4298 	bool entry_connected;
4299 	u8 nud_state, dead;
4300 	int err;
4301 
4302 	nh = list_first_entry(&neigh_entry->nexthop_list,
4303 			      struct mlxsw_sp_nexthop, neigh_list_node);
4304 	dev = mlxsw_sp_nexthop_dev(nh);
4305 
4306 	n = neigh_lookup(nh->neigh_tbl, &nh->gw_addr, dev);
4307 	if (!n) {
4308 		n = neigh_create(nh->neigh_tbl, &nh->gw_addr, dev);
4309 		if (IS_ERR(n))
4310 			return PTR_ERR(n);
4311 		neigh_event_send(n, NULL);
4312 	}
4313 
4314 	mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry);
4315 	neigh_entry->key.n = n;
4316 	err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
4317 	if (err)
4318 		goto err_neigh_entry_insert;
4319 
4320 	read_lock_bh(&n->lock);
4321 	nud_state = n->nud_state;
4322 	dead = n->dead;
4323 	read_unlock_bh(&n->lock);
4324 	entry_connected = nud_state & NUD_VALID && !dead;
4325 
4326 	list_for_each_entry(nh, &neigh_entry->nexthop_list,
4327 			    neigh_list_node) {
4328 		neigh_release(old_n);
4329 		neigh_clone(n);
4330 		__mlxsw_sp_nexthop_neigh_update(nh, !entry_connected);
4331 		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp);
4332 	}
4333 
4334 	neigh_release(n);
4335 
4336 	return 0;
4337 
4338 err_neigh_entry_insert:
4339 	neigh_entry->key.n = old_n;
4340 	mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
4341 	neigh_release(n);
4342 	return err;
4343 }
4344 
4345 static void
4346 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
4347 			      struct mlxsw_sp_neigh_entry *neigh_entry,
4348 			      bool removing, bool dead)
4349 {
4350 	struct mlxsw_sp_nexthop *nh;
4351 
4352 	if (list_empty(&neigh_entry->nexthop_list))
4353 		return;
4354 
4355 	if (dead) {
4356 		int err;
4357 
4358 		err = mlxsw_sp_nexthop_dead_neigh_replace(mlxsw_sp,
4359 							  neigh_entry);
4360 		if (err)
4361 			dev_err(mlxsw_sp->bus_info->dev, "Failed to replace dead neigh\n");
4362 		return;
4363 	}
4364 
4365 	list_for_each_entry(nh, &neigh_entry->nexthop_list,
4366 			    neigh_list_node) {
4367 		__mlxsw_sp_nexthop_neigh_update(nh, removing);
4368 		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp);
4369 	}
4370 }
4371 
4372 static void mlxsw_sp_nexthop_crif_init(struct mlxsw_sp_nexthop *nh,
4373 				       struct mlxsw_sp_crif *crif)
4374 {
4375 	if (nh->crif)
4376 		return;
4377 
4378 	nh->crif = crif;
4379 	list_add(&nh->crif_list_node, &crif->nexthop_list);
4380 }
4381 
4382 static void mlxsw_sp_nexthop_crif_fini(struct mlxsw_sp_nexthop *nh)
4383 {
4384 	if (!nh->crif)
4385 		return;
4386 
4387 	list_del(&nh->crif_list_node);
4388 	nh->crif = NULL;
4389 }
4390 
4391 static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp,
4392 				       struct mlxsw_sp_nexthop *nh)
4393 {
4394 	struct mlxsw_sp_neigh_entry *neigh_entry;
4395 	struct net_device *dev;
4396 	struct neighbour *n;
4397 	u8 nud_state, dead;
4398 	int err;
4399 
4400 	if (WARN_ON(!nh->crif->rif))
4401 		return 0;
4402 
4403 	if (!nh->nhgi->gateway || nh->neigh_entry)
4404 		return 0;
4405 	dev = mlxsw_sp_nexthop_dev(nh);
4406 
4407 	/* Take a reference of neigh here ensuring that neigh would
4408 	 * not be destructed before the nexthop entry is finished.
4409 	 * The reference is taken either in neigh_lookup() or
4410 	 * in neigh_create() in case n is not found.
4411 	 */
4412 	n = neigh_lookup(nh->neigh_tbl, &nh->gw_addr, dev);
4413 	if (!n) {
4414 		n = neigh_create(nh->neigh_tbl, &nh->gw_addr, dev);
4415 		if (IS_ERR(n))
4416 			return PTR_ERR(n);
4417 		neigh_event_send(n, NULL);
4418 	}
4419 	neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
4420 	if (!neigh_entry) {
4421 		neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
4422 		if (IS_ERR(neigh_entry)) {
4423 			err = -EINVAL;
4424 			goto err_neigh_entry_create;
4425 		}
4426 	}
4427 
4428 	/* If that is the first nexthop connected to that neigh, add to
4429 	 * nexthop_neighs_list
4430 	 */
4431 	if (list_empty(&neigh_entry->nexthop_list))
4432 		list_add_tail(&neigh_entry->nexthop_neighs_list_node,
4433 			      &mlxsw_sp->router->nexthop_neighs_list);
4434 
4435 	nh->neigh_entry = neigh_entry;
4436 	list_add_tail(&nh->neigh_list_node, &neigh_entry->nexthop_list);
4437 	read_lock_bh(&n->lock);
4438 	nud_state = n->nud_state;
4439 	dead = n->dead;
4440 	read_unlock_bh(&n->lock);
4441 	__mlxsw_sp_nexthop_neigh_update(nh, !(nud_state & NUD_VALID && !dead));
4442 
4443 	return 0;
4444 
4445 err_neigh_entry_create:
4446 	neigh_release(n);
4447 	return err;
4448 }
4449 
4450 static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp,
4451 					struct mlxsw_sp_nexthop *nh)
4452 {
4453 	struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
4454 	struct neighbour *n;
4455 
4456 	if (!neigh_entry)
4457 		return;
4458 	n = neigh_entry->key.n;
4459 
4460 	__mlxsw_sp_nexthop_neigh_update(nh, true);
4461 	list_del(&nh->neigh_list_node);
4462 	nh->neigh_entry = NULL;
4463 
4464 	/* If that is the last nexthop connected to that neigh, remove from
4465 	 * nexthop_neighs_list
4466 	 */
4467 	if (list_empty(&neigh_entry->nexthop_list))
4468 		list_del(&neigh_entry->nexthop_neighs_list_node);
4469 
4470 	if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
4471 		mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
4472 
4473 	neigh_release(n);
4474 }
4475 
4476 static bool mlxsw_sp_ipip_netdev_ul_up(struct net_device *ol_dev)
4477 {
4478 	struct net_device *ul_dev;
4479 	bool is_up;
4480 
4481 	rcu_read_lock();
4482 	ul_dev = mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
4483 	is_up = ul_dev ? (ul_dev->flags & IFF_UP) : true;
4484 	rcu_read_unlock();
4485 
4486 	return is_up;
4487 }
4488 
4489 static void mlxsw_sp_nexthop_ipip_init(struct mlxsw_sp *mlxsw_sp,
4490 				       struct mlxsw_sp_nexthop *nh,
4491 				       struct mlxsw_sp_ipip_entry *ipip_entry)
4492 {
4493 	struct mlxsw_sp_crif *crif;
4494 	bool removing;
4495 
4496 	if (!nh->nhgi->gateway || nh->ipip_entry)
4497 		return;
4498 
4499 	crif = mlxsw_sp_crif_lookup(mlxsw_sp->router, ipip_entry->ol_dev);
4500 	if (WARN_ON(!crif))
4501 		return;
4502 
4503 	nh->ipip_entry = ipip_entry;
4504 	removing = !mlxsw_sp_ipip_netdev_ul_up(ipip_entry->ol_dev);
4505 	__mlxsw_sp_nexthop_neigh_update(nh, removing);
4506 	mlxsw_sp_nexthop_crif_init(nh, crif);
4507 }
4508 
4509 static void mlxsw_sp_nexthop_ipip_fini(struct mlxsw_sp *mlxsw_sp,
4510 				       struct mlxsw_sp_nexthop *nh)
4511 {
4512 	struct mlxsw_sp_ipip_entry *ipip_entry = nh->ipip_entry;
4513 
4514 	if (!ipip_entry)
4515 		return;
4516 
4517 	__mlxsw_sp_nexthop_neigh_update(nh, true);
4518 	nh->ipip_entry = NULL;
4519 }
4520 
4521 static bool mlxsw_sp_nexthop4_ipip_type(const struct mlxsw_sp *mlxsw_sp,
4522 					const struct fib_nh *fib_nh,
4523 					enum mlxsw_sp_ipip_type *p_ipipt)
4524 {
4525 	struct net_device *dev = fib_nh->fib_nh_dev;
4526 
4527 	return dev &&
4528 	       fib_nh->nh_parent->fib_type == RTN_UNICAST &&
4529 	       mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, p_ipipt);
4530 }
4531 
4532 static int mlxsw_sp_nexthop_type_init(struct mlxsw_sp *mlxsw_sp,
4533 				      struct mlxsw_sp_nexthop *nh,
4534 				      const struct net_device *dev)
4535 {
4536 	const struct mlxsw_sp_ipip_ops *ipip_ops;
4537 	struct mlxsw_sp_ipip_entry *ipip_entry;
4538 	struct mlxsw_sp_crif *crif;
4539 	int err;
4540 
4541 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, dev);
4542 	if (ipip_entry) {
4543 		ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
4544 		if (ipip_ops->can_offload(mlxsw_sp, dev)) {
4545 			nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP;
4546 			mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, ipip_entry);
4547 			return 0;
4548 		}
4549 	}
4550 
4551 	nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH;
4552 	crif = mlxsw_sp_crif_lookup(mlxsw_sp->router, dev);
4553 	if (!crif)
4554 		return 0;
4555 
4556 	mlxsw_sp_nexthop_crif_init(nh, crif);
4557 
4558 	if (!crif->rif)
4559 		return 0;
4560 
4561 	err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
4562 	if (err)
4563 		goto err_neigh_init;
4564 
4565 	return 0;
4566 
4567 err_neigh_init:
4568 	mlxsw_sp_nexthop_crif_fini(nh);
4569 	return err;
4570 }
4571 
4572 static int mlxsw_sp_nexthop_type_rif_made(struct mlxsw_sp *mlxsw_sp,
4573 					  struct mlxsw_sp_nexthop *nh)
4574 {
4575 	switch (nh->type) {
4576 	case MLXSW_SP_NEXTHOP_TYPE_ETH:
4577 		return mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
4578 	case MLXSW_SP_NEXTHOP_TYPE_IPIP:
4579 		break;
4580 	}
4581 
4582 	return 0;
4583 }
4584 
4585 static void mlxsw_sp_nexthop_type_rif_gone(struct mlxsw_sp *mlxsw_sp,
4586 					   struct mlxsw_sp_nexthop *nh)
4587 {
4588 	switch (nh->type) {
4589 	case MLXSW_SP_NEXTHOP_TYPE_ETH:
4590 		mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
4591 		break;
4592 	case MLXSW_SP_NEXTHOP_TYPE_IPIP:
4593 		mlxsw_sp_nexthop_ipip_fini(mlxsw_sp, nh);
4594 		break;
4595 	}
4596 }
4597 
4598 static void mlxsw_sp_nexthop_type_fini(struct mlxsw_sp *mlxsw_sp,
4599 				       struct mlxsw_sp_nexthop *nh)
4600 {
4601 	mlxsw_sp_nexthop_type_rif_gone(mlxsw_sp, nh);
4602 	mlxsw_sp_nexthop_crif_fini(nh);
4603 }
4604 
4605 static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp,
4606 				  struct mlxsw_sp_nexthop_group *nh_grp,
4607 				  struct mlxsw_sp_nexthop *nh,
4608 				  struct fib_nh *fib_nh)
4609 {
4610 	struct net_device *dev = fib_nh->fib_nh_dev;
4611 	struct in_device *in_dev;
4612 	int err;
4613 
4614 	nh->nhgi = nh_grp->nhgi;
4615 	nh->key.fib_nh = fib_nh;
4616 #ifdef CONFIG_IP_ROUTE_MULTIPATH
4617 	nh->nh_weight = fib_nh->fib_nh_weight;
4618 #else
4619 	nh->nh_weight = 1;
4620 #endif
4621 	memcpy(&nh->gw_addr, &fib_nh->fib_nh_gw4, sizeof(fib_nh->fib_nh_gw4));
4622 	nh->neigh_tbl = &arp_tbl;
4623 	err = mlxsw_sp_nexthop_insert(mlxsw_sp, nh);
4624 	if (err)
4625 		return err;
4626 
4627 	err = mlxsw_sp_nexthop_counter_enable(mlxsw_sp, nh);
4628 	if (err)
4629 		goto err_counter_enable;
4630 
4631 	list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
4632 
4633 	if (!dev)
4634 		return 0;
4635 	nh->ifindex = dev->ifindex;
4636 
4637 	rcu_read_lock();
4638 	in_dev = __in_dev_get_rcu(dev);
4639 	if (in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
4640 	    fib_nh->fib_nh_flags & RTNH_F_LINKDOWN) {
4641 		rcu_read_unlock();
4642 		return 0;
4643 	}
4644 	rcu_read_unlock();
4645 
4646 	err = mlxsw_sp_nexthop_type_init(mlxsw_sp, nh, dev);
4647 	if (err)
4648 		goto err_nexthop_neigh_init;
4649 
4650 	return 0;
4651 
4652 err_nexthop_neigh_init:
4653 	list_del(&nh->router_list_node);
4654 	mlxsw_sp_nexthop_counter_disable(mlxsw_sp, nh);
4655 err_counter_enable:
4656 	mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
4657 	return err;
4658 }
4659 
4660 static void mlxsw_sp_nexthop4_fini(struct mlxsw_sp *mlxsw_sp,
4661 				   struct mlxsw_sp_nexthop *nh)
4662 {
4663 	mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
4664 	list_del(&nh->router_list_node);
4665 	mlxsw_sp_nexthop_counter_disable(mlxsw_sp, nh);
4666 	mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
4667 }
4668 
4669 static void mlxsw_sp_nexthop4_event(struct mlxsw_sp *mlxsw_sp,
4670 				    unsigned long event, struct fib_nh *fib_nh)
4671 {
4672 	struct mlxsw_sp_nexthop_key key;
4673 	struct mlxsw_sp_nexthop *nh;
4674 
4675 	key.fib_nh = fib_nh;
4676 	nh = mlxsw_sp_nexthop_lookup(mlxsw_sp, key);
4677 	if (!nh)
4678 		return;
4679 
4680 	switch (event) {
4681 	case FIB_EVENT_NH_ADD:
4682 		mlxsw_sp_nexthop_type_init(mlxsw_sp, nh, fib_nh->fib_nh_dev);
4683 		break;
4684 	case FIB_EVENT_NH_DEL:
4685 		mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
4686 		break;
4687 	}
4688 
4689 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp);
4690 }
4691 
4692 static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp,
4693 					struct mlxsw_sp_rif *rif)
4694 {
4695 	struct net_device *dev = mlxsw_sp_rif_dev(rif);
4696 	struct mlxsw_sp_nexthop *nh;
4697 	bool removing;
4698 
4699 	list_for_each_entry(nh, &rif->crif->nexthop_list, crif_list_node) {
4700 		switch (nh->type) {
4701 		case MLXSW_SP_NEXTHOP_TYPE_ETH:
4702 			removing = false;
4703 			break;
4704 		case MLXSW_SP_NEXTHOP_TYPE_IPIP:
4705 			removing = !mlxsw_sp_ipip_netdev_ul_up(dev);
4706 			break;
4707 		default:
4708 			WARN_ON(1);
4709 			continue;
4710 		}
4711 
4712 		__mlxsw_sp_nexthop_neigh_update(nh, removing);
4713 		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp);
4714 	}
4715 }
4716 
4717 static int mlxsw_sp_nexthop_rif_made_sync(struct mlxsw_sp *mlxsw_sp,
4718 					  struct mlxsw_sp_rif *rif)
4719 {
4720 	struct mlxsw_sp_nexthop *nh, *tmp;
4721 	unsigned int n = 0;
4722 	int err;
4723 
4724 	list_for_each_entry_safe(nh, tmp, &rif->crif->nexthop_list,
4725 				 crif_list_node) {
4726 		err = mlxsw_sp_nexthop_type_rif_made(mlxsw_sp, nh);
4727 		if (err)
4728 			goto err_nexthop_type_rif;
4729 		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp);
4730 		n++;
4731 	}
4732 
4733 	return 0;
4734 
4735 err_nexthop_type_rif:
4736 	list_for_each_entry_safe(nh, tmp, &rif->crif->nexthop_list,
4737 				 crif_list_node) {
4738 		if (!n--)
4739 			break;
4740 		mlxsw_sp_nexthop_type_rif_gone(mlxsw_sp, nh);
4741 		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp);
4742 	}
4743 	return err;
4744 }
4745 
4746 static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
4747 					   struct mlxsw_sp_rif *rif)
4748 {
4749 	struct mlxsw_sp_nexthop *nh, *tmp;
4750 
4751 	list_for_each_entry_safe(nh, tmp, &rif->crif->nexthop_list,
4752 				 crif_list_node) {
4753 		mlxsw_sp_nexthop_type_rif_gone(mlxsw_sp, nh);
4754 		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp);
4755 	}
4756 }
4757 
4758 static int mlxsw_sp_adj_trap_entry_init(struct mlxsw_sp *mlxsw_sp)
4759 {
4760 	enum mlxsw_reg_ratr_trap_action trap_action;
4761 	char ratr_pl[MLXSW_REG_RATR_LEN];
4762 	int err;
4763 
4764 	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1,
4765 				  &mlxsw_sp->router->adj_trap_index);
4766 	if (err)
4767 		return err;
4768 
4769 	trap_action = MLXSW_REG_RATR_TRAP_ACTION_TRAP;
4770 	mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY, true,
4771 			    MLXSW_REG_RATR_TYPE_ETHERNET,
4772 			    mlxsw_sp->router->adj_trap_index,
4773 			    mlxsw_sp->router->lb_crif->rif->rif_index);
4774 	mlxsw_reg_ratr_trap_action_set(ratr_pl, trap_action);
4775 	mlxsw_reg_ratr_trap_id_set(ratr_pl, MLXSW_TRAP_ID_RTR_EGRESS0);
4776 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
4777 	if (err)
4778 		goto err_ratr_write;
4779 
4780 	return 0;
4781 
4782 err_ratr_write:
4783 	mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1,
4784 			   mlxsw_sp->router->adj_trap_index);
4785 	return err;
4786 }
4787 
4788 static void mlxsw_sp_adj_trap_entry_fini(struct mlxsw_sp *mlxsw_sp)
4789 {
4790 	mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1,
4791 			   mlxsw_sp->router->adj_trap_index);
4792 }
4793 
4794 static int mlxsw_sp_nexthop_group_inc(struct mlxsw_sp *mlxsw_sp)
4795 {
4796 	int err;
4797 
4798 	if (refcount_inc_not_zero(&mlxsw_sp->router->num_groups))
4799 		return 0;
4800 
4801 	err = mlxsw_sp_adj_trap_entry_init(mlxsw_sp);
4802 	if (err)
4803 		return err;
4804 
4805 	refcount_set(&mlxsw_sp->router->num_groups, 1);
4806 
4807 	return 0;
4808 }
4809 
4810 static void mlxsw_sp_nexthop_group_dec(struct mlxsw_sp *mlxsw_sp)
4811 {
4812 	if (!refcount_dec_and_test(&mlxsw_sp->router->num_groups))
4813 		return;
4814 
4815 	mlxsw_sp_adj_trap_entry_fini(mlxsw_sp);
4816 }
4817 
4818 static void
4819 mlxsw_sp_nh_grp_activity_get(struct mlxsw_sp *mlxsw_sp,
4820 			     const struct mlxsw_sp_nexthop_group *nh_grp,
4821 			     unsigned long *activity)
4822 {
4823 	char *ratrad_pl;
4824 	int i, err;
4825 
4826 	ratrad_pl = kmalloc(MLXSW_REG_RATRAD_LEN, GFP_KERNEL);
4827 	if (!ratrad_pl)
4828 		return;
4829 
4830 	mlxsw_reg_ratrad_pack(ratrad_pl, nh_grp->nhgi->adj_index,
4831 			      nh_grp->nhgi->count);
4832 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ratrad), ratrad_pl);
4833 	if (err)
4834 		goto out;
4835 
4836 	for (i = 0; i < nh_grp->nhgi->count; i++) {
4837 		if (!mlxsw_reg_ratrad_activity_vector_get(ratrad_pl, i))
4838 			continue;
4839 		bitmap_set(activity, i, 1);
4840 	}
4841 
4842 out:
4843 	kfree(ratrad_pl);
4844 }
4845 
4846 #define MLXSW_SP_NH_GRP_ACTIVITY_UPDATE_INTERVAL 1000 /* ms */
4847 
4848 static void
4849 mlxsw_sp_nh_grp_activity_update(struct mlxsw_sp *mlxsw_sp,
4850 				const struct mlxsw_sp_nexthop_group *nh_grp)
4851 {
4852 	unsigned long *activity;
4853 
4854 	activity = bitmap_zalloc(nh_grp->nhgi->count, GFP_KERNEL);
4855 	if (!activity)
4856 		return;
4857 
4858 	mlxsw_sp_nh_grp_activity_get(mlxsw_sp, nh_grp, activity);
4859 	nexthop_res_grp_activity_update(mlxsw_sp_net(mlxsw_sp), nh_grp->obj.id,
4860 					nh_grp->nhgi->count, activity);
4861 
4862 	bitmap_free(activity);
4863 }
4864 
4865 static void
4866 mlxsw_sp_nh_grp_activity_work_schedule(struct mlxsw_sp *mlxsw_sp)
4867 {
4868 	unsigned int interval = MLXSW_SP_NH_GRP_ACTIVITY_UPDATE_INTERVAL;
4869 
4870 	mlxsw_core_schedule_dw(&mlxsw_sp->router->nh_grp_activity_dw,
4871 			       msecs_to_jiffies(interval));
4872 }
4873 
4874 static void mlxsw_sp_nh_grp_activity_work(struct work_struct *work)
4875 {
4876 	struct mlxsw_sp_nexthop_group_info *nhgi;
4877 	struct mlxsw_sp_router *router;
4878 	bool reschedule = false;
4879 
4880 	router = container_of(work, struct mlxsw_sp_router,
4881 			      nh_grp_activity_dw.work);
4882 
4883 	mutex_lock(&router->lock);
4884 
4885 	list_for_each_entry(nhgi, &router->nh_res_grp_list, list) {
4886 		mlxsw_sp_nh_grp_activity_update(router->mlxsw_sp, nhgi->nh_grp);
4887 		reschedule = true;
4888 	}
4889 
4890 	mutex_unlock(&router->lock);
4891 
4892 	if (!reschedule)
4893 		return;
4894 	mlxsw_sp_nh_grp_activity_work_schedule(router->mlxsw_sp);
4895 }
4896 
4897 static int
4898 mlxsw_sp_nexthop_obj_single_validate(struct mlxsw_sp *mlxsw_sp,
4899 				     const struct nh_notifier_single_info *nh,
4900 				     struct netlink_ext_ack *extack)
4901 {
4902 	int err = -EINVAL;
4903 
4904 	if (nh->is_fdb)
4905 		NL_SET_ERR_MSG_MOD(extack, "FDB nexthops are not supported");
4906 	else if (nh->has_encap)
4907 		NL_SET_ERR_MSG_MOD(extack, "Encapsulating nexthops are not supported");
4908 	else
4909 		err = 0;
4910 
4911 	return err;
4912 }
4913 
4914 static int
4915 mlxsw_sp_nexthop_obj_group_entry_validate(struct mlxsw_sp *mlxsw_sp,
4916 					  const struct nh_notifier_single_info *nh,
4917 					  struct netlink_ext_ack *extack)
4918 {
4919 	int err;
4920 
4921 	err = mlxsw_sp_nexthop_obj_single_validate(mlxsw_sp, nh, extack);
4922 	if (err)
4923 		return err;
4924 
4925 	/* Device only nexthops with an IPIP device are programmed as
4926 	 * encapsulating adjacency entries.
4927 	 */
4928 	if (!nh->gw_family && !nh->is_reject &&
4929 	    !mlxsw_sp_netdev_ipip_type(mlxsw_sp, nh->dev, NULL)) {
4930 		NL_SET_ERR_MSG_MOD(extack, "Nexthop group entry does not have a gateway");
4931 		return -EINVAL;
4932 	}
4933 
4934 	return 0;
4935 }
4936 
4937 static int
4938 mlxsw_sp_nexthop_obj_group_validate(struct mlxsw_sp *mlxsw_sp,
4939 				    const struct nh_notifier_grp_info *nh_grp,
4940 				    struct netlink_ext_ack *extack)
4941 {
4942 	int i;
4943 
4944 	if (nh_grp->is_fdb) {
4945 		NL_SET_ERR_MSG_MOD(extack, "FDB nexthop groups are not supported");
4946 		return -EINVAL;
4947 	}
4948 
4949 	for (i = 0; i < nh_grp->num_nh; i++) {
4950 		const struct nh_notifier_single_info *nh;
4951 		int err;
4952 
4953 		nh = &nh_grp->nh_entries[i].nh;
4954 		err = mlxsw_sp_nexthop_obj_group_entry_validate(mlxsw_sp, nh,
4955 								extack);
4956 		if (err)
4957 			return err;
4958 	}
4959 
4960 	return 0;
4961 }
4962 
4963 static int
4964 mlxsw_sp_nexthop_obj_res_group_size_validate(struct mlxsw_sp *mlxsw_sp,
4965 					     const struct nh_notifier_res_table_info *nh_res_table,
4966 					     struct netlink_ext_ack *extack)
4967 {
4968 	unsigned int alloc_size;
4969 	bool valid_size = false;
4970 	int err, i;
4971 
4972 	if (nh_res_table->num_nh_buckets < 32) {
4973 		NL_SET_ERR_MSG_MOD(extack, "Minimum number of buckets is 32");
4974 		return -EINVAL;
4975 	}
4976 
4977 	for (i = 0; i < mlxsw_sp->router->adj_grp_size_ranges_count; i++) {
4978 		const struct mlxsw_sp_adj_grp_size_range *size_range;
4979 
4980 		size_range = &mlxsw_sp->router->adj_grp_size_ranges[i];
4981 
4982 		if (nh_res_table->num_nh_buckets >= size_range->start &&
4983 		    nh_res_table->num_nh_buckets <= size_range->end) {
4984 			valid_size = true;
4985 			break;
4986 		}
4987 	}
4988 
4989 	if (!valid_size) {
4990 		NL_SET_ERR_MSG_MOD(extack, "Invalid number of buckets");
4991 		return -EINVAL;
4992 	}
4993 
4994 	err = mlxsw_sp_kvdl_alloc_count_query(mlxsw_sp,
4995 					      MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
4996 					      nh_res_table->num_nh_buckets,
4997 					      &alloc_size);
4998 	if (err || nh_res_table->num_nh_buckets != alloc_size) {
4999 		NL_SET_ERR_MSG_MOD(extack, "Number of buckets does not fit allocation size of any KVDL partition");
5000 		return -EINVAL;
5001 	}
5002 
5003 	return 0;
5004 }
5005 
5006 static int
5007 mlxsw_sp_nexthop_obj_res_group_validate(struct mlxsw_sp *mlxsw_sp,
5008 					const struct nh_notifier_res_table_info *nh_res_table,
5009 					struct netlink_ext_ack *extack)
5010 {
5011 	int err;
5012 	u16 i;
5013 
5014 	err = mlxsw_sp_nexthop_obj_res_group_size_validate(mlxsw_sp,
5015 							   nh_res_table,
5016 							   extack);
5017 	if (err)
5018 		return err;
5019 
5020 	for (i = 0; i < nh_res_table->num_nh_buckets; i++) {
5021 		const struct nh_notifier_single_info *nh;
5022 		int err;
5023 
5024 		nh = &nh_res_table->nhs[i];
5025 		err = mlxsw_sp_nexthop_obj_group_entry_validate(mlxsw_sp, nh,
5026 								extack);
5027 		if (err)
5028 			return err;
5029 	}
5030 
5031 	return 0;
5032 }
5033 
5034 static int mlxsw_sp_nexthop_obj_validate(struct mlxsw_sp *mlxsw_sp,
5035 					 unsigned long event,
5036 					 struct nh_notifier_info *info)
5037 {
5038 	struct nh_notifier_single_info *nh;
5039 
5040 	if (event != NEXTHOP_EVENT_REPLACE &&
5041 	    event != NEXTHOP_EVENT_RES_TABLE_PRE_REPLACE &&
5042 	    event != NEXTHOP_EVENT_BUCKET_REPLACE)
5043 		return 0;
5044 
5045 	switch (info->type) {
5046 	case NH_NOTIFIER_INFO_TYPE_SINGLE:
5047 		return mlxsw_sp_nexthop_obj_single_validate(mlxsw_sp, info->nh,
5048 							    info->extack);
5049 	case NH_NOTIFIER_INFO_TYPE_GRP:
5050 		return mlxsw_sp_nexthop_obj_group_validate(mlxsw_sp,
5051 							   info->nh_grp,
5052 							   info->extack);
5053 	case NH_NOTIFIER_INFO_TYPE_RES_TABLE:
5054 		return mlxsw_sp_nexthop_obj_res_group_validate(mlxsw_sp,
5055 							       info->nh_res_table,
5056 							       info->extack);
5057 	case NH_NOTIFIER_INFO_TYPE_RES_BUCKET:
5058 		nh = &info->nh_res_bucket->new_nh;
5059 		return mlxsw_sp_nexthop_obj_group_entry_validate(mlxsw_sp, nh,
5060 								 info->extack);
5061 	default:
5062 		NL_SET_ERR_MSG_MOD(info->extack, "Unsupported nexthop type");
5063 		return -EOPNOTSUPP;
5064 	}
5065 }
5066 
5067 static bool mlxsw_sp_nexthop_obj_is_gateway(struct mlxsw_sp *mlxsw_sp,
5068 					    const struct nh_notifier_info *info)
5069 {
5070 	const struct net_device *dev;
5071 
5072 	switch (info->type) {
5073 	case NH_NOTIFIER_INFO_TYPE_SINGLE:
5074 		dev = info->nh->dev;
5075 		return info->nh->gw_family || info->nh->is_reject ||
5076 		       mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL);
5077 	case NH_NOTIFIER_INFO_TYPE_GRP:
5078 	case NH_NOTIFIER_INFO_TYPE_RES_TABLE:
5079 		/* Already validated earlier. */
5080 		return true;
5081 	default:
5082 		return false;
5083 	}
5084 }
5085 
5086 static void mlxsw_sp_nexthop_obj_blackhole_init(struct mlxsw_sp *mlxsw_sp,
5087 						struct mlxsw_sp_nexthop *nh)
5088 {
5089 	nh->action = MLXSW_SP_NEXTHOP_ACTION_DISCARD;
5090 	nh->should_offload = 1;
5091 	/* While nexthops that discard packets do not forward packets
5092 	 * via an egress RIF, they still need to be programmed using a
5093 	 * valid RIF, so use the loopback RIF created during init.
5094 	 */
5095 	nh->crif = mlxsw_sp->router->lb_crif;
5096 }
5097 
5098 static void mlxsw_sp_nexthop_obj_blackhole_fini(struct mlxsw_sp *mlxsw_sp,
5099 						struct mlxsw_sp_nexthop *nh)
5100 {
5101 	nh->crif = NULL;
5102 	nh->should_offload = 0;
5103 }
5104 
5105 static int
5106 mlxsw_sp_nexthop_obj_init(struct mlxsw_sp *mlxsw_sp,
5107 			  struct mlxsw_sp_nexthop_group *nh_grp,
5108 			  struct mlxsw_sp_nexthop *nh,
5109 			  struct nh_notifier_single_info *nh_obj, int weight)
5110 {
5111 	struct net_device *dev = nh_obj->dev;
5112 	int err;
5113 
5114 	nh->nhgi = nh_grp->nhgi;
5115 	nh->nh_weight = weight;
5116 
5117 	switch (nh_obj->gw_family) {
5118 	case AF_INET:
5119 		memcpy(&nh->gw_addr, &nh_obj->ipv4, sizeof(nh_obj->ipv4));
5120 		nh->neigh_tbl = &arp_tbl;
5121 		break;
5122 	case AF_INET6:
5123 		memcpy(&nh->gw_addr, &nh_obj->ipv6, sizeof(nh_obj->ipv6));
5124 #if IS_ENABLED(CONFIG_IPV6)
5125 		nh->neigh_tbl = &nd_tbl;
5126 #endif
5127 		break;
5128 	}
5129 
5130 	list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
5131 	nh->ifindex = dev->ifindex;
5132 	nh->id = nh_obj->id;
5133 
5134 	err = mlxsw_sp_nexthop_type_init(mlxsw_sp, nh, dev);
5135 	if (err)
5136 		goto err_type_init;
5137 
5138 	if (nh_obj->is_reject)
5139 		mlxsw_sp_nexthop_obj_blackhole_init(mlxsw_sp, nh);
5140 
5141 	/* In a resilient nexthop group, all the nexthops must be written to
5142 	 * the adjacency table. Even if they do not have a valid neighbour or
5143 	 * RIF.
5144 	 */
5145 	if (nh_grp->nhgi->is_resilient && !nh->should_offload) {
5146 		nh->action = MLXSW_SP_NEXTHOP_ACTION_TRAP;
5147 		nh->should_offload = 1;
5148 	}
5149 
5150 	return 0;
5151 
5152 err_type_init:
5153 	list_del(&nh->router_list_node);
5154 	return err;
5155 }
5156 
5157 static void mlxsw_sp_nexthop_obj_fini(struct mlxsw_sp *mlxsw_sp,
5158 				      struct mlxsw_sp_nexthop *nh)
5159 {
5160 	if (nh->action == MLXSW_SP_NEXTHOP_ACTION_DISCARD)
5161 		mlxsw_sp_nexthop_obj_blackhole_fini(mlxsw_sp, nh);
5162 	mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
5163 	list_del(&nh->router_list_node);
5164 	mlxsw_sp_nexthop_counter_disable(mlxsw_sp, nh);
5165 	nh->should_offload = 0;
5166 }
5167 
5168 static int
5169 mlxsw_sp_nexthop_obj_group_info_init(struct mlxsw_sp *mlxsw_sp,
5170 				     struct mlxsw_sp_nexthop_group *nh_grp,
5171 				     struct nh_notifier_info *info)
5172 {
5173 	struct mlxsw_sp_nexthop_group_info *nhgi;
5174 	struct mlxsw_sp_nexthop *nh;
5175 	bool is_resilient = false;
5176 	bool hw_stats = false;
5177 	unsigned int nhs;
5178 	int err, i;
5179 
5180 	switch (info->type) {
5181 	case NH_NOTIFIER_INFO_TYPE_SINGLE:
5182 		nhs = 1;
5183 		break;
5184 	case NH_NOTIFIER_INFO_TYPE_GRP:
5185 		nhs = info->nh_grp->num_nh;
5186 		hw_stats = info->nh_grp->hw_stats;
5187 		break;
5188 	case NH_NOTIFIER_INFO_TYPE_RES_TABLE:
5189 		nhs = info->nh_res_table->num_nh_buckets;
5190 		hw_stats = info->nh_res_table->hw_stats;
5191 		is_resilient = true;
5192 		break;
5193 	default:
5194 		return -EINVAL;
5195 	}
5196 
5197 	nhgi = kzalloc(struct_size(nhgi, nexthops, nhs), GFP_KERNEL);
5198 	if (!nhgi)
5199 		return -ENOMEM;
5200 	nh_grp->nhgi = nhgi;
5201 	nhgi->nh_grp = nh_grp;
5202 	nhgi->gateway = mlxsw_sp_nexthop_obj_is_gateway(mlxsw_sp, info);
5203 	nhgi->is_resilient = is_resilient;
5204 	nhgi->count = nhs;
5205 	nhgi->hw_stats = hw_stats;
5206 
5207 	xa_init_flags(&nhgi->nexthop_counters, XA_FLAGS_ALLOC1);
5208 
5209 	for (i = 0; i < nhgi->count; i++) {
5210 		struct nh_notifier_single_info *nh_obj;
5211 		int weight;
5212 
5213 		nh = &nhgi->nexthops[i];
5214 		switch (info->type) {
5215 		case NH_NOTIFIER_INFO_TYPE_SINGLE:
5216 			nh_obj = info->nh;
5217 			weight = 1;
5218 			break;
5219 		case NH_NOTIFIER_INFO_TYPE_GRP:
5220 			nh_obj = &info->nh_grp->nh_entries[i].nh;
5221 			weight = info->nh_grp->nh_entries[i].weight;
5222 			break;
5223 		case NH_NOTIFIER_INFO_TYPE_RES_TABLE:
5224 			nh_obj = &info->nh_res_table->nhs[i];
5225 			weight = 1;
5226 			break;
5227 		default:
5228 			err = -EINVAL;
5229 			goto err_nexthop_obj_init;
5230 		}
5231 		err = mlxsw_sp_nexthop_obj_init(mlxsw_sp, nh_grp, nh, nh_obj,
5232 						weight);
5233 		if (err)
5234 			goto err_nexthop_obj_init;
5235 	}
5236 	err = mlxsw_sp_nexthop_group_inc(mlxsw_sp);
5237 	if (err)
5238 		goto err_group_inc;
5239 	err = mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
5240 	if (err) {
5241 		NL_SET_ERR_MSG_MOD(info->extack, "Failed to write adjacency entries to the device");
5242 		goto err_group_refresh;
5243 	}
5244 
5245 	/* Add resilient nexthop groups to a list so that the activity of their
5246 	 * nexthop buckets will be periodically queried and cleared.
5247 	 */
5248 	if (nhgi->is_resilient) {
5249 		if (list_empty(&mlxsw_sp->router->nh_res_grp_list))
5250 			mlxsw_sp_nh_grp_activity_work_schedule(mlxsw_sp);
5251 		list_add(&nhgi->list, &mlxsw_sp->router->nh_res_grp_list);
5252 	}
5253 
5254 	return 0;
5255 
5256 err_group_refresh:
5257 	mlxsw_sp_nexthop_group_dec(mlxsw_sp);
5258 err_group_inc:
5259 	i = nhgi->count;
5260 err_nexthop_obj_init:
5261 	for (i--; i >= 0; i--) {
5262 		nh = &nhgi->nexthops[i];
5263 		mlxsw_sp_nexthop_obj_fini(mlxsw_sp, nh);
5264 	}
5265 	kfree(nhgi);
5266 	return err;
5267 }
5268 
5269 static void
5270 mlxsw_sp_nexthop_obj_group_info_fini(struct mlxsw_sp *mlxsw_sp,
5271 				     struct mlxsw_sp_nexthop_group *nh_grp)
5272 {
5273 	struct mlxsw_sp_nexthop_group_info *nhgi = nh_grp->nhgi;
5274 	struct mlxsw_sp_router *router = mlxsw_sp->router;
5275 	int i;
5276 
5277 	if (nhgi->is_resilient) {
5278 		list_del(&nhgi->list);
5279 		if (list_empty(&mlxsw_sp->router->nh_res_grp_list))
5280 			cancel_delayed_work(&router->nh_grp_activity_dw);
5281 	}
5282 
5283 	mlxsw_sp_nexthop_group_dec(mlxsw_sp);
5284 	for (i = nhgi->count - 1; i >= 0; i--) {
5285 		struct mlxsw_sp_nexthop *nh = &nhgi->nexthops[i];
5286 
5287 		mlxsw_sp_nexthop_obj_fini(mlxsw_sp, nh);
5288 	}
5289 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
5290 	WARN_ON_ONCE(nhgi->adj_index_valid);
5291 	WARN_ON(!xa_empty(&nhgi->nexthop_counters));
5292 	xa_destroy(&nhgi->nexthop_counters);
5293 	kfree(nhgi);
5294 }
5295 
5296 static struct mlxsw_sp_nexthop_group *
5297 mlxsw_sp_nexthop_obj_group_create(struct mlxsw_sp *mlxsw_sp,
5298 				  struct nh_notifier_info *info)
5299 {
5300 	struct mlxsw_sp_nexthop_group *nh_grp;
5301 	int err;
5302 
5303 	nh_grp = kzalloc(sizeof(*nh_grp), GFP_KERNEL);
5304 	if (!nh_grp)
5305 		return ERR_PTR(-ENOMEM);
5306 	INIT_LIST_HEAD(&nh_grp->vr_list);
5307 	err = rhashtable_init(&nh_grp->vr_ht,
5308 			      &mlxsw_sp_nexthop_group_vr_ht_params);
5309 	if (err)
5310 		goto err_nexthop_group_vr_ht_init;
5311 	INIT_LIST_HEAD(&nh_grp->fib_list);
5312 	nh_grp->type = MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ;
5313 	nh_grp->obj.id = info->id;
5314 
5315 	err = mlxsw_sp_nexthop_obj_group_info_init(mlxsw_sp, nh_grp, info);
5316 	if (err)
5317 		goto err_nexthop_group_info_init;
5318 
5319 	nh_grp->can_destroy = false;
5320 
5321 	return nh_grp;
5322 
5323 err_nexthop_group_info_init:
5324 	rhashtable_destroy(&nh_grp->vr_ht);
5325 err_nexthop_group_vr_ht_init:
5326 	kfree(nh_grp);
5327 	return ERR_PTR(err);
5328 }
5329 
5330 static void
5331 mlxsw_sp_nexthop_obj_group_destroy(struct mlxsw_sp *mlxsw_sp,
5332 				   struct mlxsw_sp_nexthop_group *nh_grp)
5333 {
5334 	if (!nh_grp->can_destroy)
5335 		return;
5336 	mlxsw_sp_nexthop_obj_group_info_fini(mlxsw_sp, nh_grp);
5337 	WARN_ON_ONCE(!list_empty(&nh_grp->fib_list));
5338 	WARN_ON_ONCE(!list_empty(&nh_grp->vr_list));
5339 	rhashtable_destroy(&nh_grp->vr_ht);
5340 	kfree(nh_grp);
5341 }
5342 
5343 static struct mlxsw_sp_nexthop_group *
5344 mlxsw_sp_nexthop_obj_group_lookup(struct mlxsw_sp *mlxsw_sp, u32 id)
5345 {
5346 	struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
5347 
5348 	cmp_arg.type = MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ;
5349 	cmp_arg.id = id;
5350 	return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
5351 				      &cmp_arg,
5352 				      mlxsw_sp_nexthop_group_ht_params);
5353 }
5354 
5355 static int mlxsw_sp_nexthop_obj_group_add(struct mlxsw_sp *mlxsw_sp,
5356 					  struct mlxsw_sp_nexthop_group *nh_grp)
5357 {
5358 	return mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
5359 }
5360 
5361 static int
5362 mlxsw_sp_nexthop_obj_group_replace(struct mlxsw_sp *mlxsw_sp,
5363 				   struct mlxsw_sp_nexthop_group *nh_grp,
5364 				   struct mlxsw_sp_nexthop_group *old_nh_grp,
5365 				   struct netlink_ext_ack *extack)
5366 {
5367 	struct mlxsw_sp_nexthop_group_info *old_nhgi = old_nh_grp->nhgi;
5368 	struct mlxsw_sp_nexthop_group_info *new_nhgi = nh_grp->nhgi;
5369 	int err;
5370 
5371 	old_nh_grp->nhgi = new_nhgi;
5372 	new_nhgi->nh_grp = old_nh_grp;
5373 	nh_grp->nhgi = old_nhgi;
5374 	old_nhgi->nh_grp = nh_grp;
5375 
5376 	if (old_nhgi->adj_index_valid && new_nhgi->adj_index_valid) {
5377 		/* Both the old adjacency index and the new one are valid.
5378 		 * Routes are currently using the old one. Tell the device to
5379 		 * replace the old adjacency index with the new one.
5380 		 */
5381 		err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, old_nh_grp,
5382 						     old_nhgi->adj_index,
5383 						     old_nhgi->ecmp_size);
5384 		if (err) {
5385 			NL_SET_ERR_MSG_MOD(extack, "Failed to replace old adjacency index with new one");
5386 			goto err_out;
5387 		}
5388 	} else if (old_nhgi->adj_index_valid && !new_nhgi->adj_index_valid) {
5389 		/* The old adjacency index is valid, while the new one is not.
5390 		 * Iterate over all the routes using the group and change them
5391 		 * to trap packets to the CPU.
5392 		 */
5393 		err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, old_nh_grp);
5394 		if (err) {
5395 			NL_SET_ERR_MSG_MOD(extack, "Failed to update routes to trap packets");
5396 			goto err_out;
5397 		}
5398 	} else if (!old_nhgi->adj_index_valid && new_nhgi->adj_index_valid) {
5399 		/* The old adjacency index is invalid, while the new one is.
5400 		 * Iterate over all the routes using the group and change them
5401 		 * to forward packets using the new valid index.
5402 		 */
5403 		err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, old_nh_grp);
5404 		if (err) {
5405 			NL_SET_ERR_MSG_MOD(extack, "Failed to update routes to forward packets");
5406 			goto err_out;
5407 		}
5408 	}
5409 
5410 	/* Make sure the flags are set / cleared based on the new nexthop group
5411 	 * information.
5412 	 */
5413 	mlxsw_sp_nexthop_obj_group_offload_refresh(mlxsw_sp, old_nh_grp);
5414 
5415 	/* At this point 'nh_grp' is just a shell that is not used by anyone
5416 	 * and its nexthop group info is the old info that was just replaced
5417 	 * with the new one. Remove it.
5418 	 */
5419 	nh_grp->can_destroy = true;
5420 	mlxsw_sp_nexthop_obj_group_destroy(mlxsw_sp, nh_grp);
5421 
5422 	return 0;
5423 
5424 err_out:
5425 	old_nhgi->nh_grp = old_nh_grp;
5426 	nh_grp->nhgi = new_nhgi;
5427 	new_nhgi->nh_grp = nh_grp;
5428 	old_nh_grp->nhgi = old_nhgi;
5429 	return err;
5430 }
5431 
5432 static int mlxsw_sp_nexthop_obj_res_group_pre(struct mlxsw_sp *mlxsw_sp,
5433 					      struct nh_notifier_info *info)
5434 {
5435 	struct nh_notifier_grp_info *grp_info = info->nh_grp;
5436 	struct mlxsw_sp_nexthop_group_info *nhgi;
5437 	struct mlxsw_sp_nexthop_group *nh_grp;
5438 	int err;
5439 	int i;
5440 
5441 	nh_grp = mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, info->id);
5442 	if (!nh_grp)
5443 		return 0;
5444 	nhgi = nh_grp->nhgi;
5445 
5446 	if (nhgi->hw_stats == grp_info->hw_stats)
5447 		return 0;
5448 
5449 	nhgi->hw_stats = grp_info->hw_stats;
5450 
5451 	for (i = 0; i < nhgi->count; i++) {
5452 		struct mlxsw_sp_nexthop *nh = &nhgi->nexthops[i];
5453 
5454 		if (nh->offloaded)
5455 			nh->update = 1;
5456 	}
5457 
5458 	err = mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
5459 	if (err)
5460 		goto err_group_refresh;
5461 
5462 	return 0;
5463 
5464 err_group_refresh:
5465 	nhgi->hw_stats = !grp_info->hw_stats;
5466 	return err;
5467 }
5468 
5469 static int mlxsw_sp_nexthop_obj_new(struct mlxsw_sp *mlxsw_sp,
5470 				    struct nh_notifier_info *info)
5471 {
5472 	struct mlxsw_sp_nexthop_group *nh_grp, *old_nh_grp;
5473 	struct netlink_ext_ack *extack = info->extack;
5474 	int err;
5475 
5476 	nh_grp = mlxsw_sp_nexthop_obj_group_create(mlxsw_sp, info);
5477 	if (IS_ERR(nh_grp))
5478 		return PTR_ERR(nh_grp);
5479 
5480 	old_nh_grp = mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, info->id);
5481 	if (!old_nh_grp)
5482 		err = mlxsw_sp_nexthop_obj_group_add(mlxsw_sp, nh_grp);
5483 	else
5484 		err = mlxsw_sp_nexthop_obj_group_replace(mlxsw_sp, nh_grp,
5485 							 old_nh_grp, extack);
5486 
5487 	if (err) {
5488 		nh_grp->can_destroy = true;
5489 		mlxsw_sp_nexthop_obj_group_destroy(mlxsw_sp, nh_grp);
5490 	}
5491 
5492 	return err;
5493 }
5494 
5495 static void mlxsw_sp_nexthop_obj_del(struct mlxsw_sp *mlxsw_sp,
5496 				     struct nh_notifier_info *info)
5497 {
5498 	struct mlxsw_sp_nexthop_group *nh_grp;
5499 
5500 	nh_grp = mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, info->id);
5501 	if (!nh_grp)
5502 		return;
5503 
5504 	nh_grp->can_destroy = true;
5505 	mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
5506 
5507 	/* If the group still has routes using it, then defer the delete
5508 	 * operation until the last route using it is deleted.
5509 	 */
5510 	if (!list_empty(&nh_grp->fib_list))
5511 		return;
5512 	mlxsw_sp_nexthop_obj_group_destroy(mlxsw_sp, nh_grp);
5513 }
5514 
5515 static int mlxsw_sp_nexthop_obj_bucket_query(struct mlxsw_sp *mlxsw_sp,
5516 					     u32 adj_index, char *ratr_pl)
5517 {
5518 	MLXSW_REG_ZERO(ratr, ratr_pl);
5519 	mlxsw_reg_ratr_op_set(ratr_pl, MLXSW_REG_RATR_OP_QUERY_READ);
5520 	mlxsw_reg_ratr_adjacency_index_low_set(ratr_pl, adj_index);
5521 	mlxsw_reg_ratr_adjacency_index_high_set(ratr_pl, adj_index >> 16);
5522 
5523 	return mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
5524 }
5525 
5526 static int mlxsw_sp_nexthop_obj_bucket_compare(char *ratr_pl, char *ratr_pl_new)
5527 {
5528 	/* Clear the opcode and activity on both the old and new payload as
5529 	 * they are irrelevant for the comparison.
5530 	 */
5531 	mlxsw_reg_ratr_op_set(ratr_pl, MLXSW_REG_RATR_OP_QUERY_READ);
5532 	mlxsw_reg_ratr_a_set(ratr_pl, 0);
5533 	mlxsw_reg_ratr_op_set(ratr_pl_new, MLXSW_REG_RATR_OP_QUERY_READ);
5534 	mlxsw_reg_ratr_a_set(ratr_pl_new, 0);
5535 
5536 	/* If the contents of the adjacency entry are consistent with the
5537 	 * replacement request, then replacement was successful.
5538 	 */
5539 	if (!memcmp(ratr_pl, ratr_pl_new, MLXSW_REG_RATR_LEN))
5540 		return 0;
5541 
5542 	return -EINVAL;
5543 }
5544 
5545 static int
5546 mlxsw_sp_nexthop_obj_bucket_adj_update(struct mlxsw_sp *mlxsw_sp,
5547 				       struct mlxsw_sp_nexthop *nh,
5548 				       struct nh_notifier_info *info)
5549 {
5550 	u16 bucket_index = info->nh_res_bucket->bucket_index;
5551 	struct netlink_ext_ack *extack = info->extack;
5552 	bool force = info->nh_res_bucket->force;
5553 	char ratr_pl_new[MLXSW_REG_RATR_LEN];
5554 	char ratr_pl[MLXSW_REG_RATR_LEN];
5555 	u32 adj_index;
5556 	int err;
5557 
5558 	/* No point in trying an atomic replacement if the idle timer interval
5559 	 * is smaller than the interval in which we query and clear activity.
5560 	 */
5561 	if (!force && info->nh_res_bucket->idle_timer_ms <
5562 	    MLXSW_SP_NH_GRP_ACTIVITY_UPDATE_INTERVAL)
5563 		force = true;
5564 
5565 	adj_index = nh->nhgi->adj_index + bucket_index;
5566 	err = mlxsw_sp_nexthop_update(mlxsw_sp, adj_index, nh, force, ratr_pl);
5567 	if (err) {
5568 		NL_SET_ERR_MSG_MOD(extack, "Failed to overwrite nexthop bucket");
5569 		return err;
5570 	}
5571 
5572 	if (!force) {
5573 		err = mlxsw_sp_nexthop_obj_bucket_query(mlxsw_sp, adj_index,
5574 							ratr_pl_new);
5575 		if (err) {
5576 			NL_SET_ERR_MSG_MOD(extack, "Failed to query nexthop bucket state after replacement. State might be inconsistent");
5577 			return err;
5578 		}
5579 
5580 		err = mlxsw_sp_nexthop_obj_bucket_compare(ratr_pl, ratr_pl_new);
5581 		if (err) {
5582 			NL_SET_ERR_MSG_MOD(extack, "Nexthop bucket was not replaced because it was active during replacement");
5583 			return err;
5584 		}
5585 	}
5586 
5587 	nh->update = 0;
5588 	nh->offloaded = 1;
5589 	mlxsw_sp_nexthop_bucket_offload_refresh(mlxsw_sp, nh, bucket_index);
5590 
5591 	return 0;
5592 }
5593 
5594 static int mlxsw_sp_nexthop_obj_bucket_replace(struct mlxsw_sp *mlxsw_sp,
5595 					       struct nh_notifier_info *info)
5596 {
5597 	u16 bucket_index = info->nh_res_bucket->bucket_index;
5598 	struct netlink_ext_ack *extack = info->extack;
5599 	struct mlxsw_sp_nexthop_group_info *nhgi;
5600 	struct nh_notifier_single_info *nh_obj;
5601 	struct mlxsw_sp_nexthop_group *nh_grp;
5602 	struct mlxsw_sp_nexthop *nh;
5603 	int err;
5604 
5605 	nh_grp = mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, info->id);
5606 	if (!nh_grp) {
5607 		NL_SET_ERR_MSG_MOD(extack, "Nexthop group was not found");
5608 		return -EINVAL;
5609 	}
5610 
5611 	nhgi = nh_grp->nhgi;
5612 
5613 	if (bucket_index >= nhgi->count) {
5614 		NL_SET_ERR_MSG_MOD(extack, "Nexthop bucket index out of range");
5615 		return -EINVAL;
5616 	}
5617 
5618 	nh = &nhgi->nexthops[bucket_index];
5619 	mlxsw_sp_nexthop_obj_fini(mlxsw_sp, nh);
5620 
5621 	nh_obj = &info->nh_res_bucket->new_nh;
5622 	err = mlxsw_sp_nexthop_obj_init(mlxsw_sp, nh_grp, nh, nh_obj, 1);
5623 	if (err) {
5624 		NL_SET_ERR_MSG_MOD(extack, "Failed to initialize nexthop object for nexthop bucket replacement");
5625 		goto err_nexthop_obj_init;
5626 	}
5627 
5628 	err = mlxsw_sp_nexthop_obj_bucket_adj_update(mlxsw_sp, nh, info);
5629 	if (err)
5630 		goto err_nexthop_obj_bucket_adj_update;
5631 
5632 	return 0;
5633 
5634 err_nexthop_obj_bucket_adj_update:
5635 	mlxsw_sp_nexthop_obj_fini(mlxsw_sp, nh);
5636 err_nexthop_obj_init:
5637 	nh_obj = &info->nh_res_bucket->old_nh;
5638 	mlxsw_sp_nexthop_obj_init(mlxsw_sp, nh_grp, nh, nh_obj, 1);
5639 	/* The old adjacency entry was not overwritten */
5640 	nh->update = 0;
5641 	nh->offloaded = 1;
5642 	return err;
5643 }
5644 
5645 static void
5646 mlxsw_sp_nexthop_obj_mp_hw_stats_get(struct mlxsw_sp *mlxsw_sp,
5647 				     struct mlxsw_sp_nexthop_group_info *nhgi,
5648 				     struct nh_notifier_grp_hw_stats_info *info)
5649 {
5650 	int nhi;
5651 
5652 	for (nhi = 0; nhi < info->num_nh; nhi++) {
5653 		struct mlxsw_sp_nexthop *nh = &nhgi->nexthops[nhi];
5654 		u64 packets;
5655 		int err;
5656 
5657 		err = mlxsw_sp_nexthop_counter_get(mlxsw_sp, nh, &packets);
5658 		if (err)
5659 			continue;
5660 
5661 		nh_grp_hw_stats_report_delta(info, nhi, packets);
5662 	}
5663 }
5664 
5665 static void
5666 mlxsw_sp_nexthop_obj_res_hw_stats_get(struct mlxsw_sp *mlxsw_sp,
5667 				      struct mlxsw_sp_nexthop_group_info *nhgi,
5668 				      struct nh_notifier_grp_hw_stats_info *info)
5669 {
5670 	int nhi = -1;
5671 	int bucket;
5672 
5673 	for (bucket = 0; bucket < nhgi->count; bucket++) {
5674 		struct mlxsw_sp_nexthop *nh = &nhgi->nexthops[bucket];
5675 		u64 packets;
5676 		int err;
5677 
5678 		if (nhi == -1 || info->stats[nhi].id != nh->id) {
5679 			for (nhi = 0; nhi < info->num_nh; nhi++)
5680 				if (info->stats[nhi].id == nh->id)
5681 					break;
5682 			if (WARN_ON_ONCE(nhi == info->num_nh)) {
5683 				nhi = -1;
5684 				continue;
5685 			}
5686 		}
5687 
5688 		err = mlxsw_sp_nexthop_counter_get(mlxsw_sp, nh, &packets);
5689 		if (err)
5690 			continue;
5691 
5692 		nh_grp_hw_stats_report_delta(info, nhi, packets);
5693 	}
5694 }
5695 
5696 static void mlxsw_sp_nexthop_obj_hw_stats_get(struct mlxsw_sp *mlxsw_sp,
5697 					      struct nh_notifier_info *info)
5698 {
5699 	struct mlxsw_sp_nexthop_group_info *nhgi;
5700 	struct mlxsw_sp_nexthop_group *nh_grp;
5701 
5702 	if (info->type != NH_NOTIFIER_INFO_TYPE_GRP_HW_STATS)
5703 		return;
5704 
5705 	nh_grp = mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, info->id);
5706 	if (!nh_grp)
5707 		return;
5708 	nhgi = nh_grp->nhgi;
5709 
5710 	if (nhgi->is_resilient)
5711 		mlxsw_sp_nexthop_obj_res_hw_stats_get(mlxsw_sp, nhgi,
5712 						      info->nh_grp_hw_stats);
5713 	else
5714 		mlxsw_sp_nexthop_obj_mp_hw_stats_get(mlxsw_sp, nhgi,
5715 						     info->nh_grp_hw_stats);
5716 }
5717 
5718 static int mlxsw_sp_nexthop_obj_event(struct notifier_block *nb,
5719 				      unsigned long event, void *ptr)
5720 {
5721 	struct nh_notifier_info *info = ptr;
5722 	struct mlxsw_sp_router *router;
5723 	int err = 0;
5724 
5725 	router = container_of(nb, struct mlxsw_sp_router, nexthop_nb);
5726 	err = mlxsw_sp_nexthop_obj_validate(router->mlxsw_sp, event, info);
5727 	if (err)
5728 		goto out;
5729 
5730 	mutex_lock(&router->lock);
5731 
5732 	switch (event) {
5733 	case NEXTHOP_EVENT_RES_TABLE_PRE_REPLACE:
5734 		err = mlxsw_sp_nexthop_obj_res_group_pre(router->mlxsw_sp,
5735 							 info);
5736 		break;
5737 	case NEXTHOP_EVENT_REPLACE:
5738 		err = mlxsw_sp_nexthop_obj_new(router->mlxsw_sp, info);
5739 		break;
5740 	case NEXTHOP_EVENT_DEL:
5741 		mlxsw_sp_nexthop_obj_del(router->mlxsw_sp, info);
5742 		break;
5743 	case NEXTHOP_EVENT_BUCKET_REPLACE:
5744 		err = mlxsw_sp_nexthop_obj_bucket_replace(router->mlxsw_sp,
5745 							  info);
5746 		break;
5747 	case NEXTHOP_EVENT_HW_STATS_REPORT_DELTA:
5748 		mlxsw_sp_nexthop_obj_hw_stats_get(router->mlxsw_sp, info);
5749 		break;
5750 	default:
5751 		break;
5752 	}
5753 
5754 	mutex_unlock(&router->lock);
5755 
5756 out:
5757 	return notifier_from_errno(err);
5758 }
5759 
5760 static bool mlxsw_sp_fi_is_gateway(const struct mlxsw_sp *mlxsw_sp,
5761 				   struct fib_info *fi)
5762 {
5763 	const struct fib_nh *nh = fib_info_nh(fi, 0);
5764 
5765 	return nh->fib_nh_gw_family ||
5766 	       mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, nh, NULL);
5767 }
5768 
5769 static int
5770 mlxsw_sp_nexthop4_group_info_init(struct mlxsw_sp *mlxsw_sp,
5771 				  struct mlxsw_sp_nexthop_group *nh_grp)
5772 {
5773 	unsigned int nhs = fib_info_num_path(nh_grp->ipv4.fi);
5774 	struct mlxsw_sp_nexthop_group_info *nhgi;
5775 	struct mlxsw_sp_nexthop *nh;
5776 	int err, i;
5777 
5778 	nhgi = kzalloc(struct_size(nhgi, nexthops, nhs), GFP_KERNEL);
5779 	if (!nhgi)
5780 		return -ENOMEM;
5781 	nh_grp->nhgi = nhgi;
5782 	nhgi->nh_grp = nh_grp;
5783 	nhgi->gateway = mlxsw_sp_fi_is_gateway(mlxsw_sp, nh_grp->ipv4.fi);
5784 	nhgi->count = nhs;
5785 	for (i = 0; i < nhgi->count; i++) {
5786 		struct fib_nh *fib_nh;
5787 
5788 		nh = &nhgi->nexthops[i];
5789 		fib_nh = fib_info_nh(nh_grp->ipv4.fi, i);
5790 		err = mlxsw_sp_nexthop4_init(mlxsw_sp, nh_grp, nh, fib_nh);
5791 		if (err)
5792 			goto err_nexthop4_init;
5793 	}
5794 	err = mlxsw_sp_nexthop_group_inc(mlxsw_sp);
5795 	if (err)
5796 		goto err_group_inc;
5797 	err = mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
5798 	if (err)
5799 		goto err_group_refresh;
5800 
5801 	return 0;
5802 
5803 err_group_refresh:
5804 	mlxsw_sp_nexthop_group_dec(mlxsw_sp);
5805 err_group_inc:
5806 	i = nhgi->count;
5807 err_nexthop4_init:
5808 	for (i--; i >= 0; i--) {
5809 		nh = &nhgi->nexthops[i];
5810 		mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
5811 	}
5812 	kfree(nhgi);
5813 	return err;
5814 }
5815 
5816 static void
5817 mlxsw_sp_nexthop4_group_info_fini(struct mlxsw_sp *mlxsw_sp,
5818 				  struct mlxsw_sp_nexthop_group *nh_grp)
5819 {
5820 	struct mlxsw_sp_nexthop_group_info *nhgi = nh_grp->nhgi;
5821 	int i;
5822 
5823 	mlxsw_sp_nexthop_group_dec(mlxsw_sp);
5824 	for (i = nhgi->count - 1; i >= 0; i--) {
5825 		struct mlxsw_sp_nexthop *nh = &nhgi->nexthops[i];
5826 
5827 		mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
5828 	}
5829 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
5830 	WARN_ON_ONCE(nhgi->adj_index_valid);
5831 	kfree(nhgi);
5832 }
5833 
5834 static struct mlxsw_sp_nexthop_group *
5835 mlxsw_sp_nexthop4_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
5836 {
5837 	struct mlxsw_sp_nexthop_group *nh_grp;
5838 	int err;
5839 
5840 	nh_grp = kzalloc(sizeof(*nh_grp), GFP_KERNEL);
5841 	if (!nh_grp)
5842 		return ERR_PTR(-ENOMEM);
5843 	INIT_LIST_HEAD(&nh_grp->vr_list);
5844 	err = rhashtable_init(&nh_grp->vr_ht,
5845 			      &mlxsw_sp_nexthop_group_vr_ht_params);
5846 	if (err)
5847 		goto err_nexthop_group_vr_ht_init;
5848 	INIT_LIST_HEAD(&nh_grp->fib_list);
5849 	nh_grp->type = MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4;
5850 	nh_grp->ipv4.fi = fi;
5851 	fib_info_hold(fi);
5852 
5853 	err = mlxsw_sp_nexthop4_group_info_init(mlxsw_sp, nh_grp);
5854 	if (err)
5855 		goto err_nexthop_group_info_init;
5856 
5857 	err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
5858 	if (err)
5859 		goto err_nexthop_group_insert;
5860 
5861 	nh_grp->can_destroy = true;
5862 
5863 	return nh_grp;
5864 
5865 err_nexthop_group_insert:
5866 	mlxsw_sp_nexthop4_group_info_fini(mlxsw_sp, nh_grp);
5867 err_nexthop_group_info_init:
5868 	fib_info_put(fi);
5869 	rhashtable_destroy(&nh_grp->vr_ht);
5870 err_nexthop_group_vr_ht_init:
5871 	kfree(nh_grp);
5872 	return ERR_PTR(err);
5873 }
5874 
5875 static void
5876 mlxsw_sp_nexthop4_group_destroy(struct mlxsw_sp *mlxsw_sp,
5877 				struct mlxsw_sp_nexthop_group *nh_grp)
5878 {
5879 	if (!nh_grp->can_destroy)
5880 		return;
5881 	mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
5882 	mlxsw_sp_nexthop4_group_info_fini(mlxsw_sp, nh_grp);
5883 	fib_info_put(nh_grp->ipv4.fi);
5884 	WARN_ON_ONCE(!list_empty(&nh_grp->vr_list));
5885 	rhashtable_destroy(&nh_grp->vr_ht);
5886 	kfree(nh_grp);
5887 }
5888 
5889 static int mlxsw_sp_nexthop4_group_get(struct mlxsw_sp *mlxsw_sp,
5890 				       struct mlxsw_sp_fib_entry *fib_entry,
5891 				       struct fib_info *fi)
5892 {
5893 	struct mlxsw_sp_nexthop_group *nh_grp;
5894 
5895 	if (fi->nh) {
5896 		nh_grp = mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp,
5897 							   fi->nh->id);
5898 		if (WARN_ON_ONCE(!nh_grp))
5899 			return -EINVAL;
5900 		goto out;
5901 	}
5902 
5903 	nh_grp = mlxsw_sp_nexthop4_group_lookup(mlxsw_sp, fi);
5904 	if (!nh_grp) {
5905 		nh_grp = mlxsw_sp_nexthop4_group_create(mlxsw_sp, fi);
5906 		if (IS_ERR(nh_grp))
5907 			return PTR_ERR(nh_grp);
5908 	}
5909 out:
5910 	list_add_tail(&fib_entry->nexthop_group_node, &nh_grp->fib_list);
5911 	fib_entry->nh_group = nh_grp;
5912 	return 0;
5913 }
5914 
5915 static void mlxsw_sp_nexthop4_group_put(struct mlxsw_sp *mlxsw_sp,
5916 					struct mlxsw_sp_fib_entry *fib_entry)
5917 {
5918 	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
5919 
5920 	list_del(&fib_entry->nexthop_group_node);
5921 	if (!list_empty(&nh_grp->fib_list))
5922 		return;
5923 
5924 	if (nh_grp->type == MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ) {
5925 		mlxsw_sp_nexthop_obj_group_destroy(mlxsw_sp, nh_grp);
5926 		return;
5927 	}
5928 
5929 	mlxsw_sp_nexthop4_group_destroy(mlxsw_sp, nh_grp);
5930 }
5931 
5932 static bool
5933 mlxsw_sp_fib4_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
5934 {
5935 	struct mlxsw_sp_fib4_entry *fib4_entry;
5936 
5937 	fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry,
5938 				  common);
5939 	return !fib4_entry->dscp;
5940 }
5941 
5942 static bool
5943 mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
5944 {
5945 	struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group;
5946 
5947 	switch (fib_entry->fib_node->fib->proto) {
5948 	case MLXSW_SP_L3_PROTO_IPV4:
5949 		if (!mlxsw_sp_fib4_entry_should_offload(fib_entry))
5950 			return false;
5951 		break;
5952 	case MLXSW_SP_L3_PROTO_IPV6:
5953 		break;
5954 	}
5955 
5956 	switch (fib_entry->type) {
5957 	case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
5958 		return !!nh_group->nhgi->adj_index_valid;
5959 	case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
5960 		return !!mlxsw_sp_nhgi_rif(nh_group->nhgi);
5961 	case MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE:
5962 	case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
5963 	case MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP:
5964 		return true;
5965 	default:
5966 		return false;
5967 	}
5968 }
5969 
5970 static struct mlxsw_sp_nexthop *
5971 mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp,
5972 		     const struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
5973 {
5974 	int i;
5975 
5976 	for (i = 0; i < nh_grp->nhgi->count; i++) {
5977 		struct mlxsw_sp_nexthop *nh = &nh_grp->nhgi->nexthops[i];
5978 		struct net_device *dev = mlxsw_sp_nexthop_dev(nh);
5979 		struct fib6_info *rt = mlxsw_sp_rt6->rt;
5980 
5981 		if (dev && dev == rt->fib6_nh->fib_nh_dev &&
5982 		    ipv6_addr_equal((const struct in6_addr *) &nh->gw_addr,
5983 				    &rt->fib6_nh->fib_nh_gw6))
5984 			return nh;
5985 	}
5986 
5987 	return NULL;
5988 }
5989 
5990 static void
5991 mlxsw_sp_fib4_offload_failed_flag_set(struct mlxsw_sp *mlxsw_sp,
5992 				      struct fib_entry_notifier_info *fen_info)
5993 {
5994 	u32 *p_dst = (u32 *) &fen_info->dst;
5995 	struct fib_rt_info fri;
5996 
5997 	fri.fi = fen_info->fi;
5998 	fri.tb_id = fen_info->tb_id;
5999 	fri.dst = cpu_to_be32(*p_dst);
6000 	fri.dst_len = fen_info->dst_len;
6001 	fri.dscp = fen_info->dscp;
6002 	fri.type = fen_info->type;
6003 	fri.offload = false;
6004 	fri.trap = false;
6005 	fri.offload_failed = true;
6006 	fib_alias_hw_flags_set(mlxsw_sp_net(mlxsw_sp), &fri);
6007 }
6008 
6009 static void
6010 mlxsw_sp_fib4_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp,
6011 				 struct mlxsw_sp_fib_entry *fib_entry)
6012 {
6013 	u32 *p_dst = (u32 *) fib_entry->fib_node->key.addr;
6014 	int dst_len = fib_entry->fib_node->key.prefix_len;
6015 	struct mlxsw_sp_fib4_entry *fib4_entry;
6016 	struct fib_rt_info fri;
6017 	bool should_offload;
6018 
6019 	should_offload = mlxsw_sp_fib_entry_should_offload(fib_entry);
6020 	fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry,
6021 				  common);
6022 	fri.fi = fib4_entry->fi;
6023 	fri.tb_id = fib4_entry->tb_id;
6024 	fri.dst = cpu_to_be32(*p_dst);
6025 	fri.dst_len = dst_len;
6026 	fri.dscp = fib4_entry->dscp;
6027 	fri.type = fib4_entry->type;
6028 	fri.offload = should_offload;
6029 	fri.trap = !should_offload;
6030 	fri.offload_failed = false;
6031 	fib_alias_hw_flags_set(mlxsw_sp_net(mlxsw_sp), &fri);
6032 }
6033 
6034 static void
6035 mlxsw_sp_fib4_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp,
6036 				   struct mlxsw_sp_fib_entry *fib_entry)
6037 {
6038 	u32 *p_dst = (u32 *) fib_entry->fib_node->key.addr;
6039 	int dst_len = fib_entry->fib_node->key.prefix_len;
6040 	struct mlxsw_sp_fib4_entry *fib4_entry;
6041 	struct fib_rt_info fri;
6042 
6043 	fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry,
6044 				  common);
6045 	fri.fi = fib4_entry->fi;
6046 	fri.tb_id = fib4_entry->tb_id;
6047 	fri.dst = cpu_to_be32(*p_dst);
6048 	fri.dst_len = dst_len;
6049 	fri.dscp = fib4_entry->dscp;
6050 	fri.type = fib4_entry->type;
6051 	fri.offload = false;
6052 	fri.trap = false;
6053 	fri.offload_failed = false;
6054 	fib_alias_hw_flags_set(mlxsw_sp_net(mlxsw_sp), &fri);
6055 }
6056 
6057 #if IS_ENABLED(CONFIG_IPV6)
6058 static void
6059 mlxsw_sp_fib6_offload_failed_flag_set(struct mlxsw_sp *mlxsw_sp,
6060 				      struct fib6_info **rt_arr,
6061 				      unsigned int nrt6)
6062 {
6063 	int i;
6064 
6065 	/* In IPv6 a multipath route is represented using multiple routes, so
6066 	 * we need to set the flags on all of them.
6067 	 */
6068 	for (i = 0; i < nrt6; i++)
6069 		fib6_info_hw_flags_set(mlxsw_sp_net(mlxsw_sp), rt_arr[i],
6070 				       false, false, true);
6071 }
6072 #else
6073 static void
6074 mlxsw_sp_fib6_offload_failed_flag_set(struct mlxsw_sp *mlxsw_sp,
6075 				      struct fib6_info **rt_arr,
6076 				      unsigned int nrt6)
6077 {
6078 }
6079 #endif
6080 
6081 #if IS_ENABLED(CONFIG_IPV6)
6082 static void
6083 mlxsw_sp_fib6_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp,
6084 				 struct mlxsw_sp_fib_entry *fib_entry)
6085 {
6086 	struct mlxsw_sp_fib6_entry *fib6_entry;
6087 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
6088 	bool should_offload;
6089 
6090 	should_offload = mlxsw_sp_fib_entry_should_offload(fib_entry);
6091 
6092 	/* In IPv6 a multipath route is represented using multiple routes, so
6093 	 * we need to set the flags on all of them.
6094 	 */
6095 	fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
6096 				  common);
6097 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list)
6098 		fib6_info_hw_flags_set(mlxsw_sp_net(mlxsw_sp), mlxsw_sp_rt6->rt,
6099 				       should_offload, !should_offload, false);
6100 }
6101 #else
6102 static void
6103 mlxsw_sp_fib6_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp,
6104 				 struct mlxsw_sp_fib_entry *fib_entry)
6105 {
6106 }
6107 #endif
6108 
6109 #if IS_ENABLED(CONFIG_IPV6)
6110 static void
6111 mlxsw_sp_fib6_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp,
6112 				   struct mlxsw_sp_fib_entry *fib_entry)
6113 {
6114 	struct mlxsw_sp_fib6_entry *fib6_entry;
6115 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
6116 
6117 	fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
6118 				  common);
6119 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list)
6120 		fib6_info_hw_flags_set(mlxsw_sp_net(mlxsw_sp), mlxsw_sp_rt6->rt,
6121 				       false, false, false);
6122 }
6123 #else
6124 static void
6125 mlxsw_sp_fib6_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp,
6126 				   struct mlxsw_sp_fib_entry *fib_entry)
6127 {
6128 }
6129 #endif
6130 
6131 static void
6132 mlxsw_sp_fib_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp,
6133 				struct mlxsw_sp_fib_entry *fib_entry)
6134 {
6135 	switch (fib_entry->fib_node->fib->proto) {
6136 	case MLXSW_SP_L3_PROTO_IPV4:
6137 		mlxsw_sp_fib4_entry_hw_flags_set(mlxsw_sp, fib_entry);
6138 		break;
6139 	case MLXSW_SP_L3_PROTO_IPV6:
6140 		mlxsw_sp_fib6_entry_hw_flags_set(mlxsw_sp, fib_entry);
6141 		break;
6142 	}
6143 }
6144 
6145 static void
6146 mlxsw_sp_fib_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp,
6147 				  struct mlxsw_sp_fib_entry *fib_entry)
6148 {
6149 	switch (fib_entry->fib_node->fib->proto) {
6150 	case MLXSW_SP_L3_PROTO_IPV4:
6151 		mlxsw_sp_fib4_entry_hw_flags_clear(mlxsw_sp, fib_entry);
6152 		break;
6153 	case MLXSW_SP_L3_PROTO_IPV6:
6154 		mlxsw_sp_fib6_entry_hw_flags_clear(mlxsw_sp, fib_entry);
6155 		break;
6156 	}
6157 }
6158 
6159 static void
6160 mlxsw_sp_fib_entry_hw_flags_refresh(struct mlxsw_sp *mlxsw_sp,
6161 				    struct mlxsw_sp_fib_entry *fib_entry,
6162 				    enum mlxsw_reg_ralue_op op)
6163 {
6164 	switch (op) {
6165 	case MLXSW_REG_RALUE_OP_WRITE_WRITE:
6166 		mlxsw_sp_fib_entry_hw_flags_set(mlxsw_sp, fib_entry);
6167 		break;
6168 	case MLXSW_REG_RALUE_OP_WRITE_DELETE:
6169 		mlxsw_sp_fib_entry_hw_flags_clear(mlxsw_sp, fib_entry);
6170 		break;
6171 	default:
6172 		break;
6173 	}
6174 }
6175 
6176 static void
6177 mlxsw_sp_fib_entry_ralue_pack(char *ralue_pl,
6178 			      const struct mlxsw_sp_fib_entry *fib_entry,
6179 			      enum mlxsw_reg_ralue_op op)
6180 {
6181 	struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib;
6182 	enum mlxsw_reg_ralxx_protocol proto;
6183 	u32 *p_dip;
6184 
6185 	proto = (enum mlxsw_reg_ralxx_protocol) fib->proto;
6186 
6187 	switch (fib->proto) {
6188 	case MLXSW_SP_L3_PROTO_IPV4:
6189 		p_dip = (u32 *) fib_entry->fib_node->key.addr;
6190 		mlxsw_reg_ralue_pack4(ralue_pl, proto, op, fib->vr->id,
6191 				      fib_entry->fib_node->key.prefix_len,
6192 				      *p_dip);
6193 		break;
6194 	case MLXSW_SP_L3_PROTO_IPV6:
6195 		mlxsw_reg_ralue_pack6(ralue_pl, proto, op, fib->vr->id,
6196 				      fib_entry->fib_node->key.prefix_len,
6197 				      fib_entry->fib_node->key.addr);
6198 		break;
6199 	}
6200 }
6201 
6202 static int mlxsw_sp_fib_entry_op_remote(struct mlxsw_sp *mlxsw_sp,
6203 					struct mlxsw_sp_fib_entry *fib_entry,
6204 					enum mlxsw_reg_ralue_op op)
6205 {
6206 	struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group;
6207 	struct mlxsw_sp_nexthop_group_info *nhgi = nh_group->nhgi;
6208 	char ralue_pl[MLXSW_REG_RALUE_LEN];
6209 	enum mlxsw_reg_ralue_trap_action trap_action;
6210 	u16 trap_id = 0;
6211 	u32 adjacency_index = 0;
6212 	u16 ecmp_size = 0;
6213 
6214 	/* In case the nexthop group adjacency index is valid, use it
6215 	 * with provided ECMP size. Otherwise, setup trap and pass
6216 	 * traffic to kernel.
6217 	 */
6218 	if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
6219 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
6220 		adjacency_index = nhgi->adj_index;
6221 		ecmp_size = nhgi->ecmp_size;
6222 	} else if (!nhgi->adj_index_valid && nhgi->count &&
6223 		   mlxsw_sp_nhgi_rif(nhgi)) {
6224 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
6225 		adjacency_index = mlxsw_sp->router->adj_trap_index;
6226 		ecmp_size = 1;
6227 	} else {
6228 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
6229 		trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
6230 	}
6231 
6232 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
6233 	mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id,
6234 					adjacency_index, ecmp_size);
6235 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
6236 }
6237 
6238 static int mlxsw_sp_fib_entry_op_local(struct mlxsw_sp *mlxsw_sp,
6239 				       struct mlxsw_sp_fib_entry *fib_entry,
6240 				       enum mlxsw_reg_ralue_op op)
6241 {
6242 	struct mlxsw_sp_rif *rif = mlxsw_sp_nhgi_rif(fib_entry->nh_group->nhgi);
6243 	enum mlxsw_reg_ralue_trap_action trap_action;
6244 	char ralue_pl[MLXSW_REG_RALUE_LEN];
6245 	u16 trap_id = 0;
6246 	u16 rif_index = 0;
6247 
6248 	if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
6249 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
6250 		rif_index = rif->rif_index;
6251 	} else {
6252 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
6253 		trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
6254 	}
6255 
6256 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
6257 	mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id,
6258 				       rif_index);
6259 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
6260 }
6261 
6262 static int mlxsw_sp_fib_entry_op_trap(struct mlxsw_sp *mlxsw_sp,
6263 				      struct mlxsw_sp_fib_entry *fib_entry,
6264 				      enum mlxsw_reg_ralue_op op)
6265 {
6266 	char ralue_pl[MLXSW_REG_RALUE_LEN];
6267 
6268 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
6269 	mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
6270 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
6271 }
6272 
6273 static int mlxsw_sp_fib_entry_op_blackhole(struct mlxsw_sp *mlxsw_sp,
6274 					   struct mlxsw_sp_fib_entry *fib_entry,
6275 					   enum mlxsw_reg_ralue_op op)
6276 {
6277 	enum mlxsw_reg_ralue_trap_action trap_action;
6278 	char ralue_pl[MLXSW_REG_RALUE_LEN];
6279 
6280 	trap_action = MLXSW_REG_RALUE_TRAP_ACTION_DISCARD_ERROR;
6281 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
6282 	mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, 0, 0);
6283 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
6284 }
6285 
6286 static int
6287 mlxsw_sp_fib_entry_op_unreachable(struct mlxsw_sp *mlxsw_sp,
6288 				  struct mlxsw_sp_fib_entry *fib_entry,
6289 				  enum mlxsw_reg_ralue_op op)
6290 {
6291 	enum mlxsw_reg_ralue_trap_action trap_action;
6292 	char ralue_pl[MLXSW_REG_RALUE_LEN];
6293 	u16 trap_id;
6294 
6295 	trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
6296 	trap_id = MLXSW_TRAP_ID_RTR_INGRESS1;
6297 
6298 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
6299 	mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id, 0);
6300 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
6301 }
6302 
6303 static int
6304 mlxsw_sp_fib_entry_op_ipip_decap(struct mlxsw_sp *mlxsw_sp,
6305 				 struct mlxsw_sp_fib_entry *fib_entry,
6306 				 enum mlxsw_reg_ralue_op op)
6307 {
6308 	struct mlxsw_sp_ipip_entry *ipip_entry = fib_entry->decap.ipip_entry;
6309 	const struct mlxsw_sp_ipip_ops *ipip_ops;
6310 	char ralue_pl[MLXSW_REG_RALUE_LEN];
6311 	int err;
6312 
6313 	if (WARN_ON(!ipip_entry))
6314 		return -EINVAL;
6315 
6316 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
6317 	err = ipip_ops->decap_config(mlxsw_sp, ipip_entry,
6318 				     fib_entry->decap.tunnel_index);
6319 	if (err)
6320 		return err;
6321 
6322 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
6323 	mlxsw_reg_ralue_act_ip2me_tun_pack(ralue_pl,
6324 					   fib_entry->decap.tunnel_index);
6325 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
6326 }
6327 
6328 static int mlxsw_sp_fib_entry_op_nve_decap(struct mlxsw_sp *mlxsw_sp,
6329 					   struct mlxsw_sp_fib_entry *fib_entry,
6330 					   enum mlxsw_reg_ralue_op op)
6331 {
6332 	char ralue_pl[MLXSW_REG_RALUE_LEN];
6333 
6334 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
6335 	mlxsw_reg_ralue_act_ip2me_tun_pack(ralue_pl,
6336 					   fib_entry->decap.tunnel_index);
6337 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
6338 }
6339 
6340 static int __mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
6341 				   struct mlxsw_sp_fib_entry *fib_entry,
6342 				   enum mlxsw_reg_ralue_op op)
6343 {
6344 	switch (fib_entry->type) {
6345 	case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
6346 		return mlxsw_sp_fib_entry_op_remote(mlxsw_sp, fib_entry, op);
6347 	case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
6348 		return mlxsw_sp_fib_entry_op_local(mlxsw_sp, fib_entry, op);
6349 	case MLXSW_SP_FIB_ENTRY_TYPE_TRAP:
6350 		return mlxsw_sp_fib_entry_op_trap(mlxsw_sp, fib_entry, op);
6351 	case MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE:
6352 		return mlxsw_sp_fib_entry_op_blackhole(mlxsw_sp, fib_entry, op);
6353 	case MLXSW_SP_FIB_ENTRY_TYPE_UNREACHABLE:
6354 		return mlxsw_sp_fib_entry_op_unreachable(mlxsw_sp, fib_entry,
6355 							 op);
6356 	case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
6357 		return mlxsw_sp_fib_entry_op_ipip_decap(mlxsw_sp,
6358 							fib_entry, op);
6359 	case MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP:
6360 		return mlxsw_sp_fib_entry_op_nve_decap(mlxsw_sp, fib_entry, op);
6361 	}
6362 	return -EINVAL;
6363 }
6364 
6365 static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
6366 				 struct mlxsw_sp_fib_entry *fib_entry,
6367 				 enum mlxsw_reg_ralue_op op)
6368 {
6369 	int err = __mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, op);
6370 
6371 	if (err)
6372 		return err;
6373 
6374 	mlxsw_sp_fib_entry_hw_flags_refresh(mlxsw_sp, fib_entry, op);
6375 
6376 	return err;
6377 }
6378 
6379 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
6380 				     struct mlxsw_sp_fib_entry *fib_entry)
6381 {
6382 	return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
6383 				     MLXSW_REG_RALUE_OP_WRITE_WRITE);
6384 }
6385 
6386 static int mlxsw_sp_fib_entry_del(struct mlxsw_sp *mlxsw_sp,
6387 				  struct mlxsw_sp_fib_entry *fib_entry)
6388 {
6389 	return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
6390 				     MLXSW_REG_RALUE_OP_WRITE_DELETE);
6391 }
6392 
6393 static int
6394 mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp,
6395 			     const struct fib_entry_notifier_info *fen_info,
6396 			     struct mlxsw_sp_fib_entry *fib_entry)
6397 {
6398 	struct mlxsw_sp_nexthop_group_info *nhgi = fib_entry->nh_group->nhgi;
6399 	union mlxsw_sp_l3addr dip = { .addr4 = htonl(fen_info->dst) };
6400 	struct mlxsw_sp_router *router = mlxsw_sp->router;
6401 	u32 tb_id = mlxsw_sp_fix_tb_id(fen_info->tb_id);
6402 	int ifindex = nhgi->nexthops[0].ifindex;
6403 	struct mlxsw_sp_ipip_entry *ipip_entry;
6404 
6405 	switch (fen_info->type) {
6406 	case RTN_LOCAL:
6407 		ipip_entry = mlxsw_sp_ipip_entry_find_by_decap(mlxsw_sp, ifindex,
6408 							       MLXSW_SP_L3_PROTO_IPV4, dip);
6409 		if (ipip_entry && ipip_entry->ol_dev->flags & IFF_UP) {
6410 			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
6411 			return mlxsw_sp_fib_entry_decap_init(mlxsw_sp,
6412 							     fib_entry,
6413 							     ipip_entry);
6414 		}
6415 		if (mlxsw_sp_router_nve_is_decap(mlxsw_sp, tb_id,
6416 						 MLXSW_SP_L3_PROTO_IPV4,
6417 						 &dip)) {
6418 			u32 tunnel_index;
6419 
6420 			tunnel_index = router->nve_decap_config.tunnel_index;
6421 			fib_entry->decap.tunnel_index = tunnel_index;
6422 			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
6423 			return 0;
6424 		}
6425 		fallthrough;
6426 	case RTN_BROADCAST:
6427 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
6428 		return 0;
6429 	case RTN_BLACKHOLE:
6430 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE;
6431 		return 0;
6432 	case RTN_UNREACHABLE:
6433 	case RTN_PROHIBIT:
6434 		/* Packets hitting these routes need to be trapped, but
6435 		 * can do so with a lower priority than packets directed
6436 		 * at the host, so use action type local instead of trap.
6437 		 */
6438 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_UNREACHABLE;
6439 		return 0;
6440 	case RTN_UNICAST:
6441 		if (nhgi->gateway)
6442 			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
6443 		else
6444 			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
6445 		return 0;
6446 	default:
6447 		return -EINVAL;
6448 	}
6449 }
6450 
6451 static void
6452 mlxsw_sp_fib_entry_type_unset(struct mlxsw_sp *mlxsw_sp,
6453 			      struct mlxsw_sp_fib_entry *fib_entry)
6454 {
6455 	switch (fib_entry->type) {
6456 	case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
6457 		mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, fib_entry);
6458 		break;
6459 	default:
6460 		break;
6461 	}
6462 }
6463 
6464 static void
6465 mlxsw_sp_fib4_entry_type_unset(struct mlxsw_sp *mlxsw_sp,
6466 			       struct mlxsw_sp_fib4_entry *fib4_entry)
6467 {
6468 	mlxsw_sp_fib_entry_type_unset(mlxsw_sp, &fib4_entry->common);
6469 }
6470 
6471 static struct mlxsw_sp_fib4_entry *
6472 mlxsw_sp_fib4_entry_create(struct mlxsw_sp *mlxsw_sp,
6473 			   struct mlxsw_sp_fib_node *fib_node,
6474 			   const struct fib_entry_notifier_info *fen_info)
6475 {
6476 	struct mlxsw_sp_fib4_entry *fib4_entry;
6477 	struct mlxsw_sp_fib_entry *fib_entry;
6478 	int err;
6479 
6480 	fib4_entry = kzalloc(sizeof(*fib4_entry), GFP_KERNEL);
6481 	if (!fib4_entry)
6482 		return ERR_PTR(-ENOMEM);
6483 	fib_entry = &fib4_entry->common;
6484 
6485 	err = mlxsw_sp_nexthop4_group_get(mlxsw_sp, fib_entry, fen_info->fi);
6486 	if (err)
6487 		goto err_nexthop4_group_get;
6488 
6489 	err = mlxsw_sp_nexthop_group_vr_link(fib_entry->nh_group,
6490 					     fib_node->fib);
6491 	if (err)
6492 		goto err_nexthop_group_vr_link;
6493 
6494 	err = mlxsw_sp_fib4_entry_type_set(mlxsw_sp, fen_info, fib_entry);
6495 	if (err)
6496 		goto err_fib4_entry_type_set;
6497 
6498 	fib4_entry->fi = fen_info->fi;
6499 	fib_info_hold(fib4_entry->fi);
6500 	fib4_entry->tb_id = fen_info->tb_id;
6501 	fib4_entry->type = fen_info->type;
6502 	fib4_entry->dscp = fen_info->dscp;
6503 
6504 	fib_entry->fib_node = fib_node;
6505 
6506 	return fib4_entry;
6507 
6508 err_fib4_entry_type_set:
6509 	mlxsw_sp_nexthop_group_vr_unlink(fib_entry->nh_group, fib_node->fib);
6510 err_nexthop_group_vr_link:
6511 	mlxsw_sp_nexthop4_group_put(mlxsw_sp, &fib4_entry->common);
6512 err_nexthop4_group_get:
6513 	kfree(fib4_entry);
6514 	return ERR_PTR(err);
6515 }
6516 
6517 static void mlxsw_sp_fib4_entry_destroy(struct mlxsw_sp *mlxsw_sp,
6518 					struct mlxsw_sp_fib4_entry *fib4_entry)
6519 {
6520 	struct mlxsw_sp_fib_node *fib_node = fib4_entry->common.fib_node;
6521 
6522 	fib_info_put(fib4_entry->fi);
6523 	mlxsw_sp_fib4_entry_type_unset(mlxsw_sp, fib4_entry);
6524 	mlxsw_sp_nexthop_group_vr_unlink(fib4_entry->common.nh_group,
6525 					 fib_node->fib);
6526 	mlxsw_sp_nexthop4_group_put(mlxsw_sp, &fib4_entry->common);
6527 	kfree(fib4_entry);
6528 }
6529 
6530 static struct mlxsw_sp_fib4_entry *
6531 mlxsw_sp_fib4_entry_lookup(struct mlxsw_sp *mlxsw_sp,
6532 			   const struct fib_entry_notifier_info *fen_info)
6533 {
6534 	struct mlxsw_sp_fib4_entry *fib4_entry;
6535 	struct mlxsw_sp_fib_node *fib_node;
6536 	struct mlxsw_sp_fib *fib;
6537 	struct mlxsw_sp_vr *vr;
6538 
6539 	vr = mlxsw_sp_vr_find(mlxsw_sp, fen_info->tb_id);
6540 	if (!vr)
6541 		return NULL;
6542 	fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV4);
6543 
6544 	fib_node = mlxsw_sp_fib_node_lookup(fib, &fen_info->dst,
6545 					    sizeof(fen_info->dst),
6546 					    fen_info->dst_len);
6547 	if (!fib_node)
6548 		return NULL;
6549 
6550 	fib4_entry = container_of(fib_node->fib_entry,
6551 				  struct mlxsw_sp_fib4_entry, common);
6552 	if (fib4_entry->tb_id == fen_info->tb_id &&
6553 	    fib4_entry->dscp == fen_info->dscp &&
6554 	    fib4_entry->type == fen_info->type &&
6555 	    fib4_entry->fi == fen_info->fi)
6556 		return fib4_entry;
6557 
6558 	return NULL;
6559 }
6560 
6561 static const struct rhashtable_params mlxsw_sp_fib_ht_params = {
6562 	.key_offset = offsetof(struct mlxsw_sp_fib_node, key),
6563 	.head_offset = offsetof(struct mlxsw_sp_fib_node, ht_node),
6564 	.key_len = sizeof(struct mlxsw_sp_fib_key),
6565 	.automatic_shrinking = true,
6566 };
6567 
6568 static int mlxsw_sp_fib_node_insert(struct mlxsw_sp_fib *fib,
6569 				    struct mlxsw_sp_fib_node *fib_node)
6570 {
6571 	return rhashtable_insert_fast(&fib->ht, &fib_node->ht_node,
6572 				      mlxsw_sp_fib_ht_params);
6573 }
6574 
6575 static void mlxsw_sp_fib_node_remove(struct mlxsw_sp_fib *fib,
6576 				     struct mlxsw_sp_fib_node *fib_node)
6577 {
6578 	rhashtable_remove_fast(&fib->ht, &fib_node->ht_node,
6579 			       mlxsw_sp_fib_ht_params);
6580 }
6581 
6582 static struct mlxsw_sp_fib_node *
6583 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
6584 			 size_t addr_len, unsigned char prefix_len)
6585 {
6586 	struct mlxsw_sp_fib_key key;
6587 
6588 	memset(&key, 0, sizeof(key));
6589 	memcpy(key.addr, addr, addr_len);
6590 	key.prefix_len = prefix_len;
6591 	return rhashtable_lookup_fast(&fib->ht, &key, mlxsw_sp_fib_ht_params);
6592 }
6593 
6594 static struct mlxsw_sp_fib_node *
6595 mlxsw_sp_fib_node_create(struct mlxsw_sp_fib *fib, const void *addr,
6596 			 size_t addr_len, unsigned char prefix_len)
6597 {
6598 	struct mlxsw_sp_fib_node *fib_node;
6599 
6600 	fib_node = kzalloc(sizeof(*fib_node), GFP_KERNEL);
6601 	if (!fib_node)
6602 		return NULL;
6603 
6604 	list_add(&fib_node->list, &fib->node_list);
6605 	memcpy(fib_node->key.addr, addr, addr_len);
6606 	fib_node->key.prefix_len = prefix_len;
6607 
6608 	return fib_node;
6609 }
6610 
6611 static void mlxsw_sp_fib_node_destroy(struct mlxsw_sp_fib_node *fib_node)
6612 {
6613 	list_del(&fib_node->list);
6614 	kfree(fib_node);
6615 }
6616 
6617 static int mlxsw_sp_fib_lpm_tree_link(struct mlxsw_sp *mlxsw_sp,
6618 				      struct mlxsw_sp_fib_node *fib_node)
6619 {
6620 	struct mlxsw_sp_prefix_usage req_prefix_usage;
6621 	struct mlxsw_sp_fib *fib = fib_node->fib;
6622 	struct mlxsw_sp_lpm_tree *lpm_tree;
6623 	int err;
6624 
6625 	lpm_tree = mlxsw_sp->router->lpm.proto_trees[fib->proto];
6626 	if (lpm_tree->prefix_ref_count[fib_node->key.prefix_len] != 0)
6627 		goto out;
6628 
6629 	mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &lpm_tree->prefix_usage);
6630 	mlxsw_sp_prefix_usage_set(&req_prefix_usage, fib_node->key.prefix_len);
6631 	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
6632 					 fib->proto);
6633 	if (IS_ERR(lpm_tree))
6634 		return PTR_ERR(lpm_tree);
6635 
6636 	err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
6637 	if (err)
6638 		goto err_lpm_tree_replace;
6639 
6640 out:
6641 	lpm_tree->prefix_ref_count[fib_node->key.prefix_len]++;
6642 	return 0;
6643 
6644 err_lpm_tree_replace:
6645 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
6646 	return err;
6647 }
6648 
6649 static void mlxsw_sp_fib_lpm_tree_unlink(struct mlxsw_sp *mlxsw_sp,
6650 					 struct mlxsw_sp_fib_node *fib_node)
6651 {
6652 	struct mlxsw_sp_lpm_tree *lpm_tree = fib_node->fib->lpm_tree;
6653 	struct mlxsw_sp_prefix_usage req_prefix_usage;
6654 	struct mlxsw_sp_fib *fib = fib_node->fib;
6655 	int err;
6656 
6657 	if (--lpm_tree->prefix_ref_count[fib_node->key.prefix_len] != 0)
6658 		return;
6659 	/* Try to construct a new LPM tree from the current prefix usage
6660 	 * minus the unused one. If we fail, continue using the old one.
6661 	 */
6662 	mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &lpm_tree->prefix_usage);
6663 	mlxsw_sp_prefix_usage_clear(&req_prefix_usage,
6664 				    fib_node->key.prefix_len);
6665 	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
6666 					 fib->proto);
6667 	if (IS_ERR(lpm_tree))
6668 		return;
6669 
6670 	err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
6671 	if (err)
6672 		goto err_lpm_tree_replace;
6673 
6674 	return;
6675 
6676 err_lpm_tree_replace:
6677 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
6678 }
6679 
6680 static int mlxsw_sp_fib_node_init(struct mlxsw_sp *mlxsw_sp,
6681 				  struct mlxsw_sp_fib_node *fib_node,
6682 				  struct mlxsw_sp_fib *fib)
6683 {
6684 	int err;
6685 
6686 	err = mlxsw_sp_fib_node_insert(fib, fib_node);
6687 	if (err)
6688 		return err;
6689 	fib_node->fib = fib;
6690 
6691 	err = mlxsw_sp_fib_lpm_tree_link(mlxsw_sp, fib_node);
6692 	if (err)
6693 		goto err_fib_lpm_tree_link;
6694 
6695 	return 0;
6696 
6697 err_fib_lpm_tree_link:
6698 	fib_node->fib = NULL;
6699 	mlxsw_sp_fib_node_remove(fib, fib_node);
6700 	return err;
6701 }
6702 
6703 static void mlxsw_sp_fib_node_fini(struct mlxsw_sp *mlxsw_sp,
6704 				   struct mlxsw_sp_fib_node *fib_node)
6705 {
6706 	struct mlxsw_sp_fib *fib = fib_node->fib;
6707 
6708 	mlxsw_sp_fib_lpm_tree_unlink(mlxsw_sp, fib_node);
6709 	fib_node->fib = NULL;
6710 	mlxsw_sp_fib_node_remove(fib, fib_node);
6711 }
6712 
6713 static struct mlxsw_sp_fib_node *
6714 mlxsw_sp_fib_node_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id, const void *addr,
6715 		      size_t addr_len, unsigned char prefix_len,
6716 		      enum mlxsw_sp_l3proto proto)
6717 {
6718 	struct mlxsw_sp_fib_node *fib_node;
6719 	struct mlxsw_sp_fib *fib;
6720 	struct mlxsw_sp_vr *vr;
6721 	int err;
6722 
6723 	vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id, NULL);
6724 	if (IS_ERR(vr))
6725 		return ERR_CAST(vr);
6726 	fib = mlxsw_sp_vr_fib(vr, proto);
6727 
6728 	fib_node = mlxsw_sp_fib_node_lookup(fib, addr, addr_len, prefix_len);
6729 	if (fib_node)
6730 		return fib_node;
6731 
6732 	fib_node = mlxsw_sp_fib_node_create(fib, addr, addr_len, prefix_len);
6733 	if (!fib_node) {
6734 		err = -ENOMEM;
6735 		goto err_fib_node_create;
6736 	}
6737 
6738 	err = mlxsw_sp_fib_node_init(mlxsw_sp, fib_node, fib);
6739 	if (err)
6740 		goto err_fib_node_init;
6741 
6742 	return fib_node;
6743 
6744 err_fib_node_init:
6745 	mlxsw_sp_fib_node_destroy(fib_node);
6746 err_fib_node_create:
6747 	mlxsw_sp_vr_put(mlxsw_sp, vr);
6748 	return ERR_PTR(err);
6749 }
6750 
6751 static void mlxsw_sp_fib_node_put(struct mlxsw_sp *mlxsw_sp,
6752 				  struct mlxsw_sp_fib_node *fib_node)
6753 {
6754 	struct mlxsw_sp_vr *vr = fib_node->fib->vr;
6755 
6756 	if (fib_node->fib_entry)
6757 		return;
6758 	mlxsw_sp_fib_node_fini(mlxsw_sp, fib_node);
6759 	mlxsw_sp_fib_node_destroy(fib_node);
6760 	mlxsw_sp_vr_put(mlxsw_sp, vr);
6761 }
6762 
6763 static int mlxsw_sp_fib_node_entry_link(struct mlxsw_sp *mlxsw_sp,
6764 					struct mlxsw_sp_fib_entry *fib_entry)
6765 {
6766 	struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
6767 	int err;
6768 
6769 	fib_node->fib_entry = fib_entry;
6770 
6771 	err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
6772 	if (err)
6773 		goto err_fib_entry_update;
6774 
6775 	return 0;
6776 
6777 err_fib_entry_update:
6778 	fib_node->fib_entry = NULL;
6779 	return err;
6780 }
6781 
6782 static void
6783 mlxsw_sp_fib_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
6784 			       struct mlxsw_sp_fib_entry *fib_entry)
6785 {
6786 	struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
6787 
6788 	mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry);
6789 	fib_node->fib_entry = NULL;
6790 }
6791 
6792 static bool mlxsw_sp_fib4_allow_replace(struct mlxsw_sp_fib4_entry *fib4_entry)
6793 {
6794 	struct mlxsw_sp_fib_node *fib_node = fib4_entry->common.fib_node;
6795 	struct mlxsw_sp_fib4_entry *fib4_replaced;
6796 
6797 	if (!fib_node->fib_entry)
6798 		return true;
6799 
6800 	fib4_replaced = container_of(fib_node->fib_entry,
6801 				     struct mlxsw_sp_fib4_entry, common);
6802 	if (fib4_entry->tb_id == RT_TABLE_MAIN &&
6803 	    fib4_replaced->tb_id == RT_TABLE_LOCAL)
6804 		return false;
6805 
6806 	return true;
6807 }
6808 
6809 static int
6810 mlxsw_sp_router_fib4_replace(struct mlxsw_sp *mlxsw_sp,
6811 			     const struct fib_entry_notifier_info *fen_info)
6812 {
6813 	struct mlxsw_sp_fib4_entry *fib4_entry, *fib4_replaced;
6814 	struct mlxsw_sp_fib_entry *replaced;
6815 	struct mlxsw_sp_fib_node *fib_node;
6816 	int err;
6817 
6818 	if (fen_info->fi->nh &&
6819 	    !mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, fen_info->fi->nh->id))
6820 		return 0;
6821 
6822 	fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, fen_info->tb_id,
6823 					 &fen_info->dst, sizeof(fen_info->dst),
6824 					 fen_info->dst_len,
6825 					 MLXSW_SP_L3_PROTO_IPV4);
6826 	if (IS_ERR(fib_node)) {
6827 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to get FIB node\n");
6828 		return PTR_ERR(fib_node);
6829 	}
6830 
6831 	fib4_entry = mlxsw_sp_fib4_entry_create(mlxsw_sp, fib_node, fen_info);
6832 	if (IS_ERR(fib4_entry)) {
6833 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to create FIB entry\n");
6834 		err = PTR_ERR(fib4_entry);
6835 		goto err_fib4_entry_create;
6836 	}
6837 
6838 	if (!mlxsw_sp_fib4_allow_replace(fib4_entry)) {
6839 		mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
6840 		mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
6841 		return 0;
6842 	}
6843 
6844 	replaced = fib_node->fib_entry;
6845 	err = mlxsw_sp_fib_node_entry_link(mlxsw_sp, &fib4_entry->common);
6846 	if (err) {
6847 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to link FIB entry to node\n");
6848 		goto err_fib_node_entry_link;
6849 	}
6850 
6851 	/* Nothing to replace */
6852 	if (!replaced)
6853 		return 0;
6854 
6855 	mlxsw_sp_fib_entry_hw_flags_clear(mlxsw_sp, replaced);
6856 	fib4_replaced = container_of(replaced, struct mlxsw_sp_fib4_entry,
6857 				     common);
6858 	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_replaced);
6859 
6860 	return 0;
6861 
6862 err_fib_node_entry_link:
6863 	fib_node->fib_entry = replaced;
6864 	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
6865 err_fib4_entry_create:
6866 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
6867 	return err;
6868 }
6869 
6870 static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp,
6871 				     struct fib_entry_notifier_info *fen_info)
6872 {
6873 	struct mlxsw_sp_fib4_entry *fib4_entry;
6874 	struct mlxsw_sp_fib_node *fib_node;
6875 
6876 	fib4_entry = mlxsw_sp_fib4_entry_lookup(mlxsw_sp, fen_info);
6877 	if (!fib4_entry)
6878 		return;
6879 	fib_node = fib4_entry->common.fib_node;
6880 
6881 	mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, &fib4_entry->common);
6882 	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
6883 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
6884 }
6885 
6886 static bool mlxsw_sp_fib6_rt_should_ignore(const struct fib6_info *rt)
6887 {
6888 	/* Multicast routes aren't supported, so ignore them. Neighbour
6889 	 * Discovery packets are specifically trapped.
6890 	 */
6891 	if (ipv6_addr_type(&rt->fib6_dst.addr) & IPV6_ADDR_MULTICAST)
6892 		return true;
6893 
6894 	/* Cloned routes are irrelevant in the forwarding path. */
6895 	if (rt->fib6_flags & RTF_CACHE)
6896 		return true;
6897 
6898 	return false;
6899 }
6900 
6901 static struct mlxsw_sp_rt6 *mlxsw_sp_rt6_create(struct fib6_info *rt)
6902 {
6903 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
6904 
6905 	mlxsw_sp_rt6 = kzalloc(sizeof(*mlxsw_sp_rt6), GFP_KERNEL);
6906 	if (!mlxsw_sp_rt6)
6907 		return ERR_PTR(-ENOMEM);
6908 
6909 	/* In case of route replace, replaced route is deleted with
6910 	 * no notification. Take reference to prevent accessing freed
6911 	 * memory.
6912 	 */
6913 	mlxsw_sp_rt6->rt = rt;
6914 	fib6_info_hold(rt);
6915 
6916 	return mlxsw_sp_rt6;
6917 }
6918 
6919 #if IS_ENABLED(CONFIG_IPV6)
6920 static void mlxsw_sp_rt6_release(struct fib6_info *rt)
6921 {
6922 	fib6_info_release(rt);
6923 }
6924 #else
6925 static void mlxsw_sp_rt6_release(struct fib6_info *rt)
6926 {
6927 }
6928 #endif
6929 
6930 static void mlxsw_sp_rt6_destroy(struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
6931 {
6932 	struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh;
6933 
6934 	if (!mlxsw_sp_rt6->rt->nh)
6935 		fib6_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
6936 	mlxsw_sp_rt6_release(mlxsw_sp_rt6->rt);
6937 	kfree(mlxsw_sp_rt6);
6938 }
6939 
6940 static struct fib6_info *
6941 mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry)
6942 {
6943 	return list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
6944 				list)->rt;
6945 }
6946 
6947 static struct mlxsw_sp_rt6 *
6948 mlxsw_sp_fib6_entry_rt_find(const struct mlxsw_sp_fib6_entry *fib6_entry,
6949 			    const struct fib6_info *rt)
6950 {
6951 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
6952 
6953 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
6954 		if (mlxsw_sp_rt6->rt == rt)
6955 			return mlxsw_sp_rt6;
6956 	}
6957 
6958 	return NULL;
6959 }
6960 
6961 static bool mlxsw_sp_nexthop6_ipip_type(const struct mlxsw_sp *mlxsw_sp,
6962 					const struct fib6_info *rt,
6963 					enum mlxsw_sp_ipip_type *ret)
6964 {
6965 	return rt->fib6_nh->fib_nh_dev &&
6966 	       mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->fib6_nh->fib_nh_dev, ret);
6967 }
6968 
6969 static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp,
6970 				  struct mlxsw_sp_nexthop_group *nh_grp,
6971 				  struct mlxsw_sp_nexthop *nh,
6972 				  const struct fib6_info *rt)
6973 {
6974 	struct net_device *dev = rt->fib6_nh->fib_nh_dev;
6975 	int err;
6976 
6977 	nh->nhgi = nh_grp->nhgi;
6978 	nh->nh_weight = rt->fib6_nh->fib_nh_weight;
6979 	memcpy(&nh->gw_addr, &rt->fib6_nh->fib_nh_gw6, sizeof(nh->gw_addr));
6980 #if IS_ENABLED(CONFIG_IPV6)
6981 	nh->neigh_tbl = &nd_tbl;
6982 #endif
6983 
6984 	err = mlxsw_sp_nexthop_counter_enable(mlxsw_sp, nh);
6985 	if (err)
6986 		return err;
6987 
6988 	list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
6989 
6990 	if (!dev)
6991 		return 0;
6992 	nh->ifindex = dev->ifindex;
6993 
6994 	err = mlxsw_sp_nexthop_type_init(mlxsw_sp, nh, dev);
6995 	if (err)
6996 		goto err_nexthop_type_init;
6997 
6998 	return 0;
6999 
7000 err_nexthop_type_init:
7001 	list_del(&nh->router_list_node);
7002 	mlxsw_sp_nexthop_counter_disable(mlxsw_sp, nh);
7003 	return err;
7004 }
7005 
7006 static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp,
7007 				   struct mlxsw_sp_nexthop *nh)
7008 {
7009 	mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
7010 	list_del(&nh->router_list_node);
7011 	mlxsw_sp_nexthop_counter_disable(mlxsw_sp, nh);
7012 }
7013 
7014 static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp *mlxsw_sp,
7015 				    const struct fib6_info *rt)
7016 {
7017 	return rt->fib6_nh->fib_nh_gw_family ||
7018 	       mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, NULL);
7019 }
7020 
7021 static int
7022 mlxsw_sp_nexthop6_group_info_init(struct mlxsw_sp *mlxsw_sp,
7023 				  struct mlxsw_sp_nexthop_group *nh_grp,
7024 				  struct mlxsw_sp_fib6_entry *fib6_entry)
7025 {
7026 	struct mlxsw_sp_nexthop_group_info *nhgi;
7027 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
7028 	struct mlxsw_sp_nexthop *nh;
7029 	int err, i;
7030 
7031 	nhgi = kzalloc(struct_size(nhgi, nexthops, fib6_entry->nrt6),
7032 		       GFP_KERNEL);
7033 	if (!nhgi)
7034 		return -ENOMEM;
7035 	nh_grp->nhgi = nhgi;
7036 	nhgi->nh_grp = nh_grp;
7037 	mlxsw_sp_rt6 = list_first_entry(&fib6_entry->rt6_list,
7038 					struct mlxsw_sp_rt6, list);
7039 	nhgi->gateway = mlxsw_sp_rt6_is_gateway(mlxsw_sp, mlxsw_sp_rt6->rt);
7040 	nhgi->count = fib6_entry->nrt6;
7041 	for (i = 0; i < nhgi->count; i++) {
7042 		struct fib6_info *rt = mlxsw_sp_rt6->rt;
7043 
7044 		nh = &nhgi->nexthops[i];
7045 		err = mlxsw_sp_nexthop6_init(mlxsw_sp, nh_grp, nh, rt);
7046 		if (err)
7047 			goto err_nexthop6_init;
7048 		mlxsw_sp_rt6 = list_next_entry(mlxsw_sp_rt6, list);
7049 	}
7050 	nh_grp->nhgi = nhgi;
7051 	err = mlxsw_sp_nexthop_group_inc(mlxsw_sp);
7052 	if (err)
7053 		goto err_group_inc;
7054 	err = mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
7055 	if (err)
7056 		goto err_group_refresh;
7057 
7058 	return 0;
7059 
7060 err_group_refresh:
7061 	mlxsw_sp_nexthop_group_dec(mlxsw_sp);
7062 err_group_inc:
7063 	i = nhgi->count;
7064 err_nexthop6_init:
7065 	for (i--; i >= 0; i--) {
7066 		nh = &nhgi->nexthops[i];
7067 		mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
7068 	}
7069 	kfree(nhgi);
7070 	return err;
7071 }
7072 
7073 static void
7074 mlxsw_sp_nexthop6_group_info_fini(struct mlxsw_sp *mlxsw_sp,
7075 				  struct mlxsw_sp_nexthop_group *nh_grp)
7076 {
7077 	struct mlxsw_sp_nexthop_group_info *nhgi = nh_grp->nhgi;
7078 	int i;
7079 
7080 	mlxsw_sp_nexthop_group_dec(mlxsw_sp);
7081 	for (i = nhgi->count - 1; i >= 0; i--) {
7082 		struct mlxsw_sp_nexthop *nh = &nhgi->nexthops[i];
7083 
7084 		mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
7085 	}
7086 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
7087 	WARN_ON_ONCE(nhgi->adj_index_valid);
7088 	kfree(nhgi);
7089 }
7090 
7091 static struct mlxsw_sp_nexthop_group *
7092 mlxsw_sp_nexthop6_group_create(struct mlxsw_sp *mlxsw_sp,
7093 			       struct mlxsw_sp_fib6_entry *fib6_entry)
7094 {
7095 	struct mlxsw_sp_nexthop_group *nh_grp;
7096 	int err;
7097 
7098 	nh_grp = kzalloc(sizeof(*nh_grp), GFP_KERNEL);
7099 	if (!nh_grp)
7100 		return ERR_PTR(-ENOMEM);
7101 	INIT_LIST_HEAD(&nh_grp->vr_list);
7102 	err = rhashtable_init(&nh_grp->vr_ht,
7103 			      &mlxsw_sp_nexthop_group_vr_ht_params);
7104 	if (err)
7105 		goto err_nexthop_group_vr_ht_init;
7106 	INIT_LIST_HEAD(&nh_grp->fib_list);
7107 	nh_grp->type = MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6;
7108 
7109 	err = mlxsw_sp_nexthop6_group_info_init(mlxsw_sp, nh_grp, fib6_entry);
7110 	if (err)
7111 		goto err_nexthop_group_info_init;
7112 
7113 	err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
7114 	if (err)
7115 		goto err_nexthop_group_insert;
7116 
7117 	nh_grp->can_destroy = true;
7118 
7119 	return nh_grp;
7120 
7121 err_nexthop_group_insert:
7122 	mlxsw_sp_nexthop6_group_info_fini(mlxsw_sp, nh_grp);
7123 err_nexthop_group_info_init:
7124 	rhashtable_destroy(&nh_grp->vr_ht);
7125 err_nexthop_group_vr_ht_init:
7126 	kfree(nh_grp);
7127 	return ERR_PTR(err);
7128 }
7129 
7130 static void
7131 mlxsw_sp_nexthop6_group_destroy(struct mlxsw_sp *mlxsw_sp,
7132 				struct mlxsw_sp_nexthop_group *nh_grp)
7133 {
7134 	if (!nh_grp->can_destroy)
7135 		return;
7136 	mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
7137 	mlxsw_sp_nexthop6_group_info_fini(mlxsw_sp, nh_grp);
7138 	WARN_ON_ONCE(!list_empty(&nh_grp->vr_list));
7139 	rhashtable_destroy(&nh_grp->vr_ht);
7140 	kfree(nh_grp);
7141 }
7142 
7143 static int mlxsw_sp_nexthop6_group_get(struct mlxsw_sp *mlxsw_sp,
7144 				       struct mlxsw_sp_fib6_entry *fib6_entry)
7145 {
7146 	struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
7147 	struct mlxsw_sp_nexthop_group *nh_grp;
7148 
7149 	if (rt->nh) {
7150 		nh_grp = mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp,
7151 							   rt->nh->id);
7152 		if (WARN_ON_ONCE(!nh_grp))
7153 			return -EINVAL;
7154 		goto out;
7155 	}
7156 
7157 	nh_grp = mlxsw_sp_nexthop6_group_lookup(mlxsw_sp, fib6_entry);
7158 	if (!nh_grp) {
7159 		nh_grp = mlxsw_sp_nexthop6_group_create(mlxsw_sp, fib6_entry);
7160 		if (IS_ERR(nh_grp))
7161 			return PTR_ERR(nh_grp);
7162 	}
7163 
7164 	/* The route and the nexthop are described by the same struct, so we
7165 	 * need to the update the nexthop offload indication for the new route.
7166 	 */
7167 	__mlxsw_sp_nexthop6_group_offload_refresh(nh_grp, fib6_entry);
7168 
7169 out:
7170 	list_add_tail(&fib6_entry->common.nexthop_group_node,
7171 		      &nh_grp->fib_list);
7172 	fib6_entry->common.nh_group = nh_grp;
7173 
7174 	return 0;
7175 }
7176 
7177 static void mlxsw_sp_nexthop6_group_put(struct mlxsw_sp *mlxsw_sp,
7178 					struct mlxsw_sp_fib_entry *fib_entry)
7179 {
7180 	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
7181 
7182 	list_del(&fib_entry->nexthop_group_node);
7183 	if (!list_empty(&nh_grp->fib_list))
7184 		return;
7185 
7186 	if (nh_grp->type == MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ) {
7187 		mlxsw_sp_nexthop_obj_group_destroy(mlxsw_sp, nh_grp);
7188 		return;
7189 	}
7190 
7191 	mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, nh_grp);
7192 }
7193 
7194 static int
7195 mlxsw_sp_nexthop6_group_update(struct mlxsw_sp *mlxsw_sp,
7196 			       struct mlxsw_sp_fib6_entry *fib6_entry)
7197 {
7198 	struct mlxsw_sp_nexthop_group *old_nh_grp = fib6_entry->common.nh_group;
7199 	struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node;
7200 	int err;
7201 
7202 	mlxsw_sp_nexthop_group_vr_unlink(old_nh_grp, fib_node->fib);
7203 	fib6_entry->common.nh_group = NULL;
7204 	list_del(&fib6_entry->common.nexthop_group_node);
7205 
7206 	err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
7207 	if (err)
7208 		goto err_nexthop6_group_get;
7209 
7210 	err = mlxsw_sp_nexthop_group_vr_link(fib6_entry->common.nh_group,
7211 					     fib_node->fib);
7212 	if (err)
7213 		goto err_nexthop_group_vr_link;
7214 
7215 	/* In case this entry is offloaded, then the adjacency index
7216 	 * currently associated with it in the device's table is that
7217 	 * of the old group. Start using the new one instead.
7218 	 */
7219 	err = mlxsw_sp_fib_entry_update(mlxsw_sp, &fib6_entry->common);
7220 	if (err)
7221 		goto err_fib_entry_update;
7222 
7223 	if (list_empty(&old_nh_grp->fib_list))
7224 		mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, old_nh_grp);
7225 
7226 	return 0;
7227 
7228 err_fib_entry_update:
7229 	mlxsw_sp_nexthop_group_vr_unlink(fib6_entry->common.nh_group,
7230 					 fib_node->fib);
7231 err_nexthop_group_vr_link:
7232 	mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
7233 err_nexthop6_group_get:
7234 	list_add_tail(&fib6_entry->common.nexthop_group_node,
7235 		      &old_nh_grp->fib_list);
7236 	fib6_entry->common.nh_group = old_nh_grp;
7237 	mlxsw_sp_nexthop_group_vr_link(old_nh_grp, fib_node->fib);
7238 	return err;
7239 }
7240 
7241 static int
7242 mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp,
7243 				struct mlxsw_sp_fib6_entry *fib6_entry,
7244 				struct fib6_info **rt_arr, unsigned int nrt6)
7245 {
7246 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
7247 	int err, i;
7248 
7249 	for (i = 0; i < nrt6; i++) {
7250 		mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt_arr[i]);
7251 		if (IS_ERR(mlxsw_sp_rt6)) {
7252 			err = PTR_ERR(mlxsw_sp_rt6);
7253 			goto err_rt6_unwind;
7254 		}
7255 
7256 		list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
7257 		fib6_entry->nrt6++;
7258 	}
7259 
7260 	err = mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
7261 	if (err)
7262 		goto err_rt6_unwind;
7263 
7264 	return 0;
7265 
7266 err_rt6_unwind:
7267 	for (; i > 0; i--) {
7268 		fib6_entry->nrt6--;
7269 		mlxsw_sp_rt6 = list_last_entry(&fib6_entry->rt6_list,
7270 					       struct mlxsw_sp_rt6, list);
7271 		list_del(&mlxsw_sp_rt6->list);
7272 		mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
7273 	}
7274 	return err;
7275 }
7276 
7277 static void
7278 mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp,
7279 				struct mlxsw_sp_fib6_entry *fib6_entry,
7280 				struct fib6_info **rt_arr, unsigned int nrt6)
7281 {
7282 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
7283 	int i;
7284 
7285 	for (i = 0; i < nrt6; i++) {
7286 		mlxsw_sp_rt6 = mlxsw_sp_fib6_entry_rt_find(fib6_entry,
7287 							   rt_arr[i]);
7288 		if (WARN_ON_ONCE(!mlxsw_sp_rt6))
7289 			continue;
7290 
7291 		fib6_entry->nrt6--;
7292 		list_del(&mlxsw_sp_rt6->list);
7293 		mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
7294 	}
7295 
7296 	mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
7297 }
7298 
7299 static int
7300 mlxsw_sp_fib6_entry_type_set_local(struct mlxsw_sp *mlxsw_sp,
7301 				   struct mlxsw_sp_fib_entry *fib_entry,
7302 				   const struct fib6_info *rt)
7303 {
7304 	struct mlxsw_sp_nexthop_group_info *nhgi = fib_entry->nh_group->nhgi;
7305 	union mlxsw_sp_l3addr dip = { .addr6 = rt->fib6_dst.addr };
7306 	u32 tb_id = mlxsw_sp_fix_tb_id(rt->fib6_table->tb6_id);
7307 	struct mlxsw_sp_router *router = mlxsw_sp->router;
7308 	int ifindex = nhgi->nexthops[0].ifindex;
7309 	struct mlxsw_sp_ipip_entry *ipip_entry;
7310 
7311 	fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
7312 	ipip_entry = mlxsw_sp_ipip_entry_find_by_decap(mlxsw_sp, ifindex,
7313 						       MLXSW_SP_L3_PROTO_IPV6,
7314 						       dip);
7315 
7316 	if (ipip_entry && ipip_entry->ol_dev->flags & IFF_UP) {
7317 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
7318 		return mlxsw_sp_fib_entry_decap_init(mlxsw_sp, fib_entry,
7319 						     ipip_entry);
7320 	}
7321 	if (mlxsw_sp_router_nve_is_decap(mlxsw_sp, tb_id,
7322 					 MLXSW_SP_L3_PROTO_IPV6, &dip)) {
7323 		u32 tunnel_index;
7324 
7325 		tunnel_index = router->nve_decap_config.tunnel_index;
7326 		fib_entry->decap.tunnel_index = tunnel_index;
7327 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
7328 	}
7329 
7330 	return 0;
7331 }
7332 
7333 static int mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp *mlxsw_sp,
7334 					struct mlxsw_sp_fib_entry *fib_entry,
7335 					const struct fib6_info *rt)
7336 {
7337 	if (rt->fib6_flags & RTF_LOCAL)
7338 		return mlxsw_sp_fib6_entry_type_set_local(mlxsw_sp, fib_entry,
7339 							  rt);
7340 	if (rt->fib6_flags & RTF_ANYCAST)
7341 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
7342 	else if (rt->fib6_type == RTN_BLACKHOLE)
7343 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE;
7344 	else if (rt->fib6_flags & RTF_REJECT)
7345 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_UNREACHABLE;
7346 	else if (fib_entry->nh_group->nhgi->gateway)
7347 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
7348 	else
7349 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
7350 
7351 	return 0;
7352 }
7353 
7354 static void
7355 mlxsw_sp_fib6_entry_rt_destroy_all(struct mlxsw_sp_fib6_entry *fib6_entry)
7356 {
7357 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6, *tmp;
7358 
7359 	list_for_each_entry_safe(mlxsw_sp_rt6, tmp, &fib6_entry->rt6_list,
7360 				 list) {
7361 		fib6_entry->nrt6--;
7362 		list_del(&mlxsw_sp_rt6->list);
7363 		mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
7364 	}
7365 }
7366 
7367 static struct mlxsw_sp_fib6_entry *
7368 mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp,
7369 			   struct mlxsw_sp_fib_node *fib_node,
7370 			   struct fib6_info **rt_arr, unsigned int nrt6)
7371 {
7372 	struct mlxsw_sp_fib6_entry *fib6_entry;
7373 	struct mlxsw_sp_fib_entry *fib_entry;
7374 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
7375 	int err, i;
7376 
7377 	fib6_entry = kzalloc(sizeof(*fib6_entry), GFP_KERNEL);
7378 	if (!fib6_entry)
7379 		return ERR_PTR(-ENOMEM);
7380 	fib_entry = &fib6_entry->common;
7381 
7382 	INIT_LIST_HEAD(&fib6_entry->rt6_list);
7383 
7384 	for (i = 0; i < nrt6; i++) {
7385 		mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt_arr[i]);
7386 		if (IS_ERR(mlxsw_sp_rt6)) {
7387 			err = PTR_ERR(mlxsw_sp_rt6);
7388 			goto err_rt6_unwind;
7389 		}
7390 		list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
7391 		fib6_entry->nrt6++;
7392 	}
7393 
7394 	err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
7395 	if (err)
7396 		goto err_rt6_unwind;
7397 
7398 	err = mlxsw_sp_nexthop_group_vr_link(fib_entry->nh_group,
7399 					     fib_node->fib);
7400 	if (err)
7401 		goto err_nexthop_group_vr_link;
7402 
7403 	err = mlxsw_sp_fib6_entry_type_set(mlxsw_sp, fib_entry, rt_arr[0]);
7404 	if (err)
7405 		goto err_fib6_entry_type_set;
7406 
7407 	fib_entry->fib_node = fib_node;
7408 
7409 	return fib6_entry;
7410 
7411 err_fib6_entry_type_set:
7412 	mlxsw_sp_nexthop_group_vr_unlink(fib_entry->nh_group, fib_node->fib);
7413 err_nexthop_group_vr_link:
7414 	mlxsw_sp_nexthop6_group_put(mlxsw_sp, fib_entry);
7415 err_rt6_unwind:
7416 	for (; i > 0; i--) {
7417 		fib6_entry->nrt6--;
7418 		mlxsw_sp_rt6 = list_last_entry(&fib6_entry->rt6_list,
7419 					       struct mlxsw_sp_rt6, list);
7420 		list_del(&mlxsw_sp_rt6->list);
7421 		mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
7422 	}
7423 	kfree(fib6_entry);
7424 	return ERR_PTR(err);
7425 }
7426 
7427 static void
7428 mlxsw_sp_fib6_entry_type_unset(struct mlxsw_sp *mlxsw_sp,
7429 			       struct mlxsw_sp_fib6_entry *fib6_entry)
7430 {
7431 	mlxsw_sp_fib_entry_type_unset(mlxsw_sp, &fib6_entry->common);
7432 }
7433 
7434 static void mlxsw_sp_fib6_entry_destroy(struct mlxsw_sp *mlxsw_sp,
7435 					struct mlxsw_sp_fib6_entry *fib6_entry)
7436 {
7437 	struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node;
7438 
7439 	mlxsw_sp_fib6_entry_type_unset(mlxsw_sp, fib6_entry);
7440 	mlxsw_sp_nexthop_group_vr_unlink(fib6_entry->common.nh_group,
7441 					 fib_node->fib);
7442 	mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
7443 	mlxsw_sp_fib6_entry_rt_destroy_all(fib6_entry);
7444 	WARN_ON(fib6_entry->nrt6);
7445 	kfree(fib6_entry);
7446 }
7447 
7448 static struct mlxsw_sp_fib6_entry *
7449 mlxsw_sp_fib6_entry_lookup(struct mlxsw_sp *mlxsw_sp,
7450 			   const struct fib6_info *rt)
7451 {
7452 	struct mlxsw_sp_fib6_entry *fib6_entry;
7453 	struct mlxsw_sp_fib_node *fib_node;
7454 	struct mlxsw_sp_fib *fib;
7455 	struct fib6_info *cmp_rt;
7456 	struct mlxsw_sp_vr *vr;
7457 
7458 	vr = mlxsw_sp_vr_find(mlxsw_sp, rt->fib6_table->tb6_id);
7459 	if (!vr)
7460 		return NULL;
7461 	fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV6);
7462 
7463 	fib_node = mlxsw_sp_fib_node_lookup(fib, &rt->fib6_dst.addr,
7464 					    sizeof(rt->fib6_dst.addr),
7465 					    rt->fib6_dst.plen);
7466 	if (!fib_node)
7467 		return NULL;
7468 
7469 	fib6_entry = container_of(fib_node->fib_entry,
7470 				  struct mlxsw_sp_fib6_entry, common);
7471 	cmp_rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
7472 	if (rt->fib6_table->tb6_id == cmp_rt->fib6_table->tb6_id &&
7473 	    rt->fib6_metric == cmp_rt->fib6_metric &&
7474 	    mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt))
7475 		return fib6_entry;
7476 
7477 	return NULL;
7478 }
7479 
7480 static bool mlxsw_sp_fib6_allow_replace(struct mlxsw_sp_fib6_entry *fib6_entry)
7481 {
7482 	struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node;
7483 	struct mlxsw_sp_fib6_entry *fib6_replaced;
7484 	struct fib6_info *rt, *rt_replaced;
7485 
7486 	if (!fib_node->fib_entry)
7487 		return true;
7488 
7489 	fib6_replaced = container_of(fib_node->fib_entry,
7490 				     struct mlxsw_sp_fib6_entry,
7491 				     common);
7492 	rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
7493 	rt_replaced = mlxsw_sp_fib6_entry_rt(fib6_replaced);
7494 	if (rt->fib6_table->tb6_id == RT_TABLE_MAIN &&
7495 	    rt_replaced->fib6_table->tb6_id == RT_TABLE_LOCAL)
7496 		return false;
7497 
7498 	return true;
7499 }
7500 
7501 static int mlxsw_sp_router_fib6_replace(struct mlxsw_sp *mlxsw_sp,
7502 					struct fib6_info **rt_arr,
7503 					unsigned int nrt6)
7504 {
7505 	struct mlxsw_sp_fib6_entry *fib6_entry, *fib6_replaced;
7506 	struct mlxsw_sp_fib_entry *replaced;
7507 	struct mlxsw_sp_fib_node *fib_node;
7508 	struct fib6_info *rt = rt_arr[0];
7509 	int err;
7510 
7511 	if (rt->fib6_src.plen)
7512 		return -EINVAL;
7513 
7514 	if (mlxsw_sp_fib6_rt_should_ignore(rt))
7515 		return 0;
7516 
7517 	if (rt->nh && !mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, rt->nh->id))
7518 		return 0;
7519 
7520 	fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->fib6_table->tb6_id,
7521 					 &rt->fib6_dst.addr,
7522 					 sizeof(rt->fib6_dst.addr),
7523 					 rt->fib6_dst.plen,
7524 					 MLXSW_SP_L3_PROTO_IPV6);
7525 	if (IS_ERR(fib_node))
7526 		return PTR_ERR(fib_node);
7527 
7528 	fib6_entry = mlxsw_sp_fib6_entry_create(mlxsw_sp, fib_node, rt_arr,
7529 						nrt6);
7530 	if (IS_ERR(fib6_entry)) {
7531 		err = PTR_ERR(fib6_entry);
7532 		goto err_fib6_entry_create;
7533 	}
7534 
7535 	if (!mlxsw_sp_fib6_allow_replace(fib6_entry)) {
7536 		mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
7537 		mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
7538 		return 0;
7539 	}
7540 
7541 	replaced = fib_node->fib_entry;
7542 	err = mlxsw_sp_fib_node_entry_link(mlxsw_sp, &fib6_entry->common);
7543 	if (err)
7544 		goto err_fib_node_entry_link;
7545 
7546 	/* Nothing to replace */
7547 	if (!replaced)
7548 		return 0;
7549 
7550 	mlxsw_sp_fib_entry_hw_flags_clear(mlxsw_sp, replaced);
7551 	fib6_replaced = container_of(replaced, struct mlxsw_sp_fib6_entry,
7552 				     common);
7553 	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_replaced);
7554 
7555 	return 0;
7556 
7557 err_fib_node_entry_link:
7558 	fib_node->fib_entry = replaced;
7559 	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
7560 err_fib6_entry_create:
7561 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
7562 	return err;
7563 }
7564 
7565 static int mlxsw_sp_router_fib6_append(struct mlxsw_sp *mlxsw_sp,
7566 				       struct fib6_info **rt_arr,
7567 				       unsigned int nrt6)
7568 {
7569 	struct mlxsw_sp_fib6_entry *fib6_entry;
7570 	struct mlxsw_sp_fib_node *fib_node;
7571 	struct fib6_info *rt = rt_arr[0];
7572 	int err;
7573 
7574 	if (rt->fib6_src.plen)
7575 		return -EINVAL;
7576 
7577 	if (mlxsw_sp_fib6_rt_should_ignore(rt))
7578 		return 0;
7579 
7580 	fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->fib6_table->tb6_id,
7581 					 &rt->fib6_dst.addr,
7582 					 sizeof(rt->fib6_dst.addr),
7583 					 rt->fib6_dst.plen,
7584 					 MLXSW_SP_L3_PROTO_IPV6);
7585 	if (IS_ERR(fib_node))
7586 		return PTR_ERR(fib_node);
7587 
7588 	if (WARN_ON_ONCE(!fib_node->fib_entry)) {
7589 		mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
7590 		return -EINVAL;
7591 	}
7592 
7593 	fib6_entry = container_of(fib_node->fib_entry,
7594 				  struct mlxsw_sp_fib6_entry, common);
7595 	err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry, rt_arr,
7596 					      nrt6);
7597 	if (err)
7598 		goto err_fib6_entry_nexthop_add;
7599 
7600 	return 0;
7601 
7602 err_fib6_entry_nexthop_add:
7603 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
7604 	return err;
7605 }
7606 
7607 static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp,
7608 				     struct fib6_info **rt_arr,
7609 				     unsigned int nrt6)
7610 {
7611 	struct mlxsw_sp_fib6_entry *fib6_entry;
7612 	struct mlxsw_sp_fib_node *fib_node;
7613 	struct fib6_info *rt = rt_arr[0];
7614 
7615 	if (mlxsw_sp_fib6_rt_should_ignore(rt))
7616 		return;
7617 
7618 	/* Multipath routes are first added to the FIB trie and only then
7619 	 * notified. If we vetoed the addition, we will get a delete
7620 	 * notification for a route we do not have. Therefore, do not warn if
7621 	 * route was not found.
7622 	 */
7623 	fib6_entry = mlxsw_sp_fib6_entry_lookup(mlxsw_sp, rt);
7624 	if (!fib6_entry)
7625 		return;
7626 
7627 	/* If not all the nexthops are deleted, then only reduce the nexthop
7628 	 * group.
7629 	 */
7630 	if (nrt6 != fib6_entry->nrt6) {
7631 		mlxsw_sp_fib6_entry_nexthop_del(mlxsw_sp, fib6_entry, rt_arr,
7632 						nrt6);
7633 		return;
7634 	}
7635 
7636 	fib_node = fib6_entry->common.fib_node;
7637 
7638 	mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, &fib6_entry->common);
7639 	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
7640 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
7641 }
7642 
7643 static struct mlxsw_sp_mr_table *
7644 mlxsw_sp_router_fibmr_family_to_table(struct mlxsw_sp_vr *vr, int family)
7645 {
7646 	if (family == RTNL_FAMILY_IPMR)
7647 		return vr->mr_table[MLXSW_SP_L3_PROTO_IPV4];
7648 	else
7649 		return vr->mr_table[MLXSW_SP_L3_PROTO_IPV6];
7650 }
7651 
7652 static int mlxsw_sp_router_fibmr_add(struct mlxsw_sp *mlxsw_sp,
7653 				     struct mfc_entry_notifier_info *men_info,
7654 				     bool replace)
7655 {
7656 	struct mlxsw_sp_mr_table *mrt;
7657 	struct mlxsw_sp_vr *vr;
7658 
7659 	vr = mlxsw_sp_vr_get(mlxsw_sp, men_info->tb_id, NULL);
7660 	if (IS_ERR(vr))
7661 		return PTR_ERR(vr);
7662 
7663 	mrt = mlxsw_sp_router_fibmr_family_to_table(vr, men_info->info.family);
7664 	return mlxsw_sp_mr_route_add(mrt, men_info->mfc, replace);
7665 }
7666 
7667 static void mlxsw_sp_router_fibmr_del(struct mlxsw_sp *mlxsw_sp,
7668 				      struct mfc_entry_notifier_info *men_info)
7669 {
7670 	struct mlxsw_sp_mr_table *mrt;
7671 	struct mlxsw_sp_vr *vr;
7672 
7673 	vr = mlxsw_sp_vr_find(mlxsw_sp, men_info->tb_id);
7674 	if (WARN_ON(!vr))
7675 		return;
7676 
7677 	mrt = mlxsw_sp_router_fibmr_family_to_table(vr, men_info->info.family);
7678 	mlxsw_sp_mr_route_del(mrt, men_info->mfc);
7679 	mlxsw_sp_vr_put(mlxsw_sp, vr);
7680 }
7681 
7682 static int
7683 mlxsw_sp_router_fibmr_vif_add(struct mlxsw_sp *mlxsw_sp,
7684 			      struct vif_entry_notifier_info *ven_info)
7685 {
7686 	struct mlxsw_sp_mr_table *mrt;
7687 	struct mlxsw_sp_rif *rif;
7688 	struct mlxsw_sp_vr *vr;
7689 
7690 	vr = mlxsw_sp_vr_get(mlxsw_sp, ven_info->tb_id, NULL);
7691 	if (IS_ERR(vr))
7692 		return PTR_ERR(vr);
7693 
7694 	mrt = mlxsw_sp_router_fibmr_family_to_table(vr, ven_info->info.family);
7695 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, ven_info->dev);
7696 	return mlxsw_sp_mr_vif_add(mrt, ven_info->dev,
7697 				   ven_info->vif_index,
7698 				   ven_info->vif_flags, rif);
7699 }
7700 
7701 static void
7702 mlxsw_sp_router_fibmr_vif_del(struct mlxsw_sp *mlxsw_sp,
7703 			      struct vif_entry_notifier_info *ven_info)
7704 {
7705 	struct mlxsw_sp_mr_table *mrt;
7706 	struct mlxsw_sp_vr *vr;
7707 
7708 	vr = mlxsw_sp_vr_find(mlxsw_sp, ven_info->tb_id);
7709 	if (WARN_ON(!vr))
7710 		return;
7711 
7712 	mrt = mlxsw_sp_router_fibmr_family_to_table(vr, ven_info->info.family);
7713 	mlxsw_sp_mr_vif_del(mrt, ven_info->vif_index);
7714 	mlxsw_sp_vr_put(mlxsw_sp, vr);
7715 }
7716 
7717 static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp,
7718 				     struct mlxsw_sp_fib_node *fib_node)
7719 {
7720 	struct mlxsw_sp_fib4_entry *fib4_entry;
7721 
7722 	fib4_entry = container_of(fib_node->fib_entry,
7723 				  struct mlxsw_sp_fib4_entry, common);
7724 	mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, fib_node->fib_entry);
7725 	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
7726 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
7727 }
7728 
7729 static void mlxsw_sp_fib6_node_flush(struct mlxsw_sp *mlxsw_sp,
7730 				     struct mlxsw_sp_fib_node *fib_node)
7731 {
7732 	struct mlxsw_sp_fib6_entry *fib6_entry;
7733 
7734 	fib6_entry = container_of(fib_node->fib_entry,
7735 				  struct mlxsw_sp_fib6_entry, common);
7736 	mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, fib_node->fib_entry);
7737 	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
7738 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
7739 }
7740 
7741 static void mlxsw_sp_fib_node_flush(struct mlxsw_sp *mlxsw_sp,
7742 				    struct mlxsw_sp_fib_node *fib_node)
7743 {
7744 	switch (fib_node->fib->proto) {
7745 	case MLXSW_SP_L3_PROTO_IPV4:
7746 		mlxsw_sp_fib4_node_flush(mlxsw_sp, fib_node);
7747 		break;
7748 	case MLXSW_SP_L3_PROTO_IPV6:
7749 		mlxsw_sp_fib6_node_flush(mlxsw_sp, fib_node);
7750 		break;
7751 	}
7752 }
7753 
7754 static void mlxsw_sp_vr_fib_flush(struct mlxsw_sp *mlxsw_sp,
7755 				  struct mlxsw_sp_vr *vr,
7756 				  enum mlxsw_sp_l3proto proto)
7757 {
7758 	struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
7759 	struct mlxsw_sp_fib_node *fib_node, *tmp;
7760 
7761 	list_for_each_entry_safe(fib_node, tmp, &fib->node_list, list) {
7762 		bool do_break = &tmp->list == &fib->node_list;
7763 
7764 		mlxsw_sp_fib_node_flush(mlxsw_sp, fib_node);
7765 		if (do_break)
7766 			break;
7767 	}
7768 }
7769 
7770 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp)
7771 {
7772 	int max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
7773 	int i, j;
7774 
7775 	for (i = 0; i < max_vrs; i++) {
7776 		struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
7777 
7778 		if (!mlxsw_sp_vr_is_used(vr))
7779 			continue;
7780 
7781 		for (j = 0; j < MLXSW_SP_L3_PROTO_MAX; j++)
7782 			mlxsw_sp_mr_table_flush(vr->mr_table[j]);
7783 		mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
7784 
7785 		/* If virtual router was only used for IPv4, then it's no
7786 		 * longer used.
7787 		 */
7788 		if (!mlxsw_sp_vr_is_used(vr))
7789 			continue;
7790 		mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
7791 	}
7792 }
7793 
7794 struct mlxsw_sp_fib6_event_work {
7795 	struct fib6_info **rt_arr;
7796 	unsigned int nrt6;
7797 };
7798 
7799 struct mlxsw_sp_fib_event_work {
7800 	struct work_struct work;
7801 	netdevice_tracker dev_tracker;
7802 	union {
7803 		struct mlxsw_sp_fib6_event_work fib6_work;
7804 		struct fib_entry_notifier_info fen_info;
7805 		struct fib_rule_notifier_info fr_info;
7806 		struct fib_nh_notifier_info fnh_info;
7807 		struct mfc_entry_notifier_info men_info;
7808 		struct vif_entry_notifier_info ven_info;
7809 	};
7810 	struct mlxsw_sp *mlxsw_sp;
7811 	unsigned long event;
7812 };
7813 
7814 static int
7815 mlxsw_sp_router_fib6_work_init(struct mlxsw_sp_fib6_event_work *fib6_work,
7816 			       struct fib6_entry_notifier_info *fen6_info)
7817 {
7818 	struct fib6_info *rt = fen6_info->rt;
7819 	struct fib6_info **rt_arr;
7820 	struct fib6_info *iter;
7821 	unsigned int nrt6;
7822 	int i = 0;
7823 
7824 	nrt6 = fen6_info->nsiblings + 1;
7825 
7826 	rt_arr = kcalloc(nrt6, sizeof(struct fib6_info *), GFP_ATOMIC);
7827 	if (!rt_arr)
7828 		return -ENOMEM;
7829 
7830 	fib6_work->rt_arr = rt_arr;
7831 	fib6_work->nrt6 = nrt6;
7832 
7833 	rt_arr[0] = rt;
7834 	fib6_info_hold(rt);
7835 
7836 	if (!fen6_info->nsiblings)
7837 		return 0;
7838 
7839 	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) {
7840 		if (i == fen6_info->nsiblings)
7841 			break;
7842 
7843 		rt_arr[i + 1] = iter;
7844 		fib6_info_hold(iter);
7845 		i++;
7846 	}
7847 	WARN_ON_ONCE(i != fen6_info->nsiblings);
7848 
7849 	return 0;
7850 }
7851 
7852 static void
7853 mlxsw_sp_router_fib6_work_fini(struct mlxsw_sp_fib6_event_work *fib6_work)
7854 {
7855 	int i;
7856 
7857 	for (i = 0; i < fib6_work->nrt6; i++)
7858 		mlxsw_sp_rt6_release(fib6_work->rt_arr[i]);
7859 	kfree(fib6_work->rt_arr);
7860 }
7861 
7862 static void mlxsw_sp_router_fib4_event_work(struct work_struct *work)
7863 {
7864 	struct mlxsw_sp_fib_event_work *fib_work =
7865 		container_of(work, struct mlxsw_sp_fib_event_work, work);
7866 	struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
7867 	int err;
7868 
7869 	mutex_lock(&mlxsw_sp->router->lock);
7870 	mlxsw_sp_span_respin(mlxsw_sp);
7871 
7872 	switch (fib_work->event) {
7873 	case FIB_EVENT_ENTRY_REPLACE:
7874 		err = mlxsw_sp_router_fib4_replace(mlxsw_sp,
7875 						   &fib_work->fen_info);
7876 		if (err) {
7877 			dev_warn(mlxsw_sp->bus_info->dev, "FIB replace failed.\n");
7878 			mlxsw_sp_fib4_offload_failed_flag_set(mlxsw_sp,
7879 							      &fib_work->fen_info);
7880 		}
7881 		fib_info_put(fib_work->fen_info.fi);
7882 		break;
7883 	case FIB_EVENT_ENTRY_DEL:
7884 		mlxsw_sp_router_fib4_del(mlxsw_sp, &fib_work->fen_info);
7885 		fib_info_put(fib_work->fen_info.fi);
7886 		break;
7887 	case FIB_EVENT_NH_ADD:
7888 	case FIB_EVENT_NH_DEL:
7889 		mlxsw_sp_nexthop4_event(mlxsw_sp, fib_work->event,
7890 					fib_work->fnh_info.fib_nh);
7891 		fib_info_put(fib_work->fnh_info.fib_nh->nh_parent);
7892 		break;
7893 	}
7894 	mutex_unlock(&mlxsw_sp->router->lock);
7895 	kfree(fib_work);
7896 }
7897 
7898 static void mlxsw_sp_router_fib6_event_work(struct work_struct *work)
7899 {
7900 	struct mlxsw_sp_fib_event_work *fib_work =
7901 		    container_of(work, struct mlxsw_sp_fib_event_work, work);
7902 	struct mlxsw_sp_fib6_event_work *fib6_work = &fib_work->fib6_work;
7903 	struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
7904 	int err;
7905 
7906 	mutex_lock(&mlxsw_sp->router->lock);
7907 	mlxsw_sp_span_respin(mlxsw_sp);
7908 
7909 	switch (fib_work->event) {
7910 	case FIB_EVENT_ENTRY_REPLACE:
7911 		err = mlxsw_sp_router_fib6_replace(mlxsw_sp,
7912 						   fib6_work->rt_arr,
7913 						   fib6_work->nrt6);
7914 		if (err) {
7915 			dev_warn(mlxsw_sp->bus_info->dev, "FIB replace failed.\n");
7916 			mlxsw_sp_fib6_offload_failed_flag_set(mlxsw_sp,
7917 							      fib6_work->rt_arr,
7918 							      fib6_work->nrt6);
7919 		}
7920 		mlxsw_sp_router_fib6_work_fini(fib6_work);
7921 		break;
7922 	case FIB_EVENT_ENTRY_APPEND:
7923 		err = mlxsw_sp_router_fib6_append(mlxsw_sp,
7924 						  fib6_work->rt_arr,
7925 						  fib6_work->nrt6);
7926 		if (err) {
7927 			dev_warn(mlxsw_sp->bus_info->dev, "FIB append failed.\n");
7928 			mlxsw_sp_fib6_offload_failed_flag_set(mlxsw_sp,
7929 							      fib6_work->rt_arr,
7930 							      fib6_work->nrt6);
7931 		}
7932 		mlxsw_sp_router_fib6_work_fini(fib6_work);
7933 		break;
7934 	case FIB_EVENT_ENTRY_DEL:
7935 		mlxsw_sp_router_fib6_del(mlxsw_sp,
7936 					 fib6_work->rt_arr,
7937 					 fib6_work->nrt6);
7938 		mlxsw_sp_router_fib6_work_fini(fib6_work);
7939 		break;
7940 	}
7941 	mutex_unlock(&mlxsw_sp->router->lock);
7942 	kfree(fib_work);
7943 }
7944 
7945 static void mlxsw_sp_router_fibmr_event_work(struct work_struct *work)
7946 {
7947 	struct mlxsw_sp_fib_event_work *fib_work =
7948 		container_of(work, struct mlxsw_sp_fib_event_work, work);
7949 	struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
7950 	bool replace;
7951 	int err;
7952 
7953 	rtnl_lock();
7954 	mutex_lock(&mlxsw_sp->router->lock);
7955 	switch (fib_work->event) {
7956 	case FIB_EVENT_ENTRY_REPLACE:
7957 	case FIB_EVENT_ENTRY_ADD:
7958 		replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
7959 
7960 		err = mlxsw_sp_router_fibmr_add(mlxsw_sp, &fib_work->men_info,
7961 						replace);
7962 		if (err)
7963 			dev_warn(mlxsw_sp->bus_info->dev, "MR entry add failed.\n");
7964 		mr_cache_put(fib_work->men_info.mfc);
7965 		break;
7966 	case FIB_EVENT_ENTRY_DEL:
7967 		mlxsw_sp_router_fibmr_del(mlxsw_sp, &fib_work->men_info);
7968 		mr_cache_put(fib_work->men_info.mfc);
7969 		break;
7970 	case FIB_EVENT_VIF_ADD:
7971 		err = mlxsw_sp_router_fibmr_vif_add(mlxsw_sp,
7972 						    &fib_work->ven_info);
7973 		if (err)
7974 			dev_warn(mlxsw_sp->bus_info->dev, "MR VIF add failed.\n");
7975 		netdev_put(fib_work->ven_info.dev, &fib_work->dev_tracker);
7976 		break;
7977 	case FIB_EVENT_VIF_DEL:
7978 		mlxsw_sp_router_fibmr_vif_del(mlxsw_sp,
7979 					      &fib_work->ven_info);
7980 		netdev_put(fib_work->ven_info.dev, &fib_work->dev_tracker);
7981 		break;
7982 	}
7983 	mutex_unlock(&mlxsw_sp->router->lock);
7984 	rtnl_unlock();
7985 	kfree(fib_work);
7986 }
7987 
7988 static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work,
7989 				       struct fib_notifier_info *info)
7990 {
7991 	struct fib_entry_notifier_info *fen_info;
7992 	struct fib_nh_notifier_info *fnh_info;
7993 
7994 	switch (fib_work->event) {
7995 	case FIB_EVENT_ENTRY_REPLACE:
7996 	case FIB_EVENT_ENTRY_DEL:
7997 		fen_info = container_of(info, struct fib_entry_notifier_info,
7998 					info);
7999 		fib_work->fen_info = *fen_info;
8000 		/* Take reference on fib_info to prevent it from being
8001 		 * freed while work is queued. Release it afterwards.
8002 		 */
8003 		fib_info_hold(fib_work->fen_info.fi);
8004 		break;
8005 	case FIB_EVENT_NH_ADD:
8006 	case FIB_EVENT_NH_DEL:
8007 		fnh_info = container_of(info, struct fib_nh_notifier_info,
8008 					info);
8009 		fib_work->fnh_info = *fnh_info;
8010 		fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
8011 		break;
8012 	}
8013 }
8014 
8015 static int mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work,
8016 				      struct fib_notifier_info *info)
8017 {
8018 	struct fib6_entry_notifier_info *fen6_info;
8019 	int err;
8020 
8021 	switch (fib_work->event) {
8022 	case FIB_EVENT_ENTRY_REPLACE:
8023 	case FIB_EVENT_ENTRY_APPEND:
8024 	case FIB_EVENT_ENTRY_DEL:
8025 		fen6_info = container_of(info, struct fib6_entry_notifier_info,
8026 					 info);
8027 		err = mlxsw_sp_router_fib6_work_init(&fib_work->fib6_work,
8028 						     fen6_info);
8029 		if (err)
8030 			return err;
8031 		break;
8032 	}
8033 
8034 	return 0;
8035 }
8036 
8037 static void
8038 mlxsw_sp_router_fibmr_event(struct mlxsw_sp_fib_event_work *fib_work,
8039 			    struct fib_notifier_info *info)
8040 {
8041 	switch (fib_work->event) {
8042 	case FIB_EVENT_ENTRY_REPLACE:
8043 	case FIB_EVENT_ENTRY_ADD:
8044 	case FIB_EVENT_ENTRY_DEL:
8045 		memcpy(&fib_work->men_info, info, sizeof(fib_work->men_info));
8046 		mr_cache_hold(fib_work->men_info.mfc);
8047 		break;
8048 	case FIB_EVENT_VIF_ADD:
8049 	case FIB_EVENT_VIF_DEL:
8050 		memcpy(&fib_work->ven_info, info, sizeof(fib_work->ven_info));
8051 		netdev_hold(fib_work->ven_info.dev, &fib_work->dev_tracker,
8052 			    GFP_ATOMIC);
8053 		break;
8054 	}
8055 }
8056 
8057 static int mlxsw_sp_router_fib_rule_event(unsigned long event,
8058 					  struct fib_notifier_info *info,
8059 					  struct mlxsw_sp *mlxsw_sp)
8060 {
8061 	struct netlink_ext_ack *extack = info->extack;
8062 	struct fib_rule_notifier_info *fr_info;
8063 	struct fib_rule *rule;
8064 	int err = 0;
8065 
8066 	/* nothing to do at the moment */
8067 	if (event == FIB_EVENT_RULE_DEL)
8068 		return 0;
8069 
8070 	fr_info = container_of(info, struct fib_rule_notifier_info, info);
8071 	rule = fr_info->rule;
8072 
8073 	/* Rule only affects locally generated traffic */
8074 	if (rule->iifindex == mlxsw_sp_net(mlxsw_sp)->loopback_dev->ifindex)
8075 		return 0;
8076 
8077 	switch (info->family) {
8078 	case AF_INET:
8079 		if (!fib4_rule_default(rule) && !rule->l3mdev)
8080 			err = -EOPNOTSUPP;
8081 		break;
8082 	case AF_INET6:
8083 		if (!fib6_rule_default(rule) && !rule->l3mdev)
8084 			err = -EOPNOTSUPP;
8085 		break;
8086 	case RTNL_FAMILY_IPMR:
8087 		if (!ipmr_rule_default(rule) && !rule->l3mdev)
8088 			err = -EOPNOTSUPP;
8089 		break;
8090 	case RTNL_FAMILY_IP6MR:
8091 		if (!ip6mr_rule_default(rule) && !rule->l3mdev)
8092 			err = -EOPNOTSUPP;
8093 		break;
8094 	}
8095 
8096 	if (err < 0)
8097 		NL_SET_ERR_MSG_MOD(extack, "FIB rules not supported");
8098 
8099 	return err;
8100 }
8101 
8102 /* Called with rcu_read_lock() */
8103 static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
8104 				     unsigned long event, void *ptr)
8105 {
8106 	struct mlxsw_sp_fib_event_work *fib_work;
8107 	struct fib_notifier_info *info = ptr;
8108 	struct mlxsw_sp_router *router;
8109 	int err;
8110 
8111 	if ((info->family != AF_INET && info->family != AF_INET6 &&
8112 	     info->family != RTNL_FAMILY_IPMR &&
8113 	     info->family != RTNL_FAMILY_IP6MR))
8114 		return NOTIFY_DONE;
8115 
8116 	router = container_of(nb, struct mlxsw_sp_router, fib_nb);
8117 
8118 	switch (event) {
8119 	case FIB_EVENT_RULE_ADD:
8120 	case FIB_EVENT_RULE_DEL:
8121 		err = mlxsw_sp_router_fib_rule_event(event, info,
8122 						     router->mlxsw_sp);
8123 		return notifier_from_errno(err);
8124 	case FIB_EVENT_ENTRY_ADD:
8125 	case FIB_EVENT_ENTRY_REPLACE:
8126 	case FIB_EVENT_ENTRY_APPEND:
8127 		if (info->family == AF_INET) {
8128 			struct fib_entry_notifier_info *fen_info = ptr;
8129 
8130 			if (fen_info->fi->fib_nh_is_v6) {
8131 				NL_SET_ERR_MSG_MOD(info->extack, "IPv6 gateway with IPv4 route is not supported");
8132 				return notifier_from_errno(-EINVAL);
8133 			}
8134 		}
8135 		break;
8136 	}
8137 
8138 	fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
8139 	if (!fib_work)
8140 		return NOTIFY_BAD;
8141 
8142 	fib_work->mlxsw_sp = router->mlxsw_sp;
8143 	fib_work->event = event;
8144 
8145 	switch (info->family) {
8146 	case AF_INET:
8147 		INIT_WORK(&fib_work->work, mlxsw_sp_router_fib4_event_work);
8148 		mlxsw_sp_router_fib4_event(fib_work, info);
8149 		break;
8150 	case AF_INET6:
8151 		INIT_WORK(&fib_work->work, mlxsw_sp_router_fib6_event_work);
8152 		err = mlxsw_sp_router_fib6_event(fib_work, info);
8153 		if (err)
8154 			goto err_fib_event;
8155 		break;
8156 	case RTNL_FAMILY_IP6MR:
8157 	case RTNL_FAMILY_IPMR:
8158 		INIT_WORK(&fib_work->work, mlxsw_sp_router_fibmr_event_work);
8159 		mlxsw_sp_router_fibmr_event(fib_work, info);
8160 		break;
8161 	}
8162 
8163 	mlxsw_core_schedule_work(&fib_work->work);
8164 
8165 	return NOTIFY_DONE;
8166 
8167 err_fib_event:
8168 	kfree(fib_work);
8169 	return NOTIFY_BAD;
8170 }
8171 
8172 static struct mlxsw_sp_rif *
8173 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
8174 			 const struct net_device *dev)
8175 {
8176 	int max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
8177 	int i;
8178 
8179 	for (i = 0; i < max_rifs; i++)
8180 		if (mlxsw_sp->router->rifs[i] &&
8181 		    mlxsw_sp_rif_dev_is(mlxsw_sp->router->rifs[i], dev))
8182 			return mlxsw_sp->router->rifs[i];
8183 
8184 	return NULL;
8185 }
8186 
8187 bool mlxsw_sp_rif_exists(struct mlxsw_sp *mlxsw_sp,
8188 			 const struct net_device *dev)
8189 {
8190 	struct mlxsw_sp_rif *rif;
8191 
8192 	mutex_lock(&mlxsw_sp->router->lock);
8193 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
8194 	mutex_unlock(&mlxsw_sp->router->lock);
8195 
8196 	return rif;
8197 }
8198 
8199 u16 mlxsw_sp_rif_vid(struct mlxsw_sp *mlxsw_sp, const struct net_device *dev)
8200 {
8201 	struct mlxsw_sp_rif *rif;
8202 	u16 vid = 0;
8203 
8204 	mutex_lock(&mlxsw_sp->router->lock);
8205 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
8206 	if (!rif)
8207 		goto out;
8208 
8209 	/* We only return the VID for VLAN RIFs. Otherwise we return an
8210 	 * invalid value (0).
8211 	 */
8212 	if (rif->ops->type != MLXSW_SP_RIF_TYPE_VLAN)
8213 		goto out;
8214 
8215 	vid = mlxsw_sp_fid_8021q_vid(rif->fid);
8216 
8217 out:
8218 	mutex_unlock(&mlxsw_sp->router->lock);
8219 	return vid;
8220 }
8221 
8222 static int mlxsw_sp_router_rif_disable(struct mlxsw_sp *mlxsw_sp, u16 rif)
8223 {
8224 	char ritr_pl[MLXSW_REG_RITR_LEN];
8225 	int err;
8226 
8227 	mlxsw_reg_ritr_rif_pack(ritr_pl, rif);
8228 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
8229 	if (err)
8230 		return err;
8231 
8232 	mlxsw_reg_ritr_enable_set(ritr_pl, false);
8233 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
8234 }
8235 
8236 static int mlxsw_sp_router_rif_made_sync(struct mlxsw_sp *mlxsw_sp,
8237 					 struct mlxsw_sp_rif *rif)
8238 {
8239 	int err;
8240 
8241 	err = mlxsw_sp_neigh_rif_made_sync(mlxsw_sp, rif);
8242 	if (err)
8243 		return err;
8244 
8245 	err = mlxsw_sp_nexthop_rif_made_sync(mlxsw_sp, rif);
8246 	if (err)
8247 		goto err_nexthop;
8248 
8249 	return 0;
8250 
8251 err_nexthop:
8252 	mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, rif);
8253 	return err;
8254 }
8255 
8256 static void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
8257 					  struct mlxsw_sp_rif *rif)
8258 {
8259 	/* Signal to nexthop cleanup that the RIF is going away. */
8260 	rif->crif->rif = NULL;
8261 
8262 	mlxsw_sp_router_rif_disable(mlxsw_sp, rif->rif_index);
8263 	mlxsw_sp_nexthop_rif_gone_sync(mlxsw_sp, rif);
8264 	mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, rif);
8265 }
8266 
8267 static bool __mlxsw_sp_dev_addr_list_empty(const struct net_device *dev)
8268 {
8269 	struct inet6_dev *inet6_dev;
8270 	struct in_device *idev;
8271 
8272 	idev = __in_dev_get_rcu(dev);
8273 	if (idev && idev->ifa_list)
8274 		return false;
8275 
8276 	inet6_dev = __in6_dev_get(dev);
8277 	if (inet6_dev && !list_empty(&inet6_dev->addr_list))
8278 		return false;
8279 
8280 	return true;
8281 }
8282 
8283 static bool mlxsw_sp_dev_addr_list_empty(const struct net_device *dev)
8284 {
8285 	bool addr_list_empty;
8286 
8287 	rcu_read_lock();
8288 	addr_list_empty = __mlxsw_sp_dev_addr_list_empty(dev);
8289 	rcu_read_unlock();
8290 
8291 	return addr_list_empty;
8292 }
8293 
8294 static bool
8295 mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *rif, struct net_device *dev,
8296 			   unsigned long event)
8297 {
8298 	bool addr_list_empty;
8299 
8300 	switch (event) {
8301 	case NETDEV_UP:
8302 		return rif == NULL;
8303 	case NETDEV_DOWN:
8304 		addr_list_empty = mlxsw_sp_dev_addr_list_empty(dev);
8305 
8306 		/* macvlans do not have a RIF, but rather piggy back on the
8307 		 * RIF of their lower device.
8308 		 */
8309 		if (netif_is_macvlan(dev) && addr_list_empty)
8310 			return true;
8311 
8312 		if (rif && addr_list_empty &&
8313 		    !netif_is_l3_slave(mlxsw_sp_rif_dev(rif)))
8314 			return true;
8315 		/* It is possible we already removed the RIF ourselves
8316 		 * if it was assigned to a netdev that is now a bridge
8317 		 * or LAG slave.
8318 		 */
8319 		return false;
8320 	}
8321 
8322 	return false;
8323 }
8324 
8325 static enum mlxsw_sp_rif_type
8326 mlxsw_sp_dev_rif_type(const struct mlxsw_sp *mlxsw_sp,
8327 		      const struct net_device *dev)
8328 {
8329 	enum mlxsw_sp_fid_type type;
8330 
8331 	if (mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL))
8332 		return MLXSW_SP_RIF_TYPE_IPIP_LB;
8333 
8334 	/* Otherwise RIF type is derived from the type of the underlying FID. */
8335 	if (is_vlan_dev(dev) && netif_is_bridge_master(vlan_dev_real_dev(dev)))
8336 		type = MLXSW_SP_FID_TYPE_8021Q;
8337 	else if (netif_is_bridge_master(dev) && br_vlan_enabled(dev))
8338 		type = MLXSW_SP_FID_TYPE_8021Q;
8339 	else if (netif_is_bridge_master(dev))
8340 		type = MLXSW_SP_FID_TYPE_8021D;
8341 	else
8342 		type = MLXSW_SP_FID_TYPE_RFID;
8343 
8344 	return mlxsw_sp_fid_type_rif_type(mlxsw_sp, type);
8345 }
8346 
8347 static int mlxsw_sp_rif_index_alloc(struct mlxsw_sp *mlxsw_sp, u16 *p_rif_index,
8348 				    u8 rif_entries)
8349 {
8350 	*p_rif_index = gen_pool_alloc(mlxsw_sp->router->rifs_table,
8351 				      rif_entries);
8352 	if (*p_rif_index == 0)
8353 		return -ENOBUFS;
8354 	*p_rif_index -= MLXSW_SP_ROUTER_GENALLOC_OFFSET;
8355 
8356 	/* RIF indexes must be aligned to the allocation size. */
8357 	WARN_ON_ONCE(*p_rif_index % rif_entries);
8358 
8359 	return 0;
8360 }
8361 
8362 static void mlxsw_sp_rif_index_free(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
8363 				    u8 rif_entries)
8364 {
8365 	gen_pool_free(mlxsw_sp->router->rifs_table,
8366 		      MLXSW_SP_ROUTER_GENALLOC_OFFSET + rif_index, rif_entries);
8367 }
8368 
8369 static struct mlxsw_sp_rif *mlxsw_sp_rif_alloc(size_t rif_size, u16 rif_index,
8370 					       u16 vr_id,
8371 					       struct mlxsw_sp_crif *crif)
8372 {
8373 	struct net_device *l3_dev = crif ? crif->key.dev : NULL;
8374 	struct mlxsw_sp_rif *rif;
8375 
8376 	rif = kzalloc(rif_size, GFP_KERNEL);
8377 	if (!rif)
8378 		return NULL;
8379 
8380 	INIT_LIST_HEAD(&rif->neigh_list);
8381 	if (l3_dev) {
8382 		ether_addr_copy(rif->addr, l3_dev->dev_addr);
8383 		rif->mtu = l3_dev->mtu;
8384 	}
8385 	rif->vr_id = vr_id;
8386 	rif->rif_index = rif_index;
8387 	if (crif) {
8388 		rif->crif = crif;
8389 		crif->rif = rif;
8390 	}
8391 
8392 	return rif;
8393 }
8394 
8395 static void mlxsw_sp_rif_free(struct mlxsw_sp_rif *rif)
8396 {
8397 	WARN_ON(!list_empty(&rif->neigh_list));
8398 
8399 	if (rif->crif)
8400 		rif->crif->rif = NULL;
8401 	kfree(rif);
8402 }
8403 
8404 struct mlxsw_sp_rif *mlxsw_sp_rif_by_index(const struct mlxsw_sp *mlxsw_sp,
8405 					   u16 rif_index)
8406 {
8407 	return mlxsw_sp->router->rifs[rif_index];
8408 }
8409 
8410 u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif)
8411 {
8412 	return rif->rif_index;
8413 }
8414 
8415 u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
8416 {
8417 	return lb_rif->common.rif_index;
8418 }
8419 
8420 u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
8421 {
8422 	struct net_device *dev = mlxsw_sp_rif_dev(&lb_rif->common);
8423 	u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(dev);
8424 	struct mlxsw_sp_vr *ul_vr;
8425 
8426 	ul_vr = mlxsw_sp_vr_get(lb_rif->common.mlxsw_sp, ul_tb_id, NULL);
8427 	if (WARN_ON(IS_ERR(ul_vr)))
8428 		return 0;
8429 
8430 	return ul_vr->id;
8431 }
8432 
8433 u16 mlxsw_sp_ipip_lb_ul_rif_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
8434 {
8435 	return lb_rif->ul_rif_id;
8436 }
8437 
8438 static bool
8439 mlxsw_sp_router_port_l3_stats_enabled(struct mlxsw_sp_rif *rif)
8440 {
8441 	return mlxsw_sp_rif_counter_valid_get(rif,
8442 					      MLXSW_SP_RIF_COUNTER_EGRESS) &&
8443 	       mlxsw_sp_rif_counter_valid_get(rif,
8444 					      MLXSW_SP_RIF_COUNTER_INGRESS);
8445 }
8446 
8447 static int
8448 mlxsw_sp_router_port_l3_stats_enable(struct mlxsw_sp_rif *rif)
8449 {
8450 	int err;
8451 
8452 	err = mlxsw_sp_rif_counter_alloc(rif, MLXSW_SP_RIF_COUNTER_INGRESS);
8453 	if (err)
8454 		return err;
8455 
8456 	/* Clear stale data. */
8457 	err = mlxsw_sp_rif_counter_fetch_clear(rif,
8458 					       MLXSW_SP_RIF_COUNTER_INGRESS,
8459 					       NULL);
8460 	if (err)
8461 		goto err_clear_ingress;
8462 
8463 	err = mlxsw_sp_rif_counter_alloc(rif, MLXSW_SP_RIF_COUNTER_EGRESS);
8464 	if (err)
8465 		goto err_alloc_egress;
8466 
8467 	/* Clear stale data. */
8468 	err = mlxsw_sp_rif_counter_fetch_clear(rif,
8469 					       MLXSW_SP_RIF_COUNTER_EGRESS,
8470 					       NULL);
8471 	if (err)
8472 		goto err_clear_egress;
8473 
8474 	return 0;
8475 
8476 err_clear_egress:
8477 	mlxsw_sp_rif_counter_free(rif, MLXSW_SP_RIF_COUNTER_EGRESS);
8478 err_alloc_egress:
8479 err_clear_ingress:
8480 	mlxsw_sp_rif_counter_free(rif, MLXSW_SP_RIF_COUNTER_INGRESS);
8481 	return err;
8482 }
8483 
8484 static void
8485 mlxsw_sp_router_port_l3_stats_disable(struct mlxsw_sp_rif *rif)
8486 {
8487 	mlxsw_sp_rif_counter_free(rif, MLXSW_SP_RIF_COUNTER_EGRESS);
8488 	mlxsw_sp_rif_counter_free(rif, MLXSW_SP_RIF_COUNTER_INGRESS);
8489 }
8490 
8491 static void
8492 mlxsw_sp_router_port_l3_stats_report_used(struct mlxsw_sp_rif *rif,
8493 					  struct netdev_notifier_offload_xstats_info *info)
8494 {
8495 	if (!mlxsw_sp_router_port_l3_stats_enabled(rif))
8496 		return;
8497 	netdev_offload_xstats_report_used(info->report_used);
8498 }
8499 
8500 static int
8501 mlxsw_sp_router_port_l3_stats_fetch(struct mlxsw_sp_rif *rif,
8502 				    struct rtnl_hw_stats64 *p_stats)
8503 {
8504 	struct mlxsw_sp_rif_counter_set_basic ingress;
8505 	struct mlxsw_sp_rif_counter_set_basic egress;
8506 	int err;
8507 
8508 	err = mlxsw_sp_rif_counter_fetch_clear(rif,
8509 					       MLXSW_SP_RIF_COUNTER_INGRESS,
8510 					       &ingress);
8511 	if (err)
8512 		return err;
8513 
8514 	err = mlxsw_sp_rif_counter_fetch_clear(rif,
8515 					       MLXSW_SP_RIF_COUNTER_EGRESS,
8516 					       &egress);
8517 	if (err)
8518 		return err;
8519 
8520 #define MLXSW_SP_ROUTER_ALL_GOOD(SET, SFX)		\
8521 		((SET.good_unicast_ ## SFX) +		\
8522 		 (SET.good_multicast_ ## SFX) +		\
8523 		 (SET.good_broadcast_ ## SFX))
8524 
8525 	p_stats->rx_packets = MLXSW_SP_ROUTER_ALL_GOOD(ingress, packets);
8526 	p_stats->tx_packets = MLXSW_SP_ROUTER_ALL_GOOD(egress, packets);
8527 	p_stats->rx_bytes = MLXSW_SP_ROUTER_ALL_GOOD(ingress, bytes);
8528 	p_stats->tx_bytes = MLXSW_SP_ROUTER_ALL_GOOD(egress, bytes);
8529 	p_stats->rx_errors = ingress.error_packets;
8530 	p_stats->tx_errors = egress.error_packets;
8531 	p_stats->rx_dropped = ingress.discard_packets;
8532 	p_stats->tx_dropped = egress.discard_packets;
8533 	p_stats->multicast = ingress.good_multicast_packets +
8534 			     ingress.good_broadcast_packets;
8535 
8536 #undef MLXSW_SP_ROUTER_ALL_GOOD
8537 
8538 	return 0;
8539 }
8540 
8541 static int
8542 mlxsw_sp_router_port_l3_stats_report_delta(struct mlxsw_sp_rif *rif,
8543 					   struct netdev_notifier_offload_xstats_info *info)
8544 {
8545 	struct rtnl_hw_stats64 stats = {};
8546 	int err;
8547 
8548 	if (!mlxsw_sp_router_port_l3_stats_enabled(rif))
8549 		return 0;
8550 
8551 	err = mlxsw_sp_router_port_l3_stats_fetch(rif, &stats);
8552 	if (err)
8553 		return err;
8554 
8555 	netdev_offload_xstats_report_delta(info->report_delta, &stats);
8556 	return 0;
8557 }
8558 
8559 struct mlxsw_sp_router_hwstats_notify_work {
8560 	struct work_struct work;
8561 	struct net_device *dev;
8562 	netdevice_tracker dev_tracker;
8563 };
8564 
8565 static void mlxsw_sp_router_hwstats_notify_work(struct work_struct *work)
8566 {
8567 	struct mlxsw_sp_router_hwstats_notify_work *hws_work =
8568 		container_of(work, struct mlxsw_sp_router_hwstats_notify_work,
8569 			     work);
8570 
8571 	rtnl_lock();
8572 	rtnl_offload_xstats_notify(hws_work->dev);
8573 	rtnl_unlock();
8574 	netdev_put(hws_work->dev, &hws_work->dev_tracker);
8575 	kfree(hws_work);
8576 }
8577 
8578 static void
8579 mlxsw_sp_router_hwstats_notify_schedule(struct net_device *dev)
8580 {
8581 	struct mlxsw_sp_router_hwstats_notify_work *hws_work;
8582 
8583 	/* To collect notification payload, the core ends up sending another
8584 	 * notifier block message, which would deadlock on the attempt to
8585 	 * acquire the router lock again. Just postpone the notification until
8586 	 * later.
8587 	 */
8588 
8589 	hws_work = kzalloc(sizeof(*hws_work), GFP_KERNEL);
8590 	if (!hws_work)
8591 		return;
8592 
8593 	INIT_WORK(&hws_work->work, mlxsw_sp_router_hwstats_notify_work);
8594 	netdev_hold(dev, &hws_work->dev_tracker, GFP_KERNEL);
8595 	hws_work->dev = dev;
8596 	mlxsw_core_schedule_work(&hws_work->work);
8597 }
8598 
8599 int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif)
8600 {
8601 	return mlxsw_sp_rif_dev(rif)->ifindex;
8602 }
8603 
8604 bool mlxsw_sp_rif_has_dev(const struct mlxsw_sp_rif *rif)
8605 {
8606 	return !!mlxsw_sp_rif_dev(rif);
8607 }
8608 
8609 bool mlxsw_sp_rif_dev_is(const struct mlxsw_sp_rif *rif,
8610 			 const struct net_device *dev)
8611 {
8612 	return mlxsw_sp_rif_dev(rif) == dev;
8613 }
8614 
8615 static void mlxsw_sp_rif_push_l3_stats(struct mlxsw_sp_rif *rif)
8616 {
8617 	struct rtnl_hw_stats64 stats = {};
8618 
8619 	if (!mlxsw_sp_router_port_l3_stats_fetch(rif, &stats))
8620 		netdev_offload_xstats_push_delta(mlxsw_sp_rif_dev(rif),
8621 						 NETDEV_OFFLOAD_XSTATS_TYPE_L3,
8622 						 &stats);
8623 }
8624 
8625 static struct mlxsw_sp_rif *
8626 mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
8627 		    const struct mlxsw_sp_rif_params *params,
8628 		    struct netlink_ext_ack *extack)
8629 {
8630 	u8 rif_entries = params->double_entry ? 2 : 1;
8631 	u32 tb_id = l3mdev_fib_table(params->dev);
8632 	const struct mlxsw_sp_rif_ops *ops;
8633 	struct mlxsw_sp_fid *fid = NULL;
8634 	enum mlxsw_sp_rif_type type;
8635 	struct mlxsw_sp_crif *crif;
8636 	struct mlxsw_sp_rif *rif;
8637 	struct mlxsw_sp_vr *vr;
8638 	u16 rif_index;
8639 	int i, err;
8640 
8641 	type = mlxsw_sp_dev_rif_type(mlxsw_sp, params->dev);
8642 	ops = mlxsw_sp->router->rif_ops_arr[type];
8643 
8644 	vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN, extack);
8645 	if (IS_ERR(vr))
8646 		return ERR_CAST(vr);
8647 	vr->rif_count++;
8648 
8649 	err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index, rif_entries);
8650 	if (err) {
8651 		NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported router interfaces");
8652 		goto err_rif_index_alloc;
8653 	}
8654 
8655 	crif = mlxsw_sp_crif_lookup(mlxsw_sp->router, params->dev);
8656 	if (WARN_ON(!crif)) {
8657 		err = -ENOENT;
8658 		goto err_crif_lookup;
8659 	}
8660 
8661 	rif = mlxsw_sp_rif_alloc(ops->rif_size, rif_index, vr->id, crif);
8662 	if (!rif) {
8663 		err = -ENOMEM;
8664 		goto err_rif_alloc;
8665 	}
8666 	netdev_hold(params->dev, &rif->dev_tracker, GFP_KERNEL);
8667 	mlxsw_sp->router->rifs[rif_index] = rif;
8668 	rif->mlxsw_sp = mlxsw_sp;
8669 	rif->ops = ops;
8670 	rif->rif_entries = rif_entries;
8671 
8672 	if (ops->setup)
8673 		ops->setup(rif, params);
8674 
8675 	if (ops->fid_get) {
8676 		fid = ops->fid_get(rif, params, extack);
8677 		if (IS_ERR(fid)) {
8678 			err = PTR_ERR(fid);
8679 			goto err_fid_get;
8680 		}
8681 		rif->fid = fid;
8682 	}
8683 
8684 	err = ops->configure(rif, extack);
8685 	if (err)
8686 		goto err_configure;
8687 
8688 	for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++) {
8689 		err = mlxsw_sp_mr_rif_add(vr->mr_table[i], rif);
8690 		if (err)
8691 			goto err_mr_rif_add;
8692 	}
8693 
8694 	err = mlxsw_sp_router_rif_made_sync(mlxsw_sp, rif);
8695 	if (err)
8696 		goto err_rif_made_sync;
8697 
8698 	if (netdev_offload_xstats_enabled(params->dev,
8699 					  NETDEV_OFFLOAD_XSTATS_TYPE_L3)) {
8700 		err = mlxsw_sp_router_port_l3_stats_enable(rif);
8701 		if (err)
8702 			goto err_stats_enable;
8703 		mlxsw_sp_router_hwstats_notify_schedule(params->dev);
8704 	} else {
8705 		mlxsw_sp_rif_counters_alloc(rif);
8706 	}
8707 
8708 	atomic_add(rif_entries, &mlxsw_sp->router->rifs_count);
8709 	return rif;
8710 
8711 err_stats_enable:
8712 	mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif);
8713 err_rif_made_sync:
8714 err_mr_rif_add:
8715 	for (i--; i >= 0; i--)
8716 		mlxsw_sp_mr_rif_del(vr->mr_table[i], rif);
8717 	ops->deconfigure(rif);
8718 err_configure:
8719 	if (fid)
8720 		mlxsw_sp_fid_put(fid);
8721 err_fid_get:
8722 	mlxsw_sp->router->rifs[rif_index] = NULL;
8723 	netdev_put(params->dev, &rif->dev_tracker);
8724 	mlxsw_sp_rif_free(rif);
8725 err_rif_alloc:
8726 err_crif_lookup:
8727 	mlxsw_sp_rif_index_free(mlxsw_sp, rif_index, rif_entries);
8728 err_rif_index_alloc:
8729 	vr->rif_count--;
8730 	mlxsw_sp_vr_put(mlxsw_sp, vr);
8731 	return ERR_PTR(err);
8732 }
8733 
8734 static void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif)
8735 {
8736 	struct net_device *dev = mlxsw_sp_rif_dev(rif);
8737 	const struct mlxsw_sp_rif_ops *ops = rif->ops;
8738 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
8739 	struct mlxsw_sp_crif *crif = rif->crif;
8740 	struct mlxsw_sp_fid *fid = rif->fid;
8741 	u8 rif_entries = rif->rif_entries;
8742 	u16 rif_index = rif->rif_index;
8743 	struct mlxsw_sp_vr *vr;
8744 	int i;
8745 
8746 	atomic_sub(rif_entries, &mlxsw_sp->router->rifs_count);
8747 	mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif);
8748 	vr = &mlxsw_sp->router->vrs[rif->vr_id];
8749 
8750 	if (netdev_offload_xstats_enabled(dev, NETDEV_OFFLOAD_XSTATS_TYPE_L3)) {
8751 		mlxsw_sp_rif_push_l3_stats(rif);
8752 		mlxsw_sp_router_port_l3_stats_disable(rif);
8753 		mlxsw_sp_router_hwstats_notify_schedule(dev);
8754 	} else {
8755 		mlxsw_sp_rif_counters_free(rif);
8756 	}
8757 
8758 	for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++)
8759 		mlxsw_sp_mr_rif_del(vr->mr_table[i], rif);
8760 	ops->deconfigure(rif);
8761 	if (fid)
8762 		/* Loopback RIFs are not associated with a FID. */
8763 		mlxsw_sp_fid_put(fid);
8764 	mlxsw_sp->router->rifs[rif->rif_index] = NULL;
8765 	netdev_put(dev, &rif->dev_tracker);
8766 	mlxsw_sp_rif_free(rif);
8767 	mlxsw_sp_rif_index_free(mlxsw_sp, rif_index, rif_entries);
8768 	vr->rif_count--;
8769 	mlxsw_sp_vr_put(mlxsw_sp, vr);
8770 
8771 	if (crif->can_destroy)
8772 		mlxsw_sp_crif_free(crif);
8773 }
8774 
8775 void mlxsw_sp_rif_destroy_by_dev(struct mlxsw_sp *mlxsw_sp,
8776 				 struct net_device *dev)
8777 {
8778 	struct mlxsw_sp_rif *rif;
8779 
8780 	mutex_lock(&mlxsw_sp->router->lock);
8781 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
8782 	if (!rif)
8783 		goto out;
8784 	mlxsw_sp_rif_destroy(rif);
8785 out:
8786 	mutex_unlock(&mlxsw_sp->router->lock);
8787 }
8788 
8789 static void mlxsw_sp_rif_destroy_vlan_upper(struct mlxsw_sp *mlxsw_sp,
8790 					    struct net_device *br_dev,
8791 					    u16 vid)
8792 {
8793 	struct net_device *upper_dev;
8794 	struct mlxsw_sp_crif *crif;
8795 
8796 	rcu_read_lock();
8797 	upper_dev = __vlan_find_dev_deep_rcu(br_dev, htons(ETH_P_8021Q), vid);
8798 	rcu_read_unlock();
8799 
8800 	if (!upper_dev)
8801 		return;
8802 
8803 	crif = mlxsw_sp_crif_lookup(mlxsw_sp->router, upper_dev);
8804 	if (!crif || !crif->rif)
8805 		return;
8806 
8807 	mlxsw_sp_rif_destroy(crif->rif);
8808 }
8809 
8810 static int mlxsw_sp_inetaddr_bridge_event(struct mlxsw_sp *mlxsw_sp,
8811 					  struct net_device *l3_dev,
8812 					  int lower_pvid,
8813 					  unsigned long event,
8814 					  struct netlink_ext_ack *extack);
8815 
8816 int mlxsw_sp_router_bridge_vlan_add(struct mlxsw_sp *mlxsw_sp,
8817 				    struct net_device *br_dev,
8818 				    u16 new_vid, bool is_pvid,
8819 				    struct netlink_ext_ack *extack)
8820 {
8821 	struct mlxsw_sp_rif *old_rif;
8822 	struct mlxsw_sp_rif *new_rif;
8823 	struct net_device *upper_dev;
8824 	u16 old_pvid = 0;
8825 	u16 new_pvid;
8826 	int err = 0;
8827 
8828 	mutex_lock(&mlxsw_sp->router->lock);
8829 	old_rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, br_dev);
8830 	if (old_rif) {
8831 		/* If the RIF on the bridge is not a VLAN RIF, we shouldn't have
8832 		 * gotten a PVID notification.
8833 		 */
8834 		if (WARN_ON(old_rif->ops->type != MLXSW_SP_RIF_TYPE_VLAN))
8835 			old_rif = NULL;
8836 		else
8837 			old_pvid = mlxsw_sp_fid_8021q_vid(old_rif->fid);
8838 	}
8839 
8840 	if (is_pvid)
8841 		new_pvid = new_vid;
8842 	else if (old_pvid == new_vid)
8843 		new_pvid = 0;
8844 	else
8845 		goto out;
8846 
8847 	if (old_pvid == new_pvid)
8848 		goto out;
8849 
8850 	if (new_pvid) {
8851 		struct mlxsw_sp_rif_params params = {
8852 			.dev = br_dev,
8853 			.vid = new_pvid,
8854 		};
8855 
8856 		/* If there is a VLAN upper with the same VID as the new PVID,
8857 		 * kill its RIF, if there is one.
8858 		 */
8859 		mlxsw_sp_rif_destroy_vlan_upper(mlxsw_sp, br_dev, new_pvid);
8860 
8861 		if (mlxsw_sp_dev_addr_list_empty(br_dev))
8862 			goto out;
8863 		new_rif = mlxsw_sp_rif_create(mlxsw_sp, &params, extack);
8864 		if (IS_ERR(new_rif)) {
8865 			err = PTR_ERR(new_rif);
8866 			goto out;
8867 		}
8868 
8869 		if (old_pvid)
8870 			mlxsw_sp_rif_migrate_destroy(mlxsw_sp, old_rif, new_rif,
8871 						     true);
8872 	} else {
8873 		mlxsw_sp_rif_destroy(old_rif);
8874 	}
8875 
8876 	if (old_pvid) {
8877 		rcu_read_lock();
8878 		upper_dev = __vlan_find_dev_deep_rcu(br_dev, htons(ETH_P_8021Q),
8879 						     old_pvid);
8880 		rcu_read_unlock();
8881 		if (upper_dev)
8882 			err = mlxsw_sp_inetaddr_bridge_event(mlxsw_sp,
8883 							     upper_dev,
8884 							     new_pvid,
8885 							     NETDEV_UP, extack);
8886 	}
8887 
8888 out:
8889 	mutex_unlock(&mlxsw_sp->router->lock);
8890 	return err;
8891 }
8892 
8893 static void
8894 mlxsw_sp_rif_subport_params_init(struct mlxsw_sp_rif_params *params,
8895 				 struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
8896 {
8897 	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
8898 
8899 	params->vid = mlxsw_sp_port_vlan->vid;
8900 	params->lag = mlxsw_sp_port->lagged;
8901 	if (params->lag)
8902 		params->lag_id = mlxsw_sp_port->lag_id;
8903 	else
8904 		params->system_port = mlxsw_sp_port->local_port;
8905 }
8906 
8907 static struct mlxsw_sp_rif_subport *
8908 mlxsw_sp_rif_subport_rif(const struct mlxsw_sp_rif *rif)
8909 {
8910 	return container_of(rif, struct mlxsw_sp_rif_subport, common);
8911 }
8912 
8913 int mlxsw_sp_rif_subport_port(const struct mlxsw_sp_rif *rif,
8914 			      u16 *port, bool *is_lag)
8915 {
8916 	struct mlxsw_sp_rif_subport *rif_subport;
8917 
8918 	if (WARN_ON(rif->ops->type != MLXSW_SP_RIF_TYPE_SUBPORT))
8919 		return -EINVAL;
8920 
8921 	rif_subport = mlxsw_sp_rif_subport_rif(rif);
8922 	*is_lag = rif_subport->lag;
8923 	*port = *is_lag ? rif_subport->lag_id : rif_subport->system_port;
8924 	return 0;
8925 }
8926 
8927 static struct mlxsw_sp_rif *
8928 mlxsw_sp_rif_subport_get(struct mlxsw_sp *mlxsw_sp,
8929 			 const struct mlxsw_sp_rif_params *params,
8930 			 struct netlink_ext_ack *extack)
8931 {
8932 	struct mlxsw_sp_rif_subport *rif_subport;
8933 	struct mlxsw_sp_rif *rif;
8934 
8935 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, params->dev);
8936 	if (!rif)
8937 		return mlxsw_sp_rif_create(mlxsw_sp, params, extack);
8938 
8939 	rif_subport = mlxsw_sp_rif_subport_rif(rif);
8940 	refcount_inc(&rif_subport->ref_count);
8941 	return rif;
8942 }
8943 
8944 static void mlxsw_sp_rif_subport_put(struct mlxsw_sp_rif *rif)
8945 {
8946 	struct mlxsw_sp_rif_subport *rif_subport;
8947 
8948 	rif_subport = mlxsw_sp_rif_subport_rif(rif);
8949 	if (!refcount_dec_and_test(&rif_subport->ref_count))
8950 		return;
8951 
8952 	mlxsw_sp_rif_destroy(rif);
8953 }
8954 
8955 static int mlxsw_sp_rif_mac_profile_index_alloc(struct mlxsw_sp *mlxsw_sp,
8956 						struct mlxsw_sp_rif_mac_profile *profile,
8957 						struct netlink_ext_ack *extack)
8958 {
8959 	u8 max_rif_mac_profiles = mlxsw_sp->router->max_rif_mac_profile;
8960 	struct mlxsw_sp_router *router = mlxsw_sp->router;
8961 	int id;
8962 
8963 	id = idr_alloc(&router->rif_mac_profiles_idr, profile, 0,
8964 		       max_rif_mac_profiles, GFP_KERNEL);
8965 
8966 	if (id >= 0) {
8967 		profile->id = id;
8968 		return 0;
8969 	}
8970 
8971 	if (id == -ENOSPC)
8972 		NL_SET_ERR_MSG_MOD(extack,
8973 				   "Exceeded number of supported router interface MAC profiles");
8974 
8975 	return id;
8976 }
8977 
8978 static struct mlxsw_sp_rif_mac_profile *
8979 mlxsw_sp_rif_mac_profile_index_free(struct mlxsw_sp *mlxsw_sp, u8 mac_profile)
8980 {
8981 	struct mlxsw_sp_rif_mac_profile *profile;
8982 
8983 	profile = idr_remove(&mlxsw_sp->router->rif_mac_profiles_idr,
8984 			     mac_profile);
8985 	WARN_ON(!profile);
8986 	return profile;
8987 }
8988 
8989 static struct mlxsw_sp_rif_mac_profile *
8990 mlxsw_sp_rif_mac_profile_alloc(const char *mac)
8991 {
8992 	struct mlxsw_sp_rif_mac_profile *profile;
8993 
8994 	profile = kzalloc(sizeof(*profile), GFP_KERNEL);
8995 	if (!profile)
8996 		return NULL;
8997 
8998 	ether_addr_copy(profile->mac_prefix, mac);
8999 	refcount_set(&profile->ref_count, 1);
9000 	return profile;
9001 }
9002 
9003 static struct mlxsw_sp_rif_mac_profile *
9004 mlxsw_sp_rif_mac_profile_find(const struct mlxsw_sp *mlxsw_sp, const char *mac)
9005 {
9006 	struct mlxsw_sp_router *router = mlxsw_sp->router;
9007 	struct mlxsw_sp_rif_mac_profile *profile;
9008 	int id;
9009 
9010 	idr_for_each_entry(&router->rif_mac_profiles_idr, profile, id) {
9011 		if (ether_addr_equal_masked(profile->mac_prefix, mac,
9012 					    mlxsw_sp->mac_mask))
9013 			return profile;
9014 	}
9015 
9016 	return NULL;
9017 }
9018 
9019 static u64 mlxsw_sp_rif_mac_profiles_occ_get(void *priv)
9020 {
9021 	const struct mlxsw_sp *mlxsw_sp = priv;
9022 
9023 	return atomic_read(&mlxsw_sp->router->rif_mac_profiles_count);
9024 }
9025 
9026 static u64 mlxsw_sp_rifs_occ_get(void *priv)
9027 {
9028 	const struct mlxsw_sp *mlxsw_sp = priv;
9029 
9030 	return atomic_read(&mlxsw_sp->router->rifs_count);
9031 }
9032 
9033 static struct mlxsw_sp_rif_mac_profile *
9034 mlxsw_sp_rif_mac_profile_create(struct mlxsw_sp *mlxsw_sp, const char *mac,
9035 				struct netlink_ext_ack *extack)
9036 {
9037 	struct mlxsw_sp_rif_mac_profile *profile;
9038 	int err;
9039 
9040 	profile = mlxsw_sp_rif_mac_profile_alloc(mac);
9041 	if (!profile)
9042 		return ERR_PTR(-ENOMEM);
9043 
9044 	err = mlxsw_sp_rif_mac_profile_index_alloc(mlxsw_sp, profile, extack);
9045 	if (err)
9046 		goto profile_index_alloc_err;
9047 
9048 	atomic_inc(&mlxsw_sp->router->rif_mac_profiles_count);
9049 	return profile;
9050 
9051 profile_index_alloc_err:
9052 	kfree(profile);
9053 	return ERR_PTR(err);
9054 }
9055 
9056 static void mlxsw_sp_rif_mac_profile_destroy(struct mlxsw_sp *mlxsw_sp,
9057 					     u8 mac_profile)
9058 {
9059 	struct mlxsw_sp_rif_mac_profile *profile;
9060 
9061 	atomic_dec(&mlxsw_sp->router->rif_mac_profiles_count);
9062 	profile = mlxsw_sp_rif_mac_profile_index_free(mlxsw_sp, mac_profile);
9063 	kfree(profile);
9064 }
9065 
9066 static int mlxsw_sp_rif_mac_profile_get(struct mlxsw_sp *mlxsw_sp,
9067 					const char *mac, u8 *p_mac_profile,
9068 					struct netlink_ext_ack *extack)
9069 {
9070 	struct mlxsw_sp_rif_mac_profile *profile;
9071 
9072 	profile = mlxsw_sp_rif_mac_profile_find(mlxsw_sp, mac);
9073 	if (profile) {
9074 		refcount_inc(&profile->ref_count);
9075 		goto out;
9076 	}
9077 
9078 	profile = mlxsw_sp_rif_mac_profile_create(mlxsw_sp, mac, extack);
9079 	if (IS_ERR(profile))
9080 		return PTR_ERR(profile);
9081 
9082 out:
9083 	*p_mac_profile = profile->id;
9084 	return 0;
9085 }
9086 
9087 static void mlxsw_sp_rif_mac_profile_put(struct mlxsw_sp *mlxsw_sp,
9088 					 u8 mac_profile)
9089 {
9090 	struct mlxsw_sp_rif_mac_profile *profile;
9091 
9092 	profile = idr_find(&mlxsw_sp->router->rif_mac_profiles_idr,
9093 			   mac_profile);
9094 	if (WARN_ON(!profile))
9095 		return;
9096 
9097 	if (!refcount_dec_and_test(&profile->ref_count))
9098 		return;
9099 
9100 	mlxsw_sp_rif_mac_profile_destroy(mlxsw_sp, mac_profile);
9101 }
9102 
9103 static bool mlxsw_sp_rif_mac_profile_is_shared(const struct mlxsw_sp_rif *rif)
9104 {
9105 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
9106 	struct mlxsw_sp_rif_mac_profile *profile;
9107 
9108 	profile = idr_find(&mlxsw_sp->router->rif_mac_profiles_idr,
9109 			   rif->mac_profile_id);
9110 	if (WARN_ON(!profile))
9111 		return false;
9112 
9113 	return refcount_read(&profile->ref_count) > 1;
9114 }
9115 
9116 static int mlxsw_sp_rif_mac_profile_edit(struct mlxsw_sp_rif *rif,
9117 					 const char *new_mac)
9118 {
9119 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
9120 	struct mlxsw_sp_rif_mac_profile *profile;
9121 
9122 	profile = idr_find(&mlxsw_sp->router->rif_mac_profiles_idr,
9123 			   rif->mac_profile_id);
9124 	if (WARN_ON(!profile))
9125 		return -EINVAL;
9126 
9127 	ether_addr_copy(profile->mac_prefix, new_mac);
9128 	return 0;
9129 }
9130 
9131 static int
9132 mlxsw_sp_rif_mac_profile_replace(struct mlxsw_sp *mlxsw_sp,
9133 				 struct mlxsw_sp_rif *rif,
9134 				 const char *new_mac,
9135 				 struct netlink_ext_ack *extack)
9136 {
9137 	u8 mac_profile;
9138 	int err;
9139 
9140 	if (!mlxsw_sp_rif_mac_profile_is_shared(rif) &&
9141 	    !mlxsw_sp_rif_mac_profile_find(mlxsw_sp, new_mac))
9142 		return mlxsw_sp_rif_mac_profile_edit(rif, new_mac);
9143 
9144 	err = mlxsw_sp_rif_mac_profile_get(mlxsw_sp, new_mac,
9145 					   &mac_profile, extack);
9146 	if (err)
9147 		return err;
9148 
9149 	mlxsw_sp_rif_mac_profile_put(mlxsw_sp, rif->mac_profile_id);
9150 	rif->mac_profile_id = mac_profile;
9151 	return 0;
9152 }
9153 
9154 static int
9155 __mlxsw_sp_port_vlan_router_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan,
9156 				 struct net_device *l3_dev,
9157 				 struct netlink_ext_ack *extack)
9158 {
9159 	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
9160 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
9161 	struct mlxsw_sp_rif_params params;
9162 	u16 vid = mlxsw_sp_port_vlan->vid;
9163 	struct mlxsw_sp_rif *rif;
9164 	struct mlxsw_sp_fid *fid;
9165 	int err;
9166 
9167 	params = (struct mlxsw_sp_rif_params) {
9168 		.dev = l3_dev,
9169 		.vid = vid,
9170 	};
9171 
9172 	mlxsw_sp_rif_subport_params_init(&params, mlxsw_sp_port_vlan);
9173 	rif = mlxsw_sp_rif_subport_get(mlxsw_sp, &params, extack);
9174 	if (IS_ERR(rif))
9175 		return PTR_ERR(rif);
9176 
9177 	/* FID was already created, just take a reference */
9178 	fid = rif->ops->fid_get(rif, &params, extack);
9179 	err = mlxsw_sp_fid_port_vid_map(fid, mlxsw_sp_port, vid);
9180 	if (err)
9181 		goto err_fid_port_vid_map;
9182 
9183 	err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, false);
9184 	if (err)
9185 		goto err_port_vid_learning_set;
9186 
9187 	err = mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid,
9188 					BR_STATE_FORWARDING);
9189 	if (err)
9190 		goto err_port_vid_stp_set;
9191 
9192 	mlxsw_sp_port_vlan->fid = fid;
9193 
9194 	return 0;
9195 
9196 err_port_vid_stp_set:
9197 	mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
9198 err_port_vid_learning_set:
9199 	mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
9200 err_fid_port_vid_map:
9201 	mlxsw_sp_fid_put(fid);
9202 	mlxsw_sp_rif_subport_put(rif);
9203 	return err;
9204 }
9205 
9206 static void
9207 __mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
9208 {
9209 	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
9210 	struct mlxsw_sp_fid *fid = mlxsw_sp_port_vlan->fid;
9211 	struct mlxsw_sp_rif *rif = mlxsw_sp_fid_rif(fid);
9212 	u16 vid = mlxsw_sp_port_vlan->vid;
9213 
9214 	if (WARN_ON(mlxsw_sp_fid_type(fid) != MLXSW_SP_FID_TYPE_RFID))
9215 		return;
9216 
9217 	mlxsw_sp_port_vlan->fid = NULL;
9218 	mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid, BR_STATE_BLOCKING);
9219 	mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
9220 	mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
9221 	mlxsw_sp_fid_put(fid);
9222 	mlxsw_sp_rif_subport_put(rif);
9223 }
9224 
9225 static int
9226 mlxsw_sp_port_vlan_router_join_existing(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan,
9227 					struct net_device *l3_dev,
9228 					struct netlink_ext_ack *extack)
9229 {
9230 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port_vlan->mlxsw_sp_port->mlxsw_sp;
9231 
9232 	lockdep_assert_held(&mlxsw_sp->router->lock);
9233 
9234 	if (!mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev))
9235 		return 0;
9236 
9237 	return __mlxsw_sp_port_vlan_router_join(mlxsw_sp_port_vlan, l3_dev,
9238 						extack);
9239 }
9240 
9241 void
9242 mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
9243 {
9244 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port_vlan->mlxsw_sp_port->mlxsw_sp;
9245 
9246 	mutex_lock(&mlxsw_sp->router->lock);
9247 	__mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan);
9248 	mutex_unlock(&mlxsw_sp->router->lock);
9249 }
9250 
9251 static int mlxsw_sp_inetaddr_port_vlan_event(struct net_device *l3_dev,
9252 					     struct net_device *port_dev,
9253 					     unsigned long event, u16 vid,
9254 					     struct netlink_ext_ack *extack)
9255 {
9256 	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(port_dev);
9257 	struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
9258 
9259 	mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid);
9260 	if (WARN_ON(!mlxsw_sp_port_vlan))
9261 		return -EINVAL;
9262 
9263 	switch (event) {
9264 	case NETDEV_UP:
9265 		return __mlxsw_sp_port_vlan_router_join(mlxsw_sp_port_vlan,
9266 							l3_dev, extack);
9267 	case NETDEV_DOWN:
9268 		__mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan);
9269 		break;
9270 	}
9271 
9272 	return 0;
9273 }
9274 
9275 static int mlxsw_sp_inetaddr_port_event(struct net_device *port_dev,
9276 					unsigned long event, bool nomaster,
9277 					struct netlink_ext_ack *extack)
9278 {
9279 	if (!nomaster && (netif_is_any_bridge_port(port_dev) ||
9280 			  netif_is_lag_port(port_dev)))
9281 		return 0;
9282 
9283 	return mlxsw_sp_inetaddr_port_vlan_event(port_dev, port_dev, event,
9284 						 MLXSW_SP_DEFAULT_VID, extack);
9285 }
9286 
9287 static int __mlxsw_sp_inetaddr_lag_event(struct net_device *l3_dev,
9288 					 struct net_device *lag_dev,
9289 					 unsigned long event, u16 vid,
9290 					 struct netlink_ext_ack *extack)
9291 {
9292 	struct net_device *port_dev;
9293 	struct list_head *iter;
9294 	int err;
9295 
9296 	netdev_for_each_lower_dev(lag_dev, port_dev, iter) {
9297 		if (mlxsw_sp_port_dev_check(port_dev)) {
9298 			err = mlxsw_sp_inetaddr_port_vlan_event(l3_dev,
9299 								port_dev,
9300 								event, vid,
9301 								extack);
9302 			if (err)
9303 				return err;
9304 		}
9305 	}
9306 
9307 	return 0;
9308 }
9309 
9310 static int mlxsw_sp_inetaddr_lag_event(struct net_device *lag_dev,
9311 				       unsigned long event, bool nomaster,
9312 				       struct netlink_ext_ack *extack)
9313 {
9314 	if (!nomaster && netif_is_bridge_port(lag_dev))
9315 		return 0;
9316 
9317 	return __mlxsw_sp_inetaddr_lag_event(lag_dev, lag_dev, event,
9318 					     MLXSW_SP_DEFAULT_VID, extack);
9319 }
9320 
9321 static int mlxsw_sp_inetaddr_bridge_event(struct mlxsw_sp *mlxsw_sp,
9322 					  struct net_device *l3_dev,
9323 					  int lower_pvid,
9324 					  unsigned long event,
9325 					  struct netlink_ext_ack *extack)
9326 {
9327 	struct mlxsw_sp_rif_params params = {
9328 		.dev = l3_dev,
9329 	};
9330 	struct mlxsw_sp_rif *rif;
9331 	int err;
9332 
9333 	switch (event) {
9334 	case NETDEV_UP:
9335 		if (netif_is_bridge_master(l3_dev) && br_vlan_enabled(l3_dev)) {
9336 			u16 proto;
9337 
9338 			br_vlan_get_proto(l3_dev, &proto);
9339 			if (proto == ETH_P_8021AD) {
9340 				NL_SET_ERR_MSG_MOD(extack, "Adding an IP address to 802.1ad bridge is not supported");
9341 				return -EOPNOTSUPP;
9342 			}
9343 			err = br_vlan_get_pvid(l3_dev, &params.vid);
9344 			if (err)
9345 				return err;
9346 			if (!params.vid)
9347 				return 0;
9348 		} else if (is_vlan_dev(l3_dev)) {
9349 			params.vid = vlan_dev_vlan_id(l3_dev);
9350 
9351 			/* If the VID matches PVID of the bridge below, the
9352 			 * bridge owns the RIF for this VLAN. Don't do anything.
9353 			 */
9354 			if ((int)params.vid == lower_pvid)
9355 				return 0;
9356 		}
9357 
9358 		rif = mlxsw_sp_rif_create(mlxsw_sp, &params, extack);
9359 		if (IS_ERR(rif))
9360 			return PTR_ERR(rif);
9361 		break;
9362 	case NETDEV_DOWN:
9363 		rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
9364 		mlxsw_sp_rif_destroy(rif);
9365 		break;
9366 	}
9367 
9368 	return 0;
9369 }
9370 
9371 static int mlxsw_sp_inetaddr_vlan_event(struct mlxsw_sp *mlxsw_sp,
9372 					struct net_device *vlan_dev,
9373 					unsigned long event, bool nomaster,
9374 					struct netlink_ext_ack *extack)
9375 {
9376 	struct net_device *real_dev = vlan_dev_real_dev(vlan_dev);
9377 	u16 vid = vlan_dev_vlan_id(vlan_dev);
9378 	u16 lower_pvid;
9379 	int err;
9380 
9381 	if (!nomaster && netif_is_bridge_port(vlan_dev))
9382 		return 0;
9383 
9384 	if (mlxsw_sp_port_dev_check(real_dev)) {
9385 		return mlxsw_sp_inetaddr_port_vlan_event(vlan_dev, real_dev,
9386 							 event, vid, extack);
9387 	} else if (netif_is_lag_master(real_dev)) {
9388 		return __mlxsw_sp_inetaddr_lag_event(vlan_dev, real_dev, event,
9389 						     vid, extack);
9390 	} else if (netif_is_bridge_master(real_dev) &&
9391 		   br_vlan_enabled(real_dev)) {
9392 		err = br_vlan_get_pvid(real_dev, &lower_pvid);
9393 		if (err)
9394 			return err;
9395 		return mlxsw_sp_inetaddr_bridge_event(mlxsw_sp, vlan_dev,
9396 						      lower_pvid, event,
9397 						      extack);
9398 	}
9399 
9400 	return 0;
9401 }
9402 
9403 static bool mlxsw_sp_rif_macvlan_is_vrrp4(const u8 *mac)
9404 {
9405 	u8 vrrp4[ETH_ALEN] = { 0x00, 0x00, 0x5e, 0x00, 0x01, 0x00 };
9406 	u8 mask[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0x00 };
9407 
9408 	return ether_addr_equal_masked(mac, vrrp4, mask);
9409 }
9410 
9411 static bool mlxsw_sp_rif_macvlan_is_vrrp6(const u8 *mac)
9412 {
9413 	u8 vrrp6[ETH_ALEN] = { 0x00, 0x00, 0x5e, 0x00, 0x02, 0x00 };
9414 	u8 mask[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0x00 };
9415 
9416 	return ether_addr_equal_masked(mac, vrrp6, mask);
9417 }
9418 
9419 static int mlxsw_sp_rif_vrrp_op(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
9420 				const u8 *mac, bool adding)
9421 {
9422 	char ritr_pl[MLXSW_REG_RITR_LEN];
9423 	u8 vrrp_id = adding ? mac[5] : 0;
9424 	int err;
9425 
9426 	if (!mlxsw_sp_rif_macvlan_is_vrrp4(mac) &&
9427 	    !mlxsw_sp_rif_macvlan_is_vrrp6(mac))
9428 		return 0;
9429 
9430 	mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
9431 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
9432 	if (err)
9433 		return err;
9434 
9435 	if (mlxsw_sp_rif_macvlan_is_vrrp4(mac))
9436 		mlxsw_reg_ritr_if_vrrp_id_ipv4_set(ritr_pl, vrrp_id);
9437 	else
9438 		mlxsw_reg_ritr_if_vrrp_id_ipv6_set(ritr_pl, vrrp_id);
9439 
9440 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
9441 }
9442 
9443 static int mlxsw_sp_rif_macvlan_add(struct mlxsw_sp *mlxsw_sp,
9444 				    const struct net_device *macvlan_dev,
9445 				    struct netlink_ext_ack *extack)
9446 {
9447 	struct macvlan_dev *vlan = netdev_priv(macvlan_dev);
9448 	struct mlxsw_sp_rif *rif;
9449 	int err;
9450 
9451 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, vlan->lowerdev);
9452 	if (!rif)
9453 		return 0;
9454 
9455 	err = mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
9456 				  mlxsw_sp_fid_index(rif->fid), true);
9457 	if (err)
9458 		return err;
9459 
9460 	err = mlxsw_sp_rif_vrrp_op(mlxsw_sp, rif->rif_index,
9461 				   macvlan_dev->dev_addr, true);
9462 	if (err)
9463 		goto err_rif_vrrp_add;
9464 
9465 	/* Make sure the bridge driver does not have this MAC pointing at
9466 	 * some other port.
9467 	 */
9468 	if (rif->ops->fdb_del)
9469 		rif->ops->fdb_del(rif, macvlan_dev->dev_addr);
9470 
9471 	return 0;
9472 
9473 err_rif_vrrp_add:
9474 	mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
9475 			    mlxsw_sp_fid_index(rif->fid), false);
9476 	return err;
9477 }
9478 
9479 static void __mlxsw_sp_rif_macvlan_del(struct mlxsw_sp *mlxsw_sp,
9480 				       const struct net_device *macvlan_dev)
9481 {
9482 	struct macvlan_dev *vlan = netdev_priv(macvlan_dev);
9483 	struct mlxsw_sp_rif *rif;
9484 
9485 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, vlan->lowerdev);
9486 	/* If we do not have a RIF, then we already took care of
9487 	 * removing the macvlan's MAC during RIF deletion.
9488 	 */
9489 	if (!rif)
9490 		return;
9491 	mlxsw_sp_rif_vrrp_op(mlxsw_sp, rif->rif_index, macvlan_dev->dev_addr,
9492 			     false);
9493 	mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
9494 			    mlxsw_sp_fid_index(rif->fid), false);
9495 }
9496 
9497 void mlxsw_sp_rif_macvlan_del(struct mlxsw_sp *mlxsw_sp,
9498 			      const struct net_device *macvlan_dev)
9499 {
9500 	mutex_lock(&mlxsw_sp->router->lock);
9501 	__mlxsw_sp_rif_macvlan_del(mlxsw_sp, macvlan_dev);
9502 	mutex_unlock(&mlxsw_sp->router->lock);
9503 }
9504 
9505 static int mlxsw_sp_inetaddr_macvlan_event(struct mlxsw_sp *mlxsw_sp,
9506 					   struct net_device *macvlan_dev,
9507 					   unsigned long event,
9508 					   struct netlink_ext_ack *extack)
9509 {
9510 	switch (event) {
9511 	case NETDEV_UP:
9512 		return mlxsw_sp_rif_macvlan_add(mlxsw_sp, macvlan_dev, extack);
9513 	case NETDEV_DOWN:
9514 		__mlxsw_sp_rif_macvlan_del(mlxsw_sp, macvlan_dev);
9515 		break;
9516 	}
9517 
9518 	return 0;
9519 }
9520 
9521 static int __mlxsw_sp_inetaddr_event(struct mlxsw_sp *mlxsw_sp,
9522 				     struct net_device *dev,
9523 				     unsigned long event, bool nomaster,
9524 				     struct netlink_ext_ack *extack)
9525 {
9526 	if (mlxsw_sp_port_dev_check(dev))
9527 		return mlxsw_sp_inetaddr_port_event(dev, event, nomaster,
9528 						    extack);
9529 	else if (netif_is_lag_master(dev))
9530 		return mlxsw_sp_inetaddr_lag_event(dev, event, nomaster,
9531 						   extack);
9532 	else if (netif_is_bridge_master(dev))
9533 		return mlxsw_sp_inetaddr_bridge_event(mlxsw_sp, dev, -1, event,
9534 						      extack);
9535 	else if (is_vlan_dev(dev))
9536 		return mlxsw_sp_inetaddr_vlan_event(mlxsw_sp, dev, event,
9537 						    nomaster, extack);
9538 	else if (netif_is_macvlan(dev))
9539 		return mlxsw_sp_inetaddr_macvlan_event(mlxsw_sp, dev, event,
9540 						       extack);
9541 	else
9542 		return 0;
9543 }
9544 
9545 static int mlxsw_sp_inetaddr_event(struct notifier_block *nb,
9546 				   unsigned long event, void *ptr)
9547 {
9548 	struct in_ifaddr *ifa = (struct in_ifaddr *) ptr;
9549 	struct net_device *dev = ifa->ifa_dev->dev;
9550 	struct mlxsw_sp_router *router;
9551 	struct mlxsw_sp_rif *rif;
9552 	int err = 0;
9553 
9554 	/* NETDEV_UP event is handled by mlxsw_sp_inetaddr_valid_event */
9555 	if (event == NETDEV_UP)
9556 		return NOTIFY_DONE;
9557 
9558 	router = container_of(nb, struct mlxsw_sp_router, inetaddr_nb);
9559 	mutex_lock(&router->lock);
9560 	rif = mlxsw_sp_rif_find_by_dev(router->mlxsw_sp, dev);
9561 	if (!mlxsw_sp_rif_should_config(rif, dev, event))
9562 		goto out;
9563 
9564 	err = __mlxsw_sp_inetaddr_event(router->mlxsw_sp, dev, event, false,
9565 					NULL);
9566 out:
9567 	mutex_unlock(&router->lock);
9568 	return notifier_from_errno(err);
9569 }
9570 
9571 static int mlxsw_sp_inetaddr_valid_event(struct notifier_block *unused,
9572 					 unsigned long event, void *ptr)
9573 {
9574 	struct in_validator_info *ivi = (struct in_validator_info *) ptr;
9575 	struct net_device *dev = ivi->ivi_dev->dev;
9576 	struct mlxsw_sp *mlxsw_sp;
9577 	struct mlxsw_sp_rif *rif;
9578 	int err = 0;
9579 
9580 	mlxsw_sp = mlxsw_sp_lower_get(dev);
9581 	if (!mlxsw_sp)
9582 		return NOTIFY_DONE;
9583 
9584 	mutex_lock(&mlxsw_sp->router->lock);
9585 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
9586 	if (!mlxsw_sp_rif_should_config(rif, dev, event))
9587 		goto out;
9588 
9589 	err = __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, false,
9590 					ivi->extack);
9591 out:
9592 	mutex_unlock(&mlxsw_sp->router->lock);
9593 	return notifier_from_errno(err);
9594 }
9595 
9596 struct mlxsw_sp_inet6addr_event_work {
9597 	struct work_struct work;
9598 	struct mlxsw_sp *mlxsw_sp;
9599 	struct net_device *dev;
9600 	netdevice_tracker dev_tracker;
9601 	unsigned long event;
9602 };
9603 
9604 static void mlxsw_sp_inet6addr_event_work(struct work_struct *work)
9605 {
9606 	struct mlxsw_sp_inet6addr_event_work *inet6addr_work =
9607 		container_of(work, struct mlxsw_sp_inet6addr_event_work, work);
9608 	struct mlxsw_sp *mlxsw_sp = inet6addr_work->mlxsw_sp;
9609 	struct net_device *dev = inet6addr_work->dev;
9610 	unsigned long event = inet6addr_work->event;
9611 	struct mlxsw_sp_rif *rif;
9612 
9613 	rtnl_lock();
9614 	mutex_lock(&mlxsw_sp->router->lock);
9615 
9616 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
9617 	if (!mlxsw_sp_rif_should_config(rif, dev, event))
9618 		goto out;
9619 
9620 	__mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, false, NULL);
9621 out:
9622 	mutex_unlock(&mlxsw_sp->router->lock);
9623 	rtnl_unlock();
9624 	netdev_put(dev, &inet6addr_work->dev_tracker);
9625 	kfree(inet6addr_work);
9626 }
9627 
9628 /* Called with rcu_read_lock() */
9629 static int mlxsw_sp_inet6addr_event(struct notifier_block *nb,
9630 				    unsigned long event, void *ptr)
9631 {
9632 	struct inet6_ifaddr *if6 = (struct inet6_ifaddr *) ptr;
9633 	struct mlxsw_sp_inet6addr_event_work *inet6addr_work;
9634 	struct net_device *dev = if6->idev->dev;
9635 	struct mlxsw_sp_router *router;
9636 
9637 	/* NETDEV_UP event is handled by mlxsw_sp_inet6addr_valid_event */
9638 	if (event == NETDEV_UP)
9639 		return NOTIFY_DONE;
9640 
9641 	inet6addr_work = kzalloc(sizeof(*inet6addr_work), GFP_ATOMIC);
9642 	if (!inet6addr_work)
9643 		return NOTIFY_BAD;
9644 
9645 	router = container_of(nb, struct mlxsw_sp_router, inet6addr_nb);
9646 	INIT_WORK(&inet6addr_work->work, mlxsw_sp_inet6addr_event_work);
9647 	inet6addr_work->mlxsw_sp = router->mlxsw_sp;
9648 	inet6addr_work->dev = dev;
9649 	inet6addr_work->event = event;
9650 	netdev_hold(dev, &inet6addr_work->dev_tracker, GFP_ATOMIC);
9651 	mlxsw_core_schedule_work(&inet6addr_work->work);
9652 
9653 	return NOTIFY_DONE;
9654 }
9655 
9656 static int mlxsw_sp_inet6addr_valid_event(struct notifier_block *unused,
9657 					  unsigned long event, void *ptr)
9658 {
9659 	struct in6_validator_info *i6vi = (struct in6_validator_info *) ptr;
9660 	struct net_device *dev = i6vi->i6vi_dev->dev;
9661 	struct mlxsw_sp *mlxsw_sp;
9662 	struct mlxsw_sp_rif *rif;
9663 	int err = 0;
9664 
9665 	mlxsw_sp = mlxsw_sp_lower_get(dev);
9666 	if (!mlxsw_sp)
9667 		return NOTIFY_DONE;
9668 
9669 	mutex_lock(&mlxsw_sp->router->lock);
9670 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
9671 	if (!mlxsw_sp_rif_should_config(rif, dev, event))
9672 		goto out;
9673 
9674 	err = __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, false,
9675 					i6vi->extack);
9676 out:
9677 	mutex_unlock(&mlxsw_sp->router->lock);
9678 	return notifier_from_errno(err);
9679 }
9680 
9681 static int mlxsw_sp_rif_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
9682 			     const char *mac, int mtu, u8 mac_profile)
9683 {
9684 	char ritr_pl[MLXSW_REG_RITR_LEN];
9685 	int err;
9686 
9687 	mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
9688 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
9689 	if (err)
9690 		return err;
9691 
9692 	mlxsw_reg_ritr_mtu_set(ritr_pl, mtu);
9693 	mlxsw_reg_ritr_if_mac_memcpy_to(ritr_pl, mac);
9694 	mlxsw_reg_ritr_if_mac_profile_id_set(ritr_pl, mac_profile);
9695 	mlxsw_reg_ritr_op_set(ritr_pl, MLXSW_REG_RITR_RIF_CREATE);
9696 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
9697 }
9698 
9699 static int
9700 mlxsw_sp_router_port_change_event(struct mlxsw_sp *mlxsw_sp,
9701 				  struct mlxsw_sp_rif *rif,
9702 				  struct netlink_ext_ack *extack)
9703 {
9704 	struct net_device *dev = mlxsw_sp_rif_dev(rif);
9705 	u8 old_mac_profile;
9706 	u16 fid_index;
9707 	int err;
9708 
9709 	fid_index = mlxsw_sp_fid_index(rif->fid);
9710 
9711 	err = mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, false);
9712 	if (err)
9713 		return err;
9714 
9715 	old_mac_profile = rif->mac_profile_id;
9716 	err = mlxsw_sp_rif_mac_profile_replace(mlxsw_sp, rif, dev->dev_addr,
9717 					       extack);
9718 	if (err)
9719 		goto err_rif_mac_profile_replace;
9720 
9721 	err = mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, dev->dev_addr,
9722 				dev->mtu, rif->mac_profile_id);
9723 	if (err)
9724 		goto err_rif_edit;
9725 
9726 	err = mlxsw_sp_rif_fdb_op(mlxsw_sp, dev->dev_addr, fid_index, true);
9727 	if (err)
9728 		goto err_rif_fdb_op;
9729 
9730 	if (rif->mtu != dev->mtu) {
9731 		struct mlxsw_sp_vr *vr;
9732 		int i;
9733 
9734 		/* The RIF is relevant only to its mr_table instance, as unlike
9735 		 * unicast routing, in multicast routing a RIF cannot be shared
9736 		 * between several multicast routing tables.
9737 		 */
9738 		vr = &mlxsw_sp->router->vrs[rif->vr_id];
9739 		for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++)
9740 			mlxsw_sp_mr_rif_mtu_update(vr->mr_table[i],
9741 						   rif, dev->mtu);
9742 	}
9743 
9744 	ether_addr_copy(rif->addr, dev->dev_addr);
9745 	rif->mtu = dev->mtu;
9746 
9747 	netdev_dbg(dev, "Updated RIF=%d\n", rif->rif_index);
9748 
9749 	return 0;
9750 
9751 err_rif_fdb_op:
9752 	mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, rif->addr, rif->mtu,
9753 			  old_mac_profile);
9754 err_rif_edit:
9755 	mlxsw_sp_rif_mac_profile_replace(mlxsw_sp, rif, rif->addr, extack);
9756 err_rif_mac_profile_replace:
9757 	mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, true);
9758 	return err;
9759 }
9760 
9761 static int mlxsw_sp_router_port_pre_changeaddr_event(struct mlxsw_sp_rif *rif,
9762 			    struct netdev_notifier_pre_changeaddr_info *info)
9763 {
9764 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
9765 	struct mlxsw_sp_rif_mac_profile *profile;
9766 	struct netlink_ext_ack *extack;
9767 	u8 max_rif_mac_profiles;
9768 	u64 occ;
9769 
9770 	extack = netdev_notifier_info_to_extack(&info->info);
9771 
9772 	profile = mlxsw_sp_rif_mac_profile_find(mlxsw_sp, info->dev_addr);
9773 	if (profile)
9774 		return 0;
9775 
9776 	max_rif_mac_profiles = mlxsw_sp->router->max_rif_mac_profile;
9777 	occ = mlxsw_sp_rif_mac_profiles_occ_get(mlxsw_sp);
9778 	if (occ < max_rif_mac_profiles)
9779 		return 0;
9780 
9781 	if (!mlxsw_sp_rif_mac_profile_is_shared(rif))
9782 		return 0;
9783 
9784 	NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported router interface MAC profiles");
9785 	return -ENOBUFS;
9786 }
9787 
9788 static bool mlxsw_sp_router_netdevice_interesting(struct mlxsw_sp *mlxsw_sp,
9789 						  struct net_device *dev)
9790 {
9791 	struct vlan_dev_priv *vlan;
9792 
9793 	if (netif_is_lag_master(dev) ||
9794 	    netif_is_bridge_master(dev) ||
9795 	    mlxsw_sp_port_dev_check(dev) ||
9796 	    mlxsw_sp_netdev_is_ipip_ol(mlxsw_sp, dev) ||
9797 	    netif_is_l3_master(dev))
9798 		return true;
9799 
9800 	if (!is_vlan_dev(dev))
9801 		return false;
9802 
9803 	vlan = vlan_dev_priv(dev);
9804 	return netif_is_lag_master(vlan->real_dev) ||
9805 	       netif_is_bridge_master(vlan->real_dev) ||
9806 	       mlxsw_sp_port_dev_check(vlan->real_dev);
9807 }
9808 
9809 static struct mlxsw_sp_crif *
9810 mlxsw_sp_crif_register(struct mlxsw_sp_router *router, struct net_device *dev)
9811 {
9812 	struct mlxsw_sp_crif *crif;
9813 	int err;
9814 
9815 	if (WARN_ON(mlxsw_sp_crif_lookup(router, dev)))
9816 		return NULL;
9817 
9818 	crif = mlxsw_sp_crif_alloc(dev);
9819 	if (!crif)
9820 		return ERR_PTR(-ENOMEM);
9821 
9822 	err = mlxsw_sp_crif_insert(router, crif);
9823 	if (err)
9824 		goto err_netdev_insert;
9825 
9826 	return crif;
9827 
9828 err_netdev_insert:
9829 	mlxsw_sp_crif_free(crif);
9830 	return ERR_PTR(err);
9831 }
9832 
9833 static void mlxsw_sp_crif_unregister(struct mlxsw_sp_router *router,
9834 				     struct mlxsw_sp_crif *crif)
9835 {
9836 	struct mlxsw_sp_nexthop *nh, *tmp;
9837 
9838 	mlxsw_sp_crif_remove(router, crif);
9839 
9840 	list_for_each_entry_safe(nh, tmp, &crif->nexthop_list, crif_list_node)
9841 		mlxsw_sp_nexthop_type_fini(router->mlxsw_sp, nh);
9842 
9843 	if (crif->rif)
9844 		crif->can_destroy = true;
9845 	else
9846 		mlxsw_sp_crif_free(crif);
9847 }
9848 
9849 static int mlxsw_sp_netdevice_register(struct mlxsw_sp_router *router,
9850 				       struct net_device *dev)
9851 {
9852 	struct mlxsw_sp_crif *crif;
9853 
9854 	if (!mlxsw_sp_router_netdevice_interesting(router->mlxsw_sp, dev))
9855 		return 0;
9856 
9857 	crif = mlxsw_sp_crif_register(router, dev);
9858 	return PTR_ERR_OR_ZERO(crif);
9859 }
9860 
9861 static void mlxsw_sp_netdevice_unregister(struct mlxsw_sp_router *router,
9862 					  struct net_device *dev)
9863 {
9864 	struct mlxsw_sp_crif *crif;
9865 
9866 	if (!mlxsw_sp_router_netdevice_interesting(router->mlxsw_sp, dev))
9867 		return;
9868 
9869 	/* netdev_run_todo(), by way of netdev_wait_allrefs_any(), rebroadcasts
9870 	 * the NETDEV_UNREGISTER message, so we can get here twice. If that's
9871 	 * what happened, the netdevice state is NETREG_UNREGISTERED. In that
9872 	 * case, we expect to have collected the CRIF already, and warn if it
9873 	 * still exists. Otherwise we expect the CRIF to exist.
9874 	 */
9875 	crif = mlxsw_sp_crif_lookup(router, dev);
9876 	if (dev->reg_state == NETREG_UNREGISTERED) {
9877 		if (!WARN_ON(crif))
9878 			return;
9879 	}
9880 	if (WARN_ON(!crif))
9881 		return;
9882 
9883 	mlxsw_sp_crif_unregister(router, crif);
9884 }
9885 
9886 static bool mlxsw_sp_is_offload_xstats_event(unsigned long event)
9887 {
9888 	switch (event) {
9889 	case NETDEV_OFFLOAD_XSTATS_ENABLE:
9890 	case NETDEV_OFFLOAD_XSTATS_DISABLE:
9891 	case NETDEV_OFFLOAD_XSTATS_REPORT_USED:
9892 	case NETDEV_OFFLOAD_XSTATS_REPORT_DELTA:
9893 		return true;
9894 	}
9895 
9896 	return false;
9897 }
9898 
9899 static int
9900 mlxsw_sp_router_port_offload_xstats_cmd(struct mlxsw_sp_rif *rif,
9901 					unsigned long event,
9902 					struct netdev_notifier_offload_xstats_info *info)
9903 {
9904 	switch (info->type) {
9905 	case NETDEV_OFFLOAD_XSTATS_TYPE_L3:
9906 		break;
9907 	default:
9908 		return 0;
9909 	}
9910 
9911 	switch (event) {
9912 	case NETDEV_OFFLOAD_XSTATS_ENABLE:
9913 		return mlxsw_sp_router_port_l3_stats_enable(rif);
9914 	case NETDEV_OFFLOAD_XSTATS_DISABLE:
9915 		mlxsw_sp_router_port_l3_stats_disable(rif);
9916 		return 0;
9917 	case NETDEV_OFFLOAD_XSTATS_REPORT_USED:
9918 		mlxsw_sp_router_port_l3_stats_report_used(rif, info);
9919 		return 0;
9920 	case NETDEV_OFFLOAD_XSTATS_REPORT_DELTA:
9921 		return mlxsw_sp_router_port_l3_stats_report_delta(rif, info);
9922 	}
9923 
9924 	WARN_ON_ONCE(1);
9925 	return 0;
9926 }
9927 
9928 static int
9929 mlxsw_sp_netdevice_offload_xstats_cmd(struct mlxsw_sp *mlxsw_sp,
9930 				      struct net_device *dev,
9931 				      unsigned long event,
9932 				      struct netdev_notifier_offload_xstats_info *info)
9933 {
9934 	struct mlxsw_sp_rif *rif;
9935 
9936 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
9937 	if (!rif)
9938 		return 0;
9939 
9940 	return mlxsw_sp_router_port_offload_xstats_cmd(rif, event, info);
9941 }
9942 
9943 static bool mlxsw_sp_is_router_event(unsigned long event)
9944 {
9945 	switch (event) {
9946 	case NETDEV_PRE_CHANGEADDR:
9947 	case NETDEV_CHANGEADDR:
9948 	case NETDEV_CHANGEMTU:
9949 		return true;
9950 	default:
9951 		return false;
9952 	}
9953 }
9954 
9955 static int mlxsw_sp_netdevice_router_port_event(struct net_device *dev,
9956 						unsigned long event, void *ptr)
9957 {
9958 	struct netlink_ext_ack *extack = netdev_notifier_info_to_extack(ptr);
9959 	struct mlxsw_sp *mlxsw_sp;
9960 	struct mlxsw_sp_rif *rif;
9961 
9962 	mlxsw_sp = mlxsw_sp_lower_get(dev);
9963 	if (!mlxsw_sp)
9964 		return 0;
9965 
9966 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
9967 	if (!rif)
9968 		return 0;
9969 
9970 	switch (event) {
9971 	case NETDEV_CHANGEMTU:
9972 	case NETDEV_CHANGEADDR:
9973 		return mlxsw_sp_router_port_change_event(mlxsw_sp, rif, extack);
9974 	case NETDEV_PRE_CHANGEADDR:
9975 		return mlxsw_sp_router_port_pre_changeaddr_event(rif, ptr);
9976 	default:
9977 		WARN_ON_ONCE(1);
9978 		break;
9979 	}
9980 
9981 	return 0;
9982 }
9983 
9984 static int mlxsw_sp_port_vrf_join(struct mlxsw_sp *mlxsw_sp,
9985 				  struct net_device *l3_dev,
9986 				  struct netlink_ext_ack *extack)
9987 {
9988 	struct mlxsw_sp_rif *rif;
9989 
9990 	/* If netdev is already associated with a RIF, then we need to
9991 	 * destroy it and create a new one with the new virtual router ID.
9992 	 */
9993 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
9994 	if (rif)
9995 		__mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_DOWN, false,
9996 					  extack);
9997 
9998 	return __mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_UP, false,
9999 					 extack);
10000 }
10001 
10002 static void mlxsw_sp_port_vrf_leave(struct mlxsw_sp *mlxsw_sp,
10003 				    struct net_device *l3_dev)
10004 {
10005 	struct mlxsw_sp_rif *rif;
10006 
10007 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
10008 	if (!rif)
10009 		return;
10010 	__mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_DOWN, false, NULL);
10011 }
10012 
10013 static bool mlxsw_sp_is_vrf_event(unsigned long event, void *ptr)
10014 {
10015 	struct netdev_notifier_changeupper_info *info = ptr;
10016 
10017 	if (event != NETDEV_PRECHANGEUPPER && event != NETDEV_CHANGEUPPER)
10018 		return false;
10019 	return netif_is_l3_master(info->upper_dev);
10020 }
10021 
10022 static int
10023 mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event,
10024 			     struct netdev_notifier_changeupper_info *info)
10025 {
10026 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
10027 	int err = 0;
10028 
10029 	/* We do not create a RIF for a macvlan, but only use it to
10030 	 * direct more MAC addresses to the router.
10031 	 */
10032 	if (!mlxsw_sp || netif_is_macvlan(l3_dev))
10033 		return 0;
10034 
10035 	switch (event) {
10036 	case NETDEV_PRECHANGEUPPER:
10037 		break;
10038 	case NETDEV_CHANGEUPPER:
10039 		if (info->linking) {
10040 			struct netlink_ext_ack *extack;
10041 
10042 			extack = netdev_notifier_info_to_extack(&info->info);
10043 			err = mlxsw_sp_port_vrf_join(mlxsw_sp, l3_dev, extack);
10044 		} else {
10045 			mlxsw_sp_port_vrf_leave(mlxsw_sp, l3_dev);
10046 		}
10047 		break;
10048 	}
10049 
10050 	return err;
10051 }
10052 
10053 struct mlxsw_sp_router_replay_inetaddr_up {
10054 	struct mlxsw_sp *mlxsw_sp;
10055 	struct netlink_ext_ack *extack;
10056 	unsigned int done;
10057 	bool deslavement;
10058 };
10059 
10060 static int mlxsw_sp_router_replay_inetaddr_up(struct net_device *dev,
10061 					      struct netdev_nested_priv *priv)
10062 {
10063 	struct mlxsw_sp_router_replay_inetaddr_up *ctx = priv->data;
10064 	bool nomaster = ctx->deslavement;
10065 	struct mlxsw_sp_crif *crif;
10066 	int err;
10067 
10068 	if (mlxsw_sp_dev_addr_list_empty(dev))
10069 		return 0;
10070 
10071 	crif = mlxsw_sp_crif_lookup(ctx->mlxsw_sp->router, dev);
10072 	if (!crif || crif->rif)
10073 		return 0;
10074 
10075 	if (!mlxsw_sp_rif_should_config(crif->rif, dev, NETDEV_UP))
10076 		return 0;
10077 
10078 	err = __mlxsw_sp_inetaddr_event(ctx->mlxsw_sp, dev, NETDEV_UP,
10079 					nomaster, ctx->extack);
10080 	if (err)
10081 		return err;
10082 
10083 	ctx->done++;
10084 	return 0;
10085 }
10086 
10087 static int mlxsw_sp_router_unreplay_inetaddr_up(struct net_device *dev,
10088 						struct netdev_nested_priv *priv)
10089 {
10090 	struct mlxsw_sp_router_replay_inetaddr_up *ctx = priv->data;
10091 	bool nomaster = ctx->deslavement;
10092 	struct mlxsw_sp_crif *crif;
10093 
10094 	if (!ctx->done)
10095 		return 0;
10096 
10097 	if (mlxsw_sp_dev_addr_list_empty(dev))
10098 		return 0;
10099 
10100 	crif = mlxsw_sp_crif_lookup(ctx->mlxsw_sp->router, dev);
10101 	if (!crif || !crif->rif)
10102 		return 0;
10103 
10104 	/* We are rolling back NETDEV_UP, so ask for that. */
10105 	if (!mlxsw_sp_rif_should_config(crif->rif, dev, NETDEV_UP))
10106 		return 0;
10107 
10108 	__mlxsw_sp_inetaddr_event(ctx->mlxsw_sp, dev, NETDEV_DOWN, nomaster,
10109 				  NULL);
10110 
10111 	ctx->done--;
10112 	return 0;
10113 }
10114 
10115 int mlxsw_sp_netdevice_enslavement_replay(struct mlxsw_sp *mlxsw_sp,
10116 					  struct net_device *upper_dev,
10117 					  struct netlink_ext_ack *extack)
10118 {
10119 	struct mlxsw_sp_router_replay_inetaddr_up ctx = {
10120 		.mlxsw_sp = mlxsw_sp,
10121 		.extack = extack,
10122 		.deslavement = false,
10123 	};
10124 	struct netdev_nested_priv priv = {
10125 		.data = &ctx,
10126 	};
10127 	int err;
10128 
10129 	err = mlxsw_sp_router_replay_inetaddr_up(upper_dev, &priv);
10130 	if (err)
10131 		return err;
10132 
10133 	err = netdev_walk_all_upper_dev_rcu(upper_dev,
10134 					    mlxsw_sp_router_replay_inetaddr_up,
10135 					    &priv);
10136 	if (err)
10137 		goto err_replay_up;
10138 
10139 	return 0;
10140 
10141 err_replay_up:
10142 	netdev_walk_all_upper_dev_rcu(upper_dev,
10143 				      mlxsw_sp_router_unreplay_inetaddr_up,
10144 				      &priv);
10145 	mlxsw_sp_router_unreplay_inetaddr_up(upper_dev, &priv);
10146 	return err;
10147 }
10148 
10149 void mlxsw_sp_netdevice_deslavement_replay(struct mlxsw_sp *mlxsw_sp,
10150 					   struct net_device *dev)
10151 {
10152 	struct mlxsw_sp_router_replay_inetaddr_up ctx = {
10153 		.mlxsw_sp = mlxsw_sp,
10154 		.deslavement = true,
10155 	};
10156 	struct netdev_nested_priv priv = {
10157 		.data = &ctx,
10158 	};
10159 
10160 	mlxsw_sp_router_replay_inetaddr_up(dev, &priv);
10161 }
10162 
10163 static int
10164 mlxsw_sp_port_vid_router_join_existing(struct mlxsw_sp_port *mlxsw_sp_port,
10165 				       u16 vid, struct net_device *dev,
10166 				       struct netlink_ext_ack *extack)
10167 {
10168 	struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
10169 
10170 	mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port,
10171 							    vid);
10172 	if (WARN_ON(!mlxsw_sp_port_vlan))
10173 		return -EINVAL;
10174 
10175 	return mlxsw_sp_port_vlan_router_join_existing(mlxsw_sp_port_vlan,
10176 						       dev, extack);
10177 }
10178 
10179 static void
10180 mlxsw_sp_port_vid_router_leave(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid,
10181 			       struct net_device *dev)
10182 {
10183 	struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
10184 
10185 	mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port,
10186 							    vid);
10187 	if (WARN_ON(!mlxsw_sp_port_vlan))
10188 		return;
10189 
10190 	__mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan);
10191 }
10192 
10193 static int __mlxsw_sp_router_port_join_lag(struct mlxsw_sp_port *mlxsw_sp_port,
10194 					   struct net_device *lag_dev,
10195 					   struct netlink_ext_ack *extack)
10196 {
10197 	u16 default_vid = MLXSW_SP_DEFAULT_VID;
10198 	struct net_device *upper_dev;
10199 	struct list_head *iter;
10200 	int done = 0;
10201 	u16 vid;
10202 	int err;
10203 
10204 	err = mlxsw_sp_port_vid_router_join_existing(mlxsw_sp_port, default_vid,
10205 						     lag_dev, extack);
10206 	if (err)
10207 		return err;
10208 
10209 	netdev_for_each_upper_dev_rcu(lag_dev, upper_dev, iter) {
10210 		if (!is_vlan_dev(upper_dev))
10211 			continue;
10212 
10213 		vid = vlan_dev_vlan_id(upper_dev);
10214 		err = mlxsw_sp_port_vid_router_join_existing(mlxsw_sp_port, vid,
10215 							     upper_dev, extack);
10216 		if (err)
10217 			goto err_router_join_dev;
10218 
10219 		++done;
10220 	}
10221 
10222 	return 0;
10223 
10224 err_router_join_dev:
10225 	netdev_for_each_upper_dev_rcu(lag_dev, upper_dev, iter) {
10226 		if (!is_vlan_dev(upper_dev))
10227 			continue;
10228 		if (!done--)
10229 			break;
10230 
10231 		vid = vlan_dev_vlan_id(upper_dev);
10232 		mlxsw_sp_port_vid_router_leave(mlxsw_sp_port, vid, upper_dev);
10233 	}
10234 
10235 	mlxsw_sp_port_vid_router_leave(mlxsw_sp_port, default_vid, lag_dev);
10236 	return err;
10237 }
10238 
10239 static void
10240 __mlxsw_sp_router_port_leave_lag(struct mlxsw_sp_port *mlxsw_sp_port,
10241 				 struct net_device *lag_dev)
10242 {
10243 	u16 default_vid = MLXSW_SP_DEFAULT_VID;
10244 	struct net_device *upper_dev;
10245 	struct list_head *iter;
10246 	u16 vid;
10247 
10248 	netdev_for_each_upper_dev_rcu(lag_dev, upper_dev, iter) {
10249 		if (!is_vlan_dev(upper_dev))
10250 			continue;
10251 
10252 		vid = vlan_dev_vlan_id(upper_dev);
10253 		mlxsw_sp_port_vid_router_leave(mlxsw_sp_port, vid, upper_dev);
10254 	}
10255 
10256 	mlxsw_sp_port_vid_router_leave(mlxsw_sp_port, default_vid, lag_dev);
10257 }
10258 
10259 int mlxsw_sp_router_port_join_lag(struct mlxsw_sp_port *mlxsw_sp_port,
10260 				  struct net_device *lag_dev,
10261 				  struct netlink_ext_ack *extack)
10262 {
10263 	int err;
10264 
10265 	mutex_lock(&mlxsw_sp_port->mlxsw_sp->router->lock);
10266 	err = __mlxsw_sp_router_port_join_lag(mlxsw_sp_port, lag_dev, extack);
10267 	mutex_unlock(&mlxsw_sp_port->mlxsw_sp->router->lock);
10268 
10269 	return err;
10270 }
10271 
10272 void mlxsw_sp_router_port_leave_lag(struct mlxsw_sp_port *mlxsw_sp_port,
10273 				    struct net_device *lag_dev)
10274 {
10275 	mutex_lock(&mlxsw_sp_port->mlxsw_sp->router->lock);
10276 	__mlxsw_sp_router_port_leave_lag(mlxsw_sp_port, lag_dev);
10277 	mutex_unlock(&mlxsw_sp_port->mlxsw_sp->router->lock);
10278 }
10279 
10280 static int mlxsw_sp_router_netdevice_event(struct notifier_block *nb,
10281 					   unsigned long event, void *ptr)
10282 {
10283 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
10284 	struct mlxsw_sp_router *router;
10285 	struct mlxsw_sp *mlxsw_sp;
10286 	int err = 0;
10287 
10288 	router = container_of(nb, struct mlxsw_sp_router, netdevice_nb);
10289 	mlxsw_sp = router->mlxsw_sp;
10290 
10291 	mutex_lock(&mlxsw_sp->router->lock);
10292 
10293 	if (event == NETDEV_REGISTER) {
10294 		err = mlxsw_sp_netdevice_register(router, dev);
10295 		if (err)
10296 			/* No need to roll this back, UNREGISTER will collect it
10297 			 * anyhow.
10298 			 */
10299 			goto out;
10300 	}
10301 
10302 	if (mlxsw_sp_is_offload_xstats_event(event))
10303 		err = mlxsw_sp_netdevice_offload_xstats_cmd(mlxsw_sp, dev,
10304 							    event, ptr);
10305 	else if (mlxsw_sp_netdev_is_ipip_ol(mlxsw_sp, dev))
10306 		err = mlxsw_sp_netdevice_ipip_ol_event(mlxsw_sp, dev,
10307 						       event, ptr);
10308 	else if (mlxsw_sp_netdev_is_ipip_ul(mlxsw_sp, dev))
10309 		err = mlxsw_sp_netdevice_ipip_ul_event(mlxsw_sp, dev,
10310 						       event, ptr);
10311 	else if (mlxsw_sp_is_router_event(event))
10312 		err = mlxsw_sp_netdevice_router_port_event(dev, event, ptr);
10313 	else if (mlxsw_sp_is_vrf_event(event, ptr))
10314 		err = mlxsw_sp_netdevice_vrf_event(dev, event, ptr);
10315 
10316 	if (event == NETDEV_UNREGISTER)
10317 		mlxsw_sp_netdevice_unregister(router, dev);
10318 
10319 out:
10320 	mutex_unlock(&mlxsw_sp->router->lock);
10321 
10322 	return notifier_from_errno(err);
10323 }
10324 
10325 struct mlxsw_sp_macvlan_replay {
10326 	struct mlxsw_sp *mlxsw_sp;
10327 	struct netlink_ext_ack *extack;
10328 };
10329 
10330 static int mlxsw_sp_macvlan_replay_upper(struct net_device *dev,
10331 					 struct netdev_nested_priv *priv)
10332 {
10333 	const struct mlxsw_sp_macvlan_replay *rms = priv->data;
10334 	struct netlink_ext_ack *extack = rms->extack;
10335 	struct mlxsw_sp *mlxsw_sp = rms->mlxsw_sp;
10336 
10337 	if (!netif_is_macvlan(dev))
10338 		return 0;
10339 
10340 	return mlxsw_sp_rif_macvlan_add(mlxsw_sp, dev, extack);
10341 }
10342 
10343 static int mlxsw_sp_macvlan_replay(struct mlxsw_sp_rif *rif,
10344 				   struct netlink_ext_ack *extack)
10345 {
10346 	struct mlxsw_sp_macvlan_replay rms = {
10347 		.mlxsw_sp = rif->mlxsw_sp,
10348 		.extack = extack,
10349 	};
10350 	struct netdev_nested_priv priv = {
10351 		.data = &rms,
10352 	};
10353 
10354 	return netdev_walk_all_upper_dev_rcu(mlxsw_sp_rif_dev(rif),
10355 					     mlxsw_sp_macvlan_replay_upper,
10356 					     &priv);
10357 }
10358 
10359 static int __mlxsw_sp_rif_macvlan_flush(struct net_device *dev,
10360 					struct netdev_nested_priv *priv)
10361 {
10362 	struct mlxsw_sp_rif *rif = (struct mlxsw_sp_rif *)priv->data;
10363 
10364 	if (!netif_is_macvlan(dev))
10365 		return 0;
10366 
10367 	return mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr,
10368 				   mlxsw_sp_fid_index(rif->fid), false);
10369 }
10370 
10371 static int mlxsw_sp_rif_macvlan_flush(struct mlxsw_sp_rif *rif)
10372 {
10373 	struct net_device *dev = mlxsw_sp_rif_dev(rif);
10374 	struct netdev_nested_priv priv = {
10375 		.data = (void *)rif,
10376 	};
10377 
10378 	if (!netif_is_macvlan_port(dev))
10379 		return 0;
10380 
10381 	return netdev_walk_all_upper_dev_rcu(dev,
10382 					     __mlxsw_sp_rif_macvlan_flush, &priv);
10383 }
10384 
10385 static void mlxsw_sp_rif_subport_setup(struct mlxsw_sp_rif *rif,
10386 				       const struct mlxsw_sp_rif_params *params)
10387 {
10388 	struct mlxsw_sp_rif_subport *rif_subport;
10389 
10390 	rif_subport = mlxsw_sp_rif_subport_rif(rif);
10391 	refcount_set(&rif_subport->ref_count, 1);
10392 	rif_subport->vid = params->vid;
10393 	rif_subport->lag = params->lag;
10394 	if (params->lag)
10395 		rif_subport->lag_id = params->lag_id;
10396 	else
10397 		rif_subport->system_port = params->system_port;
10398 }
10399 
10400 static int mlxsw_sp_rif_subport_op(struct mlxsw_sp_rif *rif, bool enable)
10401 {
10402 	struct net_device *dev = mlxsw_sp_rif_dev(rif);
10403 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
10404 	struct mlxsw_sp_rif_subport *rif_subport;
10405 	char ritr_pl[MLXSW_REG_RITR_LEN];
10406 	u16 efid;
10407 
10408 	rif_subport = mlxsw_sp_rif_subport_rif(rif);
10409 	mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_SP_IF,
10410 			    rif->rif_index, rif->vr_id, dev->mtu);
10411 	mlxsw_reg_ritr_mac_pack(ritr_pl, dev->dev_addr);
10412 	mlxsw_reg_ritr_if_mac_profile_id_set(ritr_pl, rif->mac_profile_id);
10413 	efid = mlxsw_sp_fid_index(rif->fid);
10414 	mlxsw_reg_ritr_sp_if_pack(ritr_pl, rif_subport->lag,
10415 				  rif_subport->lag ? rif_subport->lag_id :
10416 						     rif_subport->system_port,
10417 				  efid, 0);
10418 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
10419 }
10420 
10421 static int mlxsw_sp_rif_subport_configure(struct mlxsw_sp_rif *rif,
10422 					  struct netlink_ext_ack *extack)
10423 {
10424 	struct net_device *dev = mlxsw_sp_rif_dev(rif);
10425 	u8 mac_profile;
10426 	int err;
10427 
10428 	err = mlxsw_sp_rif_mac_profile_get(rif->mlxsw_sp, rif->addr,
10429 					   &mac_profile, extack);
10430 	if (err)
10431 		return err;
10432 	rif->mac_profile_id = mac_profile;
10433 
10434 	err = mlxsw_sp_rif_subport_op(rif, true);
10435 	if (err)
10436 		goto err_rif_subport_op;
10437 
10438 	err = mlxsw_sp_macvlan_replay(rif, extack);
10439 	if (err)
10440 		goto err_macvlan_replay;
10441 
10442 	err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr,
10443 				  mlxsw_sp_fid_index(rif->fid), true);
10444 	if (err)
10445 		goto err_rif_fdb_op;
10446 
10447 	err = mlxsw_sp_fid_rif_set(rif->fid, rif);
10448 	if (err)
10449 		goto err_fid_rif_set;
10450 
10451 	return 0;
10452 
10453 err_fid_rif_set:
10454 	mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr,
10455 			    mlxsw_sp_fid_index(rif->fid), false);
10456 err_rif_fdb_op:
10457 	mlxsw_sp_rif_macvlan_flush(rif);
10458 err_macvlan_replay:
10459 	mlxsw_sp_rif_subport_op(rif, false);
10460 err_rif_subport_op:
10461 	mlxsw_sp_rif_mac_profile_put(rif->mlxsw_sp, mac_profile);
10462 	return err;
10463 }
10464 
10465 static void mlxsw_sp_rif_subport_deconfigure(struct mlxsw_sp_rif *rif)
10466 {
10467 	struct net_device *dev = mlxsw_sp_rif_dev(rif);
10468 	struct mlxsw_sp_fid *fid = rif->fid;
10469 
10470 	mlxsw_sp_fid_rif_unset(fid);
10471 	mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr,
10472 			    mlxsw_sp_fid_index(fid), false);
10473 	mlxsw_sp_rif_macvlan_flush(rif);
10474 	mlxsw_sp_rif_subport_op(rif, false);
10475 	mlxsw_sp_rif_mac_profile_put(rif->mlxsw_sp, rif->mac_profile_id);
10476 }
10477 
10478 static struct mlxsw_sp_fid *
10479 mlxsw_sp_rif_subport_fid_get(struct mlxsw_sp_rif *rif,
10480 			     const struct mlxsw_sp_rif_params *params,
10481 			     struct netlink_ext_ack *extack)
10482 {
10483 	return mlxsw_sp_fid_rfid_get(rif->mlxsw_sp, rif->rif_index);
10484 }
10485 
10486 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_subport_ops = {
10487 	.type			= MLXSW_SP_RIF_TYPE_SUBPORT,
10488 	.rif_size		= sizeof(struct mlxsw_sp_rif_subport),
10489 	.setup			= mlxsw_sp_rif_subport_setup,
10490 	.configure		= mlxsw_sp_rif_subport_configure,
10491 	.deconfigure		= mlxsw_sp_rif_subport_deconfigure,
10492 	.fid_get		= mlxsw_sp_rif_subport_fid_get,
10493 };
10494 
10495 static int mlxsw_sp_rif_fid_op(struct mlxsw_sp_rif *rif, u16 fid, bool enable)
10496 {
10497 	enum mlxsw_reg_ritr_if_type type = MLXSW_REG_RITR_FID_IF;
10498 	struct net_device *dev = mlxsw_sp_rif_dev(rif);
10499 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
10500 	char ritr_pl[MLXSW_REG_RITR_LEN];
10501 
10502 	mlxsw_reg_ritr_pack(ritr_pl, enable, type, rif->rif_index, rif->vr_id,
10503 			    dev->mtu);
10504 	mlxsw_reg_ritr_mac_pack(ritr_pl, dev->dev_addr);
10505 	mlxsw_reg_ritr_if_mac_profile_id_set(ritr_pl, rif->mac_profile_id);
10506 	mlxsw_reg_ritr_fid_if_fid_set(ritr_pl, fid);
10507 
10508 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
10509 }
10510 
10511 u16 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp)
10512 {
10513 	return mlxsw_core_max_ports(mlxsw_sp->core) + 1;
10514 }
10515 
10516 static int mlxsw_sp_rif_fid_configure(struct mlxsw_sp_rif *rif,
10517 				      struct netlink_ext_ack *extack)
10518 {
10519 	struct net_device *dev = mlxsw_sp_rif_dev(rif);
10520 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
10521 	u16 fid_index = mlxsw_sp_fid_index(rif->fid);
10522 	u8 mac_profile;
10523 	int err;
10524 
10525 	err = mlxsw_sp_rif_mac_profile_get(mlxsw_sp, rif->addr,
10526 					   &mac_profile, extack);
10527 	if (err)
10528 		return err;
10529 	rif->mac_profile_id = mac_profile;
10530 
10531 	err = mlxsw_sp_rif_fid_op(rif, fid_index, true);
10532 	if (err)
10533 		goto err_rif_fid_op;
10534 
10535 	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
10536 				     mlxsw_sp_router_port(mlxsw_sp), true);
10537 	if (err)
10538 		goto err_fid_mc_flood_set;
10539 
10540 	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
10541 				     mlxsw_sp_router_port(mlxsw_sp), true);
10542 	if (err)
10543 		goto err_fid_bc_flood_set;
10544 
10545 	err = mlxsw_sp_macvlan_replay(rif, extack);
10546 	if (err)
10547 		goto err_macvlan_replay;
10548 
10549 	err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr,
10550 				  mlxsw_sp_fid_index(rif->fid), true);
10551 	if (err)
10552 		goto err_rif_fdb_op;
10553 
10554 	err = mlxsw_sp_fid_rif_set(rif->fid, rif);
10555 	if (err)
10556 		goto err_fid_rif_set;
10557 
10558 	return 0;
10559 
10560 err_fid_rif_set:
10561 	mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr,
10562 			    mlxsw_sp_fid_index(rif->fid), false);
10563 err_rif_fdb_op:
10564 	mlxsw_sp_rif_macvlan_flush(rif);
10565 err_macvlan_replay:
10566 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
10567 			       mlxsw_sp_router_port(mlxsw_sp), false);
10568 err_fid_bc_flood_set:
10569 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
10570 			       mlxsw_sp_router_port(mlxsw_sp), false);
10571 err_fid_mc_flood_set:
10572 	mlxsw_sp_rif_fid_op(rif, fid_index, false);
10573 err_rif_fid_op:
10574 	mlxsw_sp_rif_mac_profile_put(mlxsw_sp, mac_profile);
10575 	return err;
10576 }
10577 
10578 static void mlxsw_sp_rif_fid_deconfigure(struct mlxsw_sp_rif *rif)
10579 {
10580 	struct net_device *dev = mlxsw_sp_rif_dev(rif);
10581 	u16 fid_index = mlxsw_sp_fid_index(rif->fid);
10582 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
10583 	struct mlxsw_sp_fid *fid = rif->fid;
10584 
10585 	mlxsw_sp_fid_rif_unset(fid);
10586 	mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr,
10587 			    mlxsw_sp_fid_index(fid), false);
10588 	mlxsw_sp_rif_macvlan_flush(rif);
10589 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
10590 			       mlxsw_sp_router_port(mlxsw_sp), false);
10591 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
10592 			       mlxsw_sp_router_port(mlxsw_sp), false);
10593 	mlxsw_sp_rif_fid_op(rif, fid_index, false);
10594 	mlxsw_sp_rif_mac_profile_put(rif->mlxsw_sp, rif->mac_profile_id);
10595 }
10596 
10597 static struct mlxsw_sp_fid *
10598 mlxsw_sp_rif_fid_fid_get(struct mlxsw_sp_rif *rif,
10599 			 const struct mlxsw_sp_rif_params *params,
10600 			 struct netlink_ext_ack *extack)
10601 {
10602 	int rif_ifindex = mlxsw_sp_rif_dev_ifindex(rif);
10603 
10604 	return mlxsw_sp_fid_8021d_get(rif->mlxsw_sp, rif_ifindex);
10605 }
10606 
10607 static void mlxsw_sp_rif_fid_fdb_del(struct mlxsw_sp_rif *rif, const char *mac)
10608 {
10609 	struct switchdev_notifier_fdb_info info = {};
10610 	struct net_device *dev;
10611 
10612 	dev = br_fdb_find_port(mlxsw_sp_rif_dev(rif), mac, 0);
10613 	if (!dev)
10614 		return;
10615 
10616 	info.addr = mac;
10617 	info.vid = 0;
10618 	call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info,
10619 				 NULL);
10620 }
10621 
10622 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_fid_ops = {
10623 	.type			= MLXSW_SP_RIF_TYPE_FID,
10624 	.rif_size		= sizeof(struct mlxsw_sp_rif),
10625 	.configure		= mlxsw_sp_rif_fid_configure,
10626 	.deconfigure		= mlxsw_sp_rif_fid_deconfigure,
10627 	.fid_get		= mlxsw_sp_rif_fid_fid_get,
10628 	.fdb_del		= mlxsw_sp_rif_fid_fdb_del,
10629 };
10630 
10631 static struct mlxsw_sp_fid *
10632 mlxsw_sp_rif_vlan_fid_get(struct mlxsw_sp_rif *rif,
10633 			  const struct mlxsw_sp_rif_params *params,
10634 			  struct netlink_ext_ack *extack)
10635 {
10636 	struct net_device *dev = mlxsw_sp_rif_dev(rif);
10637 	struct net_device *br_dev;
10638 
10639 	if (WARN_ON(!params->vid))
10640 		return ERR_PTR(-EINVAL);
10641 
10642 	if (is_vlan_dev(dev)) {
10643 		br_dev = vlan_dev_real_dev(dev);
10644 		if (WARN_ON(!netif_is_bridge_master(br_dev)))
10645 			return ERR_PTR(-EINVAL);
10646 	}
10647 
10648 	return mlxsw_sp_fid_8021q_get(rif->mlxsw_sp, params->vid);
10649 }
10650 
10651 static void mlxsw_sp_rif_vlan_fdb_del(struct mlxsw_sp_rif *rif, const char *mac)
10652 {
10653 	struct net_device *rif_dev = mlxsw_sp_rif_dev(rif);
10654 	struct switchdev_notifier_fdb_info info = {};
10655 	u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
10656 	struct net_device *br_dev;
10657 	struct net_device *dev;
10658 
10659 	br_dev = is_vlan_dev(rif_dev) ? vlan_dev_real_dev(rif_dev) : rif_dev;
10660 	dev = br_fdb_find_port(br_dev, mac, vid);
10661 	if (!dev)
10662 		return;
10663 
10664 	info.addr = mac;
10665 	info.vid = vid;
10666 	call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info,
10667 				 NULL);
10668 }
10669 
10670 static int mlxsw_sp_rif_vlan_op(struct mlxsw_sp_rif *rif, u16 vid, u16 efid,
10671 				bool enable)
10672 {
10673 	struct net_device *dev = mlxsw_sp_rif_dev(rif);
10674 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
10675 	char ritr_pl[MLXSW_REG_RITR_LEN];
10676 
10677 	mlxsw_reg_ritr_vlan_if_pack(ritr_pl, enable, rif->rif_index, rif->vr_id,
10678 				    dev->mtu, dev->dev_addr,
10679 				    rif->mac_profile_id, vid, efid);
10680 
10681 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
10682 }
10683 
10684 static int mlxsw_sp_rif_vlan_configure(struct mlxsw_sp_rif *rif, u16 efid,
10685 				       struct netlink_ext_ack *extack)
10686 {
10687 	struct net_device *dev = mlxsw_sp_rif_dev(rif);
10688 	u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
10689 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
10690 	u8 mac_profile;
10691 	int err;
10692 
10693 	err = mlxsw_sp_rif_mac_profile_get(mlxsw_sp, rif->addr,
10694 					   &mac_profile, extack);
10695 	if (err)
10696 		return err;
10697 	rif->mac_profile_id = mac_profile;
10698 
10699 	err = mlxsw_sp_rif_vlan_op(rif, vid, efid, true);
10700 	if (err)
10701 		goto err_rif_vlan_fid_op;
10702 
10703 	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
10704 				     mlxsw_sp_router_port(mlxsw_sp), true);
10705 	if (err)
10706 		goto err_fid_mc_flood_set;
10707 
10708 	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
10709 				     mlxsw_sp_router_port(mlxsw_sp), true);
10710 	if (err)
10711 		goto err_fid_bc_flood_set;
10712 
10713 	err = mlxsw_sp_macvlan_replay(rif, extack);
10714 	if (err)
10715 		goto err_macvlan_replay;
10716 
10717 	err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr,
10718 				  mlxsw_sp_fid_index(rif->fid), true);
10719 	if (err)
10720 		goto err_rif_fdb_op;
10721 
10722 	err = mlxsw_sp_fid_rif_set(rif->fid, rif);
10723 	if (err)
10724 		goto err_fid_rif_set;
10725 
10726 	return 0;
10727 
10728 err_fid_rif_set:
10729 	mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr,
10730 			    mlxsw_sp_fid_index(rif->fid), false);
10731 err_rif_fdb_op:
10732 	mlxsw_sp_rif_macvlan_flush(rif);
10733 err_macvlan_replay:
10734 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
10735 			       mlxsw_sp_router_port(mlxsw_sp), false);
10736 err_fid_bc_flood_set:
10737 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
10738 			       mlxsw_sp_router_port(mlxsw_sp), false);
10739 err_fid_mc_flood_set:
10740 	mlxsw_sp_rif_vlan_op(rif, vid, 0, false);
10741 err_rif_vlan_fid_op:
10742 	mlxsw_sp_rif_mac_profile_put(mlxsw_sp, mac_profile);
10743 	return err;
10744 }
10745 
10746 static void mlxsw_sp_rif_vlan_deconfigure(struct mlxsw_sp_rif *rif)
10747 {
10748 	struct net_device *dev = mlxsw_sp_rif_dev(rif);
10749 	u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
10750 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
10751 
10752 	mlxsw_sp_fid_rif_unset(rif->fid);
10753 	mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr,
10754 			    mlxsw_sp_fid_index(rif->fid), false);
10755 	mlxsw_sp_rif_macvlan_flush(rif);
10756 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
10757 			       mlxsw_sp_router_port(mlxsw_sp), false);
10758 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
10759 			       mlxsw_sp_router_port(mlxsw_sp), false);
10760 	mlxsw_sp_rif_vlan_op(rif, vid, 0, false);
10761 	mlxsw_sp_rif_mac_profile_put(rif->mlxsw_sp, rif->mac_profile_id);
10762 }
10763 
10764 static int mlxsw_sp1_rif_vlan_configure(struct mlxsw_sp_rif *rif,
10765 					struct netlink_ext_ack *extack)
10766 {
10767 	return mlxsw_sp_rif_vlan_configure(rif, 0, extack);
10768 }
10769 
10770 static const struct mlxsw_sp_rif_ops mlxsw_sp1_rif_vlan_ops = {
10771 	.type			= MLXSW_SP_RIF_TYPE_VLAN,
10772 	.rif_size		= sizeof(struct mlxsw_sp_rif),
10773 	.configure		= mlxsw_sp1_rif_vlan_configure,
10774 	.deconfigure		= mlxsw_sp_rif_vlan_deconfigure,
10775 	.fid_get		= mlxsw_sp_rif_vlan_fid_get,
10776 	.fdb_del		= mlxsw_sp_rif_vlan_fdb_del,
10777 };
10778 
10779 static int mlxsw_sp2_rif_vlan_configure(struct mlxsw_sp_rif *rif,
10780 					struct netlink_ext_ack *extack)
10781 {
10782 	u16 efid = mlxsw_sp_fid_index(rif->fid);
10783 
10784 	return mlxsw_sp_rif_vlan_configure(rif, efid, extack);
10785 }
10786 
10787 static const struct mlxsw_sp_rif_ops mlxsw_sp2_rif_vlan_ops = {
10788 	.type			= MLXSW_SP_RIF_TYPE_VLAN,
10789 	.rif_size		= sizeof(struct mlxsw_sp_rif),
10790 	.configure		= mlxsw_sp2_rif_vlan_configure,
10791 	.deconfigure		= mlxsw_sp_rif_vlan_deconfigure,
10792 	.fid_get		= mlxsw_sp_rif_vlan_fid_get,
10793 	.fdb_del		= mlxsw_sp_rif_vlan_fdb_del,
10794 };
10795 
10796 static struct mlxsw_sp_rif_ipip_lb *
10797 mlxsw_sp_rif_ipip_lb_rif(struct mlxsw_sp_rif *rif)
10798 {
10799 	return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
10800 }
10801 
10802 static void
10803 mlxsw_sp_rif_ipip_lb_setup(struct mlxsw_sp_rif *rif,
10804 			   const struct mlxsw_sp_rif_params *params)
10805 {
10806 	struct mlxsw_sp_rif_params_ipip_lb *params_lb;
10807 	struct mlxsw_sp_rif_ipip_lb *rif_lb;
10808 
10809 	params_lb = container_of(params, struct mlxsw_sp_rif_params_ipip_lb,
10810 				 common);
10811 	rif_lb = mlxsw_sp_rif_ipip_lb_rif(rif);
10812 	rif_lb->lb_config = params_lb->lb_config;
10813 }
10814 
10815 static int
10816 mlxsw_sp1_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif,
10817 				struct netlink_ext_ack *extack)
10818 {
10819 	struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
10820 	struct net_device *dev = mlxsw_sp_rif_dev(rif);
10821 	u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(dev);
10822 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
10823 	struct mlxsw_sp_vr *ul_vr;
10824 	int err;
10825 
10826 	ul_vr = mlxsw_sp_vr_get(mlxsw_sp, ul_tb_id, extack);
10827 	if (IS_ERR(ul_vr))
10828 		return PTR_ERR(ul_vr);
10829 
10830 	err = mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr->id, 0, true);
10831 	if (err)
10832 		goto err_loopback_op;
10833 
10834 	lb_rif->ul_vr_id = ul_vr->id;
10835 	lb_rif->ul_rif_id = 0;
10836 	++ul_vr->rif_count;
10837 	return 0;
10838 
10839 err_loopback_op:
10840 	mlxsw_sp_vr_put(mlxsw_sp, ul_vr);
10841 	return err;
10842 }
10843 
10844 static void mlxsw_sp1_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif)
10845 {
10846 	struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
10847 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
10848 	struct mlxsw_sp_vr *ul_vr;
10849 
10850 	ul_vr = &mlxsw_sp->router->vrs[lb_rif->ul_vr_id];
10851 	mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr->id, 0, false);
10852 
10853 	--ul_vr->rif_count;
10854 	mlxsw_sp_vr_put(mlxsw_sp, ul_vr);
10855 }
10856 
10857 static const struct mlxsw_sp_rif_ops mlxsw_sp1_rif_ipip_lb_ops = {
10858 	.type			= MLXSW_SP_RIF_TYPE_IPIP_LB,
10859 	.rif_size		= sizeof(struct mlxsw_sp_rif_ipip_lb),
10860 	.setup                  = mlxsw_sp_rif_ipip_lb_setup,
10861 	.configure		= mlxsw_sp1_rif_ipip_lb_configure,
10862 	.deconfigure		= mlxsw_sp1_rif_ipip_lb_deconfigure,
10863 };
10864 
10865 static const struct mlxsw_sp_rif_ops *mlxsw_sp1_rif_ops_arr[] = {
10866 	[MLXSW_SP_RIF_TYPE_SUBPORT]	= &mlxsw_sp_rif_subport_ops,
10867 	[MLXSW_SP_RIF_TYPE_VLAN]	= &mlxsw_sp1_rif_vlan_ops,
10868 	[MLXSW_SP_RIF_TYPE_FID]		= &mlxsw_sp_rif_fid_ops,
10869 	[MLXSW_SP_RIF_TYPE_IPIP_LB]	= &mlxsw_sp1_rif_ipip_lb_ops,
10870 };
10871 
10872 static int
10873 mlxsw_sp_rif_ipip_lb_ul_rif_op(struct mlxsw_sp_rif *ul_rif, bool enable)
10874 {
10875 	struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp;
10876 	char ritr_pl[MLXSW_REG_RITR_LEN];
10877 
10878 	mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF,
10879 			    ul_rif->rif_index, ul_rif->vr_id, IP_MAX_MTU);
10880 	mlxsw_reg_ritr_loopback_protocol_set(ritr_pl,
10881 					     MLXSW_REG_RITR_LOOPBACK_GENERIC);
10882 
10883 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
10884 }
10885 
10886 static struct mlxsw_sp_rif *
10887 mlxsw_sp_ul_rif_create(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr,
10888 		       struct mlxsw_sp_crif *ul_crif,
10889 		       struct netlink_ext_ack *extack)
10890 {
10891 	struct mlxsw_sp_rif *ul_rif;
10892 	u8 rif_entries = 1;
10893 	u16 rif_index;
10894 	int err;
10895 
10896 	err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index, rif_entries);
10897 	if (err) {
10898 		NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported router interfaces");
10899 		return ERR_PTR(err);
10900 	}
10901 
10902 	ul_rif = mlxsw_sp_rif_alloc(sizeof(*ul_rif), rif_index, vr->id,
10903 				    ul_crif);
10904 	if (!ul_rif) {
10905 		err = -ENOMEM;
10906 		goto err_rif_alloc;
10907 	}
10908 
10909 	mlxsw_sp->router->rifs[rif_index] = ul_rif;
10910 	ul_rif->mlxsw_sp = mlxsw_sp;
10911 	ul_rif->rif_entries = rif_entries;
10912 	err = mlxsw_sp_rif_ipip_lb_ul_rif_op(ul_rif, true);
10913 	if (err)
10914 		goto ul_rif_op_err;
10915 
10916 	atomic_add(rif_entries, &mlxsw_sp->router->rifs_count);
10917 	return ul_rif;
10918 
10919 ul_rif_op_err:
10920 	mlxsw_sp->router->rifs[rif_index] = NULL;
10921 	mlxsw_sp_rif_free(ul_rif);
10922 err_rif_alloc:
10923 	mlxsw_sp_rif_index_free(mlxsw_sp, rif_index, rif_entries);
10924 	return ERR_PTR(err);
10925 }
10926 
10927 static void mlxsw_sp_ul_rif_destroy(struct mlxsw_sp_rif *ul_rif)
10928 {
10929 	struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp;
10930 	u8 rif_entries = ul_rif->rif_entries;
10931 	u16 rif_index = ul_rif->rif_index;
10932 
10933 	atomic_sub(rif_entries, &mlxsw_sp->router->rifs_count);
10934 	mlxsw_sp_rif_ipip_lb_ul_rif_op(ul_rif, false);
10935 	mlxsw_sp->router->rifs[ul_rif->rif_index] = NULL;
10936 	mlxsw_sp_rif_free(ul_rif);
10937 	mlxsw_sp_rif_index_free(mlxsw_sp, rif_index, rif_entries);
10938 }
10939 
10940 static struct mlxsw_sp_rif *
10941 mlxsw_sp_ul_rif_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
10942 		    struct mlxsw_sp_crif *ul_crif,
10943 		    struct netlink_ext_ack *extack)
10944 {
10945 	struct mlxsw_sp_vr *vr;
10946 	int err;
10947 
10948 	vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id, extack);
10949 	if (IS_ERR(vr))
10950 		return ERR_CAST(vr);
10951 
10952 	if (refcount_inc_not_zero(&vr->ul_rif_refcnt))
10953 		return vr->ul_rif;
10954 
10955 	vr->ul_rif = mlxsw_sp_ul_rif_create(mlxsw_sp, vr, ul_crif, extack);
10956 	if (IS_ERR(vr->ul_rif)) {
10957 		err = PTR_ERR(vr->ul_rif);
10958 		goto err_ul_rif_create;
10959 	}
10960 
10961 	vr->rif_count++;
10962 	refcount_set(&vr->ul_rif_refcnt, 1);
10963 
10964 	return vr->ul_rif;
10965 
10966 err_ul_rif_create:
10967 	mlxsw_sp_vr_put(mlxsw_sp, vr);
10968 	return ERR_PTR(err);
10969 }
10970 
10971 static void mlxsw_sp_ul_rif_put(struct mlxsw_sp_rif *ul_rif)
10972 {
10973 	struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp;
10974 	struct mlxsw_sp_vr *vr;
10975 
10976 	vr = &mlxsw_sp->router->vrs[ul_rif->vr_id];
10977 
10978 	if (!refcount_dec_and_test(&vr->ul_rif_refcnt))
10979 		return;
10980 
10981 	vr->rif_count--;
10982 	mlxsw_sp_ul_rif_destroy(ul_rif);
10983 	mlxsw_sp_vr_put(mlxsw_sp, vr);
10984 }
10985 
10986 int mlxsw_sp_router_ul_rif_get(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
10987 			       u16 *ul_rif_index)
10988 {
10989 	struct mlxsw_sp_rif *ul_rif;
10990 	int err = 0;
10991 
10992 	mutex_lock(&mlxsw_sp->router->lock);
10993 	ul_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, ul_tb_id, NULL, NULL);
10994 	if (IS_ERR(ul_rif)) {
10995 		err = PTR_ERR(ul_rif);
10996 		goto out;
10997 	}
10998 	*ul_rif_index = ul_rif->rif_index;
10999 out:
11000 	mutex_unlock(&mlxsw_sp->router->lock);
11001 	return err;
11002 }
11003 
11004 void mlxsw_sp_router_ul_rif_put(struct mlxsw_sp *mlxsw_sp, u16 ul_rif_index)
11005 {
11006 	struct mlxsw_sp_rif *ul_rif;
11007 
11008 	mutex_lock(&mlxsw_sp->router->lock);
11009 	ul_rif = mlxsw_sp->router->rifs[ul_rif_index];
11010 	if (WARN_ON(!ul_rif))
11011 		goto out;
11012 
11013 	mlxsw_sp_ul_rif_put(ul_rif);
11014 out:
11015 	mutex_unlock(&mlxsw_sp->router->lock);
11016 }
11017 
11018 static int
11019 mlxsw_sp2_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif,
11020 				struct netlink_ext_ack *extack)
11021 {
11022 	struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
11023 	struct net_device *dev = mlxsw_sp_rif_dev(rif);
11024 	u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(dev);
11025 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
11026 	struct mlxsw_sp_rif *ul_rif;
11027 	int err;
11028 
11029 	ul_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, ul_tb_id, NULL, extack);
11030 	if (IS_ERR(ul_rif))
11031 		return PTR_ERR(ul_rif);
11032 
11033 	err = mlxsw_sp_rif_ipip_lb_op(lb_rif, 0, ul_rif->rif_index, true);
11034 	if (err)
11035 		goto err_loopback_op;
11036 
11037 	lb_rif->ul_vr_id = 0;
11038 	lb_rif->ul_rif_id = ul_rif->rif_index;
11039 
11040 	return 0;
11041 
11042 err_loopback_op:
11043 	mlxsw_sp_ul_rif_put(ul_rif);
11044 	return err;
11045 }
11046 
11047 static void mlxsw_sp2_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif)
11048 {
11049 	struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
11050 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
11051 	struct mlxsw_sp_rif *ul_rif;
11052 
11053 	ul_rif = mlxsw_sp_rif_by_index(mlxsw_sp, lb_rif->ul_rif_id);
11054 	mlxsw_sp_rif_ipip_lb_op(lb_rif, 0, lb_rif->ul_rif_id, false);
11055 	mlxsw_sp_ul_rif_put(ul_rif);
11056 }
11057 
11058 static const struct mlxsw_sp_rif_ops mlxsw_sp2_rif_ipip_lb_ops = {
11059 	.type			= MLXSW_SP_RIF_TYPE_IPIP_LB,
11060 	.rif_size		= sizeof(struct mlxsw_sp_rif_ipip_lb),
11061 	.setup                  = mlxsw_sp_rif_ipip_lb_setup,
11062 	.configure		= mlxsw_sp2_rif_ipip_lb_configure,
11063 	.deconfigure		= mlxsw_sp2_rif_ipip_lb_deconfigure,
11064 };
11065 
11066 static const struct mlxsw_sp_rif_ops *mlxsw_sp2_rif_ops_arr[] = {
11067 	[MLXSW_SP_RIF_TYPE_SUBPORT]	= &mlxsw_sp_rif_subport_ops,
11068 	[MLXSW_SP_RIF_TYPE_VLAN]	= &mlxsw_sp2_rif_vlan_ops,
11069 	[MLXSW_SP_RIF_TYPE_FID]		= &mlxsw_sp_rif_fid_ops,
11070 	[MLXSW_SP_RIF_TYPE_IPIP_LB]	= &mlxsw_sp2_rif_ipip_lb_ops,
11071 };
11072 
11073 static int mlxsw_sp_rifs_table_init(struct mlxsw_sp *mlxsw_sp)
11074 {
11075 	struct gen_pool *rifs_table;
11076 	int err;
11077 
11078 	rifs_table = gen_pool_create(0, -1);
11079 	if (!rifs_table)
11080 		return -ENOMEM;
11081 
11082 	gen_pool_set_algo(rifs_table, gen_pool_first_fit_order_align,
11083 			  NULL);
11084 
11085 	err = gen_pool_add(rifs_table, MLXSW_SP_ROUTER_GENALLOC_OFFSET,
11086 			   MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS), -1);
11087 	if (err)
11088 		goto err_gen_pool_add;
11089 
11090 	mlxsw_sp->router->rifs_table = rifs_table;
11091 
11092 	return 0;
11093 
11094 err_gen_pool_add:
11095 	gen_pool_destroy(rifs_table);
11096 	return err;
11097 }
11098 
11099 static void mlxsw_sp_rifs_table_fini(struct mlxsw_sp *mlxsw_sp)
11100 {
11101 	gen_pool_destroy(mlxsw_sp->router->rifs_table);
11102 }
11103 
11104 static int mlxsw_sp_rifs_init(struct mlxsw_sp *mlxsw_sp)
11105 {
11106 	u64 max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
11107 	struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
11108 	struct mlxsw_core *core = mlxsw_sp->core;
11109 	int err;
11110 
11111 	if (!MLXSW_CORE_RES_VALID(core, MAX_RIF_MAC_PROFILES))
11112 		return -EIO;
11113 	mlxsw_sp->router->max_rif_mac_profile =
11114 		MLXSW_CORE_RES_GET(core, MAX_RIF_MAC_PROFILES);
11115 
11116 	mlxsw_sp->router->rifs = kcalloc(max_rifs,
11117 					 sizeof(struct mlxsw_sp_rif *),
11118 					 GFP_KERNEL);
11119 	if (!mlxsw_sp->router->rifs)
11120 		return -ENOMEM;
11121 
11122 	err = mlxsw_sp_rifs_table_init(mlxsw_sp);
11123 	if (err)
11124 		goto err_rifs_table_init;
11125 
11126 	idr_init(&mlxsw_sp->router->rif_mac_profiles_idr);
11127 	atomic_set(&mlxsw_sp->router->rif_mac_profiles_count, 0);
11128 	atomic_set(&mlxsw_sp->router->rifs_count, 0);
11129 	devl_resource_occ_get_register(devlink,
11130 				       MLXSW_SP_RESOURCE_RIF_MAC_PROFILES,
11131 				       mlxsw_sp_rif_mac_profiles_occ_get,
11132 				       mlxsw_sp);
11133 	devl_resource_occ_get_register(devlink,
11134 				       MLXSW_SP_RESOURCE_RIFS,
11135 				       mlxsw_sp_rifs_occ_get,
11136 				       mlxsw_sp);
11137 
11138 	return 0;
11139 
11140 err_rifs_table_init:
11141 	kfree(mlxsw_sp->router->rifs);
11142 	return err;
11143 }
11144 
11145 static void mlxsw_sp_rifs_fini(struct mlxsw_sp *mlxsw_sp)
11146 {
11147 	int max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
11148 	struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
11149 	int i;
11150 
11151 	WARN_ON_ONCE(atomic_read(&mlxsw_sp->router->rifs_count));
11152 	for (i = 0; i < max_rifs; i++)
11153 		WARN_ON_ONCE(mlxsw_sp->router->rifs[i]);
11154 
11155 	devl_resource_occ_get_unregister(devlink, MLXSW_SP_RESOURCE_RIFS);
11156 	devl_resource_occ_get_unregister(devlink,
11157 					 MLXSW_SP_RESOURCE_RIF_MAC_PROFILES);
11158 	WARN_ON(!idr_is_empty(&mlxsw_sp->router->rif_mac_profiles_idr));
11159 	idr_destroy(&mlxsw_sp->router->rif_mac_profiles_idr);
11160 	mlxsw_sp_rifs_table_fini(mlxsw_sp);
11161 	kfree(mlxsw_sp->router->rifs);
11162 }
11163 
11164 static int
11165 mlxsw_sp_ipip_config_tigcr(struct mlxsw_sp *mlxsw_sp)
11166 {
11167 	char tigcr_pl[MLXSW_REG_TIGCR_LEN];
11168 
11169 	mlxsw_reg_tigcr_pack(tigcr_pl, true, 0);
11170 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tigcr), tigcr_pl);
11171 }
11172 
11173 static int mlxsw_sp_ipips_init(struct mlxsw_sp *mlxsw_sp)
11174 {
11175 	int err;
11176 
11177 	INIT_LIST_HEAD(&mlxsw_sp->router->ipip_list);
11178 
11179 	err = mlxsw_sp_ipip_ecn_encap_init(mlxsw_sp);
11180 	if (err)
11181 		return err;
11182 	err = mlxsw_sp_ipip_ecn_decap_init(mlxsw_sp);
11183 	if (err)
11184 		return err;
11185 
11186 	return mlxsw_sp_ipip_config_tigcr(mlxsw_sp);
11187 }
11188 
11189 static int mlxsw_sp1_ipips_init(struct mlxsw_sp *mlxsw_sp)
11190 {
11191 	mlxsw_sp->router->ipip_ops_arr = mlxsw_sp1_ipip_ops_arr;
11192 	return mlxsw_sp_ipips_init(mlxsw_sp);
11193 }
11194 
11195 static int mlxsw_sp2_ipips_init(struct mlxsw_sp *mlxsw_sp)
11196 {
11197 	mlxsw_sp->router->ipip_ops_arr = mlxsw_sp2_ipip_ops_arr;
11198 	return mlxsw_sp_ipips_init(mlxsw_sp);
11199 }
11200 
11201 static void mlxsw_sp_ipips_fini(struct mlxsw_sp *mlxsw_sp)
11202 {
11203 	WARN_ON(!list_empty(&mlxsw_sp->router->ipip_list));
11204 }
11205 
11206 static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb)
11207 {
11208 	struct mlxsw_sp_router *router;
11209 
11210 	/* Flush pending FIB notifications and then flush the device's
11211 	 * table before requesting another dump. The FIB notification
11212 	 * block is unregistered, so no need to take RTNL.
11213 	 */
11214 	mlxsw_core_flush_owq();
11215 	router = container_of(nb, struct mlxsw_sp_router, fib_nb);
11216 	mlxsw_sp_router_fib_flush(router->mlxsw_sp);
11217 }
11218 
11219 #ifdef CONFIG_IP_ROUTE_MULTIPATH
11220 struct mlxsw_sp_mp_hash_config {
11221 	DECLARE_BITMAP(headers, __MLXSW_REG_RECR2_HEADER_CNT);
11222 	DECLARE_BITMAP(fields, __MLXSW_REG_RECR2_FIELD_CNT);
11223 	DECLARE_BITMAP(inner_headers, __MLXSW_REG_RECR2_HEADER_CNT);
11224 	DECLARE_BITMAP(inner_fields, __MLXSW_REG_RECR2_INNER_FIELD_CNT);
11225 	bool inc_parsing_depth;
11226 };
11227 
11228 #define MLXSW_SP_MP_HASH_HEADER_SET(_headers, _header) \
11229 	bitmap_set(_headers, MLXSW_REG_RECR2_##_header, 1)
11230 
11231 #define MLXSW_SP_MP_HASH_FIELD_SET(_fields, _field) \
11232 	bitmap_set(_fields, MLXSW_REG_RECR2_##_field, 1)
11233 
11234 #define MLXSW_SP_MP_HASH_FIELD_RANGE_SET(_fields, _field, _nr) \
11235 	bitmap_set(_fields, MLXSW_REG_RECR2_##_field, _nr)
11236 
11237 static void mlxsw_sp_mp_hash_inner_l3(struct mlxsw_sp_mp_hash_config *config)
11238 {
11239 	unsigned long *inner_headers = config->inner_headers;
11240 	unsigned long *inner_fields = config->inner_fields;
11241 
11242 	/* IPv4 inner */
11243 	MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV4_EN_NOT_TCP_NOT_UDP);
11244 	MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV4_EN_TCP_UDP);
11245 	MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV4_SIP0, 4);
11246 	MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV4_DIP0, 4);
11247 	/* IPv6 inner */
11248 	MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV6_EN_NOT_TCP_NOT_UDP);
11249 	MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV6_EN_TCP_UDP);
11250 	MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_SIP0_7);
11251 	MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV6_SIP8, 8);
11252 	MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_DIP0_7);
11253 	MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV6_DIP8, 8);
11254 	MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_NEXT_HEADER);
11255 	MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_FLOW_LABEL);
11256 }
11257 
11258 static void mlxsw_sp_mp4_hash_outer_addr(struct mlxsw_sp_mp_hash_config *config)
11259 {
11260 	unsigned long *headers = config->headers;
11261 	unsigned long *fields = config->fields;
11262 
11263 	MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV4_EN_NOT_TCP_NOT_UDP);
11264 	MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV4_EN_TCP_UDP);
11265 	MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV4_SIP0, 4);
11266 	MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV4_DIP0, 4);
11267 }
11268 
11269 static void
11270 mlxsw_sp_mp_hash_inner_custom(struct mlxsw_sp_mp_hash_config *config,
11271 			      u32 hash_fields)
11272 {
11273 	unsigned long *inner_headers = config->inner_headers;
11274 	unsigned long *inner_fields = config->inner_fields;
11275 
11276 	/* IPv4 Inner */
11277 	MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV4_EN_NOT_TCP_NOT_UDP);
11278 	MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV4_EN_TCP_UDP);
11279 	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP)
11280 		MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV4_SIP0, 4);
11281 	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP)
11282 		MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV4_DIP0, 4);
11283 	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_IP_PROTO)
11284 		MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV4_PROTOCOL);
11285 	/* IPv6 inner */
11286 	MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV6_EN_NOT_TCP_NOT_UDP);
11287 	MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV6_EN_TCP_UDP);
11288 	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP) {
11289 		MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_SIP0_7);
11290 		MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV6_SIP8, 8);
11291 	}
11292 	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP) {
11293 		MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_DIP0_7);
11294 		MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV6_DIP8, 8);
11295 	}
11296 	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_IP_PROTO)
11297 		MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_NEXT_HEADER);
11298 	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_FLOWLABEL)
11299 		MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_FLOW_LABEL);
11300 	/* L4 inner */
11301 	MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, TCP_UDP_EN_IPV4);
11302 	MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, TCP_UDP_EN_IPV6);
11303 	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_PORT)
11304 		MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_TCP_UDP_SPORT);
11305 	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_PORT)
11306 		MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_TCP_UDP_DPORT);
11307 }
11308 
11309 static void mlxsw_sp_mp4_hash_init(struct mlxsw_sp *mlxsw_sp,
11310 				   struct mlxsw_sp_mp_hash_config *config)
11311 {
11312 	struct net *net = mlxsw_sp_net(mlxsw_sp);
11313 	unsigned long *headers = config->headers;
11314 	unsigned long *fields = config->fields;
11315 	u32 hash_fields;
11316 
11317 	switch (READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_policy)) {
11318 	case 0:
11319 		mlxsw_sp_mp4_hash_outer_addr(config);
11320 		break;
11321 	case 1:
11322 		mlxsw_sp_mp4_hash_outer_addr(config);
11323 		MLXSW_SP_MP_HASH_HEADER_SET(headers, TCP_UDP_EN_IPV4);
11324 		MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV4_PROTOCOL);
11325 		MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_SPORT);
11326 		MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_DPORT);
11327 		break;
11328 	case 2:
11329 		/* Outer */
11330 		mlxsw_sp_mp4_hash_outer_addr(config);
11331 		/* Inner */
11332 		mlxsw_sp_mp_hash_inner_l3(config);
11333 		break;
11334 	case 3:
11335 		hash_fields = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_fields);
11336 		/* Outer */
11337 		MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV4_EN_NOT_TCP_NOT_UDP);
11338 		MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV4_EN_TCP_UDP);
11339 		MLXSW_SP_MP_HASH_HEADER_SET(headers, TCP_UDP_EN_IPV4);
11340 		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_IP)
11341 			MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV4_SIP0, 4);
11342 		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_IP)
11343 			MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV4_DIP0, 4);
11344 		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO)
11345 			MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV4_PROTOCOL);
11346 		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT)
11347 			MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_SPORT);
11348 		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT)
11349 			MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_DPORT);
11350 		/* Inner */
11351 		mlxsw_sp_mp_hash_inner_custom(config, hash_fields);
11352 		break;
11353 	}
11354 }
11355 
11356 static void mlxsw_sp_mp6_hash_outer_addr(struct mlxsw_sp_mp_hash_config *config)
11357 {
11358 	unsigned long *headers = config->headers;
11359 	unsigned long *fields = config->fields;
11360 
11361 	MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV6_EN_NOT_TCP_NOT_UDP);
11362 	MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV6_EN_TCP_UDP);
11363 	MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_SIP0_7);
11364 	MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV6_SIP8, 8);
11365 	MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_DIP0_7);
11366 	MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV6_DIP8, 8);
11367 }
11368 
11369 static void mlxsw_sp_mp6_hash_init(struct mlxsw_sp *mlxsw_sp,
11370 				   struct mlxsw_sp_mp_hash_config *config)
11371 {
11372 	u32 hash_fields = ip6_multipath_hash_fields(mlxsw_sp_net(mlxsw_sp));
11373 	unsigned long *headers = config->headers;
11374 	unsigned long *fields = config->fields;
11375 
11376 	switch (ip6_multipath_hash_policy(mlxsw_sp_net(mlxsw_sp))) {
11377 	case 0:
11378 		mlxsw_sp_mp6_hash_outer_addr(config);
11379 		MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_NEXT_HEADER);
11380 		MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_FLOW_LABEL);
11381 		break;
11382 	case 1:
11383 		mlxsw_sp_mp6_hash_outer_addr(config);
11384 		MLXSW_SP_MP_HASH_HEADER_SET(headers, TCP_UDP_EN_IPV6);
11385 		MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_NEXT_HEADER);
11386 		MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_SPORT);
11387 		MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_DPORT);
11388 		break;
11389 	case 2:
11390 		/* Outer */
11391 		mlxsw_sp_mp6_hash_outer_addr(config);
11392 		MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_NEXT_HEADER);
11393 		MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_FLOW_LABEL);
11394 		/* Inner */
11395 		mlxsw_sp_mp_hash_inner_l3(config);
11396 		config->inc_parsing_depth = true;
11397 		break;
11398 	case 3:
11399 		/* Outer */
11400 		MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV6_EN_NOT_TCP_NOT_UDP);
11401 		MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV6_EN_TCP_UDP);
11402 		MLXSW_SP_MP_HASH_HEADER_SET(headers, TCP_UDP_EN_IPV6);
11403 		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_IP) {
11404 			MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_SIP0_7);
11405 			MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV6_SIP8, 8);
11406 		}
11407 		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_IP) {
11408 			MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_DIP0_7);
11409 			MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV6_DIP8, 8);
11410 		}
11411 		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO)
11412 			MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_NEXT_HEADER);
11413 		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_FLOWLABEL)
11414 			MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_FLOW_LABEL);
11415 		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT)
11416 			MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_SPORT);
11417 		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT)
11418 			MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_DPORT);
11419 		/* Inner */
11420 		mlxsw_sp_mp_hash_inner_custom(config, hash_fields);
11421 		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_MASK)
11422 			config->inc_parsing_depth = true;
11423 		break;
11424 	}
11425 }
11426 
11427 static int mlxsw_sp_mp_hash_parsing_depth_adjust(struct mlxsw_sp *mlxsw_sp,
11428 						 bool old_inc_parsing_depth,
11429 						 bool new_inc_parsing_depth)
11430 {
11431 	int err;
11432 
11433 	if (!old_inc_parsing_depth && new_inc_parsing_depth) {
11434 		err = mlxsw_sp_parsing_depth_inc(mlxsw_sp);
11435 		if (err)
11436 			return err;
11437 		mlxsw_sp->router->inc_parsing_depth = true;
11438 	} else if (old_inc_parsing_depth && !new_inc_parsing_depth) {
11439 		mlxsw_sp_parsing_depth_dec(mlxsw_sp);
11440 		mlxsw_sp->router->inc_parsing_depth = false;
11441 	}
11442 
11443 	return 0;
11444 }
11445 
11446 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
11447 {
11448 	bool old_inc_parsing_depth, new_inc_parsing_depth;
11449 	struct mlxsw_sp_mp_hash_config config = {};
11450 	struct net *net = mlxsw_sp_net(mlxsw_sp);
11451 	char recr2_pl[MLXSW_REG_RECR2_LEN];
11452 	unsigned long bit;
11453 	u32 seed;
11454 	int err;
11455 
11456 	seed = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_seed).user_seed;
11457 	if (!seed)
11458 		seed = jhash(mlxsw_sp->base_mac, sizeof(mlxsw_sp->base_mac), 0);
11459 
11460 	mlxsw_reg_recr2_pack(recr2_pl, seed);
11461 	mlxsw_sp_mp4_hash_init(mlxsw_sp, &config);
11462 	mlxsw_sp_mp6_hash_init(mlxsw_sp, &config);
11463 
11464 	old_inc_parsing_depth = mlxsw_sp->router->inc_parsing_depth;
11465 	new_inc_parsing_depth = config.inc_parsing_depth;
11466 	err = mlxsw_sp_mp_hash_parsing_depth_adjust(mlxsw_sp,
11467 						    old_inc_parsing_depth,
11468 						    new_inc_parsing_depth);
11469 	if (err)
11470 		return err;
11471 
11472 	for_each_set_bit(bit, config.headers, __MLXSW_REG_RECR2_HEADER_CNT)
11473 		mlxsw_reg_recr2_outer_header_enables_set(recr2_pl, bit, 1);
11474 	for_each_set_bit(bit, config.fields, __MLXSW_REG_RECR2_FIELD_CNT)
11475 		mlxsw_reg_recr2_outer_header_fields_enable_set(recr2_pl, bit, 1);
11476 	for_each_set_bit(bit, config.inner_headers, __MLXSW_REG_RECR2_HEADER_CNT)
11477 		mlxsw_reg_recr2_inner_header_enables_set(recr2_pl, bit, 1);
11478 	for_each_set_bit(bit, config.inner_fields, __MLXSW_REG_RECR2_INNER_FIELD_CNT)
11479 		mlxsw_reg_recr2_inner_header_fields_enable_set(recr2_pl, bit, 1);
11480 
11481 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(recr2), recr2_pl);
11482 	if (err)
11483 		goto err_reg_write;
11484 
11485 	return 0;
11486 
11487 err_reg_write:
11488 	mlxsw_sp_mp_hash_parsing_depth_adjust(mlxsw_sp, new_inc_parsing_depth,
11489 					      old_inc_parsing_depth);
11490 	return err;
11491 }
11492 
11493 static void mlxsw_sp_mp_hash_fini(struct mlxsw_sp *mlxsw_sp)
11494 {
11495 	bool old_inc_parsing_depth = mlxsw_sp->router->inc_parsing_depth;
11496 
11497 	mlxsw_sp_mp_hash_parsing_depth_adjust(mlxsw_sp, old_inc_parsing_depth,
11498 					      false);
11499 }
11500 #else
11501 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
11502 {
11503 	return 0;
11504 }
11505 
11506 static void mlxsw_sp_mp_hash_fini(struct mlxsw_sp *mlxsw_sp)
11507 {
11508 }
11509 #endif
11510 
11511 static int mlxsw_sp_dscp_init(struct mlxsw_sp *mlxsw_sp)
11512 {
11513 	char rdpm_pl[MLXSW_REG_RDPM_LEN];
11514 	unsigned int i;
11515 
11516 	MLXSW_REG_ZERO(rdpm, rdpm_pl);
11517 
11518 	/* HW is determining switch priority based on DSCP-bits, but the
11519 	 * kernel is still doing that based on the ToS. Since there's a
11520 	 * mismatch in bits we need to make sure to translate the right
11521 	 * value ToS would observe, skipping the 2 least-significant ECN bits.
11522 	 */
11523 	for (i = 0; i < MLXSW_REG_RDPM_DSCP_ENTRY_REC_MAX_COUNT; i++)
11524 		mlxsw_reg_rdpm_pack(rdpm_pl, i, rt_tos2priority(i << 2));
11525 
11526 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rdpm), rdpm_pl);
11527 }
11528 
11529 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
11530 {
11531 	struct net *net = mlxsw_sp_net(mlxsw_sp);
11532 	char rgcr_pl[MLXSW_REG_RGCR_LEN];
11533 	u64 max_rifs;
11534 	bool usp;
11535 
11536 	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_RIFS))
11537 		return -EIO;
11538 	max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
11539 	usp = READ_ONCE(net->ipv4.sysctl_ip_fwd_update_priority);
11540 
11541 	mlxsw_reg_rgcr_pack(rgcr_pl, true, true);
11542 	mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs);
11543 	mlxsw_reg_rgcr_usp_set(rgcr_pl, usp);
11544 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
11545 }
11546 
11547 static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
11548 {
11549 	char rgcr_pl[MLXSW_REG_RGCR_LEN];
11550 
11551 	mlxsw_reg_rgcr_pack(rgcr_pl, false, false);
11552 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
11553 }
11554 
11555 static int mlxsw_sp_lb_rif_init(struct mlxsw_sp *mlxsw_sp,
11556 				struct netlink_ext_ack *extack)
11557 {
11558 	struct mlxsw_sp_router *router = mlxsw_sp->router;
11559 	struct mlxsw_sp_rif *lb_rif;
11560 	int err;
11561 
11562 	router->lb_crif = mlxsw_sp_crif_alloc(NULL);
11563 	if (!router->lb_crif)
11564 		return -ENOMEM;
11565 
11566 	/* Create a generic loopback RIF associated with the main table
11567 	 * (default VRF). Any table can be used, but the main table exists
11568 	 * anyway, so we do not waste resources. Loopback RIFs are usually
11569 	 * created with a NULL CRIF, but this RIF is used as a fallback RIF
11570 	 * for blackhole nexthops, and nexthops expect to have a valid CRIF.
11571 	 */
11572 	lb_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, RT_TABLE_MAIN, router->lb_crif,
11573 				     extack);
11574 	if (IS_ERR(lb_rif)) {
11575 		err = PTR_ERR(lb_rif);
11576 		goto err_ul_rif_get;
11577 	}
11578 
11579 	return 0;
11580 
11581 err_ul_rif_get:
11582 	mlxsw_sp_crif_free(router->lb_crif);
11583 	return err;
11584 }
11585 
11586 static void mlxsw_sp_lb_rif_fini(struct mlxsw_sp *mlxsw_sp)
11587 {
11588 	mlxsw_sp_ul_rif_put(mlxsw_sp->router->lb_crif->rif);
11589 	mlxsw_sp_crif_free(mlxsw_sp->router->lb_crif);
11590 }
11591 
11592 static int mlxsw_sp1_router_init(struct mlxsw_sp *mlxsw_sp)
11593 {
11594 	size_t size_ranges_count = ARRAY_SIZE(mlxsw_sp1_adj_grp_size_ranges);
11595 
11596 	mlxsw_sp->router->rif_ops_arr = mlxsw_sp1_rif_ops_arr;
11597 	mlxsw_sp->router->adj_grp_size_ranges = mlxsw_sp1_adj_grp_size_ranges;
11598 	mlxsw_sp->router->adj_grp_size_ranges_count = size_ranges_count;
11599 
11600 	return 0;
11601 }
11602 
11603 const struct mlxsw_sp_router_ops mlxsw_sp1_router_ops = {
11604 	.init = mlxsw_sp1_router_init,
11605 	.ipips_init = mlxsw_sp1_ipips_init,
11606 };
11607 
11608 static int mlxsw_sp2_router_init(struct mlxsw_sp *mlxsw_sp)
11609 {
11610 	size_t size_ranges_count = ARRAY_SIZE(mlxsw_sp2_adj_grp_size_ranges);
11611 
11612 	mlxsw_sp->router->rif_ops_arr = mlxsw_sp2_rif_ops_arr;
11613 	mlxsw_sp->router->adj_grp_size_ranges = mlxsw_sp2_adj_grp_size_ranges;
11614 	mlxsw_sp->router->adj_grp_size_ranges_count = size_ranges_count;
11615 
11616 	return 0;
11617 }
11618 
11619 const struct mlxsw_sp_router_ops mlxsw_sp2_router_ops = {
11620 	.init = mlxsw_sp2_router_init,
11621 	.ipips_init = mlxsw_sp2_ipips_init,
11622 };
11623 
11624 int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp,
11625 			 struct netlink_ext_ack *extack)
11626 {
11627 	struct mlxsw_sp_router *router;
11628 	struct notifier_block *nb;
11629 	int err;
11630 
11631 	router = kzalloc(sizeof(*mlxsw_sp->router), GFP_KERNEL);
11632 	if (!router)
11633 		return -ENOMEM;
11634 	mutex_init(&router->lock);
11635 	mlxsw_sp->router = router;
11636 	router->mlxsw_sp = mlxsw_sp;
11637 
11638 	err = mlxsw_sp->router_ops->init(mlxsw_sp);
11639 	if (err)
11640 		goto err_router_ops_init;
11641 
11642 	INIT_LIST_HEAD(&mlxsw_sp->router->nh_res_grp_list);
11643 	INIT_DELAYED_WORK(&mlxsw_sp->router->nh_grp_activity_dw,
11644 			  mlxsw_sp_nh_grp_activity_work);
11645 	INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_neighs_list);
11646 	err = __mlxsw_sp_router_init(mlxsw_sp);
11647 	if (err)
11648 		goto err_router_init;
11649 
11650 	err = mlxsw_sp->router_ops->ipips_init(mlxsw_sp);
11651 	if (err)
11652 		goto err_ipips_init;
11653 
11654 	err = rhashtable_init(&mlxsw_sp->router->crif_ht,
11655 			      &mlxsw_sp_crif_ht_params);
11656 	if (err)
11657 		goto err_crif_ht_init;
11658 
11659 	err = mlxsw_sp_rifs_init(mlxsw_sp);
11660 	if (err)
11661 		goto err_rifs_init;
11662 
11663 	err = rhashtable_init(&mlxsw_sp->router->nexthop_ht,
11664 			      &mlxsw_sp_nexthop_ht_params);
11665 	if (err)
11666 		goto err_nexthop_ht_init;
11667 
11668 	err = rhashtable_init(&mlxsw_sp->router->nexthop_group_ht,
11669 			      &mlxsw_sp_nexthop_group_ht_params);
11670 	if (err)
11671 		goto err_nexthop_group_ht_init;
11672 
11673 	INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_list);
11674 	err = mlxsw_sp_lpm_init(mlxsw_sp);
11675 	if (err)
11676 		goto err_lpm_init;
11677 
11678 	err = mlxsw_sp_mr_init(mlxsw_sp, &mlxsw_sp_mr_tcam_ops);
11679 	if (err)
11680 		goto err_mr_init;
11681 
11682 	err = mlxsw_sp_vrs_init(mlxsw_sp);
11683 	if (err)
11684 		goto err_vrs_init;
11685 
11686 	err = mlxsw_sp_lb_rif_init(mlxsw_sp, extack);
11687 	if (err)
11688 		goto err_lb_rif_init;
11689 
11690 	err = mlxsw_sp_neigh_init(mlxsw_sp);
11691 	if (err)
11692 		goto err_neigh_init;
11693 
11694 	err = mlxsw_sp_mp_hash_init(mlxsw_sp);
11695 	if (err)
11696 		goto err_mp_hash_init;
11697 
11698 	err = mlxsw_sp_dscp_init(mlxsw_sp);
11699 	if (err)
11700 		goto err_dscp_init;
11701 
11702 	router->inetaddr_nb.notifier_call = mlxsw_sp_inetaddr_event;
11703 	err = register_inetaddr_notifier(&router->inetaddr_nb);
11704 	if (err)
11705 		goto err_register_inetaddr_notifier;
11706 
11707 	router->inet6addr_nb.notifier_call = mlxsw_sp_inet6addr_event;
11708 	err = register_inet6addr_notifier(&router->inet6addr_nb);
11709 	if (err)
11710 		goto err_register_inet6addr_notifier;
11711 
11712 	router->inetaddr_valid_nb.notifier_call = mlxsw_sp_inetaddr_valid_event;
11713 	err = register_inetaddr_validator_notifier(&router->inetaddr_valid_nb);
11714 	if (err)
11715 		goto err_register_inetaddr_valid_notifier;
11716 
11717 	nb = &router->inet6addr_valid_nb;
11718 	nb->notifier_call = mlxsw_sp_inet6addr_valid_event;
11719 	err = register_inet6addr_validator_notifier(nb);
11720 	if (err)
11721 		goto err_register_inet6addr_valid_notifier;
11722 
11723 	mlxsw_sp->router->netevent_nb.notifier_call =
11724 		mlxsw_sp_router_netevent_event;
11725 	err = register_netevent_notifier(&mlxsw_sp->router->netevent_nb);
11726 	if (err)
11727 		goto err_register_netevent_notifier;
11728 
11729 	mlxsw_sp->router->netdevice_nb.notifier_call =
11730 		mlxsw_sp_router_netdevice_event;
11731 	err = register_netdevice_notifier_net(mlxsw_sp_net(mlxsw_sp),
11732 					      &mlxsw_sp->router->netdevice_nb);
11733 	if (err)
11734 		goto err_register_netdev_notifier;
11735 
11736 	mlxsw_sp->router->nexthop_nb.notifier_call =
11737 		mlxsw_sp_nexthop_obj_event;
11738 	err = register_nexthop_notifier(mlxsw_sp_net(mlxsw_sp),
11739 					&mlxsw_sp->router->nexthop_nb,
11740 					extack);
11741 	if (err)
11742 		goto err_register_nexthop_notifier;
11743 
11744 	mlxsw_sp->router->fib_nb.notifier_call = mlxsw_sp_router_fib_event;
11745 	err = register_fib_notifier(mlxsw_sp_net(mlxsw_sp),
11746 				    &mlxsw_sp->router->fib_nb,
11747 				    mlxsw_sp_router_fib_dump_flush, extack);
11748 	if (err)
11749 		goto err_register_fib_notifier;
11750 
11751 	return 0;
11752 
11753 err_register_fib_notifier:
11754 	unregister_nexthop_notifier(mlxsw_sp_net(mlxsw_sp),
11755 				    &mlxsw_sp->router->nexthop_nb);
11756 err_register_nexthop_notifier:
11757 	unregister_netdevice_notifier_net(mlxsw_sp_net(mlxsw_sp),
11758 					  &router->netdevice_nb);
11759 err_register_netdev_notifier:
11760 	unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
11761 err_register_netevent_notifier:
11762 	unregister_inet6addr_validator_notifier(&router->inet6addr_valid_nb);
11763 err_register_inet6addr_valid_notifier:
11764 	unregister_inetaddr_validator_notifier(&router->inetaddr_valid_nb);
11765 err_register_inetaddr_valid_notifier:
11766 	unregister_inet6addr_notifier(&router->inet6addr_nb);
11767 err_register_inet6addr_notifier:
11768 	unregister_inetaddr_notifier(&router->inetaddr_nb);
11769 err_register_inetaddr_notifier:
11770 	mlxsw_core_flush_owq();
11771 err_dscp_init:
11772 	mlxsw_sp_mp_hash_fini(mlxsw_sp);
11773 err_mp_hash_init:
11774 	mlxsw_sp_neigh_fini(mlxsw_sp);
11775 err_neigh_init:
11776 	mlxsw_sp_lb_rif_fini(mlxsw_sp);
11777 err_lb_rif_init:
11778 	mlxsw_sp_vrs_fini(mlxsw_sp);
11779 err_vrs_init:
11780 	mlxsw_sp_mr_fini(mlxsw_sp);
11781 err_mr_init:
11782 	mlxsw_sp_lpm_fini(mlxsw_sp);
11783 err_lpm_init:
11784 	rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
11785 err_nexthop_group_ht_init:
11786 	rhashtable_destroy(&mlxsw_sp->router->nexthop_ht);
11787 err_nexthop_ht_init:
11788 	mlxsw_sp_rifs_fini(mlxsw_sp);
11789 err_rifs_init:
11790 	rhashtable_destroy(&mlxsw_sp->router->crif_ht);
11791 err_crif_ht_init:
11792 	mlxsw_sp_ipips_fini(mlxsw_sp);
11793 err_ipips_init:
11794 	__mlxsw_sp_router_fini(mlxsw_sp);
11795 err_router_init:
11796 	cancel_delayed_work_sync(&mlxsw_sp->router->nh_grp_activity_dw);
11797 err_router_ops_init:
11798 	mutex_destroy(&mlxsw_sp->router->lock);
11799 	kfree(mlxsw_sp->router);
11800 	return err;
11801 }
11802 
11803 void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
11804 {
11805 	struct mlxsw_sp_router *router = mlxsw_sp->router;
11806 
11807 	unregister_fib_notifier(mlxsw_sp_net(mlxsw_sp), &router->fib_nb);
11808 	unregister_nexthop_notifier(mlxsw_sp_net(mlxsw_sp),
11809 				    &router->nexthop_nb);
11810 	unregister_netdevice_notifier_net(mlxsw_sp_net(mlxsw_sp),
11811 					  &router->netdevice_nb);
11812 	unregister_netevent_notifier(&router->netevent_nb);
11813 	unregister_inet6addr_validator_notifier(&router->inet6addr_valid_nb);
11814 	unregister_inetaddr_validator_notifier(&router->inetaddr_valid_nb);
11815 	unregister_inet6addr_notifier(&router->inet6addr_nb);
11816 	unregister_inetaddr_notifier(&router->inetaddr_nb);
11817 	mlxsw_core_flush_owq();
11818 	mlxsw_sp_mp_hash_fini(mlxsw_sp);
11819 	mlxsw_sp_neigh_fini(mlxsw_sp);
11820 	mlxsw_sp_lb_rif_fini(mlxsw_sp);
11821 	mlxsw_sp_vrs_fini(mlxsw_sp);
11822 	mlxsw_sp_mr_fini(mlxsw_sp);
11823 	mlxsw_sp_lpm_fini(mlxsw_sp);
11824 	rhashtable_destroy(&router->nexthop_group_ht);
11825 	rhashtable_destroy(&router->nexthop_ht);
11826 	mlxsw_sp_rifs_fini(mlxsw_sp);
11827 	rhashtable_destroy(&mlxsw_sp->router->crif_ht);
11828 	mlxsw_sp_ipips_fini(mlxsw_sp);
11829 	__mlxsw_sp_router_fini(mlxsw_sp);
11830 	cancel_delayed_work_sync(&router->nh_grp_activity_dw);
11831 	mutex_destroy(&router->lock);
11832 	kfree(router);
11833 }
11834