xref: /linux/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c (revision e9f0878c4b2004ac19581274c1ae4c61ae3ca70e)
1 // SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
2 /* Copyright (c) 2016-2018 Mellanox Technologies. All rights reserved */
3 
4 #include <linux/kernel.h>
5 #include <linux/types.h>
6 #include <linux/rhashtable.h>
7 #include <linux/bitops.h>
8 #include <linux/in6.h>
9 #include <linux/notifier.h>
10 #include <linux/inetdevice.h>
11 #include <linux/netdevice.h>
12 #include <linux/if_bridge.h>
13 #include <linux/socket.h>
14 #include <linux/route.h>
15 #include <linux/gcd.h>
16 #include <linux/random.h>
17 #include <linux/if_macvlan.h>
18 #include <net/netevent.h>
19 #include <net/neighbour.h>
20 #include <net/arp.h>
21 #include <net/ip_fib.h>
22 #include <net/ip6_fib.h>
23 #include <net/fib_rules.h>
24 #include <net/ip_tunnels.h>
25 #include <net/l3mdev.h>
26 #include <net/addrconf.h>
27 #include <net/ndisc.h>
28 #include <net/ipv6.h>
29 #include <net/fib_notifier.h>
30 #include <net/switchdev.h>
31 
32 #include "spectrum.h"
33 #include "core.h"
34 #include "reg.h"
35 #include "spectrum_cnt.h"
36 #include "spectrum_dpipe.h"
37 #include "spectrum_ipip.h"
38 #include "spectrum_mr.h"
39 #include "spectrum_mr_tcam.h"
40 #include "spectrum_router.h"
41 #include "spectrum_span.h"
42 
43 struct mlxsw_sp_fib;
44 struct mlxsw_sp_vr;
45 struct mlxsw_sp_lpm_tree;
46 struct mlxsw_sp_rif_ops;
47 
48 struct mlxsw_sp_router {
49 	struct mlxsw_sp *mlxsw_sp;
50 	struct mlxsw_sp_rif **rifs;
51 	struct mlxsw_sp_vr *vrs;
52 	struct rhashtable neigh_ht;
53 	struct rhashtable nexthop_group_ht;
54 	struct rhashtable nexthop_ht;
55 	struct list_head nexthop_list;
56 	struct {
57 		/* One tree for each protocol: IPv4 and IPv6 */
58 		struct mlxsw_sp_lpm_tree *proto_trees[2];
59 		struct mlxsw_sp_lpm_tree *trees;
60 		unsigned int tree_count;
61 	} lpm;
62 	struct {
63 		struct delayed_work dw;
64 		unsigned long interval;	/* ms */
65 	} neighs_update;
66 	struct delayed_work nexthop_probe_dw;
67 #define MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL 5000 /* ms */
68 	struct list_head nexthop_neighs_list;
69 	struct list_head ipip_list;
70 	bool aborted;
71 	struct notifier_block fib_nb;
72 	struct notifier_block netevent_nb;
73 	const struct mlxsw_sp_rif_ops **rif_ops_arr;
74 	const struct mlxsw_sp_ipip_ops **ipip_ops_arr;
75 };
76 
77 struct mlxsw_sp_rif {
78 	struct list_head nexthop_list;
79 	struct list_head neigh_list;
80 	struct net_device *dev;
81 	struct mlxsw_sp_fid *fid;
82 	unsigned char addr[ETH_ALEN];
83 	int mtu;
84 	u16 rif_index;
85 	u16 vr_id;
86 	const struct mlxsw_sp_rif_ops *ops;
87 	struct mlxsw_sp *mlxsw_sp;
88 
89 	unsigned int counter_ingress;
90 	bool counter_ingress_valid;
91 	unsigned int counter_egress;
92 	bool counter_egress_valid;
93 };
94 
95 struct mlxsw_sp_rif_params {
96 	struct net_device *dev;
97 	union {
98 		u16 system_port;
99 		u16 lag_id;
100 	};
101 	u16 vid;
102 	bool lag;
103 };
104 
105 struct mlxsw_sp_rif_subport {
106 	struct mlxsw_sp_rif common;
107 	union {
108 		u16 system_port;
109 		u16 lag_id;
110 	};
111 	u16 vid;
112 	bool lag;
113 };
114 
115 struct mlxsw_sp_rif_ipip_lb {
116 	struct mlxsw_sp_rif common;
117 	struct mlxsw_sp_rif_ipip_lb_config lb_config;
118 	u16 ul_vr_id; /* Reserved for Spectrum-2. */
119 };
120 
121 struct mlxsw_sp_rif_params_ipip_lb {
122 	struct mlxsw_sp_rif_params common;
123 	struct mlxsw_sp_rif_ipip_lb_config lb_config;
124 };
125 
126 struct mlxsw_sp_rif_ops {
127 	enum mlxsw_sp_rif_type type;
128 	size_t rif_size;
129 
130 	void (*setup)(struct mlxsw_sp_rif *rif,
131 		      const struct mlxsw_sp_rif_params *params);
132 	int (*configure)(struct mlxsw_sp_rif *rif);
133 	void (*deconfigure)(struct mlxsw_sp_rif *rif);
134 	struct mlxsw_sp_fid * (*fid_get)(struct mlxsw_sp_rif *rif,
135 					 struct netlink_ext_ack *extack);
136 	void (*fdb_del)(struct mlxsw_sp_rif *rif, const char *mac);
137 };
138 
139 static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree);
140 static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
141 				  struct mlxsw_sp_lpm_tree *lpm_tree);
142 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
143 				     const struct mlxsw_sp_fib *fib,
144 				     u8 tree_id);
145 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
146 				       const struct mlxsw_sp_fib *fib);
147 
148 static unsigned int *
149 mlxsw_sp_rif_p_counter_get(struct mlxsw_sp_rif *rif,
150 			   enum mlxsw_sp_rif_counter_dir dir)
151 {
152 	switch (dir) {
153 	case MLXSW_SP_RIF_COUNTER_EGRESS:
154 		return &rif->counter_egress;
155 	case MLXSW_SP_RIF_COUNTER_INGRESS:
156 		return &rif->counter_ingress;
157 	}
158 	return NULL;
159 }
160 
161 static bool
162 mlxsw_sp_rif_counter_valid_get(struct mlxsw_sp_rif *rif,
163 			       enum mlxsw_sp_rif_counter_dir dir)
164 {
165 	switch (dir) {
166 	case MLXSW_SP_RIF_COUNTER_EGRESS:
167 		return rif->counter_egress_valid;
168 	case MLXSW_SP_RIF_COUNTER_INGRESS:
169 		return rif->counter_ingress_valid;
170 	}
171 	return false;
172 }
173 
174 static void
175 mlxsw_sp_rif_counter_valid_set(struct mlxsw_sp_rif *rif,
176 			       enum mlxsw_sp_rif_counter_dir dir,
177 			       bool valid)
178 {
179 	switch (dir) {
180 	case MLXSW_SP_RIF_COUNTER_EGRESS:
181 		rif->counter_egress_valid = valid;
182 		break;
183 	case MLXSW_SP_RIF_COUNTER_INGRESS:
184 		rif->counter_ingress_valid = valid;
185 		break;
186 	}
187 }
188 
189 static int mlxsw_sp_rif_counter_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
190 				     unsigned int counter_index, bool enable,
191 				     enum mlxsw_sp_rif_counter_dir dir)
192 {
193 	char ritr_pl[MLXSW_REG_RITR_LEN];
194 	bool is_egress = false;
195 	int err;
196 
197 	if (dir == MLXSW_SP_RIF_COUNTER_EGRESS)
198 		is_egress = true;
199 	mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
200 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
201 	if (err)
202 		return err;
203 
204 	mlxsw_reg_ritr_counter_pack(ritr_pl, counter_index, enable,
205 				    is_egress);
206 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
207 }
208 
209 int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp,
210 				   struct mlxsw_sp_rif *rif,
211 				   enum mlxsw_sp_rif_counter_dir dir, u64 *cnt)
212 {
213 	char ricnt_pl[MLXSW_REG_RICNT_LEN];
214 	unsigned int *p_counter_index;
215 	bool valid;
216 	int err;
217 
218 	valid = mlxsw_sp_rif_counter_valid_get(rif, dir);
219 	if (!valid)
220 		return -EINVAL;
221 
222 	p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
223 	if (!p_counter_index)
224 		return -EINVAL;
225 	mlxsw_reg_ricnt_pack(ricnt_pl, *p_counter_index,
226 			     MLXSW_REG_RICNT_OPCODE_NOP);
227 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
228 	if (err)
229 		return err;
230 	*cnt = mlxsw_reg_ricnt_good_unicast_packets_get(ricnt_pl);
231 	return 0;
232 }
233 
234 static int mlxsw_sp_rif_counter_clear(struct mlxsw_sp *mlxsw_sp,
235 				      unsigned int counter_index)
236 {
237 	char ricnt_pl[MLXSW_REG_RICNT_LEN];
238 
239 	mlxsw_reg_ricnt_pack(ricnt_pl, counter_index,
240 			     MLXSW_REG_RICNT_OPCODE_CLEAR);
241 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
242 }
243 
244 int mlxsw_sp_rif_counter_alloc(struct mlxsw_sp *mlxsw_sp,
245 			       struct mlxsw_sp_rif *rif,
246 			       enum mlxsw_sp_rif_counter_dir dir)
247 {
248 	unsigned int *p_counter_index;
249 	int err;
250 
251 	p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
252 	if (!p_counter_index)
253 		return -EINVAL;
254 	err = mlxsw_sp_counter_alloc(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
255 				     p_counter_index);
256 	if (err)
257 		return err;
258 
259 	err = mlxsw_sp_rif_counter_clear(mlxsw_sp, *p_counter_index);
260 	if (err)
261 		goto err_counter_clear;
262 
263 	err = mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
264 					*p_counter_index, true, dir);
265 	if (err)
266 		goto err_counter_edit;
267 	mlxsw_sp_rif_counter_valid_set(rif, dir, true);
268 	return 0;
269 
270 err_counter_edit:
271 err_counter_clear:
272 	mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
273 			      *p_counter_index);
274 	return err;
275 }
276 
277 void mlxsw_sp_rif_counter_free(struct mlxsw_sp *mlxsw_sp,
278 			       struct mlxsw_sp_rif *rif,
279 			       enum mlxsw_sp_rif_counter_dir dir)
280 {
281 	unsigned int *p_counter_index;
282 
283 	if (!mlxsw_sp_rif_counter_valid_get(rif, dir))
284 		return;
285 
286 	p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
287 	if (WARN_ON(!p_counter_index))
288 		return;
289 	mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
290 				  *p_counter_index, false, dir);
291 	mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
292 			      *p_counter_index);
293 	mlxsw_sp_rif_counter_valid_set(rif, dir, false);
294 }
295 
296 static void mlxsw_sp_rif_counters_alloc(struct mlxsw_sp_rif *rif)
297 {
298 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
299 	struct devlink *devlink;
300 
301 	devlink = priv_to_devlink(mlxsw_sp->core);
302 	if (!devlink_dpipe_table_counter_enabled(devlink,
303 						 MLXSW_SP_DPIPE_TABLE_NAME_ERIF))
304 		return;
305 	mlxsw_sp_rif_counter_alloc(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS);
306 }
307 
308 static void mlxsw_sp_rif_counters_free(struct mlxsw_sp_rif *rif)
309 {
310 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
311 
312 	mlxsw_sp_rif_counter_free(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS);
313 }
314 
315 #define MLXSW_SP_PREFIX_COUNT (sizeof(struct in6_addr) * BITS_PER_BYTE + 1)
316 
317 struct mlxsw_sp_prefix_usage {
318 	DECLARE_BITMAP(b, MLXSW_SP_PREFIX_COUNT);
319 };
320 
321 #define mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) \
322 	for_each_set_bit(prefix, (prefix_usage)->b, MLXSW_SP_PREFIX_COUNT)
323 
324 static bool
325 mlxsw_sp_prefix_usage_eq(struct mlxsw_sp_prefix_usage *prefix_usage1,
326 			 struct mlxsw_sp_prefix_usage *prefix_usage2)
327 {
328 	return !memcmp(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
329 }
330 
331 static void
332 mlxsw_sp_prefix_usage_cpy(struct mlxsw_sp_prefix_usage *prefix_usage1,
333 			  struct mlxsw_sp_prefix_usage *prefix_usage2)
334 {
335 	memcpy(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
336 }
337 
338 static void
339 mlxsw_sp_prefix_usage_set(struct mlxsw_sp_prefix_usage *prefix_usage,
340 			  unsigned char prefix_len)
341 {
342 	set_bit(prefix_len, prefix_usage->b);
343 }
344 
345 static void
346 mlxsw_sp_prefix_usage_clear(struct mlxsw_sp_prefix_usage *prefix_usage,
347 			    unsigned char prefix_len)
348 {
349 	clear_bit(prefix_len, prefix_usage->b);
350 }
351 
352 struct mlxsw_sp_fib_key {
353 	unsigned char addr[sizeof(struct in6_addr)];
354 	unsigned char prefix_len;
355 };
356 
357 enum mlxsw_sp_fib_entry_type {
358 	MLXSW_SP_FIB_ENTRY_TYPE_REMOTE,
359 	MLXSW_SP_FIB_ENTRY_TYPE_LOCAL,
360 	MLXSW_SP_FIB_ENTRY_TYPE_TRAP,
361 
362 	/* This is a special case of local delivery, where a packet should be
363 	 * decapsulated on reception. Note that there is no corresponding ENCAP,
364 	 * because that's a type of next hop, not of FIB entry. (There can be
365 	 * several next hops in a REMOTE entry, and some of them may be
366 	 * encapsulating entries.)
367 	 */
368 	MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP,
369 };
370 
371 struct mlxsw_sp_nexthop_group;
372 
373 struct mlxsw_sp_fib_node {
374 	struct list_head entry_list;
375 	struct list_head list;
376 	struct rhash_head ht_node;
377 	struct mlxsw_sp_fib *fib;
378 	struct mlxsw_sp_fib_key key;
379 };
380 
381 struct mlxsw_sp_fib_entry_decap {
382 	struct mlxsw_sp_ipip_entry *ipip_entry;
383 	u32 tunnel_index;
384 };
385 
386 struct mlxsw_sp_fib_entry {
387 	struct list_head list;
388 	struct mlxsw_sp_fib_node *fib_node;
389 	enum mlxsw_sp_fib_entry_type type;
390 	struct list_head nexthop_group_node;
391 	struct mlxsw_sp_nexthop_group *nh_group;
392 	struct mlxsw_sp_fib_entry_decap decap; /* Valid for decap entries. */
393 };
394 
395 struct mlxsw_sp_fib4_entry {
396 	struct mlxsw_sp_fib_entry common;
397 	u32 tb_id;
398 	u32 prio;
399 	u8 tos;
400 	u8 type;
401 };
402 
403 struct mlxsw_sp_fib6_entry {
404 	struct mlxsw_sp_fib_entry common;
405 	struct list_head rt6_list;
406 	unsigned int nrt6;
407 };
408 
409 struct mlxsw_sp_rt6 {
410 	struct list_head list;
411 	struct fib6_info *rt;
412 };
413 
414 struct mlxsw_sp_lpm_tree {
415 	u8 id; /* tree ID */
416 	unsigned int ref_count;
417 	enum mlxsw_sp_l3proto proto;
418 	unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT];
419 	struct mlxsw_sp_prefix_usage prefix_usage;
420 };
421 
422 struct mlxsw_sp_fib {
423 	struct rhashtable ht;
424 	struct list_head node_list;
425 	struct mlxsw_sp_vr *vr;
426 	struct mlxsw_sp_lpm_tree *lpm_tree;
427 	enum mlxsw_sp_l3proto proto;
428 };
429 
430 struct mlxsw_sp_vr {
431 	u16 id; /* virtual router ID */
432 	u32 tb_id; /* kernel fib table id */
433 	unsigned int rif_count;
434 	struct mlxsw_sp_fib *fib4;
435 	struct mlxsw_sp_fib *fib6;
436 	struct mlxsw_sp_mr_table *mr_table[MLXSW_SP_L3_PROTO_MAX];
437 };
438 
439 static const struct rhashtable_params mlxsw_sp_fib_ht_params;
440 
441 static struct mlxsw_sp_fib *mlxsw_sp_fib_create(struct mlxsw_sp *mlxsw_sp,
442 						struct mlxsw_sp_vr *vr,
443 						enum mlxsw_sp_l3proto proto)
444 {
445 	struct mlxsw_sp_lpm_tree *lpm_tree;
446 	struct mlxsw_sp_fib *fib;
447 	int err;
448 
449 	lpm_tree = mlxsw_sp->router->lpm.proto_trees[proto];
450 	fib = kzalloc(sizeof(*fib), GFP_KERNEL);
451 	if (!fib)
452 		return ERR_PTR(-ENOMEM);
453 	err = rhashtable_init(&fib->ht, &mlxsw_sp_fib_ht_params);
454 	if (err)
455 		goto err_rhashtable_init;
456 	INIT_LIST_HEAD(&fib->node_list);
457 	fib->proto = proto;
458 	fib->vr = vr;
459 	fib->lpm_tree = lpm_tree;
460 	mlxsw_sp_lpm_tree_hold(lpm_tree);
461 	err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, lpm_tree->id);
462 	if (err)
463 		goto err_lpm_tree_bind;
464 	return fib;
465 
466 err_lpm_tree_bind:
467 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
468 err_rhashtable_init:
469 	kfree(fib);
470 	return ERR_PTR(err);
471 }
472 
473 static void mlxsw_sp_fib_destroy(struct mlxsw_sp *mlxsw_sp,
474 				 struct mlxsw_sp_fib *fib)
475 {
476 	mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, fib);
477 	mlxsw_sp_lpm_tree_put(mlxsw_sp, fib->lpm_tree);
478 	WARN_ON(!list_empty(&fib->node_list));
479 	rhashtable_destroy(&fib->ht);
480 	kfree(fib);
481 }
482 
483 static struct mlxsw_sp_lpm_tree *
484 mlxsw_sp_lpm_tree_find_unused(struct mlxsw_sp *mlxsw_sp)
485 {
486 	static struct mlxsw_sp_lpm_tree *lpm_tree;
487 	int i;
488 
489 	for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
490 		lpm_tree = &mlxsw_sp->router->lpm.trees[i];
491 		if (lpm_tree->ref_count == 0)
492 			return lpm_tree;
493 	}
494 	return NULL;
495 }
496 
497 static int mlxsw_sp_lpm_tree_alloc(struct mlxsw_sp *mlxsw_sp,
498 				   struct mlxsw_sp_lpm_tree *lpm_tree)
499 {
500 	char ralta_pl[MLXSW_REG_RALTA_LEN];
501 
502 	mlxsw_reg_ralta_pack(ralta_pl, true,
503 			     (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
504 			     lpm_tree->id);
505 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
506 }
507 
508 static void mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp,
509 				   struct mlxsw_sp_lpm_tree *lpm_tree)
510 {
511 	char ralta_pl[MLXSW_REG_RALTA_LEN];
512 
513 	mlxsw_reg_ralta_pack(ralta_pl, false,
514 			     (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
515 			     lpm_tree->id);
516 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
517 }
518 
519 static int
520 mlxsw_sp_lpm_tree_left_struct_set(struct mlxsw_sp *mlxsw_sp,
521 				  struct mlxsw_sp_prefix_usage *prefix_usage,
522 				  struct mlxsw_sp_lpm_tree *lpm_tree)
523 {
524 	char ralst_pl[MLXSW_REG_RALST_LEN];
525 	u8 root_bin = 0;
526 	u8 prefix;
527 	u8 last_prefix = MLXSW_REG_RALST_BIN_NO_CHILD;
528 
529 	mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage)
530 		root_bin = prefix;
531 
532 	mlxsw_reg_ralst_pack(ralst_pl, root_bin, lpm_tree->id);
533 	mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) {
534 		if (prefix == 0)
535 			continue;
536 		mlxsw_reg_ralst_bin_pack(ralst_pl, prefix, last_prefix,
537 					 MLXSW_REG_RALST_BIN_NO_CHILD);
538 		last_prefix = prefix;
539 	}
540 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
541 }
542 
543 static struct mlxsw_sp_lpm_tree *
544 mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp,
545 			 struct mlxsw_sp_prefix_usage *prefix_usage,
546 			 enum mlxsw_sp_l3proto proto)
547 {
548 	struct mlxsw_sp_lpm_tree *lpm_tree;
549 	int err;
550 
551 	lpm_tree = mlxsw_sp_lpm_tree_find_unused(mlxsw_sp);
552 	if (!lpm_tree)
553 		return ERR_PTR(-EBUSY);
554 	lpm_tree->proto = proto;
555 	err = mlxsw_sp_lpm_tree_alloc(mlxsw_sp, lpm_tree);
556 	if (err)
557 		return ERR_PTR(err);
558 
559 	err = mlxsw_sp_lpm_tree_left_struct_set(mlxsw_sp, prefix_usage,
560 						lpm_tree);
561 	if (err)
562 		goto err_left_struct_set;
563 	memcpy(&lpm_tree->prefix_usage, prefix_usage,
564 	       sizeof(lpm_tree->prefix_usage));
565 	memset(&lpm_tree->prefix_ref_count, 0,
566 	       sizeof(lpm_tree->prefix_ref_count));
567 	lpm_tree->ref_count = 1;
568 	return lpm_tree;
569 
570 err_left_struct_set:
571 	mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
572 	return ERR_PTR(err);
573 }
574 
575 static void mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp *mlxsw_sp,
576 				      struct mlxsw_sp_lpm_tree *lpm_tree)
577 {
578 	mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
579 }
580 
581 static struct mlxsw_sp_lpm_tree *
582 mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp,
583 		      struct mlxsw_sp_prefix_usage *prefix_usage,
584 		      enum mlxsw_sp_l3proto proto)
585 {
586 	struct mlxsw_sp_lpm_tree *lpm_tree;
587 	int i;
588 
589 	for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
590 		lpm_tree = &mlxsw_sp->router->lpm.trees[i];
591 		if (lpm_tree->ref_count != 0 &&
592 		    lpm_tree->proto == proto &&
593 		    mlxsw_sp_prefix_usage_eq(&lpm_tree->prefix_usage,
594 					     prefix_usage)) {
595 			mlxsw_sp_lpm_tree_hold(lpm_tree);
596 			return lpm_tree;
597 		}
598 	}
599 	return mlxsw_sp_lpm_tree_create(mlxsw_sp, prefix_usage, proto);
600 }
601 
602 static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree)
603 {
604 	lpm_tree->ref_count++;
605 }
606 
607 static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
608 				  struct mlxsw_sp_lpm_tree *lpm_tree)
609 {
610 	if (--lpm_tree->ref_count == 0)
611 		mlxsw_sp_lpm_tree_destroy(mlxsw_sp, lpm_tree);
612 }
613 
614 #define MLXSW_SP_LPM_TREE_MIN 1 /* tree 0 is reserved */
615 
616 static int mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp)
617 {
618 	struct mlxsw_sp_prefix_usage req_prefix_usage = {{ 0 } };
619 	struct mlxsw_sp_lpm_tree *lpm_tree;
620 	u64 max_trees;
621 	int err, i;
622 
623 	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_LPM_TREES))
624 		return -EIO;
625 
626 	max_trees = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_LPM_TREES);
627 	mlxsw_sp->router->lpm.tree_count = max_trees - MLXSW_SP_LPM_TREE_MIN;
628 	mlxsw_sp->router->lpm.trees = kcalloc(mlxsw_sp->router->lpm.tree_count,
629 					     sizeof(struct mlxsw_sp_lpm_tree),
630 					     GFP_KERNEL);
631 	if (!mlxsw_sp->router->lpm.trees)
632 		return -ENOMEM;
633 
634 	for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
635 		lpm_tree = &mlxsw_sp->router->lpm.trees[i];
636 		lpm_tree->id = i + MLXSW_SP_LPM_TREE_MIN;
637 	}
638 
639 	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
640 					 MLXSW_SP_L3_PROTO_IPV4);
641 	if (IS_ERR(lpm_tree)) {
642 		err = PTR_ERR(lpm_tree);
643 		goto err_ipv4_tree_get;
644 	}
645 	mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4] = lpm_tree;
646 
647 	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
648 					 MLXSW_SP_L3_PROTO_IPV6);
649 	if (IS_ERR(lpm_tree)) {
650 		err = PTR_ERR(lpm_tree);
651 		goto err_ipv6_tree_get;
652 	}
653 	mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV6] = lpm_tree;
654 
655 	return 0;
656 
657 err_ipv6_tree_get:
658 	lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4];
659 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
660 err_ipv4_tree_get:
661 	kfree(mlxsw_sp->router->lpm.trees);
662 	return err;
663 }
664 
665 static void mlxsw_sp_lpm_fini(struct mlxsw_sp *mlxsw_sp)
666 {
667 	struct mlxsw_sp_lpm_tree *lpm_tree;
668 
669 	lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV6];
670 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
671 
672 	lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4];
673 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
674 
675 	kfree(mlxsw_sp->router->lpm.trees);
676 }
677 
678 static bool mlxsw_sp_vr_is_used(const struct mlxsw_sp_vr *vr)
679 {
680 	return !!vr->fib4 || !!vr->fib6 ||
681 	       !!vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] ||
682 	       !!vr->mr_table[MLXSW_SP_L3_PROTO_IPV6];
683 }
684 
685 static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp)
686 {
687 	struct mlxsw_sp_vr *vr;
688 	int i;
689 
690 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
691 		vr = &mlxsw_sp->router->vrs[i];
692 		if (!mlxsw_sp_vr_is_used(vr))
693 			return vr;
694 	}
695 	return NULL;
696 }
697 
698 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
699 				     const struct mlxsw_sp_fib *fib, u8 tree_id)
700 {
701 	char raltb_pl[MLXSW_REG_RALTB_LEN];
702 
703 	mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
704 			     (enum mlxsw_reg_ralxx_protocol) fib->proto,
705 			     tree_id);
706 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
707 }
708 
709 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
710 				       const struct mlxsw_sp_fib *fib)
711 {
712 	char raltb_pl[MLXSW_REG_RALTB_LEN];
713 
714 	/* Bind to tree 0 which is default */
715 	mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
716 			     (enum mlxsw_reg_ralxx_protocol) fib->proto, 0);
717 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
718 }
719 
720 static u32 mlxsw_sp_fix_tb_id(u32 tb_id)
721 {
722 	/* For our purpose, squash main, default and local tables into one */
723 	if (tb_id == RT_TABLE_LOCAL || tb_id == RT_TABLE_DEFAULT)
724 		tb_id = RT_TABLE_MAIN;
725 	return tb_id;
726 }
727 
728 static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp,
729 					    u32 tb_id)
730 {
731 	struct mlxsw_sp_vr *vr;
732 	int i;
733 
734 	tb_id = mlxsw_sp_fix_tb_id(tb_id);
735 
736 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
737 		vr = &mlxsw_sp->router->vrs[i];
738 		if (mlxsw_sp_vr_is_used(vr) && vr->tb_id == tb_id)
739 			return vr;
740 	}
741 	return NULL;
742 }
743 
744 static struct mlxsw_sp_fib *mlxsw_sp_vr_fib(const struct mlxsw_sp_vr *vr,
745 					    enum mlxsw_sp_l3proto proto)
746 {
747 	switch (proto) {
748 	case MLXSW_SP_L3_PROTO_IPV4:
749 		return vr->fib4;
750 	case MLXSW_SP_L3_PROTO_IPV6:
751 		return vr->fib6;
752 	}
753 	return NULL;
754 }
755 
756 static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp,
757 					      u32 tb_id,
758 					      struct netlink_ext_ack *extack)
759 {
760 	struct mlxsw_sp_mr_table *mr4_table, *mr6_table;
761 	struct mlxsw_sp_fib *fib4;
762 	struct mlxsw_sp_fib *fib6;
763 	struct mlxsw_sp_vr *vr;
764 	int err;
765 
766 	vr = mlxsw_sp_vr_find_unused(mlxsw_sp);
767 	if (!vr) {
768 		NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported virtual routers");
769 		return ERR_PTR(-EBUSY);
770 	}
771 	fib4 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
772 	if (IS_ERR(fib4))
773 		return ERR_CAST(fib4);
774 	fib6 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
775 	if (IS_ERR(fib6)) {
776 		err = PTR_ERR(fib6);
777 		goto err_fib6_create;
778 	}
779 	mr4_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id,
780 					     MLXSW_SP_L3_PROTO_IPV4);
781 	if (IS_ERR(mr4_table)) {
782 		err = PTR_ERR(mr4_table);
783 		goto err_mr4_table_create;
784 	}
785 	mr6_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id,
786 					     MLXSW_SP_L3_PROTO_IPV6);
787 	if (IS_ERR(mr6_table)) {
788 		err = PTR_ERR(mr6_table);
789 		goto err_mr6_table_create;
790 	}
791 
792 	vr->fib4 = fib4;
793 	vr->fib6 = fib6;
794 	vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] = mr4_table;
795 	vr->mr_table[MLXSW_SP_L3_PROTO_IPV6] = mr6_table;
796 	vr->tb_id = tb_id;
797 	return vr;
798 
799 err_mr6_table_create:
800 	mlxsw_sp_mr_table_destroy(mr4_table);
801 err_mr4_table_create:
802 	mlxsw_sp_fib_destroy(mlxsw_sp, fib6);
803 err_fib6_create:
804 	mlxsw_sp_fib_destroy(mlxsw_sp, fib4);
805 	return ERR_PTR(err);
806 }
807 
808 static void mlxsw_sp_vr_destroy(struct mlxsw_sp *mlxsw_sp,
809 				struct mlxsw_sp_vr *vr)
810 {
811 	mlxsw_sp_mr_table_destroy(vr->mr_table[MLXSW_SP_L3_PROTO_IPV6]);
812 	vr->mr_table[MLXSW_SP_L3_PROTO_IPV6] = NULL;
813 	mlxsw_sp_mr_table_destroy(vr->mr_table[MLXSW_SP_L3_PROTO_IPV4]);
814 	vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] = NULL;
815 	mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib6);
816 	vr->fib6 = NULL;
817 	mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib4);
818 	vr->fib4 = NULL;
819 }
820 
821 static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
822 					   struct netlink_ext_ack *extack)
823 {
824 	struct mlxsw_sp_vr *vr;
825 
826 	tb_id = mlxsw_sp_fix_tb_id(tb_id);
827 	vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
828 	if (!vr)
829 		vr = mlxsw_sp_vr_create(mlxsw_sp, tb_id, extack);
830 	return vr;
831 }
832 
833 static void mlxsw_sp_vr_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr)
834 {
835 	if (!vr->rif_count && list_empty(&vr->fib4->node_list) &&
836 	    list_empty(&vr->fib6->node_list) &&
837 	    mlxsw_sp_mr_table_empty(vr->mr_table[MLXSW_SP_L3_PROTO_IPV4]) &&
838 	    mlxsw_sp_mr_table_empty(vr->mr_table[MLXSW_SP_L3_PROTO_IPV6]))
839 		mlxsw_sp_vr_destroy(mlxsw_sp, vr);
840 }
841 
842 static bool
843 mlxsw_sp_vr_lpm_tree_should_replace(struct mlxsw_sp_vr *vr,
844 				    enum mlxsw_sp_l3proto proto, u8 tree_id)
845 {
846 	struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
847 
848 	if (!mlxsw_sp_vr_is_used(vr))
849 		return false;
850 	if (fib->lpm_tree->id == tree_id)
851 		return true;
852 	return false;
853 }
854 
855 static int mlxsw_sp_vr_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
856 					struct mlxsw_sp_fib *fib,
857 					struct mlxsw_sp_lpm_tree *new_tree)
858 {
859 	struct mlxsw_sp_lpm_tree *old_tree = fib->lpm_tree;
860 	int err;
861 
862 	fib->lpm_tree = new_tree;
863 	mlxsw_sp_lpm_tree_hold(new_tree);
864 	err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, new_tree->id);
865 	if (err)
866 		goto err_tree_bind;
867 	mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree);
868 	return 0;
869 
870 err_tree_bind:
871 	mlxsw_sp_lpm_tree_put(mlxsw_sp, new_tree);
872 	fib->lpm_tree = old_tree;
873 	return err;
874 }
875 
876 static int mlxsw_sp_vrs_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
877 					 struct mlxsw_sp_fib *fib,
878 					 struct mlxsw_sp_lpm_tree *new_tree)
879 {
880 	enum mlxsw_sp_l3proto proto = fib->proto;
881 	struct mlxsw_sp_lpm_tree *old_tree;
882 	u8 old_id, new_id = new_tree->id;
883 	struct mlxsw_sp_vr *vr;
884 	int i, err;
885 
886 	old_tree = mlxsw_sp->router->lpm.proto_trees[proto];
887 	old_id = old_tree->id;
888 
889 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
890 		vr = &mlxsw_sp->router->vrs[i];
891 		if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, old_id))
892 			continue;
893 		err = mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
894 						   mlxsw_sp_vr_fib(vr, proto),
895 						   new_tree);
896 		if (err)
897 			goto err_tree_replace;
898 	}
899 
900 	memcpy(new_tree->prefix_ref_count, old_tree->prefix_ref_count,
901 	       sizeof(new_tree->prefix_ref_count));
902 	mlxsw_sp->router->lpm.proto_trees[proto] = new_tree;
903 	mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree);
904 
905 	return 0;
906 
907 err_tree_replace:
908 	for (i--; i >= 0; i--) {
909 		if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, new_id))
910 			continue;
911 		mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
912 					     mlxsw_sp_vr_fib(vr, proto),
913 					     old_tree);
914 	}
915 	return err;
916 }
917 
918 static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp)
919 {
920 	struct mlxsw_sp_vr *vr;
921 	u64 max_vrs;
922 	int i;
923 
924 	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_VRS))
925 		return -EIO;
926 
927 	max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
928 	mlxsw_sp->router->vrs = kcalloc(max_vrs, sizeof(struct mlxsw_sp_vr),
929 					GFP_KERNEL);
930 	if (!mlxsw_sp->router->vrs)
931 		return -ENOMEM;
932 
933 	for (i = 0; i < max_vrs; i++) {
934 		vr = &mlxsw_sp->router->vrs[i];
935 		vr->id = i;
936 	}
937 
938 	return 0;
939 }
940 
941 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp);
942 
943 static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp)
944 {
945 	/* At this stage we're guaranteed not to have new incoming
946 	 * FIB notifications and the work queue is free from FIBs
947 	 * sitting on top of mlxsw netdevs. However, we can still
948 	 * have other FIBs queued. Flush the queue before flushing
949 	 * the device's tables. No need for locks, as we're the only
950 	 * writer.
951 	 */
952 	mlxsw_core_flush_owq();
953 	mlxsw_sp_router_fib_flush(mlxsw_sp);
954 	kfree(mlxsw_sp->router->vrs);
955 }
956 
957 static struct net_device *
958 __mlxsw_sp_ipip_netdev_ul_dev_get(const struct net_device *ol_dev)
959 {
960 	struct ip_tunnel *tun = netdev_priv(ol_dev);
961 	struct net *net = dev_net(ol_dev);
962 
963 	return __dev_get_by_index(net, tun->parms.link);
964 }
965 
966 u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev)
967 {
968 	struct net_device *d = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
969 
970 	if (d)
971 		return l3mdev_fib_table(d) ? : RT_TABLE_MAIN;
972 	else
973 		return l3mdev_fib_table(ol_dev) ? : RT_TABLE_MAIN;
974 }
975 
976 static struct mlxsw_sp_rif *
977 mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
978 		    const struct mlxsw_sp_rif_params *params,
979 		    struct netlink_ext_ack *extack);
980 
981 static struct mlxsw_sp_rif_ipip_lb *
982 mlxsw_sp_ipip_ol_ipip_lb_create(struct mlxsw_sp *mlxsw_sp,
983 				enum mlxsw_sp_ipip_type ipipt,
984 				struct net_device *ol_dev,
985 				struct netlink_ext_ack *extack)
986 {
987 	struct mlxsw_sp_rif_params_ipip_lb lb_params;
988 	const struct mlxsw_sp_ipip_ops *ipip_ops;
989 	struct mlxsw_sp_rif *rif;
990 
991 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
992 	lb_params = (struct mlxsw_sp_rif_params_ipip_lb) {
993 		.common.dev = ol_dev,
994 		.common.lag = false,
995 		.lb_config = ipip_ops->ol_loopback_config(mlxsw_sp, ol_dev),
996 	};
997 
998 	rif = mlxsw_sp_rif_create(mlxsw_sp, &lb_params.common, extack);
999 	if (IS_ERR(rif))
1000 		return ERR_CAST(rif);
1001 	return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
1002 }
1003 
1004 static struct mlxsw_sp_ipip_entry *
1005 mlxsw_sp_ipip_entry_alloc(struct mlxsw_sp *mlxsw_sp,
1006 			  enum mlxsw_sp_ipip_type ipipt,
1007 			  struct net_device *ol_dev)
1008 {
1009 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1010 	struct mlxsw_sp_ipip_entry *ipip_entry;
1011 	struct mlxsw_sp_ipip_entry *ret = NULL;
1012 
1013 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
1014 	ipip_entry = kzalloc(sizeof(*ipip_entry), GFP_KERNEL);
1015 	if (!ipip_entry)
1016 		return ERR_PTR(-ENOMEM);
1017 
1018 	ipip_entry->ol_lb = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp, ipipt,
1019 							    ol_dev, NULL);
1020 	if (IS_ERR(ipip_entry->ol_lb)) {
1021 		ret = ERR_CAST(ipip_entry->ol_lb);
1022 		goto err_ol_ipip_lb_create;
1023 	}
1024 
1025 	ipip_entry->ipipt = ipipt;
1026 	ipip_entry->ol_dev = ol_dev;
1027 
1028 	switch (ipip_ops->ul_proto) {
1029 	case MLXSW_SP_L3_PROTO_IPV4:
1030 		ipip_entry->parms4 = mlxsw_sp_ipip_netdev_parms4(ol_dev);
1031 		break;
1032 	case MLXSW_SP_L3_PROTO_IPV6:
1033 		WARN_ON(1);
1034 		break;
1035 	}
1036 
1037 	return ipip_entry;
1038 
1039 err_ol_ipip_lb_create:
1040 	kfree(ipip_entry);
1041 	return ret;
1042 }
1043 
1044 static void
1045 mlxsw_sp_ipip_entry_dealloc(struct mlxsw_sp_ipip_entry *ipip_entry)
1046 {
1047 	mlxsw_sp_rif_destroy(&ipip_entry->ol_lb->common);
1048 	kfree(ipip_entry);
1049 }
1050 
1051 static bool
1052 mlxsw_sp_ipip_entry_saddr_matches(struct mlxsw_sp *mlxsw_sp,
1053 				  const enum mlxsw_sp_l3proto ul_proto,
1054 				  union mlxsw_sp_l3addr saddr,
1055 				  u32 ul_tb_id,
1056 				  struct mlxsw_sp_ipip_entry *ipip_entry)
1057 {
1058 	u32 tun_ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
1059 	enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
1060 	union mlxsw_sp_l3addr tun_saddr;
1061 
1062 	if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
1063 		return false;
1064 
1065 	tun_saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ipip_entry->ol_dev);
1066 	return tun_ul_tb_id == ul_tb_id &&
1067 	       mlxsw_sp_l3addr_eq(&tun_saddr, &saddr);
1068 }
1069 
1070 static int
1071 mlxsw_sp_fib_entry_decap_init(struct mlxsw_sp *mlxsw_sp,
1072 			      struct mlxsw_sp_fib_entry *fib_entry,
1073 			      struct mlxsw_sp_ipip_entry *ipip_entry)
1074 {
1075 	u32 tunnel_index;
1076 	int err;
1077 
1078 	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
1079 				  1, &tunnel_index);
1080 	if (err)
1081 		return err;
1082 
1083 	ipip_entry->decap_fib_entry = fib_entry;
1084 	fib_entry->decap.ipip_entry = ipip_entry;
1085 	fib_entry->decap.tunnel_index = tunnel_index;
1086 	return 0;
1087 }
1088 
1089 static void mlxsw_sp_fib_entry_decap_fini(struct mlxsw_sp *mlxsw_sp,
1090 					  struct mlxsw_sp_fib_entry *fib_entry)
1091 {
1092 	/* Unlink this node from the IPIP entry that it's the decap entry of. */
1093 	fib_entry->decap.ipip_entry->decap_fib_entry = NULL;
1094 	fib_entry->decap.ipip_entry = NULL;
1095 	mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
1096 			   1, fib_entry->decap.tunnel_index);
1097 }
1098 
1099 static struct mlxsw_sp_fib_node *
1100 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
1101 			 size_t addr_len, unsigned char prefix_len);
1102 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
1103 				     struct mlxsw_sp_fib_entry *fib_entry);
1104 
1105 static void
1106 mlxsw_sp_ipip_entry_demote_decap(struct mlxsw_sp *mlxsw_sp,
1107 				 struct mlxsw_sp_ipip_entry *ipip_entry)
1108 {
1109 	struct mlxsw_sp_fib_entry *fib_entry = ipip_entry->decap_fib_entry;
1110 
1111 	mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, fib_entry);
1112 	fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1113 
1114 	mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1115 }
1116 
1117 static void
1118 mlxsw_sp_ipip_entry_promote_decap(struct mlxsw_sp *mlxsw_sp,
1119 				  struct mlxsw_sp_ipip_entry *ipip_entry,
1120 				  struct mlxsw_sp_fib_entry *decap_fib_entry)
1121 {
1122 	if (mlxsw_sp_fib_entry_decap_init(mlxsw_sp, decap_fib_entry,
1123 					  ipip_entry))
1124 		return;
1125 	decap_fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
1126 
1127 	if (mlxsw_sp_fib_entry_update(mlxsw_sp, decap_fib_entry))
1128 		mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1129 }
1130 
1131 /* Given an IPIP entry, find the corresponding decap route. */
1132 static struct mlxsw_sp_fib_entry *
1133 mlxsw_sp_ipip_entry_find_decap(struct mlxsw_sp *mlxsw_sp,
1134 			       struct mlxsw_sp_ipip_entry *ipip_entry)
1135 {
1136 	static struct mlxsw_sp_fib_node *fib_node;
1137 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1138 	struct mlxsw_sp_fib_entry *fib_entry;
1139 	unsigned char saddr_prefix_len;
1140 	union mlxsw_sp_l3addr saddr;
1141 	struct mlxsw_sp_fib *ul_fib;
1142 	struct mlxsw_sp_vr *ul_vr;
1143 	const void *saddrp;
1144 	size_t saddr_len;
1145 	u32 ul_tb_id;
1146 	u32 saddr4;
1147 
1148 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1149 
1150 	ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
1151 	ul_vr = mlxsw_sp_vr_find(mlxsw_sp, ul_tb_id);
1152 	if (!ul_vr)
1153 		return NULL;
1154 
1155 	ul_fib = mlxsw_sp_vr_fib(ul_vr, ipip_ops->ul_proto);
1156 	saddr = mlxsw_sp_ipip_netdev_saddr(ipip_ops->ul_proto,
1157 					   ipip_entry->ol_dev);
1158 
1159 	switch (ipip_ops->ul_proto) {
1160 	case MLXSW_SP_L3_PROTO_IPV4:
1161 		saddr4 = be32_to_cpu(saddr.addr4);
1162 		saddrp = &saddr4;
1163 		saddr_len = 4;
1164 		saddr_prefix_len = 32;
1165 		break;
1166 	case MLXSW_SP_L3_PROTO_IPV6:
1167 		WARN_ON(1);
1168 		return NULL;
1169 	}
1170 
1171 	fib_node = mlxsw_sp_fib_node_lookup(ul_fib, saddrp, saddr_len,
1172 					    saddr_prefix_len);
1173 	if (!fib_node || list_empty(&fib_node->entry_list))
1174 		return NULL;
1175 
1176 	fib_entry = list_first_entry(&fib_node->entry_list,
1177 				     struct mlxsw_sp_fib_entry, list);
1178 	if (fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_TRAP)
1179 		return NULL;
1180 
1181 	return fib_entry;
1182 }
1183 
1184 static struct mlxsw_sp_ipip_entry *
1185 mlxsw_sp_ipip_entry_create(struct mlxsw_sp *mlxsw_sp,
1186 			   enum mlxsw_sp_ipip_type ipipt,
1187 			   struct net_device *ol_dev)
1188 {
1189 	struct mlxsw_sp_ipip_entry *ipip_entry;
1190 
1191 	ipip_entry = mlxsw_sp_ipip_entry_alloc(mlxsw_sp, ipipt, ol_dev);
1192 	if (IS_ERR(ipip_entry))
1193 		return ipip_entry;
1194 
1195 	list_add_tail(&ipip_entry->ipip_list_node,
1196 		      &mlxsw_sp->router->ipip_list);
1197 
1198 	return ipip_entry;
1199 }
1200 
1201 static void
1202 mlxsw_sp_ipip_entry_destroy(struct mlxsw_sp *mlxsw_sp,
1203 			    struct mlxsw_sp_ipip_entry *ipip_entry)
1204 {
1205 	list_del(&ipip_entry->ipip_list_node);
1206 	mlxsw_sp_ipip_entry_dealloc(ipip_entry);
1207 }
1208 
1209 static bool
1210 mlxsw_sp_ipip_entry_matches_decap(struct mlxsw_sp *mlxsw_sp,
1211 				  const struct net_device *ul_dev,
1212 				  enum mlxsw_sp_l3proto ul_proto,
1213 				  union mlxsw_sp_l3addr ul_dip,
1214 				  struct mlxsw_sp_ipip_entry *ipip_entry)
1215 {
1216 	u32 ul_tb_id = l3mdev_fib_table(ul_dev) ? : RT_TABLE_MAIN;
1217 	enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
1218 	struct net_device *ipip_ul_dev;
1219 
1220 	if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
1221 		return false;
1222 
1223 	ipip_ul_dev = __mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev);
1224 	return mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, ul_dip,
1225 						 ul_tb_id, ipip_entry) &&
1226 	       (!ipip_ul_dev || ipip_ul_dev == ul_dev);
1227 }
1228 
1229 /* Given decap parameters, find the corresponding IPIP entry. */
1230 static struct mlxsw_sp_ipip_entry *
1231 mlxsw_sp_ipip_entry_find_by_decap(struct mlxsw_sp *mlxsw_sp,
1232 				  const struct net_device *ul_dev,
1233 				  enum mlxsw_sp_l3proto ul_proto,
1234 				  union mlxsw_sp_l3addr ul_dip)
1235 {
1236 	struct mlxsw_sp_ipip_entry *ipip_entry;
1237 
1238 	list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
1239 			    ipip_list_node)
1240 		if (mlxsw_sp_ipip_entry_matches_decap(mlxsw_sp, ul_dev,
1241 						      ul_proto, ul_dip,
1242 						      ipip_entry))
1243 			return ipip_entry;
1244 
1245 	return NULL;
1246 }
1247 
1248 static bool mlxsw_sp_netdev_ipip_type(const struct mlxsw_sp *mlxsw_sp,
1249 				      const struct net_device *dev,
1250 				      enum mlxsw_sp_ipip_type *p_type)
1251 {
1252 	struct mlxsw_sp_router *router = mlxsw_sp->router;
1253 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1254 	enum mlxsw_sp_ipip_type ipipt;
1255 
1256 	for (ipipt = 0; ipipt < MLXSW_SP_IPIP_TYPE_MAX; ++ipipt) {
1257 		ipip_ops = router->ipip_ops_arr[ipipt];
1258 		if (dev->type == ipip_ops->dev_type) {
1259 			if (p_type)
1260 				*p_type = ipipt;
1261 			return true;
1262 		}
1263 	}
1264 	return false;
1265 }
1266 
1267 bool mlxsw_sp_netdev_is_ipip_ol(const struct mlxsw_sp *mlxsw_sp,
1268 				const struct net_device *dev)
1269 {
1270 	return mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL);
1271 }
1272 
1273 static struct mlxsw_sp_ipip_entry *
1274 mlxsw_sp_ipip_entry_find_by_ol_dev(struct mlxsw_sp *mlxsw_sp,
1275 				   const struct net_device *ol_dev)
1276 {
1277 	struct mlxsw_sp_ipip_entry *ipip_entry;
1278 
1279 	list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
1280 			    ipip_list_node)
1281 		if (ipip_entry->ol_dev == ol_dev)
1282 			return ipip_entry;
1283 
1284 	return NULL;
1285 }
1286 
1287 static struct mlxsw_sp_ipip_entry *
1288 mlxsw_sp_ipip_entry_find_by_ul_dev(const struct mlxsw_sp *mlxsw_sp,
1289 				   const struct net_device *ul_dev,
1290 				   struct mlxsw_sp_ipip_entry *start)
1291 {
1292 	struct mlxsw_sp_ipip_entry *ipip_entry;
1293 
1294 	ipip_entry = list_prepare_entry(start, &mlxsw_sp->router->ipip_list,
1295 					ipip_list_node);
1296 	list_for_each_entry_continue(ipip_entry, &mlxsw_sp->router->ipip_list,
1297 				     ipip_list_node) {
1298 		struct net_device *ipip_ul_dev =
1299 			__mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev);
1300 
1301 		if (ipip_ul_dev == ul_dev)
1302 			return ipip_entry;
1303 	}
1304 
1305 	return NULL;
1306 }
1307 
1308 bool mlxsw_sp_netdev_is_ipip_ul(const struct mlxsw_sp *mlxsw_sp,
1309 				const struct net_device *dev)
1310 {
1311 	return mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp, dev, NULL);
1312 }
1313 
1314 static bool mlxsw_sp_netdevice_ipip_can_offload(struct mlxsw_sp *mlxsw_sp,
1315 						const struct net_device *ol_dev,
1316 						enum mlxsw_sp_ipip_type ipipt)
1317 {
1318 	const struct mlxsw_sp_ipip_ops *ops
1319 		= mlxsw_sp->router->ipip_ops_arr[ipipt];
1320 
1321 	/* For deciding whether decap should be offloaded, we don't care about
1322 	 * overlay protocol, so ask whether either one is supported.
1323 	 */
1324 	return ops->can_offload(mlxsw_sp, ol_dev, MLXSW_SP_L3_PROTO_IPV4) ||
1325 	       ops->can_offload(mlxsw_sp, ol_dev, MLXSW_SP_L3_PROTO_IPV6);
1326 }
1327 
1328 static int mlxsw_sp_netdevice_ipip_ol_reg_event(struct mlxsw_sp *mlxsw_sp,
1329 						struct net_device *ol_dev)
1330 {
1331 	struct mlxsw_sp_ipip_entry *ipip_entry;
1332 	enum mlxsw_sp_l3proto ul_proto;
1333 	enum mlxsw_sp_ipip_type ipipt;
1334 	union mlxsw_sp_l3addr saddr;
1335 	u32 ul_tb_id;
1336 
1337 	mlxsw_sp_netdev_ipip_type(mlxsw_sp, ol_dev, &ipipt);
1338 	if (mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev, ipipt)) {
1339 		ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev);
1340 		ul_proto = mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto;
1341 		saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev);
1342 		if (!mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto,
1343 							  saddr, ul_tb_id,
1344 							  NULL)) {
1345 			ipip_entry = mlxsw_sp_ipip_entry_create(mlxsw_sp, ipipt,
1346 								ol_dev);
1347 			if (IS_ERR(ipip_entry))
1348 				return PTR_ERR(ipip_entry);
1349 		}
1350 	}
1351 
1352 	return 0;
1353 }
1354 
1355 static void mlxsw_sp_netdevice_ipip_ol_unreg_event(struct mlxsw_sp *mlxsw_sp,
1356 						   struct net_device *ol_dev)
1357 {
1358 	struct mlxsw_sp_ipip_entry *ipip_entry;
1359 
1360 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1361 	if (ipip_entry)
1362 		mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
1363 }
1364 
1365 static void
1366 mlxsw_sp_ipip_entry_ol_up_event(struct mlxsw_sp *mlxsw_sp,
1367 				struct mlxsw_sp_ipip_entry *ipip_entry)
1368 {
1369 	struct mlxsw_sp_fib_entry *decap_fib_entry;
1370 
1371 	decap_fib_entry = mlxsw_sp_ipip_entry_find_decap(mlxsw_sp, ipip_entry);
1372 	if (decap_fib_entry)
1373 		mlxsw_sp_ipip_entry_promote_decap(mlxsw_sp, ipip_entry,
1374 						  decap_fib_entry);
1375 }
1376 
1377 static int
1378 mlxsw_sp_rif_ipip_lb_op(struct mlxsw_sp_rif_ipip_lb *lb_rif,
1379 			struct mlxsw_sp_vr *ul_vr, bool enable)
1380 {
1381 	struct mlxsw_sp_rif_ipip_lb_config lb_cf = lb_rif->lb_config;
1382 	struct mlxsw_sp_rif *rif = &lb_rif->common;
1383 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
1384 	char ritr_pl[MLXSW_REG_RITR_LEN];
1385 	u32 saddr4;
1386 
1387 	switch (lb_cf.ul_protocol) {
1388 	case MLXSW_SP_L3_PROTO_IPV4:
1389 		saddr4 = be32_to_cpu(lb_cf.saddr.addr4);
1390 		mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF,
1391 				    rif->rif_index, rif->vr_id, rif->dev->mtu);
1392 		mlxsw_reg_ritr_loopback_ipip4_pack(ritr_pl, lb_cf.lb_ipipt,
1393 			    MLXSW_REG_RITR_LOOPBACK_IPIP_OPTIONS_GRE_KEY_PRESET,
1394 			    ul_vr->id, saddr4, lb_cf.okey);
1395 		break;
1396 
1397 	case MLXSW_SP_L3_PROTO_IPV6:
1398 		return -EAFNOSUPPORT;
1399 	}
1400 
1401 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
1402 }
1403 
1404 static int mlxsw_sp_netdevice_ipip_ol_update_mtu(struct mlxsw_sp *mlxsw_sp,
1405 						 struct net_device *ol_dev)
1406 {
1407 	struct mlxsw_sp_ipip_entry *ipip_entry;
1408 	struct mlxsw_sp_rif_ipip_lb *lb_rif;
1409 	struct mlxsw_sp_vr *ul_vr;
1410 	int err = 0;
1411 
1412 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1413 	if (ipip_entry) {
1414 		lb_rif = ipip_entry->ol_lb;
1415 		ul_vr = &mlxsw_sp->router->vrs[lb_rif->ul_vr_id];
1416 		err = mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, true);
1417 		if (err)
1418 			goto out;
1419 		lb_rif->common.mtu = ol_dev->mtu;
1420 	}
1421 
1422 out:
1423 	return err;
1424 }
1425 
1426 static void mlxsw_sp_netdevice_ipip_ol_up_event(struct mlxsw_sp *mlxsw_sp,
1427 						struct net_device *ol_dev)
1428 {
1429 	struct mlxsw_sp_ipip_entry *ipip_entry;
1430 
1431 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1432 	if (ipip_entry)
1433 		mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry);
1434 }
1435 
1436 static void
1437 mlxsw_sp_ipip_entry_ol_down_event(struct mlxsw_sp *mlxsw_sp,
1438 				  struct mlxsw_sp_ipip_entry *ipip_entry)
1439 {
1440 	if (ipip_entry->decap_fib_entry)
1441 		mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1442 }
1443 
1444 static void mlxsw_sp_netdevice_ipip_ol_down_event(struct mlxsw_sp *mlxsw_sp,
1445 						  struct net_device *ol_dev)
1446 {
1447 	struct mlxsw_sp_ipip_entry *ipip_entry;
1448 
1449 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1450 	if (ipip_entry)
1451 		mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry);
1452 }
1453 
1454 static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp,
1455 					 struct mlxsw_sp_rif *old_rif,
1456 					 struct mlxsw_sp_rif *new_rif);
1457 static int
1458 mlxsw_sp_ipip_entry_ol_lb_update(struct mlxsw_sp *mlxsw_sp,
1459 				 struct mlxsw_sp_ipip_entry *ipip_entry,
1460 				 bool keep_encap,
1461 				 struct netlink_ext_ack *extack)
1462 {
1463 	struct mlxsw_sp_rif_ipip_lb *old_lb_rif = ipip_entry->ol_lb;
1464 	struct mlxsw_sp_rif_ipip_lb *new_lb_rif;
1465 
1466 	new_lb_rif = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp,
1467 						     ipip_entry->ipipt,
1468 						     ipip_entry->ol_dev,
1469 						     extack);
1470 	if (IS_ERR(new_lb_rif))
1471 		return PTR_ERR(new_lb_rif);
1472 	ipip_entry->ol_lb = new_lb_rif;
1473 
1474 	if (keep_encap)
1475 		mlxsw_sp_nexthop_rif_migrate(mlxsw_sp, &old_lb_rif->common,
1476 					     &new_lb_rif->common);
1477 
1478 	mlxsw_sp_rif_destroy(&old_lb_rif->common);
1479 
1480 	return 0;
1481 }
1482 
1483 static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp,
1484 					struct mlxsw_sp_rif *rif);
1485 
1486 /**
1487  * Update the offload related to an IPIP entry. This always updates decap, and
1488  * in addition to that it also:
1489  * @recreate_loopback: recreates the associated loopback RIF
1490  * @keep_encap: updates next hops that use the tunnel netdevice. This is only
1491  *              relevant when recreate_loopback is true.
1492  * @update_nexthops: updates next hops, keeping the current loopback RIF. This
1493  *                   is only relevant when recreate_loopback is false.
1494  */
1495 int __mlxsw_sp_ipip_entry_update_tunnel(struct mlxsw_sp *mlxsw_sp,
1496 					struct mlxsw_sp_ipip_entry *ipip_entry,
1497 					bool recreate_loopback,
1498 					bool keep_encap,
1499 					bool update_nexthops,
1500 					struct netlink_ext_ack *extack)
1501 {
1502 	int err;
1503 
1504 	/* RIFs can't be edited, so to update loopback, we need to destroy and
1505 	 * recreate it. That creates a window of opportunity where RALUE and
1506 	 * RATR registers end up referencing a RIF that's already gone. RATRs
1507 	 * are handled in mlxsw_sp_ipip_entry_ol_lb_update(), and to take care
1508 	 * of RALUE, demote the decap route back.
1509 	 */
1510 	if (ipip_entry->decap_fib_entry)
1511 		mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1512 
1513 	if (recreate_loopback) {
1514 		err = mlxsw_sp_ipip_entry_ol_lb_update(mlxsw_sp, ipip_entry,
1515 						       keep_encap, extack);
1516 		if (err)
1517 			return err;
1518 	} else if (update_nexthops) {
1519 		mlxsw_sp_nexthop_rif_update(mlxsw_sp,
1520 					    &ipip_entry->ol_lb->common);
1521 	}
1522 
1523 	if (ipip_entry->ol_dev->flags & IFF_UP)
1524 		mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry);
1525 
1526 	return 0;
1527 }
1528 
1529 static int mlxsw_sp_netdevice_ipip_ol_vrf_event(struct mlxsw_sp *mlxsw_sp,
1530 						struct net_device *ol_dev,
1531 						struct netlink_ext_ack *extack)
1532 {
1533 	struct mlxsw_sp_ipip_entry *ipip_entry =
1534 		mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1535 	enum mlxsw_sp_l3proto ul_proto;
1536 	union mlxsw_sp_l3addr saddr;
1537 	u32 ul_tb_id;
1538 
1539 	if (!ipip_entry)
1540 		return 0;
1541 
1542 	/* For flat configuration cases, moving overlay to a different VRF might
1543 	 * cause local address conflict, and the conflicting tunnels need to be
1544 	 * demoted.
1545 	 */
1546 	ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev);
1547 	ul_proto = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt]->ul_proto;
1548 	saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev);
1549 	if (mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto,
1550 						 saddr, ul_tb_id,
1551 						 ipip_entry)) {
1552 		mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1553 		return 0;
1554 	}
1555 
1556 	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1557 						   true, false, false, extack);
1558 }
1559 
1560 static int
1561 mlxsw_sp_netdevice_ipip_ul_vrf_event(struct mlxsw_sp *mlxsw_sp,
1562 				     struct mlxsw_sp_ipip_entry *ipip_entry,
1563 				     struct net_device *ul_dev,
1564 				     struct netlink_ext_ack *extack)
1565 {
1566 	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1567 						   true, true, false, extack);
1568 }
1569 
1570 static int
1571 mlxsw_sp_netdevice_ipip_ul_up_event(struct mlxsw_sp *mlxsw_sp,
1572 				    struct mlxsw_sp_ipip_entry *ipip_entry,
1573 				    struct net_device *ul_dev)
1574 {
1575 	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1576 						   false, false, true, NULL);
1577 }
1578 
1579 static int
1580 mlxsw_sp_netdevice_ipip_ul_down_event(struct mlxsw_sp *mlxsw_sp,
1581 				      struct mlxsw_sp_ipip_entry *ipip_entry,
1582 				      struct net_device *ul_dev)
1583 {
1584 	/* A down underlay device causes encapsulated packets to not be
1585 	 * forwarded, but decap still works. So refresh next hops without
1586 	 * touching anything else.
1587 	 */
1588 	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1589 						   false, false, true, NULL);
1590 }
1591 
1592 static int
1593 mlxsw_sp_netdevice_ipip_ol_change_event(struct mlxsw_sp *mlxsw_sp,
1594 					struct net_device *ol_dev,
1595 					struct netlink_ext_ack *extack)
1596 {
1597 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1598 	struct mlxsw_sp_ipip_entry *ipip_entry;
1599 	int err;
1600 
1601 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1602 	if (!ipip_entry)
1603 		/* A change might make a tunnel eligible for offloading, but
1604 		 * that is currently not implemented. What falls to slow path
1605 		 * stays there.
1606 		 */
1607 		return 0;
1608 
1609 	/* A change might make a tunnel not eligible for offloading. */
1610 	if (!mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev,
1611 						 ipip_entry->ipipt)) {
1612 		mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1613 		return 0;
1614 	}
1615 
1616 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1617 	err = ipip_ops->ol_netdev_change(mlxsw_sp, ipip_entry, extack);
1618 	return err;
1619 }
1620 
1621 void mlxsw_sp_ipip_entry_demote_tunnel(struct mlxsw_sp *mlxsw_sp,
1622 				       struct mlxsw_sp_ipip_entry *ipip_entry)
1623 {
1624 	struct net_device *ol_dev = ipip_entry->ol_dev;
1625 
1626 	if (ol_dev->flags & IFF_UP)
1627 		mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry);
1628 	mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
1629 }
1630 
1631 /* The configuration where several tunnels have the same local address in the
1632  * same underlay table needs special treatment in the HW. That is currently not
1633  * implemented in the driver. This function finds and demotes the first tunnel
1634  * with a given source address, except the one passed in in the argument
1635  * `except'.
1636  */
1637 bool
1638 mlxsw_sp_ipip_demote_tunnel_by_saddr(struct mlxsw_sp *mlxsw_sp,
1639 				     enum mlxsw_sp_l3proto ul_proto,
1640 				     union mlxsw_sp_l3addr saddr,
1641 				     u32 ul_tb_id,
1642 				     const struct mlxsw_sp_ipip_entry *except)
1643 {
1644 	struct mlxsw_sp_ipip_entry *ipip_entry, *tmp;
1645 
1646 	list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list,
1647 				 ipip_list_node) {
1648 		if (ipip_entry != except &&
1649 		    mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, saddr,
1650 						      ul_tb_id, ipip_entry)) {
1651 			mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1652 			return true;
1653 		}
1654 	}
1655 
1656 	return false;
1657 }
1658 
1659 static void mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(struct mlxsw_sp *mlxsw_sp,
1660 						     struct net_device *ul_dev)
1661 {
1662 	struct mlxsw_sp_ipip_entry *ipip_entry, *tmp;
1663 
1664 	list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list,
1665 				 ipip_list_node) {
1666 		struct net_device *ipip_ul_dev =
1667 			__mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev);
1668 
1669 		if (ipip_ul_dev == ul_dev)
1670 			mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1671 	}
1672 }
1673 
1674 int mlxsw_sp_netdevice_ipip_ol_event(struct mlxsw_sp *mlxsw_sp,
1675 				     struct net_device *ol_dev,
1676 				     unsigned long event,
1677 				     struct netdev_notifier_info *info)
1678 {
1679 	struct netdev_notifier_changeupper_info *chup;
1680 	struct netlink_ext_ack *extack;
1681 
1682 	switch (event) {
1683 	case NETDEV_REGISTER:
1684 		return mlxsw_sp_netdevice_ipip_ol_reg_event(mlxsw_sp, ol_dev);
1685 	case NETDEV_UNREGISTER:
1686 		mlxsw_sp_netdevice_ipip_ol_unreg_event(mlxsw_sp, ol_dev);
1687 		return 0;
1688 	case NETDEV_UP:
1689 		mlxsw_sp_netdevice_ipip_ol_up_event(mlxsw_sp, ol_dev);
1690 		return 0;
1691 	case NETDEV_DOWN:
1692 		mlxsw_sp_netdevice_ipip_ol_down_event(mlxsw_sp, ol_dev);
1693 		return 0;
1694 	case NETDEV_CHANGEUPPER:
1695 		chup = container_of(info, typeof(*chup), info);
1696 		extack = info->extack;
1697 		if (netif_is_l3_master(chup->upper_dev))
1698 			return mlxsw_sp_netdevice_ipip_ol_vrf_event(mlxsw_sp,
1699 								    ol_dev,
1700 								    extack);
1701 		return 0;
1702 	case NETDEV_CHANGE:
1703 		extack = info->extack;
1704 		return mlxsw_sp_netdevice_ipip_ol_change_event(mlxsw_sp,
1705 							       ol_dev, extack);
1706 	case NETDEV_CHANGEMTU:
1707 		return mlxsw_sp_netdevice_ipip_ol_update_mtu(mlxsw_sp, ol_dev);
1708 	}
1709 	return 0;
1710 }
1711 
1712 static int
1713 __mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
1714 				   struct mlxsw_sp_ipip_entry *ipip_entry,
1715 				   struct net_device *ul_dev,
1716 				   unsigned long event,
1717 				   struct netdev_notifier_info *info)
1718 {
1719 	struct netdev_notifier_changeupper_info *chup;
1720 	struct netlink_ext_ack *extack;
1721 
1722 	switch (event) {
1723 	case NETDEV_CHANGEUPPER:
1724 		chup = container_of(info, typeof(*chup), info);
1725 		extack = info->extack;
1726 		if (netif_is_l3_master(chup->upper_dev))
1727 			return mlxsw_sp_netdevice_ipip_ul_vrf_event(mlxsw_sp,
1728 								    ipip_entry,
1729 								    ul_dev,
1730 								    extack);
1731 		break;
1732 
1733 	case NETDEV_UP:
1734 		return mlxsw_sp_netdevice_ipip_ul_up_event(mlxsw_sp, ipip_entry,
1735 							   ul_dev);
1736 	case NETDEV_DOWN:
1737 		return mlxsw_sp_netdevice_ipip_ul_down_event(mlxsw_sp,
1738 							     ipip_entry,
1739 							     ul_dev);
1740 	}
1741 	return 0;
1742 }
1743 
1744 int
1745 mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
1746 				 struct net_device *ul_dev,
1747 				 unsigned long event,
1748 				 struct netdev_notifier_info *info)
1749 {
1750 	struct mlxsw_sp_ipip_entry *ipip_entry = NULL;
1751 	int err;
1752 
1753 	while ((ipip_entry = mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp,
1754 								ul_dev,
1755 								ipip_entry))) {
1756 		err = __mlxsw_sp_netdevice_ipip_ul_event(mlxsw_sp, ipip_entry,
1757 							 ul_dev, event, info);
1758 		if (err) {
1759 			mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(mlxsw_sp,
1760 								 ul_dev);
1761 			return err;
1762 		}
1763 	}
1764 
1765 	return 0;
1766 }
1767 
1768 struct mlxsw_sp_neigh_key {
1769 	struct neighbour *n;
1770 };
1771 
1772 struct mlxsw_sp_neigh_entry {
1773 	struct list_head rif_list_node;
1774 	struct rhash_head ht_node;
1775 	struct mlxsw_sp_neigh_key key;
1776 	u16 rif;
1777 	bool connected;
1778 	unsigned char ha[ETH_ALEN];
1779 	struct list_head nexthop_list; /* list of nexthops using
1780 					* this neigh entry
1781 					*/
1782 	struct list_head nexthop_neighs_list_node;
1783 	unsigned int counter_index;
1784 	bool counter_valid;
1785 };
1786 
1787 static const struct rhashtable_params mlxsw_sp_neigh_ht_params = {
1788 	.key_offset = offsetof(struct mlxsw_sp_neigh_entry, key),
1789 	.head_offset = offsetof(struct mlxsw_sp_neigh_entry, ht_node),
1790 	.key_len = sizeof(struct mlxsw_sp_neigh_key),
1791 };
1792 
1793 struct mlxsw_sp_neigh_entry *
1794 mlxsw_sp_rif_neigh_next(struct mlxsw_sp_rif *rif,
1795 			struct mlxsw_sp_neigh_entry *neigh_entry)
1796 {
1797 	if (!neigh_entry) {
1798 		if (list_empty(&rif->neigh_list))
1799 			return NULL;
1800 		else
1801 			return list_first_entry(&rif->neigh_list,
1802 						typeof(*neigh_entry),
1803 						rif_list_node);
1804 	}
1805 	if (list_is_last(&neigh_entry->rif_list_node, &rif->neigh_list))
1806 		return NULL;
1807 	return list_next_entry(neigh_entry, rif_list_node);
1808 }
1809 
1810 int mlxsw_sp_neigh_entry_type(struct mlxsw_sp_neigh_entry *neigh_entry)
1811 {
1812 	return neigh_entry->key.n->tbl->family;
1813 }
1814 
1815 unsigned char *
1816 mlxsw_sp_neigh_entry_ha(struct mlxsw_sp_neigh_entry *neigh_entry)
1817 {
1818 	return neigh_entry->ha;
1819 }
1820 
1821 u32 mlxsw_sp_neigh4_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
1822 {
1823 	struct neighbour *n;
1824 
1825 	n = neigh_entry->key.n;
1826 	return ntohl(*((__be32 *) n->primary_key));
1827 }
1828 
1829 struct in6_addr *
1830 mlxsw_sp_neigh6_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
1831 {
1832 	struct neighbour *n;
1833 
1834 	n = neigh_entry->key.n;
1835 	return (struct in6_addr *) &n->primary_key;
1836 }
1837 
1838 int mlxsw_sp_neigh_counter_get(struct mlxsw_sp *mlxsw_sp,
1839 			       struct mlxsw_sp_neigh_entry *neigh_entry,
1840 			       u64 *p_counter)
1841 {
1842 	if (!neigh_entry->counter_valid)
1843 		return -EINVAL;
1844 
1845 	return mlxsw_sp_flow_counter_get(mlxsw_sp, neigh_entry->counter_index,
1846 					 p_counter, NULL);
1847 }
1848 
1849 static struct mlxsw_sp_neigh_entry *
1850 mlxsw_sp_neigh_entry_alloc(struct mlxsw_sp *mlxsw_sp, struct neighbour *n,
1851 			   u16 rif)
1852 {
1853 	struct mlxsw_sp_neigh_entry *neigh_entry;
1854 
1855 	neigh_entry = kzalloc(sizeof(*neigh_entry), GFP_KERNEL);
1856 	if (!neigh_entry)
1857 		return NULL;
1858 
1859 	neigh_entry->key.n = n;
1860 	neigh_entry->rif = rif;
1861 	INIT_LIST_HEAD(&neigh_entry->nexthop_list);
1862 
1863 	return neigh_entry;
1864 }
1865 
1866 static void mlxsw_sp_neigh_entry_free(struct mlxsw_sp_neigh_entry *neigh_entry)
1867 {
1868 	kfree(neigh_entry);
1869 }
1870 
1871 static int
1872 mlxsw_sp_neigh_entry_insert(struct mlxsw_sp *mlxsw_sp,
1873 			    struct mlxsw_sp_neigh_entry *neigh_entry)
1874 {
1875 	return rhashtable_insert_fast(&mlxsw_sp->router->neigh_ht,
1876 				      &neigh_entry->ht_node,
1877 				      mlxsw_sp_neigh_ht_params);
1878 }
1879 
1880 static void
1881 mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp,
1882 			    struct mlxsw_sp_neigh_entry *neigh_entry)
1883 {
1884 	rhashtable_remove_fast(&mlxsw_sp->router->neigh_ht,
1885 			       &neigh_entry->ht_node,
1886 			       mlxsw_sp_neigh_ht_params);
1887 }
1888 
1889 static bool
1890 mlxsw_sp_neigh_counter_should_alloc(struct mlxsw_sp *mlxsw_sp,
1891 				    struct mlxsw_sp_neigh_entry *neigh_entry)
1892 {
1893 	struct devlink *devlink;
1894 	const char *table_name;
1895 
1896 	switch (mlxsw_sp_neigh_entry_type(neigh_entry)) {
1897 	case AF_INET:
1898 		table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST4;
1899 		break;
1900 	case AF_INET6:
1901 		table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST6;
1902 		break;
1903 	default:
1904 		WARN_ON(1);
1905 		return false;
1906 	}
1907 
1908 	devlink = priv_to_devlink(mlxsw_sp->core);
1909 	return devlink_dpipe_table_counter_enabled(devlink, table_name);
1910 }
1911 
1912 static void
1913 mlxsw_sp_neigh_counter_alloc(struct mlxsw_sp *mlxsw_sp,
1914 			     struct mlxsw_sp_neigh_entry *neigh_entry)
1915 {
1916 	if (!mlxsw_sp_neigh_counter_should_alloc(mlxsw_sp, neigh_entry))
1917 		return;
1918 
1919 	if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &neigh_entry->counter_index))
1920 		return;
1921 
1922 	neigh_entry->counter_valid = true;
1923 }
1924 
1925 static void
1926 mlxsw_sp_neigh_counter_free(struct mlxsw_sp *mlxsw_sp,
1927 			    struct mlxsw_sp_neigh_entry *neigh_entry)
1928 {
1929 	if (!neigh_entry->counter_valid)
1930 		return;
1931 	mlxsw_sp_flow_counter_free(mlxsw_sp,
1932 				   neigh_entry->counter_index);
1933 	neigh_entry->counter_valid = false;
1934 }
1935 
1936 static struct mlxsw_sp_neigh_entry *
1937 mlxsw_sp_neigh_entry_create(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
1938 {
1939 	struct mlxsw_sp_neigh_entry *neigh_entry;
1940 	struct mlxsw_sp_rif *rif;
1941 	int err;
1942 
1943 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, n->dev);
1944 	if (!rif)
1945 		return ERR_PTR(-EINVAL);
1946 
1947 	neigh_entry = mlxsw_sp_neigh_entry_alloc(mlxsw_sp, n, rif->rif_index);
1948 	if (!neigh_entry)
1949 		return ERR_PTR(-ENOMEM);
1950 
1951 	err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
1952 	if (err)
1953 		goto err_neigh_entry_insert;
1954 
1955 	mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
1956 	list_add(&neigh_entry->rif_list_node, &rif->neigh_list);
1957 
1958 	return neigh_entry;
1959 
1960 err_neigh_entry_insert:
1961 	mlxsw_sp_neigh_entry_free(neigh_entry);
1962 	return ERR_PTR(err);
1963 }
1964 
1965 static void
1966 mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp *mlxsw_sp,
1967 			     struct mlxsw_sp_neigh_entry *neigh_entry)
1968 {
1969 	list_del(&neigh_entry->rif_list_node);
1970 	mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
1971 	mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry);
1972 	mlxsw_sp_neigh_entry_free(neigh_entry);
1973 }
1974 
1975 static struct mlxsw_sp_neigh_entry *
1976 mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
1977 {
1978 	struct mlxsw_sp_neigh_key key;
1979 
1980 	key.n = n;
1981 	return rhashtable_lookup_fast(&mlxsw_sp->router->neigh_ht,
1982 				      &key, mlxsw_sp_neigh_ht_params);
1983 }
1984 
1985 static void
1986 mlxsw_sp_router_neighs_update_interval_init(struct mlxsw_sp *mlxsw_sp)
1987 {
1988 	unsigned long interval;
1989 
1990 #if IS_ENABLED(CONFIG_IPV6)
1991 	interval = min_t(unsigned long,
1992 			 NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME),
1993 			 NEIGH_VAR(&nd_tbl.parms, DELAY_PROBE_TIME));
1994 #else
1995 	interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
1996 #endif
1997 	mlxsw_sp->router->neighs_update.interval = jiffies_to_msecs(interval);
1998 }
1999 
2000 static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp,
2001 						   char *rauhtd_pl,
2002 						   int ent_index)
2003 {
2004 	struct net_device *dev;
2005 	struct neighbour *n;
2006 	__be32 dipn;
2007 	u32 dip;
2008 	u16 rif;
2009 
2010 	mlxsw_reg_rauhtd_ent_ipv4_unpack(rauhtd_pl, ent_index, &rif, &dip);
2011 
2012 	if (!mlxsw_sp->router->rifs[rif]) {
2013 		dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
2014 		return;
2015 	}
2016 
2017 	dipn = htonl(dip);
2018 	dev = mlxsw_sp->router->rifs[rif]->dev;
2019 	n = neigh_lookup(&arp_tbl, &dipn, dev);
2020 	if (!n)
2021 		return;
2022 
2023 	netdev_dbg(dev, "Updating neighbour with IP=%pI4h\n", &dip);
2024 	neigh_event_send(n, NULL);
2025 	neigh_release(n);
2026 }
2027 
2028 #if IS_ENABLED(CONFIG_IPV6)
2029 static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2030 						   char *rauhtd_pl,
2031 						   int rec_index)
2032 {
2033 	struct net_device *dev;
2034 	struct neighbour *n;
2035 	struct in6_addr dip;
2036 	u16 rif;
2037 
2038 	mlxsw_reg_rauhtd_ent_ipv6_unpack(rauhtd_pl, rec_index, &rif,
2039 					 (char *) &dip);
2040 
2041 	if (!mlxsw_sp->router->rifs[rif]) {
2042 		dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
2043 		return;
2044 	}
2045 
2046 	dev = mlxsw_sp->router->rifs[rif]->dev;
2047 	n = neigh_lookup(&nd_tbl, &dip, dev);
2048 	if (!n)
2049 		return;
2050 
2051 	netdev_dbg(dev, "Updating neighbour with IP=%pI6c\n", &dip);
2052 	neigh_event_send(n, NULL);
2053 	neigh_release(n);
2054 }
2055 #else
2056 static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2057 						   char *rauhtd_pl,
2058 						   int rec_index)
2059 {
2060 }
2061 #endif
2062 
2063 static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp,
2064 						   char *rauhtd_pl,
2065 						   int rec_index)
2066 {
2067 	u8 num_entries;
2068 	int i;
2069 
2070 	num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
2071 								rec_index);
2072 	/* Hardware starts counting at 0, so add 1. */
2073 	num_entries++;
2074 
2075 	/* Each record consists of several neighbour entries. */
2076 	for (i = 0; i < num_entries; i++) {
2077 		int ent_index;
2078 
2079 		ent_index = rec_index * MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC + i;
2080 		mlxsw_sp_router_neigh_ent_ipv4_process(mlxsw_sp, rauhtd_pl,
2081 						       ent_index);
2082 	}
2083 
2084 }
2085 
2086 static void mlxsw_sp_router_neigh_rec_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2087 						   char *rauhtd_pl,
2088 						   int rec_index)
2089 {
2090 	/* One record contains one entry. */
2091 	mlxsw_sp_router_neigh_ent_ipv6_process(mlxsw_sp, rauhtd_pl,
2092 					       rec_index);
2093 }
2094 
2095 static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp,
2096 					      char *rauhtd_pl, int rec_index)
2097 {
2098 	switch (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, rec_index)) {
2099 	case MLXSW_REG_RAUHTD_TYPE_IPV4:
2100 		mlxsw_sp_router_neigh_rec_ipv4_process(mlxsw_sp, rauhtd_pl,
2101 						       rec_index);
2102 		break;
2103 	case MLXSW_REG_RAUHTD_TYPE_IPV6:
2104 		mlxsw_sp_router_neigh_rec_ipv6_process(mlxsw_sp, rauhtd_pl,
2105 						       rec_index);
2106 		break;
2107 	}
2108 }
2109 
2110 static bool mlxsw_sp_router_rauhtd_is_full(char *rauhtd_pl)
2111 {
2112 	u8 num_rec, last_rec_index, num_entries;
2113 
2114 	num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
2115 	last_rec_index = num_rec - 1;
2116 
2117 	if (num_rec < MLXSW_REG_RAUHTD_REC_MAX_NUM)
2118 		return false;
2119 	if (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, last_rec_index) ==
2120 	    MLXSW_REG_RAUHTD_TYPE_IPV6)
2121 		return true;
2122 
2123 	num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
2124 								last_rec_index);
2125 	if (++num_entries == MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC)
2126 		return true;
2127 	return false;
2128 }
2129 
2130 static int
2131 __mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp,
2132 				       char *rauhtd_pl,
2133 				       enum mlxsw_reg_rauhtd_type type)
2134 {
2135 	int i, num_rec;
2136 	int err;
2137 
2138 	/* Make sure the neighbour's netdev isn't removed in the
2139 	 * process.
2140 	 */
2141 	rtnl_lock();
2142 	do {
2143 		mlxsw_reg_rauhtd_pack(rauhtd_pl, type);
2144 		err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd),
2145 				      rauhtd_pl);
2146 		if (err) {
2147 			dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to dump neighbour table\n");
2148 			break;
2149 		}
2150 		num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
2151 		for (i = 0; i < num_rec; i++)
2152 			mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl,
2153 							  i);
2154 	} while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl));
2155 	rtnl_unlock();
2156 
2157 	return err;
2158 }
2159 
2160 static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp)
2161 {
2162 	enum mlxsw_reg_rauhtd_type type;
2163 	char *rauhtd_pl;
2164 	int err;
2165 
2166 	rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL);
2167 	if (!rauhtd_pl)
2168 		return -ENOMEM;
2169 
2170 	type = MLXSW_REG_RAUHTD_TYPE_IPV4;
2171 	err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
2172 	if (err)
2173 		goto out;
2174 
2175 	type = MLXSW_REG_RAUHTD_TYPE_IPV6;
2176 	err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
2177 out:
2178 	kfree(rauhtd_pl);
2179 	return err;
2180 }
2181 
2182 static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp *mlxsw_sp)
2183 {
2184 	struct mlxsw_sp_neigh_entry *neigh_entry;
2185 
2186 	/* Take RTNL mutex here to prevent lists from changes */
2187 	rtnl_lock();
2188 	list_for_each_entry(neigh_entry, &mlxsw_sp->router->nexthop_neighs_list,
2189 			    nexthop_neighs_list_node)
2190 		/* If this neigh have nexthops, make the kernel think this neigh
2191 		 * is active regardless of the traffic.
2192 		 */
2193 		neigh_event_send(neigh_entry->key.n, NULL);
2194 	rtnl_unlock();
2195 }
2196 
2197 static void
2198 mlxsw_sp_router_neighs_update_work_schedule(struct mlxsw_sp *mlxsw_sp)
2199 {
2200 	unsigned long interval = mlxsw_sp->router->neighs_update.interval;
2201 
2202 	mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw,
2203 			       msecs_to_jiffies(interval));
2204 }
2205 
2206 static void mlxsw_sp_router_neighs_update_work(struct work_struct *work)
2207 {
2208 	struct mlxsw_sp_router *router;
2209 	int err;
2210 
2211 	router = container_of(work, struct mlxsw_sp_router,
2212 			      neighs_update.dw.work);
2213 	err = mlxsw_sp_router_neighs_update_rauhtd(router->mlxsw_sp);
2214 	if (err)
2215 		dev_err(router->mlxsw_sp->bus_info->dev, "Could not update kernel for neigh activity");
2216 
2217 	mlxsw_sp_router_neighs_update_nh(router->mlxsw_sp);
2218 
2219 	mlxsw_sp_router_neighs_update_work_schedule(router->mlxsw_sp);
2220 }
2221 
2222 static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work)
2223 {
2224 	struct mlxsw_sp_neigh_entry *neigh_entry;
2225 	struct mlxsw_sp_router *router;
2226 
2227 	router = container_of(work, struct mlxsw_sp_router,
2228 			      nexthop_probe_dw.work);
2229 	/* Iterate over nexthop neighbours, find those who are unresolved and
2230 	 * send arp on them. This solves the chicken-egg problem when
2231 	 * the nexthop wouldn't get offloaded until the neighbor is resolved
2232 	 * but it wouldn't get resolved ever in case traffic is flowing in HW
2233 	 * using different nexthop.
2234 	 *
2235 	 * Take RTNL mutex here to prevent lists from changes.
2236 	 */
2237 	rtnl_lock();
2238 	list_for_each_entry(neigh_entry, &router->nexthop_neighs_list,
2239 			    nexthop_neighs_list_node)
2240 		if (!neigh_entry->connected)
2241 			neigh_event_send(neigh_entry->key.n, NULL);
2242 	rtnl_unlock();
2243 
2244 	mlxsw_core_schedule_dw(&router->nexthop_probe_dw,
2245 			       MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL);
2246 }
2247 
2248 static void
2249 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
2250 			      struct mlxsw_sp_neigh_entry *neigh_entry,
2251 			      bool removing);
2252 
2253 static enum mlxsw_reg_rauht_op mlxsw_sp_rauht_op(bool adding)
2254 {
2255 	return adding ? MLXSW_REG_RAUHT_OP_WRITE_ADD :
2256 			MLXSW_REG_RAUHT_OP_WRITE_DELETE;
2257 }
2258 
2259 static void
2260 mlxsw_sp_router_neigh_entry_op4(struct mlxsw_sp *mlxsw_sp,
2261 				struct mlxsw_sp_neigh_entry *neigh_entry,
2262 				enum mlxsw_reg_rauht_op op)
2263 {
2264 	struct neighbour *n = neigh_entry->key.n;
2265 	u32 dip = ntohl(*((__be32 *) n->primary_key));
2266 	char rauht_pl[MLXSW_REG_RAUHT_LEN];
2267 
2268 	mlxsw_reg_rauht_pack4(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
2269 			      dip);
2270 	if (neigh_entry->counter_valid)
2271 		mlxsw_reg_rauht_pack_counter(rauht_pl,
2272 					     neigh_entry->counter_index);
2273 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
2274 }
2275 
2276 static void
2277 mlxsw_sp_router_neigh_entry_op6(struct mlxsw_sp *mlxsw_sp,
2278 				struct mlxsw_sp_neigh_entry *neigh_entry,
2279 				enum mlxsw_reg_rauht_op op)
2280 {
2281 	struct neighbour *n = neigh_entry->key.n;
2282 	char rauht_pl[MLXSW_REG_RAUHT_LEN];
2283 	const char *dip = n->primary_key;
2284 
2285 	mlxsw_reg_rauht_pack6(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
2286 			      dip);
2287 	if (neigh_entry->counter_valid)
2288 		mlxsw_reg_rauht_pack_counter(rauht_pl,
2289 					     neigh_entry->counter_index);
2290 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
2291 }
2292 
2293 bool mlxsw_sp_neigh_ipv6_ignore(struct mlxsw_sp_neigh_entry *neigh_entry)
2294 {
2295 	struct neighbour *n = neigh_entry->key.n;
2296 
2297 	/* Packets with a link-local destination address are trapped
2298 	 * after LPM lookup and never reach the neighbour table, so
2299 	 * there is no need to program such neighbours to the device.
2300 	 */
2301 	if (ipv6_addr_type((struct in6_addr *) &n->primary_key) &
2302 	    IPV6_ADDR_LINKLOCAL)
2303 		return true;
2304 	return false;
2305 }
2306 
2307 static void
2308 mlxsw_sp_neigh_entry_update(struct mlxsw_sp *mlxsw_sp,
2309 			    struct mlxsw_sp_neigh_entry *neigh_entry,
2310 			    bool adding)
2311 {
2312 	if (!adding && !neigh_entry->connected)
2313 		return;
2314 	neigh_entry->connected = adding;
2315 	if (neigh_entry->key.n->tbl->family == AF_INET) {
2316 		mlxsw_sp_router_neigh_entry_op4(mlxsw_sp, neigh_entry,
2317 						mlxsw_sp_rauht_op(adding));
2318 	} else if (neigh_entry->key.n->tbl->family == AF_INET6) {
2319 		if (mlxsw_sp_neigh_ipv6_ignore(neigh_entry))
2320 			return;
2321 		mlxsw_sp_router_neigh_entry_op6(mlxsw_sp, neigh_entry,
2322 						mlxsw_sp_rauht_op(adding));
2323 	} else {
2324 		WARN_ON_ONCE(1);
2325 	}
2326 }
2327 
2328 void
2329 mlxsw_sp_neigh_entry_counter_update(struct mlxsw_sp *mlxsw_sp,
2330 				    struct mlxsw_sp_neigh_entry *neigh_entry,
2331 				    bool adding)
2332 {
2333 	if (adding)
2334 		mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
2335 	else
2336 		mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
2337 	mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, true);
2338 }
2339 
2340 struct mlxsw_sp_netevent_work {
2341 	struct work_struct work;
2342 	struct mlxsw_sp *mlxsw_sp;
2343 	struct neighbour *n;
2344 };
2345 
2346 static void mlxsw_sp_router_neigh_event_work(struct work_struct *work)
2347 {
2348 	struct mlxsw_sp_netevent_work *net_work =
2349 		container_of(work, struct mlxsw_sp_netevent_work, work);
2350 	struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2351 	struct mlxsw_sp_neigh_entry *neigh_entry;
2352 	struct neighbour *n = net_work->n;
2353 	unsigned char ha[ETH_ALEN];
2354 	bool entry_connected;
2355 	u8 nud_state, dead;
2356 
2357 	/* If these parameters are changed after we release the lock,
2358 	 * then we are guaranteed to receive another event letting us
2359 	 * know about it.
2360 	 */
2361 	read_lock_bh(&n->lock);
2362 	memcpy(ha, n->ha, ETH_ALEN);
2363 	nud_state = n->nud_state;
2364 	dead = n->dead;
2365 	read_unlock_bh(&n->lock);
2366 
2367 	rtnl_lock();
2368 	mlxsw_sp_span_respin(mlxsw_sp);
2369 
2370 	entry_connected = nud_state & NUD_VALID && !dead;
2371 	neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
2372 	if (!entry_connected && !neigh_entry)
2373 		goto out;
2374 	if (!neigh_entry) {
2375 		neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
2376 		if (IS_ERR(neigh_entry))
2377 			goto out;
2378 	}
2379 
2380 	memcpy(neigh_entry->ha, ha, ETH_ALEN);
2381 	mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, entry_connected);
2382 	mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, !entry_connected);
2383 
2384 	if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
2385 		mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2386 
2387 out:
2388 	rtnl_unlock();
2389 	neigh_release(n);
2390 	kfree(net_work);
2391 }
2392 
2393 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp);
2394 
2395 static void mlxsw_sp_router_mp_hash_event_work(struct work_struct *work)
2396 {
2397 	struct mlxsw_sp_netevent_work *net_work =
2398 		container_of(work, struct mlxsw_sp_netevent_work, work);
2399 	struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2400 
2401 	mlxsw_sp_mp_hash_init(mlxsw_sp);
2402 	kfree(net_work);
2403 }
2404 
2405 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp);
2406 
2407 static void mlxsw_sp_router_update_priority_work(struct work_struct *work)
2408 {
2409 	struct mlxsw_sp_netevent_work *net_work =
2410 		container_of(work, struct mlxsw_sp_netevent_work, work);
2411 	struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2412 
2413 	__mlxsw_sp_router_init(mlxsw_sp);
2414 	kfree(net_work);
2415 }
2416 
2417 static int mlxsw_sp_router_schedule_work(struct net *net,
2418 					 struct notifier_block *nb,
2419 					 void (*cb)(struct work_struct *))
2420 {
2421 	struct mlxsw_sp_netevent_work *net_work;
2422 	struct mlxsw_sp_router *router;
2423 
2424 	if (!net_eq(net, &init_net))
2425 		return NOTIFY_DONE;
2426 
2427 	net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC);
2428 	if (!net_work)
2429 		return NOTIFY_BAD;
2430 
2431 	router = container_of(nb, struct mlxsw_sp_router, netevent_nb);
2432 	INIT_WORK(&net_work->work, cb);
2433 	net_work->mlxsw_sp = router->mlxsw_sp;
2434 	mlxsw_core_schedule_work(&net_work->work);
2435 	return NOTIFY_DONE;
2436 }
2437 
2438 static int mlxsw_sp_router_netevent_event(struct notifier_block *nb,
2439 					  unsigned long event, void *ptr)
2440 {
2441 	struct mlxsw_sp_netevent_work *net_work;
2442 	struct mlxsw_sp_port *mlxsw_sp_port;
2443 	struct mlxsw_sp *mlxsw_sp;
2444 	unsigned long interval;
2445 	struct neigh_parms *p;
2446 	struct neighbour *n;
2447 
2448 	switch (event) {
2449 	case NETEVENT_DELAY_PROBE_TIME_UPDATE:
2450 		p = ptr;
2451 
2452 		/* We don't care about changes in the default table. */
2453 		if (!p->dev || (p->tbl->family != AF_INET &&
2454 				p->tbl->family != AF_INET6))
2455 			return NOTIFY_DONE;
2456 
2457 		/* We are in atomic context and can't take RTNL mutex,
2458 		 * so use RCU variant to walk the device chain.
2459 		 */
2460 		mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(p->dev);
2461 		if (!mlxsw_sp_port)
2462 			return NOTIFY_DONE;
2463 
2464 		mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
2465 		interval = jiffies_to_msecs(NEIGH_VAR(p, DELAY_PROBE_TIME));
2466 		mlxsw_sp->router->neighs_update.interval = interval;
2467 
2468 		mlxsw_sp_port_dev_put(mlxsw_sp_port);
2469 		break;
2470 	case NETEVENT_NEIGH_UPDATE:
2471 		n = ptr;
2472 
2473 		if (n->tbl->family != AF_INET && n->tbl->family != AF_INET6)
2474 			return NOTIFY_DONE;
2475 
2476 		mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(n->dev);
2477 		if (!mlxsw_sp_port)
2478 			return NOTIFY_DONE;
2479 
2480 		net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC);
2481 		if (!net_work) {
2482 			mlxsw_sp_port_dev_put(mlxsw_sp_port);
2483 			return NOTIFY_BAD;
2484 		}
2485 
2486 		INIT_WORK(&net_work->work, mlxsw_sp_router_neigh_event_work);
2487 		net_work->mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
2488 		net_work->n = n;
2489 
2490 		/* Take a reference to ensure the neighbour won't be
2491 		 * destructed until we drop the reference in delayed
2492 		 * work.
2493 		 */
2494 		neigh_clone(n);
2495 		mlxsw_core_schedule_work(&net_work->work);
2496 		mlxsw_sp_port_dev_put(mlxsw_sp_port);
2497 		break;
2498 	case NETEVENT_IPV4_MPATH_HASH_UPDATE:
2499 	case NETEVENT_IPV6_MPATH_HASH_UPDATE:
2500 		return mlxsw_sp_router_schedule_work(ptr, nb,
2501 				mlxsw_sp_router_mp_hash_event_work);
2502 
2503 	case NETEVENT_IPV4_FWD_UPDATE_PRIORITY_UPDATE:
2504 		return mlxsw_sp_router_schedule_work(ptr, nb,
2505 				mlxsw_sp_router_update_priority_work);
2506 	}
2507 
2508 	return NOTIFY_DONE;
2509 }
2510 
2511 static int mlxsw_sp_neigh_init(struct mlxsw_sp *mlxsw_sp)
2512 {
2513 	int err;
2514 
2515 	err = rhashtable_init(&mlxsw_sp->router->neigh_ht,
2516 			      &mlxsw_sp_neigh_ht_params);
2517 	if (err)
2518 		return err;
2519 
2520 	/* Initialize the polling interval according to the default
2521 	 * table.
2522 	 */
2523 	mlxsw_sp_router_neighs_update_interval_init(mlxsw_sp);
2524 
2525 	/* Create the delayed works for the activity_update */
2526 	INIT_DELAYED_WORK(&mlxsw_sp->router->neighs_update.dw,
2527 			  mlxsw_sp_router_neighs_update_work);
2528 	INIT_DELAYED_WORK(&mlxsw_sp->router->nexthop_probe_dw,
2529 			  mlxsw_sp_router_probe_unresolved_nexthops);
2530 	mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw, 0);
2531 	mlxsw_core_schedule_dw(&mlxsw_sp->router->nexthop_probe_dw, 0);
2532 	return 0;
2533 }
2534 
2535 static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp)
2536 {
2537 	cancel_delayed_work_sync(&mlxsw_sp->router->neighs_update.dw);
2538 	cancel_delayed_work_sync(&mlxsw_sp->router->nexthop_probe_dw);
2539 	rhashtable_destroy(&mlxsw_sp->router->neigh_ht);
2540 }
2541 
2542 static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
2543 					 struct mlxsw_sp_rif *rif)
2544 {
2545 	struct mlxsw_sp_neigh_entry *neigh_entry, *tmp;
2546 
2547 	list_for_each_entry_safe(neigh_entry, tmp, &rif->neigh_list,
2548 				 rif_list_node) {
2549 		mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, false);
2550 		mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2551 	}
2552 }
2553 
2554 enum mlxsw_sp_nexthop_type {
2555 	MLXSW_SP_NEXTHOP_TYPE_ETH,
2556 	MLXSW_SP_NEXTHOP_TYPE_IPIP,
2557 };
2558 
2559 struct mlxsw_sp_nexthop_key {
2560 	struct fib_nh *fib_nh;
2561 };
2562 
2563 struct mlxsw_sp_nexthop {
2564 	struct list_head neigh_list_node; /* member of neigh entry list */
2565 	struct list_head rif_list_node;
2566 	struct list_head router_list_node;
2567 	struct mlxsw_sp_nexthop_group *nh_grp; /* pointer back to the group
2568 						* this belongs to
2569 						*/
2570 	struct rhash_head ht_node;
2571 	struct mlxsw_sp_nexthop_key key;
2572 	unsigned char gw_addr[sizeof(struct in6_addr)];
2573 	int ifindex;
2574 	int nh_weight;
2575 	int norm_nh_weight;
2576 	int num_adj_entries;
2577 	struct mlxsw_sp_rif *rif;
2578 	u8 should_offload:1, /* set indicates this neigh is connected and
2579 			      * should be put to KVD linear area of this group.
2580 			      */
2581 	   offloaded:1, /* set in case the neigh is actually put into
2582 			 * KVD linear area of this group.
2583 			 */
2584 	   update:1; /* set indicates that MAC of this neigh should be
2585 		      * updated in HW
2586 		      */
2587 	enum mlxsw_sp_nexthop_type type;
2588 	union {
2589 		struct mlxsw_sp_neigh_entry *neigh_entry;
2590 		struct mlxsw_sp_ipip_entry *ipip_entry;
2591 	};
2592 	unsigned int counter_index;
2593 	bool counter_valid;
2594 };
2595 
2596 struct mlxsw_sp_nexthop_group {
2597 	void *priv;
2598 	struct rhash_head ht_node;
2599 	struct list_head fib_list; /* list of fib entries that use this group */
2600 	struct neigh_table *neigh_tbl;
2601 	u8 adj_index_valid:1,
2602 	   gateway:1; /* routes using the group use a gateway */
2603 	u32 adj_index;
2604 	u16 ecmp_size;
2605 	u16 count;
2606 	int sum_norm_weight;
2607 	struct mlxsw_sp_nexthop nexthops[0];
2608 #define nh_rif	nexthops[0].rif
2609 };
2610 
2611 void mlxsw_sp_nexthop_counter_alloc(struct mlxsw_sp *mlxsw_sp,
2612 				    struct mlxsw_sp_nexthop *nh)
2613 {
2614 	struct devlink *devlink;
2615 
2616 	devlink = priv_to_devlink(mlxsw_sp->core);
2617 	if (!devlink_dpipe_table_counter_enabled(devlink,
2618 						 MLXSW_SP_DPIPE_TABLE_NAME_ADJ))
2619 		return;
2620 
2621 	if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &nh->counter_index))
2622 		return;
2623 
2624 	nh->counter_valid = true;
2625 }
2626 
2627 void mlxsw_sp_nexthop_counter_free(struct mlxsw_sp *mlxsw_sp,
2628 				   struct mlxsw_sp_nexthop *nh)
2629 {
2630 	if (!nh->counter_valid)
2631 		return;
2632 	mlxsw_sp_flow_counter_free(mlxsw_sp, nh->counter_index);
2633 	nh->counter_valid = false;
2634 }
2635 
2636 int mlxsw_sp_nexthop_counter_get(struct mlxsw_sp *mlxsw_sp,
2637 				 struct mlxsw_sp_nexthop *nh, u64 *p_counter)
2638 {
2639 	if (!nh->counter_valid)
2640 		return -EINVAL;
2641 
2642 	return mlxsw_sp_flow_counter_get(mlxsw_sp, nh->counter_index,
2643 					 p_counter, NULL);
2644 }
2645 
2646 struct mlxsw_sp_nexthop *mlxsw_sp_nexthop_next(struct mlxsw_sp_router *router,
2647 					       struct mlxsw_sp_nexthop *nh)
2648 {
2649 	if (!nh) {
2650 		if (list_empty(&router->nexthop_list))
2651 			return NULL;
2652 		else
2653 			return list_first_entry(&router->nexthop_list,
2654 						typeof(*nh), router_list_node);
2655 	}
2656 	if (list_is_last(&nh->router_list_node, &router->nexthop_list))
2657 		return NULL;
2658 	return list_next_entry(nh, router_list_node);
2659 }
2660 
2661 bool mlxsw_sp_nexthop_offload(struct mlxsw_sp_nexthop *nh)
2662 {
2663 	return nh->offloaded;
2664 }
2665 
2666 unsigned char *mlxsw_sp_nexthop_ha(struct mlxsw_sp_nexthop *nh)
2667 {
2668 	if (!nh->offloaded)
2669 		return NULL;
2670 	return nh->neigh_entry->ha;
2671 }
2672 
2673 int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop *nh, u32 *p_adj_index,
2674 			     u32 *p_adj_size, u32 *p_adj_hash_index)
2675 {
2676 	struct mlxsw_sp_nexthop_group *nh_grp = nh->nh_grp;
2677 	u32 adj_hash_index = 0;
2678 	int i;
2679 
2680 	if (!nh->offloaded || !nh_grp->adj_index_valid)
2681 		return -EINVAL;
2682 
2683 	*p_adj_index = nh_grp->adj_index;
2684 	*p_adj_size = nh_grp->ecmp_size;
2685 
2686 	for (i = 0; i < nh_grp->count; i++) {
2687 		struct mlxsw_sp_nexthop *nh_iter = &nh_grp->nexthops[i];
2688 
2689 		if (nh_iter == nh)
2690 			break;
2691 		if (nh_iter->offloaded)
2692 			adj_hash_index += nh_iter->num_adj_entries;
2693 	}
2694 
2695 	*p_adj_hash_index = adj_hash_index;
2696 	return 0;
2697 }
2698 
2699 struct mlxsw_sp_rif *mlxsw_sp_nexthop_rif(struct mlxsw_sp_nexthop *nh)
2700 {
2701 	return nh->rif;
2702 }
2703 
2704 bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh)
2705 {
2706 	struct mlxsw_sp_nexthop_group *nh_grp = nh->nh_grp;
2707 	int i;
2708 
2709 	for (i = 0; i < nh_grp->count; i++) {
2710 		struct mlxsw_sp_nexthop *nh_iter = &nh_grp->nexthops[i];
2711 
2712 		if (nh_iter->type == MLXSW_SP_NEXTHOP_TYPE_IPIP)
2713 			return true;
2714 	}
2715 	return false;
2716 }
2717 
2718 static struct fib_info *
2719 mlxsw_sp_nexthop4_group_fi(const struct mlxsw_sp_nexthop_group *nh_grp)
2720 {
2721 	return nh_grp->priv;
2722 }
2723 
2724 struct mlxsw_sp_nexthop_group_cmp_arg {
2725 	enum mlxsw_sp_l3proto proto;
2726 	union {
2727 		struct fib_info *fi;
2728 		struct mlxsw_sp_fib6_entry *fib6_entry;
2729 	};
2730 };
2731 
2732 static bool
2733 mlxsw_sp_nexthop6_group_has_nexthop(const struct mlxsw_sp_nexthop_group *nh_grp,
2734 				    const struct in6_addr *gw, int ifindex,
2735 				    int weight)
2736 {
2737 	int i;
2738 
2739 	for (i = 0; i < nh_grp->count; i++) {
2740 		const struct mlxsw_sp_nexthop *nh;
2741 
2742 		nh = &nh_grp->nexthops[i];
2743 		if (nh->ifindex == ifindex && nh->nh_weight == weight &&
2744 		    ipv6_addr_equal(gw, (struct in6_addr *) nh->gw_addr))
2745 			return true;
2746 	}
2747 
2748 	return false;
2749 }
2750 
2751 static bool
2752 mlxsw_sp_nexthop6_group_cmp(const struct mlxsw_sp_nexthop_group *nh_grp,
2753 			    const struct mlxsw_sp_fib6_entry *fib6_entry)
2754 {
2755 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
2756 
2757 	if (nh_grp->count != fib6_entry->nrt6)
2758 		return false;
2759 
2760 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
2761 		struct in6_addr *gw;
2762 		int ifindex, weight;
2763 
2764 		ifindex = mlxsw_sp_rt6->rt->fib6_nh.nh_dev->ifindex;
2765 		weight = mlxsw_sp_rt6->rt->fib6_nh.nh_weight;
2766 		gw = &mlxsw_sp_rt6->rt->fib6_nh.nh_gw;
2767 		if (!mlxsw_sp_nexthop6_group_has_nexthop(nh_grp, gw, ifindex,
2768 							 weight))
2769 			return false;
2770 	}
2771 
2772 	return true;
2773 }
2774 
2775 static int
2776 mlxsw_sp_nexthop_group_cmp(struct rhashtable_compare_arg *arg, const void *ptr)
2777 {
2778 	const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = arg->key;
2779 	const struct mlxsw_sp_nexthop_group *nh_grp = ptr;
2780 
2781 	switch (cmp_arg->proto) {
2782 	case MLXSW_SP_L3_PROTO_IPV4:
2783 		return cmp_arg->fi != mlxsw_sp_nexthop4_group_fi(nh_grp);
2784 	case MLXSW_SP_L3_PROTO_IPV6:
2785 		return !mlxsw_sp_nexthop6_group_cmp(nh_grp,
2786 						    cmp_arg->fib6_entry);
2787 	default:
2788 		WARN_ON(1);
2789 		return 1;
2790 	}
2791 }
2792 
2793 static int
2794 mlxsw_sp_nexthop_group_type(const struct mlxsw_sp_nexthop_group *nh_grp)
2795 {
2796 	return nh_grp->neigh_tbl->family;
2797 }
2798 
2799 static u32 mlxsw_sp_nexthop_group_hash_obj(const void *data, u32 len, u32 seed)
2800 {
2801 	const struct mlxsw_sp_nexthop_group *nh_grp = data;
2802 	const struct mlxsw_sp_nexthop *nh;
2803 	struct fib_info *fi;
2804 	unsigned int val;
2805 	int i;
2806 
2807 	switch (mlxsw_sp_nexthop_group_type(nh_grp)) {
2808 	case AF_INET:
2809 		fi = mlxsw_sp_nexthop4_group_fi(nh_grp);
2810 		return jhash(&fi, sizeof(fi), seed);
2811 	case AF_INET6:
2812 		val = nh_grp->count;
2813 		for (i = 0; i < nh_grp->count; i++) {
2814 			nh = &nh_grp->nexthops[i];
2815 			val ^= nh->ifindex;
2816 		}
2817 		return jhash(&val, sizeof(val), seed);
2818 	default:
2819 		WARN_ON(1);
2820 		return 0;
2821 	}
2822 }
2823 
2824 static u32
2825 mlxsw_sp_nexthop6_group_hash(struct mlxsw_sp_fib6_entry *fib6_entry, u32 seed)
2826 {
2827 	unsigned int val = fib6_entry->nrt6;
2828 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
2829 	struct net_device *dev;
2830 
2831 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
2832 		dev = mlxsw_sp_rt6->rt->fib6_nh.nh_dev;
2833 		val ^= dev->ifindex;
2834 	}
2835 
2836 	return jhash(&val, sizeof(val), seed);
2837 }
2838 
2839 static u32
2840 mlxsw_sp_nexthop_group_hash(const void *data, u32 len, u32 seed)
2841 {
2842 	const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = data;
2843 
2844 	switch (cmp_arg->proto) {
2845 	case MLXSW_SP_L3_PROTO_IPV4:
2846 		return jhash(&cmp_arg->fi, sizeof(cmp_arg->fi), seed);
2847 	case MLXSW_SP_L3_PROTO_IPV6:
2848 		return mlxsw_sp_nexthop6_group_hash(cmp_arg->fib6_entry, seed);
2849 	default:
2850 		WARN_ON(1);
2851 		return 0;
2852 	}
2853 }
2854 
2855 static const struct rhashtable_params mlxsw_sp_nexthop_group_ht_params = {
2856 	.head_offset = offsetof(struct mlxsw_sp_nexthop_group, ht_node),
2857 	.hashfn	     = mlxsw_sp_nexthop_group_hash,
2858 	.obj_hashfn  = mlxsw_sp_nexthop_group_hash_obj,
2859 	.obj_cmpfn   = mlxsw_sp_nexthop_group_cmp,
2860 };
2861 
2862 static int mlxsw_sp_nexthop_group_insert(struct mlxsw_sp *mlxsw_sp,
2863 					 struct mlxsw_sp_nexthop_group *nh_grp)
2864 {
2865 	if (mlxsw_sp_nexthop_group_type(nh_grp) == AF_INET6 &&
2866 	    !nh_grp->gateway)
2867 		return 0;
2868 
2869 	return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_group_ht,
2870 				      &nh_grp->ht_node,
2871 				      mlxsw_sp_nexthop_group_ht_params);
2872 }
2873 
2874 static void mlxsw_sp_nexthop_group_remove(struct mlxsw_sp *mlxsw_sp,
2875 					  struct mlxsw_sp_nexthop_group *nh_grp)
2876 {
2877 	if (mlxsw_sp_nexthop_group_type(nh_grp) == AF_INET6 &&
2878 	    !nh_grp->gateway)
2879 		return;
2880 
2881 	rhashtable_remove_fast(&mlxsw_sp->router->nexthop_group_ht,
2882 			       &nh_grp->ht_node,
2883 			       mlxsw_sp_nexthop_group_ht_params);
2884 }
2885 
2886 static struct mlxsw_sp_nexthop_group *
2887 mlxsw_sp_nexthop4_group_lookup(struct mlxsw_sp *mlxsw_sp,
2888 			       struct fib_info *fi)
2889 {
2890 	struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
2891 
2892 	cmp_arg.proto = MLXSW_SP_L3_PROTO_IPV4;
2893 	cmp_arg.fi = fi;
2894 	return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
2895 				      &cmp_arg,
2896 				      mlxsw_sp_nexthop_group_ht_params);
2897 }
2898 
2899 static struct mlxsw_sp_nexthop_group *
2900 mlxsw_sp_nexthop6_group_lookup(struct mlxsw_sp *mlxsw_sp,
2901 			       struct mlxsw_sp_fib6_entry *fib6_entry)
2902 {
2903 	struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
2904 
2905 	cmp_arg.proto = MLXSW_SP_L3_PROTO_IPV6;
2906 	cmp_arg.fib6_entry = fib6_entry;
2907 	return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
2908 				      &cmp_arg,
2909 				      mlxsw_sp_nexthop_group_ht_params);
2910 }
2911 
2912 static const struct rhashtable_params mlxsw_sp_nexthop_ht_params = {
2913 	.key_offset = offsetof(struct mlxsw_sp_nexthop, key),
2914 	.head_offset = offsetof(struct mlxsw_sp_nexthop, ht_node),
2915 	.key_len = sizeof(struct mlxsw_sp_nexthop_key),
2916 };
2917 
2918 static int mlxsw_sp_nexthop_insert(struct mlxsw_sp *mlxsw_sp,
2919 				   struct mlxsw_sp_nexthop *nh)
2920 {
2921 	return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_ht,
2922 				      &nh->ht_node, mlxsw_sp_nexthop_ht_params);
2923 }
2924 
2925 static void mlxsw_sp_nexthop_remove(struct mlxsw_sp *mlxsw_sp,
2926 				    struct mlxsw_sp_nexthop *nh)
2927 {
2928 	rhashtable_remove_fast(&mlxsw_sp->router->nexthop_ht, &nh->ht_node,
2929 			       mlxsw_sp_nexthop_ht_params);
2930 }
2931 
2932 static struct mlxsw_sp_nexthop *
2933 mlxsw_sp_nexthop_lookup(struct mlxsw_sp *mlxsw_sp,
2934 			struct mlxsw_sp_nexthop_key key)
2935 {
2936 	return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_ht, &key,
2937 				      mlxsw_sp_nexthop_ht_params);
2938 }
2939 
2940 static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp,
2941 					     const struct mlxsw_sp_fib *fib,
2942 					     u32 adj_index, u16 ecmp_size,
2943 					     u32 new_adj_index,
2944 					     u16 new_ecmp_size)
2945 {
2946 	char raleu_pl[MLXSW_REG_RALEU_LEN];
2947 
2948 	mlxsw_reg_raleu_pack(raleu_pl,
2949 			     (enum mlxsw_reg_ralxx_protocol) fib->proto,
2950 			     fib->vr->id, adj_index, ecmp_size, new_adj_index,
2951 			     new_ecmp_size);
2952 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raleu), raleu_pl);
2953 }
2954 
2955 static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp,
2956 					  struct mlxsw_sp_nexthop_group *nh_grp,
2957 					  u32 old_adj_index, u16 old_ecmp_size)
2958 {
2959 	struct mlxsw_sp_fib_entry *fib_entry;
2960 	struct mlxsw_sp_fib *fib = NULL;
2961 	int err;
2962 
2963 	list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
2964 		if (fib == fib_entry->fib_node->fib)
2965 			continue;
2966 		fib = fib_entry->fib_node->fib;
2967 		err = mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, fib,
2968 							old_adj_index,
2969 							old_ecmp_size,
2970 							nh_grp->adj_index,
2971 							nh_grp->ecmp_size);
2972 		if (err)
2973 			return err;
2974 	}
2975 	return 0;
2976 }
2977 
2978 static int __mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
2979 				     struct mlxsw_sp_nexthop *nh)
2980 {
2981 	struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
2982 	char ratr_pl[MLXSW_REG_RATR_LEN];
2983 
2984 	mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY,
2985 			    true, MLXSW_REG_RATR_TYPE_ETHERNET,
2986 			    adj_index, neigh_entry->rif);
2987 	mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha);
2988 	if (nh->counter_valid)
2989 		mlxsw_reg_ratr_counter_pack(ratr_pl, nh->counter_index, true);
2990 	else
2991 		mlxsw_reg_ratr_counter_pack(ratr_pl, 0, false);
2992 
2993 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
2994 }
2995 
2996 int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
2997 			    struct mlxsw_sp_nexthop *nh)
2998 {
2999 	int i;
3000 
3001 	for (i = 0; i < nh->num_adj_entries; i++) {
3002 		int err;
3003 
3004 		err = __mlxsw_sp_nexthop_update(mlxsw_sp, adj_index + i, nh);
3005 		if (err)
3006 			return err;
3007 	}
3008 
3009 	return 0;
3010 }
3011 
3012 static int __mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
3013 					  u32 adj_index,
3014 					  struct mlxsw_sp_nexthop *nh)
3015 {
3016 	const struct mlxsw_sp_ipip_ops *ipip_ops;
3017 
3018 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[nh->ipip_entry->ipipt];
3019 	return ipip_ops->nexthop_update(mlxsw_sp, adj_index, nh->ipip_entry);
3020 }
3021 
3022 static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
3023 					u32 adj_index,
3024 					struct mlxsw_sp_nexthop *nh)
3025 {
3026 	int i;
3027 
3028 	for (i = 0; i < nh->num_adj_entries; i++) {
3029 		int err;
3030 
3031 		err = __mlxsw_sp_nexthop_ipip_update(mlxsw_sp, adj_index + i,
3032 						     nh);
3033 		if (err)
3034 			return err;
3035 	}
3036 
3037 	return 0;
3038 }
3039 
3040 static int
3041 mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp,
3042 			      struct mlxsw_sp_nexthop_group *nh_grp,
3043 			      bool reallocate)
3044 {
3045 	u32 adj_index = nh_grp->adj_index; /* base */
3046 	struct mlxsw_sp_nexthop *nh;
3047 	int i;
3048 	int err;
3049 
3050 	for (i = 0; i < nh_grp->count; i++) {
3051 		nh = &nh_grp->nexthops[i];
3052 
3053 		if (!nh->should_offload) {
3054 			nh->offloaded = 0;
3055 			continue;
3056 		}
3057 
3058 		if (nh->update || reallocate) {
3059 			switch (nh->type) {
3060 			case MLXSW_SP_NEXTHOP_TYPE_ETH:
3061 				err = mlxsw_sp_nexthop_update
3062 					    (mlxsw_sp, adj_index, nh);
3063 				break;
3064 			case MLXSW_SP_NEXTHOP_TYPE_IPIP:
3065 				err = mlxsw_sp_nexthop_ipip_update
3066 					    (mlxsw_sp, adj_index, nh);
3067 				break;
3068 			}
3069 			if (err)
3070 				return err;
3071 			nh->update = 0;
3072 			nh->offloaded = 1;
3073 		}
3074 		adj_index += nh->num_adj_entries;
3075 	}
3076 	return 0;
3077 }
3078 
3079 static bool
3080 mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node,
3081 				 const struct mlxsw_sp_fib_entry *fib_entry);
3082 
3083 static int
3084 mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp,
3085 				    struct mlxsw_sp_nexthop_group *nh_grp)
3086 {
3087 	struct mlxsw_sp_fib_entry *fib_entry;
3088 	int err;
3089 
3090 	list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
3091 		if (!mlxsw_sp_fib_node_entry_is_first(fib_entry->fib_node,
3092 						      fib_entry))
3093 			continue;
3094 		err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
3095 		if (err)
3096 			return err;
3097 	}
3098 	return 0;
3099 }
3100 
3101 static void
3102 mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
3103 				   enum mlxsw_reg_ralue_op op, int err);
3104 
3105 static void
3106 mlxsw_sp_nexthop_fib_entries_refresh(struct mlxsw_sp_nexthop_group *nh_grp)
3107 {
3108 	enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_WRITE;
3109 	struct mlxsw_sp_fib_entry *fib_entry;
3110 
3111 	list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
3112 		if (!mlxsw_sp_fib_node_entry_is_first(fib_entry->fib_node,
3113 						      fib_entry))
3114 			continue;
3115 		mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0);
3116 	}
3117 }
3118 
3119 static void mlxsw_sp_adj_grp_size_round_up(u16 *p_adj_grp_size)
3120 {
3121 	/* Valid sizes for an adjacency group are:
3122 	 * 1-64, 512, 1024, 2048 and 4096.
3123 	 */
3124 	if (*p_adj_grp_size <= 64)
3125 		return;
3126 	else if (*p_adj_grp_size <= 512)
3127 		*p_adj_grp_size = 512;
3128 	else if (*p_adj_grp_size <= 1024)
3129 		*p_adj_grp_size = 1024;
3130 	else if (*p_adj_grp_size <= 2048)
3131 		*p_adj_grp_size = 2048;
3132 	else
3133 		*p_adj_grp_size = 4096;
3134 }
3135 
3136 static void mlxsw_sp_adj_grp_size_round_down(u16 *p_adj_grp_size,
3137 					     unsigned int alloc_size)
3138 {
3139 	if (alloc_size >= 4096)
3140 		*p_adj_grp_size = 4096;
3141 	else if (alloc_size >= 2048)
3142 		*p_adj_grp_size = 2048;
3143 	else if (alloc_size >= 1024)
3144 		*p_adj_grp_size = 1024;
3145 	else if (alloc_size >= 512)
3146 		*p_adj_grp_size = 512;
3147 }
3148 
3149 static int mlxsw_sp_fix_adj_grp_size(struct mlxsw_sp *mlxsw_sp,
3150 				     u16 *p_adj_grp_size)
3151 {
3152 	unsigned int alloc_size;
3153 	int err;
3154 
3155 	/* Round up the requested group size to the next size supported
3156 	 * by the device and make sure the request can be satisfied.
3157 	 */
3158 	mlxsw_sp_adj_grp_size_round_up(p_adj_grp_size);
3159 	err = mlxsw_sp_kvdl_alloc_count_query(mlxsw_sp,
3160 					      MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3161 					      *p_adj_grp_size, &alloc_size);
3162 	if (err)
3163 		return err;
3164 	/* It is possible the allocation results in more allocated
3165 	 * entries than requested. Try to use as much of them as
3166 	 * possible.
3167 	 */
3168 	mlxsw_sp_adj_grp_size_round_down(p_adj_grp_size, alloc_size);
3169 
3170 	return 0;
3171 }
3172 
3173 static void
3174 mlxsw_sp_nexthop_group_normalize(struct mlxsw_sp_nexthop_group *nh_grp)
3175 {
3176 	int i, g = 0, sum_norm_weight = 0;
3177 	struct mlxsw_sp_nexthop *nh;
3178 
3179 	for (i = 0; i < nh_grp->count; i++) {
3180 		nh = &nh_grp->nexthops[i];
3181 
3182 		if (!nh->should_offload)
3183 			continue;
3184 		if (g > 0)
3185 			g = gcd(nh->nh_weight, g);
3186 		else
3187 			g = nh->nh_weight;
3188 	}
3189 
3190 	for (i = 0; i < nh_grp->count; i++) {
3191 		nh = &nh_grp->nexthops[i];
3192 
3193 		if (!nh->should_offload)
3194 			continue;
3195 		nh->norm_nh_weight = nh->nh_weight / g;
3196 		sum_norm_weight += nh->norm_nh_weight;
3197 	}
3198 
3199 	nh_grp->sum_norm_weight = sum_norm_weight;
3200 }
3201 
3202 static void
3203 mlxsw_sp_nexthop_group_rebalance(struct mlxsw_sp_nexthop_group *nh_grp)
3204 {
3205 	int total = nh_grp->sum_norm_weight;
3206 	u16 ecmp_size = nh_grp->ecmp_size;
3207 	int i, weight = 0, lower_bound = 0;
3208 
3209 	for (i = 0; i < nh_grp->count; i++) {
3210 		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3211 		int upper_bound;
3212 
3213 		if (!nh->should_offload)
3214 			continue;
3215 		weight += nh->norm_nh_weight;
3216 		upper_bound = DIV_ROUND_CLOSEST(ecmp_size * weight, total);
3217 		nh->num_adj_entries = upper_bound - lower_bound;
3218 		lower_bound = upper_bound;
3219 	}
3220 }
3221 
3222 static void
3223 mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
3224 			       struct mlxsw_sp_nexthop_group *nh_grp)
3225 {
3226 	u16 ecmp_size, old_ecmp_size;
3227 	struct mlxsw_sp_nexthop *nh;
3228 	bool offload_change = false;
3229 	u32 adj_index;
3230 	bool old_adj_index_valid;
3231 	u32 old_adj_index;
3232 	int i;
3233 	int err;
3234 
3235 	if (!nh_grp->gateway) {
3236 		mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3237 		return;
3238 	}
3239 
3240 	for (i = 0; i < nh_grp->count; i++) {
3241 		nh = &nh_grp->nexthops[i];
3242 
3243 		if (nh->should_offload != nh->offloaded) {
3244 			offload_change = true;
3245 			if (nh->should_offload)
3246 				nh->update = 1;
3247 		}
3248 	}
3249 	if (!offload_change) {
3250 		/* Nothing was added or removed, so no need to reallocate. Just
3251 		 * update MAC on existing adjacency indexes.
3252 		 */
3253 		err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, false);
3254 		if (err) {
3255 			dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
3256 			goto set_trap;
3257 		}
3258 		return;
3259 	}
3260 	mlxsw_sp_nexthop_group_normalize(nh_grp);
3261 	if (!nh_grp->sum_norm_weight)
3262 		/* No neigh of this group is connected so we just set
3263 		 * the trap and let everthing flow through kernel.
3264 		 */
3265 		goto set_trap;
3266 
3267 	ecmp_size = nh_grp->sum_norm_weight;
3268 	err = mlxsw_sp_fix_adj_grp_size(mlxsw_sp, &ecmp_size);
3269 	if (err)
3270 		/* No valid allocation size available. */
3271 		goto set_trap;
3272 
3273 	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3274 				  ecmp_size, &adj_index);
3275 	if (err) {
3276 		/* We ran out of KVD linear space, just set the
3277 		 * trap and let everything flow through kernel.
3278 		 */
3279 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to allocate KVD linear area for nexthop group.\n");
3280 		goto set_trap;
3281 	}
3282 	old_adj_index_valid = nh_grp->adj_index_valid;
3283 	old_adj_index = nh_grp->adj_index;
3284 	old_ecmp_size = nh_grp->ecmp_size;
3285 	nh_grp->adj_index_valid = 1;
3286 	nh_grp->adj_index = adj_index;
3287 	nh_grp->ecmp_size = ecmp_size;
3288 	mlxsw_sp_nexthop_group_rebalance(nh_grp);
3289 	err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, true);
3290 	if (err) {
3291 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
3292 		goto set_trap;
3293 	}
3294 
3295 	if (!old_adj_index_valid) {
3296 		/* The trap was set for fib entries, so we have to call
3297 		 * fib entry update to unset it and use adjacency index.
3298 		 */
3299 		err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3300 		if (err) {
3301 			dev_warn(mlxsw_sp->bus_info->dev, "Failed to add adjacency index to fib entries.\n");
3302 			goto set_trap;
3303 		}
3304 		return;
3305 	}
3306 
3307 	err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, nh_grp,
3308 					     old_adj_index, old_ecmp_size);
3309 	mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3310 			   old_ecmp_size, old_adj_index);
3311 	if (err) {
3312 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to mass-update adjacency index for nexthop group.\n");
3313 		goto set_trap;
3314 	}
3315 
3316 	/* Offload state within the group changed, so update the flags. */
3317 	mlxsw_sp_nexthop_fib_entries_refresh(nh_grp);
3318 
3319 	return;
3320 
3321 set_trap:
3322 	old_adj_index_valid = nh_grp->adj_index_valid;
3323 	nh_grp->adj_index_valid = 0;
3324 	for (i = 0; i < nh_grp->count; i++) {
3325 		nh = &nh_grp->nexthops[i];
3326 		nh->offloaded = 0;
3327 	}
3328 	err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3329 	if (err)
3330 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n");
3331 	if (old_adj_index_valid)
3332 		mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3333 				   nh_grp->ecmp_size, nh_grp->adj_index);
3334 }
3335 
3336 static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh,
3337 					    bool removing)
3338 {
3339 	if (!removing)
3340 		nh->should_offload = 1;
3341 	else
3342 		nh->should_offload = 0;
3343 	nh->update = 1;
3344 }
3345 
3346 static void
3347 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
3348 			      struct mlxsw_sp_neigh_entry *neigh_entry,
3349 			      bool removing)
3350 {
3351 	struct mlxsw_sp_nexthop *nh;
3352 
3353 	list_for_each_entry(nh, &neigh_entry->nexthop_list,
3354 			    neigh_list_node) {
3355 		__mlxsw_sp_nexthop_neigh_update(nh, removing);
3356 		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3357 	}
3358 }
3359 
3360 static void mlxsw_sp_nexthop_rif_init(struct mlxsw_sp_nexthop *nh,
3361 				      struct mlxsw_sp_rif *rif)
3362 {
3363 	if (nh->rif)
3364 		return;
3365 
3366 	nh->rif = rif;
3367 	list_add(&nh->rif_list_node, &rif->nexthop_list);
3368 }
3369 
3370 static void mlxsw_sp_nexthop_rif_fini(struct mlxsw_sp_nexthop *nh)
3371 {
3372 	if (!nh->rif)
3373 		return;
3374 
3375 	list_del(&nh->rif_list_node);
3376 	nh->rif = NULL;
3377 }
3378 
3379 static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp,
3380 				       struct mlxsw_sp_nexthop *nh)
3381 {
3382 	struct mlxsw_sp_neigh_entry *neigh_entry;
3383 	struct neighbour *n;
3384 	u8 nud_state, dead;
3385 	int err;
3386 
3387 	if (!nh->nh_grp->gateway || nh->neigh_entry)
3388 		return 0;
3389 
3390 	/* Take a reference of neigh here ensuring that neigh would
3391 	 * not be destructed before the nexthop entry is finished.
3392 	 * The reference is taken either in neigh_lookup() or
3393 	 * in neigh_create() in case n is not found.
3394 	 */
3395 	n = neigh_lookup(nh->nh_grp->neigh_tbl, &nh->gw_addr, nh->rif->dev);
3396 	if (!n) {
3397 		n = neigh_create(nh->nh_grp->neigh_tbl, &nh->gw_addr,
3398 				 nh->rif->dev);
3399 		if (IS_ERR(n))
3400 			return PTR_ERR(n);
3401 		neigh_event_send(n, NULL);
3402 	}
3403 	neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
3404 	if (!neigh_entry) {
3405 		neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
3406 		if (IS_ERR(neigh_entry)) {
3407 			err = -EINVAL;
3408 			goto err_neigh_entry_create;
3409 		}
3410 	}
3411 
3412 	/* If that is the first nexthop connected to that neigh, add to
3413 	 * nexthop_neighs_list
3414 	 */
3415 	if (list_empty(&neigh_entry->nexthop_list))
3416 		list_add_tail(&neigh_entry->nexthop_neighs_list_node,
3417 			      &mlxsw_sp->router->nexthop_neighs_list);
3418 
3419 	nh->neigh_entry = neigh_entry;
3420 	list_add_tail(&nh->neigh_list_node, &neigh_entry->nexthop_list);
3421 	read_lock_bh(&n->lock);
3422 	nud_state = n->nud_state;
3423 	dead = n->dead;
3424 	read_unlock_bh(&n->lock);
3425 	__mlxsw_sp_nexthop_neigh_update(nh, !(nud_state & NUD_VALID && !dead));
3426 
3427 	return 0;
3428 
3429 err_neigh_entry_create:
3430 	neigh_release(n);
3431 	return err;
3432 }
3433 
3434 static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp,
3435 					struct mlxsw_sp_nexthop *nh)
3436 {
3437 	struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
3438 	struct neighbour *n;
3439 
3440 	if (!neigh_entry)
3441 		return;
3442 	n = neigh_entry->key.n;
3443 
3444 	__mlxsw_sp_nexthop_neigh_update(nh, true);
3445 	list_del(&nh->neigh_list_node);
3446 	nh->neigh_entry = NULL;
3447 
3448 	/* If that is the last nexthop connected to that neigh, remove from
3449 	 * nexthop_neighs_list
3450 	 */
3451 	if (list_empty(&neigh_entry->nexthop_list))
3452 		list_del(&neigh_entry->nexthop_neighs_list_node);
3453 
3454 	if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
3455 		mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
3456 
3457 	neigh_release(n);
3458 }
3459 
3460 static bool mlxsw_sp_ipip_netdev_ul_up(struct net_device *ol_dev)
3461 {
3462 	struct net_device *ul_dev = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
3463 
3464 	return ul_dev ? (ul_dev->flags & IFF_UP) : true;
3465 }
3466 
3467 static void mlxsw_sp_nexthop_ipip_init(struct mlxsw_sp *mlxsw_sp,
3468 				       struct mlxsw_sp_nexthop *nh,
3469 				       struct mlxsw_sp_ipip_entry *ipip_entry)
3470 {
3471 	bool removing;
3472 
3473 	if (!nh->nh_grp->gateway || nh->ipip_entry)
3474 		return;
3475 
3476 	nh->ipip_entry = ipip_entry;
3477 	removing = !mlxsw_sp_ipip_netdev_ul_up(ipip_entry->ol_dev);
3478 	__mlxsw_sp_nexthop_neigh_update(nh, removing);
3479 	mlxsw_sp_nexthop_rif_init(nh, &ipip_entry->ol_lb->common);
3480 }
3481 
3482 static void mlxsw_sp_nexthop_ipip_fini(struct mlxsw_sp *mlxsw_sp,
3483 				       struct mlxsw_sp_nexthop *nh)
3484 {
3485 	struct mlxsw_sp_ipip_entry *ipip_entry = nh->ipip_entry;
3486 
3487 	if (!ipip_entry)
3488 		return;
3489 
3490 	__mlxsw_sp_nexthop_neigh_update(nh, true);
3491 	nh->ipip_entry = NULL;
3492 }
3493 
3494 static bool mlxsw_sp_nexthop4_ipip_type(const struct mlxsw_sp *mlxsw_sp,
3495 					const struct fib_nh *fib_nh,
3496 					enum mlxsw_sp_ipip_type *p_ipipt)
3497 {
3498 	struct net_device *dev = fib_nh->nh_dev;
3499 
3500 	return dev &&
3501 	       fib_nh->nh_parent->fib_type == RTN_UNICAST &&
3502 	       mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, p_ipipt);
3503 }
3504 
3505 static void mlxsw_sp_nexthop_type_fini(struct mlxsw_sp *mlxsw_sp,
3506 				       struct mlxsw_sp_nexthop *nh)
3507 {
3508 	switch (nh->type) {
3509 	case MLXSW_SP_NEXTHOP_TYPE_ETH:
3510 		mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
3511 		mlxsw_sp_nexthop_rif_fini(nh);
3512 		break;
3513 	case MLXSW_SP_NEXTHOP_TYPE_IPIP:
3514 		mlxsw_sp_nexthop_rif_fini(nh);
3515 		mlxsw_sp_nexthop_ipip_fini(mlxsw_sp, nh);
3516 		break;
3517 	}
3518 }
3519 
3520 static int mlxsw_sp_nexthop4_type_init(struct mlxsw_sp *mlxsw_sp,
3521 				       struct mlxsw_sp_nexthop *nh,
3522 				       struct fib_nh *fib_nh)
3523 {
3524 	const struct mlxsw_sp_ipip_ops *ipip_ops;
3525 	struct net_device *dev = fib_nh->nh_dev;
3526 	struct mlxsw_sp_ipip_entry *ipip_entry;
3527 	struct mlxsw_sp_rif *rif;
3528 	int err;
3529 
3530 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, dev);
3531 	if (ipip_entry) {
3532 		ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
3533 		if (ipip_ops->can_offload(mlxsw_sp, dev,
3534 					  MLXSW_SP_L3_PROTO_IPV4)) {
3535 			nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP;
3536 			mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, ipip_entry);
3537 			return 0;
3538 		}
3539 	}
3540 
3541 	nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH;
3542 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
3543 	if (!rif)
3544 		return 0;
3545 
3546 	mlxsw_sp_nexthop_rif_init(nh, rif);
3547 	err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
3548 	if (err)
3549 		goto err_neigh_init;
3550 
3551 	return 0;
3552 
3553 err_neigh_init:
3554 	mlxsw_sp_nexthop_rif_fini(nh);
3555 	return err;
3556 }
3557 
3558 static void mlxsw_sp_nexthop4_type_fini(struct mlxsw_sp *mlxsw_sp,
3559 					struct mlxsw_sp_nexthop *nh)
3560 {
3561 	mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
3562 }
3563 
3564 static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp,
3565 				  struct mlxsw_sp_nexthop_group *nh_grp,
3566 				  struct mlxsw_sp_nexthop *nh,
3567 				  struct fib_nh *fib_nh)
3568 {
3569 	struct net_device *dev = fib_nh->nh_dev;
3570 	struct in_device *in_dev;
3571 	int err;
3572 
3573 	nh->nh_grp = nh_grp;
3574 	nh->key.fib_nh = fib_nh;
3575 #ifdef CONFIG_IP_ROUTE_MULTIPATH
3576 	nh->nh_weight = fib_nh->nh_weight;
3577 #else
3578 	nh->nh_weight = 1;
3579 #endif
3580 	memcpy(&nh->gw_addr, &fib_nh->nh_gw, sizeof(fib_nh->nh_gw));
3581 	err = mlxsw_sp_nexthop_insert(mlxsw_sp, nh);
3582 	if (err)
3583 		return err;
3584 
3585 	mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
3586 	list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
3587 
3588 	if (!dev)
3589 		return 0;
3590 
3591 	in_dev = __in_dev_get_rtnl(dev);
3592 	if (in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
3593 	    fib_nh->nh_flags & RTNH_F_LINKDOWN)
3594 		return 0;
3595 
3596 	err = mlxsw_sp_nexthop4_type_init(mlxsw_sp, nh, fib_nh);
3597 	if (err)
3598 		goto err_nexthop_neigh_init;
3599 
3600 	return 0;
3601 
3602 err_nexthop_neigh_init:
3603 	mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
3604 	return err;
3605 }
3606 
3607 static void mlxsw_sp_nexthop4_fini(struct mlxsw_sp *mlxsw_sp,
3608 				   struct mlxsw_sp_nexthop *nh)
3609 {
3610 	mlxsw_sp_nexthop4_type_fini(mlxsw_sp, nh);
3611 	list_del(&nh->router_list_node);
3612 	mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
3613 	mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
3614 }
3615 
3616 static void mlxsw_sp_nexthop4_event(struct mlxsw_sp *mlxsw_sp,
3617 				    unsigned long event, struct fib_nh *fib_nh)
3618 {
3619 	struct mlxsw_sp_nexthop_key key;
3620 	struct mlxsw_sp_nexthop *nh;
3621 
3622 	if (mlxsw_sp->router->aborted)
3623 		return;
3624 
3625 	key.fib_nh = fib_nh;
3626 	nh = mlxsw_sp_nexthop_lookup(mlxsw_sp, key);
3627 	if (WARN_ON_ONCE(!nh))
3628 		return;
3629 
3630 	switch (event) {
3631 	case FIB_EVENT_NH_ADD:
3632 		mlxsw_sp_nexthop4_type_init(mlxsw_sp, nh, fib_nh);
3633 		break;
3634 	case FIB_EVENT_NH_DEL:
3635 		mlxsw_sp_nexthop4_type_fini(mlxsw_sp, nh);
3636 		break;
3637 	}
3638 
3639 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3640 }
3641 
3642 static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp,
3643 					struct mlxsw_sp_rif *rif)
3644 {
3645 	struct mlxsw_sp_nexthop *nh;
3646 	bool removing;
3647 
3648 	list_for_each_entry(nh, &rif->nexthop_list, rif_list_node) {
3649 		switch (nh->type) {
3650 		case MLXSW_SP_NEXTHOP_TYPE_ETH:
3651 			removing = false;
3652 			break;
3653 		case MLXSW_SP_NEXTHOP_TYPE_IPIP:
3654 			removing = !mlxsw_sp_ipip_netdev_ul_up(rif->dev);
3655 			break;
3656 		default:
3657 			WARN_ON(1);
3658 			continue;
3659 		}
3660 
3661 		__mlxsw_sp_nexthop_neigh_update(nh, removing);
3662 		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3663 	}
3664 }
3665 
3666 static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp,
3667 					 struct mlxsw_sp_rif *old_rif,
3668 					 struct mlxsw_sp_rif *new_rif)
3669 {
3670 	struct mlxsw_sp_nexthop *nh;
3671 
3672 	list_splice_init(&old_rif->nexthop_list, &new_rif->nexthop_list);
3673 	list_for_each_entry(nh, &new_rif->nexthop_list, rif_list_node)
3674 		nh->rif = new_rif;
3675 	mlxsw_sp_nexthop_rif_update(mlxsw_sp, new_rif);
3676 }
3677 
3678 static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
3679 					   struct mlxsw_sp_rif *rif)
3680 {
3681 	struct mlxsw_sp_nexthop *nh, *tmp;
3682 
3683 	list_for_each_entry_safe(nh, tmp, &rif->nexthop_list, rif_list_node) {
3684 		mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
3685 		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3686 	}
3687 }
3688 
3689 static bool mlxsw_sp_fi_is_gateway(const struct mlxsw_sp *mlxsw_sp,
3690 				   const struct fib_info *fi)
3691 {
3692 	return fi->fib_nh->nh_scope == RT_SCOPE_LINK ||
3693 	       mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, fi->fib_nh, NULL);
3694 }
3695 
3696 static struct mlxsw_sp_nexthop_group *
3697 mlxsw_sp_nexthop4_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
3698 {
3699 	struct mlxsw_sp_nexthop_group *nh_grp;
3700 	struct mlxsw_sp_nexthop *nh;
3701 	struct fib_nh *fib_nh;
3702 	size_t alloc_size;
3703 	int i;
3704 	int err;
3705 
3706 	alloc_size = sizeof(*nh_grp) +
3707 		     fi->fib_nhs * sizeof(struct mlxsw_sp_nexthop);
3708 	nh_grp = kzalloc(alloc_size, GFP_KERNEL);
3709 	if (!nh_grp)
3710 		return ERR_PTR(-ENOMEM);
3711 	nh_grp->priv = fi;
3712 	INIT_LIST_HEAD(&nh_grp->fib_list);
3713 	nh_grp->neigh_tbl = &arp_tbl;
3714 
3715 	nh_grp->gateway = mlxsw_sp_fi_is_gateway(mlxsw_sp, fi);
3716 	nh_grp->count = fi->fib_nhs;
3717 	fib_info_hold(fi);
3718 	for (i = 0; i < nh_grp->count; i++) {
3719 		nh = &nh_grp->nexthops[i];
3720 		fib_nh = &fi->fib_nh[i];
3721 		err = mlxsw_sp_nexthop4_init(mlxsw_sp, nh_grp, nh, fib_nh);
3722 		if (err)
3723 			goto err_nexthop4_init;
3724 	}
3725 	err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
3726 	if (err)
3727 		goto err_nexthop_group_insert;
3728 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
3729 	return nh_grp;
3730 
3731 err_nexthop_group_insert:
3732 err_nexthop4_init:
3733 	for (i--; i >= 0; i--) {
3734 		nh = &nh_grp->nexthops[i];
3735 		mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
3736 	}
3737 	fib_info_put(fi);
3738 	kfree(nh_grp);
3739 	return ERR_PTR(err);
3740 }
3741 
3742 static void
3743 mlxsw_sp_nexthop4_group_destroy(struct mlxsw_sp *mlxsw_sp,
3744 				struct mlxsw_sp_nexthop_group *nh_grp)
3745 {
3746 	struct mlxsw_sp_nexthop *nh;
3747 	int i;
3748 
3749 	mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
3750 	for (i = 0; i < nh_grp->count; i++) {
3751 		nh = &nh_grp->nexthops[i];
3752 		mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
3753 	}
3754 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
3755 	WARN_ON_ONCE(nh_grp->adj_index_valid);
3756 	fib_info_put(mlxsw_sp_nexthop4_group_fi(nh_grp));
3757 	kfree(nh_grp);
3758 }
3759 
3760 static int mlxsw_sp_nexthop4_group_get(struct mlxsw_sp *mlxsw_sp,
3761 				       struct mlxsw_sp_fib_entry *fib_entry,
3762 				       struct fib_info *fi)
3763 {
3764 	struct mlxsw_sp_nexthop_group *nh_grp;
3765 
3766 	nh_grp = mlxsw_sp_nexthop4_group_lookup(mlxsw_sp, fi);
3767 	if (!nh_grp) {
3768 		nh_grp = mlxsw_sp_nexthop4_group_create(mlxsw_sp, fi);
3769 		if (IS_ERR(nh_grp))
3770 			return PTR_ERR(nh_grp);
3771 	}
3772 	list_add_tail(&fib_entry->nexthop_group_node, &nh_grp->fib_list);
3773 	fib_entry->nh_group = nh_grp;
3774 	return 0;
3775 }
3776 
3777 static void mlxsw_sp_nexthop4_group_put(struct mlxsw_sp *mlxsw_sp,
3778 					struct mlxsw_sp_fib_entry *fib_entry)
3779 {
3780 	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3781 
3782 	list_del(&fib_entry->nexthop_group_node);
3783 	if (!list_empty(&nh_grp->fib_list))
3784 		return;
3785 	mlxsw_sp_nexthop4_group_destroy(mlxsw_sp, nh_grp);
3786 }
3787 
3788 static bool
3789 mlxsw_sp_fib4_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
3790 {
3791 	struct mlxsw_sp_fib4_entry *fib4_entry;
3792 
3793 	fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry,
3794 				  common);
3795 	return !fib4_entry->tos;
3796 }
3797 
3798 static bool
3799 mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
3800 {
3801 	struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group;
3802 
3803 	switch (fib_entry->fib_node->fib->proto) {
3804 	case MLXSW_SP_L3_PROTO_IPV4:
3805 		if (!mlxsw_sp_fib4_entry_should_offload(fib_entry))
3806 			return false;
3807 		break;
3808 	case MLXSW_SP_L3_PROTO_IPV6:
3809 		break;
3810 	}
3811 
3812 	switch (fib_entry->type) {
3813 	case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
3814 		return !!nh_group->adj_index_valid;
3815 	case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
3816 		return !!nh_group->nh_rif;
3817 	case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
3818 		return true;
3819 	default:
3820 		return false;
3821 	}
3822 }
3823 
3824 static struct mlxsw_sp_nexthop *
3825 mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp,
3826 		     const struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
3827 {
3828 	int i;
3829 
3830 	for (i = 0; i < nh_grp->count; i++) {
3831 		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3832 		struct fib6_info *rt = mlxsw_sp_rt6->rt;
3833 
3834 		if (nh->rif && nh->rif->dev == rt->fib6_nh.nh_dev &&
3835 		    ipv6_addr_equal((const struct in6_addr *) &nh->gw_addr,
3836 				    &rt->fib6_nh.nh_gw))
3837 			return nh;
3838 		continue;
3839 	}
3840 
3841 	return NULL;
3842 }
3843 
3844 static void
3845 mlxsw_sp_fib4_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
3846 {
3847 	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3848 	int i;
3849 
3850 	if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL ||
3851 	    fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP) {
3852 		nh_grp->nexthops->key.fib_nh->nh_flags |= RTNH_F_OFFLOAD;
3853 		return;
3854 	}
3855 
3856 	for (i = 0; i < nh_grp->count; i++) {
3857 		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3858 
3859 		if (nh->offloaded)
3860 			nh->key.fib_nh->nh_flags |= RTNH_F_OFFLOAD;
3861 		else
3862 			nh->key.fib_nh->nh_flags &= ~RTNH_F_OFFLOAD;
3863 	}
3864 }
3865 
3866 static void
3867 mlxsw_sp_fib4_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
3868 {
3869 	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3870 	int i;
3871 
3872 	if (!list_is_singular(&nh_grp->fib_list))
3873 		return;
3874 
3875 	for (i = 0; i < nh_grp->count; i++) {
3876 		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3877 
3878 		nh->key.fib_nh->nh_flags &= ~RTNH_F_OFFLOAD;
3879 	}
3880 }
3881 
3882 static void
3883 mlxsw_sp_fib6_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
3884 {
3885 	struct mlxsw_sp_fib6_entry *fib6_entry;
3886 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
3887 
3888 	fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
3889 				  common);
3890 
3891 	if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL) {
3892 		list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
3893 				 list)->rt->fib6_nh.nh_flags |= RTNH_F_OFFLOAD;
3894 		return;
3895 	}
3896 
3897 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
3898 		struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3899 		struct mlxsw_sp_nexthop *nh;
3900 
3901 		nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6);
3902 		if (nh && nh->offloaded)
3903 			mlxsw_sp_rt6->rt->fib6_nh.nh_flags |= RTNH_F_OFFLOAD;
3904 		else
3905 			mlxsw_sp_rt6->rt->fib6_nh.nh_flags &= ~RTNH_F_OFFLOAD;
3906 	}
3907 }
3908 
3909 static void
3910 mlxsw_sp_fib6_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
3911 {
3912 	struct mlxsw_sp_fib6_entry *fib6_entry;
3913 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
3914 
3915 	fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
3916 				  common);
3917 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
3918 		struct fib6_info *rt = mlxsw_sp_rt6->rt;
3919 
3920 		rt->fib6_nh.nh_flags &= ~RTNH_F_OFFLOAD;
3921 	}
3922 }
3923 
3924 static void mlxsw_sp_fib_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
3925 {
3926 	switch (fib_entry->fib_node->fib->proto) {
3927 	case MLXSW_SP_L3_PROTO_IPV4:
3928 		mlxsw_sp_fib4_entry_offload_set(fib_entry);
3929 		break;
3930 	case MLXSW_SP_L3_PROTO_IPV6:
3931 		mlxsw_sp_fib6_entry_offload_set(fib_entry);
3932 		break;
3933 	}
3934 }
3935 
3936 static void
3937 mlxsw_sp_fib_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
3938 {
3939 	switch (fib_entry->fib_node->fib->proto) {
3940 	case MLXSW_SP_L3_PROTO_IPV4:
3941 		mlxsw_sp_fib4_entry_offload_unset(fib_entry);
3942 		break;
3943 	case MLXSW_SP_L3_PROTO_IPV6:
3944 		mlxsw_sp_fib6_entry_offload_unset(fib_entry);
3945 		break;
3946 	}
3947 }
3948 
3949 static void
3950 mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
3951 				   enum mlxsw_reg_ralue_op op, int err)
3952 {
3953 	switch (op) {
3954 	case MLXSW_REG_RALUE_OP_WRITE_DELETE:
3955 		return mlxsw_sp_fib_entry_offload_unset(fib_entry);
3956 	case MLXSW_REG_RALUE_OP_WRITE_WRITE:
3957 		if (err)
3958 			return;
3959 		if (mlxsw_sp_fib_entry_should_offload(fib_entry))
3960 			mlxsw_sp_fib_entry_offload_set(fib_entry);
3961 		else
3962 			mlxsw_sp_fib_entry_offload_unset(fib_entry);
3963 		return;
3964 	default:
3965 		return;
3966 	}
3967 }
3968 
3969 static void
3970 mlxsw_sp_fib_entry_ralue_pack(char *ralue_pl,
3971 			      const struct mlxsw_sp_fib_entry *fib_entry,
3972 			      enum mlxsw_reg_ralue_op op)
3973 {
3974 	struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib;
3975 	enum mlxsw_reg_ralxx_protocol proto;
3976 	u32 *p_dip;
3977 
3978 	proto = (enum mlxsw_reg_ralxx_protocol) fib->proto;
3979 
3980 	switch (fib->proto) {
3981 	case MLXSW_SP_L3_PROTO_IPV4:
3982 		p_dip = (u32 *) fib_entry->fib_node->key.addr;
3983 		mlxsw_reg_ralue_pack4(ralue_pl, proto, op, fib->vr->id,
3984 				      fib_entry->fib_node->key.prefix_len,
3985 				      *p_dip);
3986 		break;
3987 	case MLXSW_SP_L3_PROTO_IPV6:
3988 		mlxsw_reg_ralue_pack6(ralue_pl, proto, op, fib->vr->id,
3989 				      fib_entry->fib_node->key.prefix_len,
3990 				      fib_entry->fib_node->key.addr);
3991 		break;
3992 	}
3993 }
3994 
3995 static int mlxsw_sp_fib_entry_op_remote(struct mlxsw_sp *mlxsw_sp,
3996 					struct mlxsw_sp_fib_entry *fib_entry,
3997 					enum mlxsw_reg_ralue_op op)
3998 {
3999 	char ralue_pl[MLXSW_REG_RALUE_LEN];
4000 	enum mlxsw_reg_ralue_trap_action trap_action;
4001 	u16 trap_id = 0;
4002 	u32 adjacency_index = 0;
4003 	u16 ecmp_size = 0;
4004 
4005 	/* In case the nexthop group adjacency index is valid, use it
4006 	 * with provided ECMP size. Otherwise, setup trap and pass
4007 	 * traffic to kernel.
4008 	 */
4009 	if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
4010 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
4011 		adjacency_index = fib_entry->nh_group->adj_index;
4012 		ecmp_size = fib_entry->nh_group->ecmp_size;
4013 	} else {
4014 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
4015 		trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
4016 	}
4017 
4018 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
4019 	mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id,
4020 					adjacency_index, ecmp_size);
4021 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
4022 }
4023 
4024 static int mlxsw_sp_fib_entry_op_local(struct mlxsw_sp *mlxsw_sp,
4025 				       struct mlxsw_sp_fib_entry *fib_entry,
4026 				       enum mlxsw_reg_ralue_op op)
4027 {
4028 	struct mlxsw_sp_rif *rif = fib_entry->nh_group->nh_rif;
4029 	enum mlxsw_reg_ralue_trap_action trap_action;
4030 	char ralue_pl[MLXSW_REG_RALUE_LEN];
4031 	u16 trap_id = 0;
4032 	u16 rif_index = 0;
4033 
4034 	if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
4035 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
4036 		rif_index = rif->rif_index;
4037 	} else {
4038 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
4039 		trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
4040 	}
4041 
4042 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
4043 	mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id,
4044 				       rif_index);
4045 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
4046 }
4047 
4048 static int mlxsw_sp_fib_entry_op_trap(struct mlxsw_sp *mlxsw_sp,
4049 				      struct mlxsw_sp_fib_entry *fib_entry,
4050 				      enum mlxsw_reg_ralue_op op)
4051 {
4052 	char ralue_pl[MLXSW_REG_RALUE_LEN];
4053 
4054 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
4055 	mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
4056 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
4057 }
4058 
4059 static int
4060 mlxsw_sp_fib_entry_op_ipip_decap(struct mlxsw_sp *mlxsw_sp,
4061 				 struct mlxsw_sp_fib_entry *fib_entry,
4062 				 enum mlxsw_reg_ralue_op op)
4063 {
4064 	struct mlxsw_sp_ipip_entry *ipip_entry = fib_entry->decap.ipip_entry;
4065 	const struct mlxsw_sp_ipip_ops *ipip_ops;
4066 
4067 	if (WARN_ON(!ipip_entry))
4068 		return -EINVAL;
4069 
4070 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
4071 	return ipip_ops->fib_entry_op(mlxsw_sp, ipip_entry, op,
4072 				      fib_entry->decap.tunnel_index);
4073 }
4074 
4075 static int __mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
4076 				   struct mlxsw_sp_fib_entry *fib_entry,
4077 				   enum mlxsw_reg_ralue_op op)
4078 {
4079 	switch (fib_entry->type) {
4080 	case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
4081 		return mlxsw_sp_fib_entry_op_remote(mlxsw_sp, fib_entry, op);
4082 	case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
4083 		return mlxsw_sp_fib_entry_op_local(mlxsw_sp, fib_entry, op);
4084 	case MLXSW_SP_FIB_ENTRY_TYPE_TRAP:
4085 		return mlxsw_sp_fib_entry_op_trap(mlxsw_sp, fib_entry, op);
4086 	case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
4087 		return mlxsw_sp_fib_entry_op_ipip_decap(mlxsw_sp,
4088 							fib_entry, op);
4089 	}
4090 	return -EINVAL;
4091 }
4092 
4093 static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
4094 				 struct mlxsw_sp_fib_entry *fib_entry,
4095 				 enum mlxsw_reg_ralue_op op)
4096 {
4097 	int err = __mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, op);
4098 
4099 	mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, err);
4100 
4101 	return err;
4102 }
4103 
4104 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
4105 				     struct mlxsw_sp_fib_entry *fib_entry)
4106 {
4107 	return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
4108 				     MLXSW_REG_RALUE_OP_WRITE_WRITE);
4109 }
4110 
4111 static int mlxsw_sp_fib_entry_del(struct mlxsw_sp *mlxsw_sp,
4112 				  struct mlxsw_sp_fib_entry *fib_entry)
4113 {
4114 	return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
4115 				     MLXSW_REG_RALUE_OP_WRITE_DELETE);
4116 }
4117 
4118 static int
4119 mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp,
4120 			     const struct fib_entry_notifier_info *fen_info,
4121 			     struct mlxsw_sp_fib_entry *fib_entry)
4122 {
4123 	union mlxsw_sp_l3addr dip = { .addr4 = htonl(fen_info->dst) };
4124 	struct net_device *dev = fen_info->fi->fib_dev;
4125 	struct mlxsw_sp_ipip_entry *ipip_entry;
4126 	struct fib_info *fi = fen_info->fi;
4127 
4128 	switch (fen_info->type) {
4129 	case RTN_LOCAL:
4130 		ipip_entry = mlxsw_sp_ipip_entry_find_by_decap(mlxsw_sp, dev,
4131 						 MLXSW_SP_L3_PROTO_IPV4, dip);
4132 		if (ipip_entry && ipip_entry->ol_dev->flags & IFF_UP) {
4133 			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
4134 			return mlxsw_sp_fib_entry_decap_init(mlxsw_sp,
4135 							     fib_entry,
4136 							     ipip_entry);
4137 		}
4138 		/* fall through */
4139 	case RTN_BROADCAST:
4140 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
4141 		return 0;
4142 	case RTN_UNREACHABLE: /* fall through */
4143 	case RTN_BLACKHOLE: /* fall through */
4144 	case RTN_PROHIBIT:
4145 		/* Packets hitting these routes need to be trapped, but
4146 		 * can do so with a lower priority than packets directed
4147 		 * at the host, so use action type local instead of trap.
4148 		 */
4149 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
4150 		return 0;
4151 	case RTN_UNICAST:
4152 		if (mlxsw_sp_fi_is_gateway(mlxsw_sp, fi))
4153 			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
4154 		else
4155 			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
4156 		return 0;
4157 	default:
4158 		return -EINVAL;
4159 	}
4160 }
4161 
4162 static struct mlxsw_sp_fib4_entry *
4163 mlxsw_sp_fib4_entry_create(struct mlxsw_sp *mlxsw_sp,
4164 			   struct mlxsw_sp_fib_node *fib_node,
4165 			   const struct fib_entry_notifier_info *fen_info)
4166 {
4167 	struct mlxsw_sp_fib4_entry *fib4_entry;
4168 	struct mlxsw_sp_fib_entry *fib_entry;
4169 	int err;
4170 
4171 	fib4_entry = kzalloc(sizeof(*fib4_entry), GFP_KERNEL);
4172 	if (!fib4_entry)
4173 		return ERR_PTR(-ENOMEM);
4174 	fib_entry = &fib4_entry->common;
4175 
4176 	err = mlxsw_sp_fib4_entry_type_set(mlxsw_sp, fen_info, fib_entry);
4177 	if (err)
4178 		goto err_fib4_entry_type_set;
4179 
4180 	err = mlxsw_sp_nexthop4_group_get(mlxsw_sp, fib_entry, fen_info->fi);
4181 	if (err)
4182 		goto err_nexthop4_group_get;
4183 
4184 	fib4_entry->prio = fen_info->fi->fib_priority;
4185 	fib4_entry->tb_id = fen_info->tb_id;
4186 	fib4_entry->type = fen_info->type;
4187 	fib4_entry->tos = fen_info->tos;
4188 
4189 	fib_entry->fib_node = fib_node;
4190 
4191 	return fib4_entry;
4192 
4193 err_nexthop4_group_get:
4194 err_fib4_entry_type_set:
4195 	kfree(fib4_entry);
4196 	return ERR_PTR(err);
4197 }
4198 
4199 static void mlxsw_sp_fib4_entry_destroy(struct mlxsw_sp *mlxsw_sp,
4200 					struct mlxsw_sp_fib4_entry *fib4_entry)
4201 {
4202 	mlxsw_sp_nexthop4_group_put(mlxsw_sp, &fib4_entry->common);
4203 	kfree(fib4_entry);
4204 }
4205 
4206 static struct mlxsw_sp_fib4_entry *
4207 mlxsw_sp_fib4_entry_lookup(struct mlxsw_sp *mlxsw_sp,
4208 			   const struct fib_entry_notifier_info *fen_info)
4209 {
4210 	struct mlxsw_sp_fib4_entry *fib4_entry;
4211 	struct mlxsw_sp_fib_node *fib_node;
4212 	struct mlxsw_sp_fib *fib;
4213 	struct mlxsw_sp_vr *vr;
4214 
4215 	vr = mlxsw_sp_vr_find(mlxsw_sp, fen_info->tb_id);
4216 	if (!vr)
4217 		return NULL;
4218 	fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV4);
4219 
4220 	fib_node = mlxsw_sp_fib_node_lookup(fib, &fen_info->dst,
4221 					    sizeof(fen_info->dst),
4222 					    fen_info->dst_len);
4223 	if (!fib_node)
4224 		return NULL;
4225 
4226 	list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) {
4227 		if (fib4_entry->tb_id == fen_info->tb_id &&
4228 		    fib4_entry->tos == fen_info->tos &&
4229 		    fib4_entry->type == fen_info->type &&
4230 		    mlxsw_sp_nexthop4_group_fi(fib4_entry->common.nh_group) ==
4231 		    fen_info->fi) {
4232 			return fib4_entry;
4233 		}
4234 	}
4235 
4236 	return NULL;
4237 }
4238 
4239 static const struct rhashtable_params mlxsw_sp_fib_ht_params = {
4240 	.key_offset = offsetof(struct mlxsw_sp_fib_node, key),
4241 	.head_offset = offsetof(struct mlxsw_sp_fib_node, ht_node),
4242 	.key_len = sizeof(struct mlxsw_sp_fib_key),
4243 	.automatic_shrinking = true,
4244 };
4245 
4246 static int mlxsw_sp_fib_node_insert(struct mlxsw_sp_fib *fib,
4247 				    struct mlxsw_sp_fib_node *fib_node)
4248 {
4249 	return rhashtable_insert_fast(&fib->ht, &fib_node->ht_node,
4250 				      mlxsw_sp_fib_ht_params);
4251 }
4252 
4253 static void mlxsw_sp_fib_node_remove(struct mlxsw_sp_fib *fib,
4254 				     struct mlxsw_sp_fib_node *fib_node)
4255 {
4256 	rhashtable_remove_fast(&fib->ht, &fib_node->ht_node,
4257 			       mlxsw_sp_fib_ht_params);
4258 }
4259 
4260 static struct mlxsw_sp_fib_node *
4261 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
4262 			 size_t addr_len, unsigned char prefix_len)
4263 {
4264 	struct mlxsw_sp_fib_key key;
4265 
4266 	memset(&key, 0, sizeof(key));
4267 	memcpy(key.addr, addr, addr_len);
4268 	key.prefix_len = prefix_len;
4269 	return rhashtable_lookup_fast(&fib->ht, &key, mlxsw_sp_fib_ht_params);
4270 }
4271 
4272 static struct mlxsw_sp_fib_node *
4273 mlxsw_sp_fib_node_create(struct mlxsw_sp_fib *fib, const void *addr,
4274 			 size_t addr_len, unsigned char prefix_len)
4275 {
4276 	struct mlxsw_sp_fib_node *fib_node;
4277 
4278 	fib_node = kzalloc(sizeof(*fib_node), GFP_KERNEL);
4279 	if (!fib_node)
4280 		return NULL;
4281 
4282 	INIT_LIST_HEAD(&fib_node->entry_list);
4283 	list_add(&fib_node->list, &fib->node_list);
4284 	memcpy(fib_node->key.addr, addr, addr_len);
4285 	fib_node->key.prefix_len = prefix_len;
4286 
4287 	return fib_node;
4288 }
4289 
4290 static void mlxsw_sp_fib_node_destroy(struct mlxsw_sp_fib_node *fib_node)
4291 {
4292 	list_del(&fib_node->list);
4293 	WARN_ON(!list_empty(&fib_node->entry_list));
4294 	kfree(fib_node);
4295 }
4296 
4297 static bool
4298 mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node,
4299 				 const struct mlxsw_sp_fib_entry *fib_entry)
4300 {
4301 	return list_first_entry(&fib_node->entry_list,
4302 				struct mlxsw_sp_fib_entry, list) == fib_entry;
4303 }
4304 
4305 static int mlxsw_sp_fib_lpm_tree_link(struct mlxsw_sp *mlxsw_sp,
4306 				      struct mlxsw_sp_fib_node *fib_node)
4307 {
4308 	struct mlxsw_sp_prefix_usage req_prefix_usage;
4309 	struct mlxsw_sp_fib *fib = fib_node->fib;
4310 	struct mlxsw_sp_lpm_tree *lpm_tree;
4311 	int err;
4312 
4313 	lpm_tree = mlxsw_sp->router->lpm.proto_trees[fib->proto];
4314 	if (lpm_tree->prefix_ref_count[fib_node->key.prefix_len] != 0)
4315 		goto out;
4316 
4317 	mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &lpm_tree->prefix_usage);
4318 	mlxsw_sp_prefix_usage_set(&req_prefix_usage, fib_node->key.prefix_len);
4319 	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
4320 					 fib->proto);
4321 	if (IS_ERR(lpm_tree))
4322 		return PTR_ERR(lpm_tree);
4323 
4324 	err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
4325 	if (err)
4326 		goto err_lpm_tree_replace;
4327 
4328 out:
4329 	lpm_tree->prefix_ref_count[fib_node->key.prefix_len]++;
4330 	return 0;
4331 
4332 err_lpm_tree_replace:
4333 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
4334 	return err;
4335 }
4336 
4337 static void mlxsw_sp_fib_lpm_tree_unlink(struct mlxsw_sp *mlxsw_sp,
4338 					 struct mlxsw_sp_fib_node *fib_node)
4339 {
4340 	struct mlxsw_sp_lpm_tree *lpm_tree = fib_node->fib->lpm_tree;
4341 	struct mlxsw_sp_prefix_usage req_prefix_usage;
4342 	struct mlxsw_sp_fib *fib = fib_node->fib;
4343 	int err;
4344 
4345 	if (--lpm_tree->prefix_ref_count[fib_node->key.prefix_len] != 0)
4346 		return;
4347 	/* Try to construct a new LPM tree from the current prefix usage
4348 	 * minus the unused one. If we fail, continue using the old one.
4349 	 */
4350 	mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &lpm_tree->prefix_usage);
4351 	mlxsw_sp_prefix_usage_clear(&req_prefix_usage,
4352 				    fib_node->key.prefix_len);
4353 	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
4354 					 fib->proto);
4355 	if (IS_ERR(lpm_tree))
4356 		return;
4357 
4358 	err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
4359 	if (err)
4360 		goto err_lpm_tree_replace;
4361 
4362 	return;
4363 
4364 err_lpm_tree_replace:
4365 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
4366 }
4367 
4368 static int mlxsw_sp_fib_node_init(struct mlxsw_sp *mlxsw_sp,
4369 				  struct mlxsw_sp_fib_node *fib_node,
4370 				  struct mlxsw_sp_fib *fib)
4371 {
4372 	int err;
4373 
4374 	err = mlxsw_sp_fib_node_insert(fib, fib_node);
4375 	if (err)
4376 		return err;
4377 	fib_node->fib = fib;
4378 
4379 	err = mlxsw_sp_fib_lpm_tree_link(mlxsw_sp, fib_node);
4380 	if (err)
4381 		goto err_fib_lpm_tree_link;
4382 
4383 	return 0;
4384 
4385 err_fib_lpm_tree_link:
4386 	fib_node->fib = NULL;
4387 	mlxsw_sp_fib_node_remove(fib, fib_node);
4388 	return err;
4389 }
4390 
4391 static void mlxsw_sp_fib_node_fini(struct mlxsw_sp *mlxsw_sp,
4392 				   struct mlxsw_sp_fib_node *fib_node)
4393 {
4394 	struct mlxsw_sp_fib *fib = fib_node->fib;
4395 
4396 	mlxsw_sp_fib_lpm_tree_unlink(mlxsw_sp, fib_node);
4397 	fib_node->fib = NULL;
4398 	mlxsw_sp_fib_node_remove(fib, fib_node);
4399 }
4400 
4401 static struct mlxsw_sp_fib_node *
4402 mlxsw_sp_fib_node_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id, const void *addr,
4403 		      size_t addr_len, unsigned char prefix_len,
4404 		      enum mlxsw_sp_l3proto proto)
4405 {
4406 	struct mlxsw_sp_fib_node *fib_node;
4407 	struct mlxsw_sp_fib *fib;
4408 	struct mlxsw_sp_vr *vr;
4409 	int err;
4410 
4411 	vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id, NULL);
4412 	if (IS_ERR(vr))
4413 		return ERR_CAST(vr);
4414 	fib = mlxsw_sp_vr_fib(vr, proto);
4415 
4416 	fib_node = mlxsw_sp_fib_node_lookup(fib, addr, addr_len, prefix_len);
4417 	if (fib_node)
4418 		return fib_node;
4419 
4420 	fib_node = mlxsw_sp_fib_node_create(fib, addr, addr_len, prefix_len);
4421 	if (!fib_node) {
4422 		err = -ENOMEM;
4423 		goto err_fib_node_create;
4424 	}
4425 
4426 	err = mlxsw_sp_fib_node_init(mlxsw_sp, fib_node, fib);
4427 	if (err)
4428 		goto err_fib_node_init;
4429 
4430 	return fib_node;
4431 
4432 err_fib_node_init:
4433 	mlxsw_sp_fib_node_destroy(fib_node);
4434 err_fib_node_create:
4435 	mlxsw_sp_vr_put(mlxsw_sp, vr);
4436 	return ERR_PTR(err);
4437 }
4438 
4439 static void mlxsw_sp_fib_node_put(struct mlxsw_sp *mlxsw_sp,
4440 				  struct mlxsw_sp_fib_node *fib_node)
4441 {
4442 	struct mlxsw_sp_vr *vr = fib_node->fib->vr;
4443 
4444 	if (!list_empty(&fib_node->entry_list))
4445 		return;
4446 	mlxsw_sp_fib_node_fini(mlxsw_sp, fib_node);
4447 	mlxsw_sp_fib_node_destroy(fib_node);
4448 	mlxsw_sp_vr_put(mlxsw_sp, vr);
4449 }
4450 
4451 static struct mlxsw_sp_fib4_entry *
4452 mlxsw_sp_fib4_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
4453 			      const struct mlxsw_sp_fib4_entry *new4_entry)
4454 {
4455 	struct mlxsw_sp_fib4_entry *fib4_entry;
4456 
4457 	list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) {
4458 		if (fib4_entry->tb_id > new4_entry->tb_id)
4459 			continue;
4460 		if (fib4_entry->tb_id != new4_entry->tb_id)
4461 			break;
4462 		if (fib4_entry->tos > new4_entry->tos)
4463 			continue;
4464 		if (fib4_entry->prio >= new4_entry->prio ||
4465 		    fib4_entry->tos < new4_entry->tos)
4466 			return fib4_entry;
4467 	}
4468 
4469 	return NULL;
4470 }
4471 
4472 static int
4473 mlxsw_sp_fib4_node_list_append(struct mlxsw_sp_fib4_entry *fib4_entry,
4474 			       struct mlxsw_sp_fib4_entry *new4_entry)
4475 {
4476 	struct mlxsw_sp_fib_node *fib_node;
4477 
4478 	if (WARN_ON(!fib4_entry))
4479 		return -EINVAL;
4480 
4481 	fib_node = fib4_entry->common.fib_node;
4482 	list_for_each_entry_from(fib4_entry, &fib_node->entry_list,
4483 				 common.list) {
4484 		if (fib4_entry->tb_id != new4_entry->tb_id ||
4485 		    fib4_entry->tos != new4_entry->tos ||
4486 		    fib4_entry->prio != new4_entry->prio)
4487 			break;
4488 	}
4489 
4490 	list_add_tail(&new4_entry->common.list, &fib4_entry->common.list);
4491 	return 0;
4492 }
4493 
4494 static int
4495 mlxsw_sp_fib4_node_list_insert(struct mlxsw_sp_fib4_entry *new4_entry,
4496 			       bool replace, bool append)
4497 {
4498 	struct mlxsw_sp_fib_node *fib_node = new4_entry->common.fib_node;
4499 	struct mlxsw_sp_fib4_entry *fib4_entry;
4500 
4501 	fib4_entry = mlxsw_sp_fib4_node_entry_find(fib_node, new4_entry);
4502 
4503 	if (append)
4504 		return mlxsw_sp_fib4_node_list_append(fib4_entry, new4_entry);
4505 	if (replace && WARN_ON(!fib4_entry))
4506 		return -EINVAL;
4507 
4508 	/* Insert new entry before replaced one, so that we can later
4509 	 * remove the second.
4510 	 */
4511 	if (fib4_entry) {
4512 		list_add_tail(&new4_entry->common.list,
4513 			      &fib4_entry->common.list);
4514 	} else {
4515 		struct mlxsw_sp_fib4_entry *last;
4516 
4517 		list_for_each_entry(last, &fib_node->entry_list, common.list) {
4518 			if (new4_entry->tb_id > last->tb_id)
4519 				break;
4520 			fib4_entry = last;
4521 		}
4522 
4523 		if (fib4_entry)
4524 			list_add(&new4_entry->common.list,
4525 				 &fib4_entry->common.list);
4526 		else
4527 			list_add(&new4_entry->common.list,
4528 				 &fib_node->entry_list);
4529 	}
4530 
4531 	return 0;
4532 }
4533 
4534 static void
4535 mlxsw_sp_fib4_node_list_remove(struct mlxsw_sp_fib4_entry *fib4_entry)
4536 {
4537 	list_del(&fib4_entry->common.list);
4538 }
4539 
4540 static int mlxsw_sp_fib_node_entry_add(struct mlxsw_sp *mlxsw_sp,
4541 				       struct mlxsw_sp_fib_entry *fib_entry)
4542 {
4543 	struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
4544 
4545 	if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
4546 		return 0;
4547 
4548 	/* To prevent packet loss, overwrite the previously offloaded
4549 	 * entry.
4550 	 */
4551 	if (!list_is_singular(&fib_node->entry_list)) {
4552 		enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
4553 		struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list);
4554 
4555 		mlxsw_sp_fib_entry_offload_refresh(n, op, 0);
4556 	}
4557 
4558 	return mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
4559 }
4560 
4561 static void mlxsw_sp_fib_node_entry_del(struct mlxsw_sp *mlxsw_sp,
4562 					struct mlxsw_sp_fib_entry *fib_entry)
4563 {
4564 	struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
4565 
4566 	if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
4567 		return;
4568 
4569 	/* Promote the next entry by overwriting the deleted entry */
4570 	if (!list_is_singular(&fib_node->entry_list)) {
4571 		struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list);
4572 		enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
4573 
4574 		mlxsw_sp_fib_entry_update(mlxsw_sp, n);
4575 		mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0);
4576 		return;
4577 	}
4578 
4579 	mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry);
4580 }
4581 
4582 static int mlxsw_sp_fib4_node_entry_link(struct mlxsw_sp *mlxsw_sp,
4583 					 struct mlxsw_sp_fib4_entry *fib4_entry,
4584 					 bool replace, bool append)
4585 {
4586 	int err;
4587 
4588 	err = mlxsw_sp_fib4_node_list_insert(fib4_entry, replace, append);
4589 	if (err)
4590 		return err;
4591 
4592 	err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib4_entry->common);
4593 	if (err)
4594 		goto err_fib_node_entry_add;
4595 
4596 	return 0;
4597 
4598 err_fib_node_entry_add:
4599 	mlxsw_sp_fib4_node_list_remove(fib4_entry);
4600 	return err;
4601 }
4602 
4603 static void
4604 mlxsw_sp_fib4_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
4605 				struct mlxsw_sp_fib4_entry *fib4_entry)
4606 {
4607 	mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib4_entry->common);
4608 	mlxsw_sp_fib4_node_list_remove(fib4_entry);
4609 
4610 	if (fib4_entry->common.type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP)
4611 		mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, &fib4_entry->common);
4612 }
4613 
4614 static void mlxsw_sp_fib4_entry_replace(struct mlxsw_sp *mlxsw_sp,
4615 					struct mlxsw_sp_fib4_entry *fib4_entry,
4616 					bool replace)
4617 {
4618 	struct mlxsw_sp_fib_node *fib_node = fib4_entry->common.fib_node;
4619 	struct mlxsw_sp_fib4_entry *replaced;
4620 
4621 	if (!replace)
4622 		return;
4623 
4624 	/* We inserted the new entry before replaced one */
4625 	replaced = list_next_entry(fib4_entry, common.list);
4626 
4627 	mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, replaced);
4628 	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, replaced);
4629 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4630 }
4631 
4632 static int
4633 mlxsw_sp_router_fib4_add(struct mlxsw_sp *mlxsw_sp,
4634 			 const struct fib_entry_notifier_info *fen_info,
4635 			 bool replace, bool append)
4636 {
4637 	struct mlxsw_sp_fib4_entry *fib4_entry;
4638 	struct mlxsw_sp_fib_node *fib_node;
4639 	int err;
4640 
4641 	if (mlxsw_sp->router->aborted)
4642 		return 0;
4643 
4644 	fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, fen_info->tb_id,
4645 					 &fen_info->dst, sizeof(fen_info->dst),
4646 					 fen_info->dst_len,
4647 					 MLXSW_SP_L3_PROTO_IPV4);
4648 	if (IS_ERR(fib_node)) {
4649 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to get FIB node\n");
4650 		return PTR_ERR(fib_node);
4651 	}
4652 
4653 	fib4_entry = mlxsw_sp_fib4_entry_create(mlxsw_sp, fib_node, fen_info);
4654 	if (IS_ERR(fib4_entry)) {
4655 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to create FIB entry\n");
4656 		err = PTR_ERR(fib4_entry);
4657 		goto err_fib4_entry_create;
4658 	}
4659 
4660 	err = mlxsw_sp_fib4_node_entry_link(mlxsw_sp, fib4_entry, replace,
4661 					    append);
4662 	if (err) {
4663 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to link FIB entry to node\n");
4664 		goto err_fib4_node_entry_link;
4665 	}
4666 
4667 	mlxsw_sp_fib4_entry_replace(mlxsw_sp, fib4_entry, replace);
4668 
4669 	return 0;
4670 
4671 err_fib4_node_entry_link:
4672 	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
4673 err_fib4_entry_create:
4674 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4675 	return err;
4676 }
4677 
4678 static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp,
4679 				     struct fib_entry_notifier_info *fen_info)
4680 {
4681 	struct mlxsw_sp_fib4_entry *fib4_entry;
4682 	struct mlxsw_sp_fib_node *fib_node;
4683 
4684 	if (mlxsw_sp->router->aborted)
4685 		return;
4686 
4687 	fib4_entry = mlxsw_sp_fib4_entry_lookup(mlxsw_sp, fen_info);
4688 	if (WARN_ON(!fib4_entry))
4689 		return;
4690 	fib_node = fib4_entry->common.fib_node;
4691 
4692 	mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry);
4693 	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
4694 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4695 }
4696 
4697 static bool mlxsw_sp_fib6_rt_should_ignore(const struct fib6_info *rt)
4698 {
4699 	/* Packets with link-local destination IP arriving to the router
4700 	 * are trapped to the CPU, so no need to program specific routes
4701 	 * for them.
4702 	 */
4703 	if (ipv6_addr_type(&rt->fib6_dst.addr) & IPV6_ADDR_LINKLOCAL)
4704 		return true;
4705 
4706 	/* Multicast routes aren't supported, so ignore them. Neighbour
4707 	 * Discovery packets are specifically trapped.
4708 	 */
4709 	if (ipv6_addr_type(&rt->fib6_dst.addr) & IPV6_ADDR_MULTICAST)
4710 		return true;
4711 
4712 	/* Cloned routes are irrelevant in the forwarding path. */
4713 	if (rt->fib6_flags & RTF_CACHE)
4714 		return true;
4715 
4716 	return false;
4717 }
4718 
4719 static struct mlxsw_sp_rt6 *mlxsw_sp_rt6_create(struct fib6_info *rt)
4720 {
4721 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4722 
4723 	mlxsw_sp_rt6 = kzalloc(sizeof(*mlxsw_sp_rt6), GFP_KERNEL);
4724 	if (!mlxsw_sp_rt6)
4725 		return ERR_PTR(-ENOMEM);
4726 
4727 	/* In case of route replace, replaced route is deleted with
4728 	 * no notification. Take reference to prevent accessing freed
4729 	 * memory.
4730 	 */
4731 	mlxsw_sp_rt6->rt = rt;
4732 	fib6_info_hold(rt);
4733 
4734 	return mlxsw_sp_rt6;
4735 }
4736 
4737 #if IS_ENABLED(CONFIG_IPV6)
4738 static void mlxsw_sp_rt6_release(struct fib6_info *rt)
4739 {
4740 	fib6_info_release(rt);
4741 }
4742 #else
4743 static void mlxsw_sp_rt6_release(struct fib6_info *rt)
4744 {
4745 }
4746 #endif
4747 
4748 static void mlxsw_sp_rt6_destroy(struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
4749 {
4750 	mlxsw_sp_rt6_release(mlxsw_sp_rt6->rt);
4751 	kfree(mlxsw_sp_rt6);
4752 }
4753 
4754 static bool mlxsw_sp_fib6_rt_can_mp(const struct fib6_info *rt)
4755 {
4756 	/* RTF_CACHE routes are ignored */
4757 	return (rt->fib6_flags & (RTF_GATEWAY | RTF_ADDRCONF)) == RTF_GATEWAY;
4758 }
4759 
4760 static struct fib6_info *
4761 mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry)
4762 {
4763 	return list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
4764 				list)->rt;
4765 }
4766 
4767 static struct mlxsw_sp_fib6_entry *
4768 mlxsw_sp_fib6_node_mp_entry_find(const struct mlxsw_sp_fib_node *fib_node,
4769 				 const struct fib6_info *nrt, bool replace)
4770 {
4771 	struct mlxsw_sp_fib6_entry *fib6_entry;
4772 
4773 	if (!mlxsw_sp_fib6_rt_can_mp(nrt) || replace)
4774 		return NULL;
4775 
4776 	list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
4777 		struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
4778 
4779 		/* RT6_TABLE_LOCAL and RT6_TABLE_MAIN share the same
4780 		 * virtual router.
4781 		 */
4782 		if (rt->fib6_table->tb6_id > nrt->fib6_table->tb6_id)
4783 			continue;
4784 		if (rt->fib6_table->tb6_id != nrt->fib6_table->tb6_id)
4785 			break;
4786 		if (rt->fib6_metric < nrt->fib6_metric)
4787 			continue;
4788 		if (rt->fib6_metric == nrt->fib6_metric &&
4789 		    mlxsw_sp_fib6_rt_can_mp(rt))
4790 			return fib6_entry;
4791 		if (rt->fib6_metric > nrt->fib6_metric)
4792 			break;
4793 	}
4794 
4795 	return NULL;
4796 }
4797 
4798 static struct mlxsw_sp_rt6 *
4799 mlxsw_sp_fib6_entry_rt_find(const struct mlxsw_sp_fib6_entry *fib6_entry,
4800 			    const struct fib6_info *rt)
4801 {
4802 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4803 
4804 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
4805 		if (mlxsw_sp_rt6->rt == rt)
4806 			return mlxsw_sp_rt6;
4807 	}
4808 
4809 	return NULL;
4810 }
4811 
4812 static bool mlxsw_sp_nexthop6_ipip_type(const struct mlxsw_sp *mlxsw_sp,
4813 					const struct fib6_info *rt,
4814 					enum mlxsw_sp_ipip_type *ret)
4815 {
4816 	return rt->fib6_nh.nh_dev &&
4817 	       mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->fib6_nh.nh_dev, ret);
4818 }
4819 
4820 static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp *mlxsw_sp,
4821 				       struct mlxsw_sp_nexthop_group *nh_grp,
4822 				       struct mlxsw_sp_nexthop *nh,
4823 				       const struct fib6_info *rt)
4824 {
4825 	const struct mlxsw_sp_ipip_ops *ipip_ops;
4826 	struct mlxsw_sp_ipip_entry *ipip_entry;
4827 	struct net_device *dev = rt->fib6_nh.nh_dev;
4828 	struct mlxsw_sp_rif *rif;
4829 	int err;
4830 
4831 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, dev);
4832 	if (ipip_entry) {
4833 		ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
4834 		if (ipip_ops->can_offload(mlxsw_sp, dev,
4835 					  MLXSW_SP_L3_PROTO_IPV6)) {
4836 			nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP;
4837 			mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, ipip_entry);
4838 			return 0;
4839 		}
4840 	}
4841 
4842 	nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH;
4843 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
4844 	if (!rif)
4845 		return 0;
4846 	mlxsw_sp_nexthop_rif_init(nh, rif);
4847 
4848 	err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
4849 	if (err)
4850 		goto err_nexthop_neigh_init;
4851 
4852 	return 0;
4853 
4854 err_nexthop_neigh_init:
4855 	mlxsw_sp_nexthop_rif_fini(nh);
4856 	return err;
4857 }
4858 
4859 static void mlxsw_sp_nexthop6_type_fini(struct mlxsw_sp *mlxsw_sp,
4860 					struct mlxsw_sp_nexthop *nh)
4861 {
4862 	mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
4863 }
4864 
4865 static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp,
4866 				  struct mlxsw_sp_nexthop_group *nh_grp,
4867 				  struct mlxsw_sp_nexthop *nh,
4868 				  const struct fib6_info *rt)
4869 {
4870 	struct net_device *dev = rt->fib6_nh.nh_dev;
4871 
4872 	nh->nh_grp = nh_grp;
4873 	nh->nh_weight = rt->fib6_nh.nh_weight;
4874 	memcpy(&nh->gw_addr, &rt->fib6_nh.nh_gw, sizeof(nh->gw_addr));
4875 	mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
4876 
4877 	list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
4878 
4879 	if (!dev)
4880 		return 0;
4881 	nh->ifindex = dev->ifindex;
4882 
4883 	return mlxsw_sp_nexthop6_type_init(mlxsw_sp, nh_grp, nh, rt);
4884 }
4885 
4886 static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp,
4887 				   struct mlxsw_sp_nexthop *nh)
4888 {
4889 	mlxsw_sp_nexthop6_type_fini(mlxsw_sp, nh);
4890 	list_del(&nh->router_list_node);
4891 	mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
4892 }
4893 
4894 static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp *mlxsw_sp,
4895 				    const struct fib6_info *rt)
4896 {
4897 	return rt->fib6_flags & RTF_GATEWAY ||
4898 	       mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, NULL);
4899 }
4900 
4901 static struct mlxsw_sp_nexthop_group *
4902 mlxsw_sp_nexthop6_group_create(struct mlxsw_sp *mlxsw_sp,
4903 			       struct mlxsw_sp_fib6_entry *fib6_entry)
4904 {
4905 	struct mlxsw_sp_nexthop_group *nh_grp;
4906 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4907 	struct mlxsw_sp_nexthop *nh;
4908 	size_t alloc_size;
4909 	int i = 0;
4910 	int err;
4911 
4912 	alloc_size = sizeof(*nh_grp) +
4913 		     fib6_entry->nrt6 * sizeof(struct mlxsw_sp_nexthop);
4914 	nh_grp = kzalloc(alloc_size, GFP_KERNEL);
4915 	if (!nh_grp)
4916 		return ERR_PTR(-ENOMEM);
4917 	INIT_LIST_HEAD(&nh_grp->fib_list);
4918 #if IS_ENABLED(CONFIG_IPV6)
4919 	nh_grp->neigh_tbl = &nd_tbl;
4920 #endif
4921 	mlxsw_sp_rt6 = list_first_entry(&fib6_entry->rt6_list,
4922 					struct mlxsw_sp_rt6, list);
4923 	nh_grp->gateway = mlxsw_sp_rt6_is_gateway(mlxsw_sp, mlxsw_sp_rt6->rt);
4924 	nh_grp->count = fib6_entry->nrt6;
4925 	for (i = 0; i < nh_grp->count; i++) {
4926 		struct fib6_info *rt = mlxsw_sp_rt6->rt;
4927 
4928 		nh = &nh_grp->nexthops[i];
4929 		err = mlxsw_sp_nexthop6_init(mlxsw_sp, nh_grp, nh, rt);
4930 		if (err)
4931 			goto err_nexthop6_init;
4932 		mlxsw_sp_rt6 = list_next_entry(mlxsw_sp_rt6, list);
4933 	}
4934 
4935 	err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
4936 	if (err)
4937 		goto err_nexthop_group_insert;
4938 
4939 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
4940 	return nh_grp;
4941 
4942 err_nexthop_group_insert:
4943 err_nexthop6_init:
4944 	for (i--; i >= 0; i--) {
4945 		nh = &nh_grp->nexthops[i];
4946 		mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
4947 	}
4948 	kfree(nh_grp);
4949 	return ERR_PTR(err);
4950 }
4951 
4952 static void
4953 mlxsw_sp_nexthop6_group_destroy(struct mlxsw_sp *mlxsw_sp,
4954 				struct mlxsw_sp_nexthop_group *nh_grp)
4955 {
4956 	struct mlxsw_sp_nexthop *nh;
4957 	int i = nh_grp->count;
4958 
4959 	mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
4960 	for (i--; i >= 0; i--) {
4961 		nh = &nh_grp->nexthops[i];
4962 		mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
4963 	}
4964 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
4965 	WARN_ON(nh_grp->adj_index_valid);
4966 	kfree(nh_grp);
4967 }
4968 
4969 static int mlxsw_sp_nexthop6_group_get(struct mlxsw_sp *mlxsw_sp,
4970 				       struct mlxsw_sp_fib6_entry *fib6_entry)
4971 {
4972 	struct mlxsw_sp_nexthop_group *nh_grp;
4973 
4974 	nh_grp = mlxsw_sp_nexthop6_group_lookup(mlxsw_sp, fib6_entry);
4975 	if (!nh_grp) {
4976 		nh_grp = mlxsw_sp_nexthop6_group_create(mlxsw_sp, fib6_entry);
4977 		if (IS_ERR(nh_grp))
4978 			return PTR_ERR(nh_grp);
4979 	}
4980 
4981 	list_add_tail(&fib6_entry->common.nexthop_group_node,
4982 		      &nh_grp->fib_list);
4983 	fib6_entry->common.nh_group = nh_grp;
4984 
4985 	return 0;
4986 }
4987 
4988 static void mlxsw_sp_nexthop6_group_put(struct mlxsw_sp *mlxsw_sp,
4989 					struct mlxsw_sp_fib_entry *fib_entry)
4990 {
4991 	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
4992 
4993 	list_del(&fib_entry->nexthop_group_node);
4994 	if (!list_empty(&nh_grp->fib_list))
4995 		return;
4996 	mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, nh_grp);
4997 }
4998 
4999 static int
5000 mlxsw_sp_nexthop6_group_update(struct mlxsw_sp *mlxsw_sp,
5001 			       struct mlxsw_sp_fib6_entry *fib6_entry)
5002 {
5003 	struct mlxsw_sp_nexthop_group *old_nh_grp = fib6_entry->common.nh_group;
5004 	int err;
5005 
5006 	fib6_entry->common.nh_group = NULL;
5007 	list_del(&fib6_entry->common.nexthop_group_node);
5008 
5009 	err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
5010 	if (err)
5011 		goto err_nexthop6_group_get;
5012 
5013 	/* In case this entry is offloaded, then the adjacency index
5014 	 * currently associated with it in the device's table is that
5015 	 * of the old group. Start using the new one instead.
5016 	 */
5017 	err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common);
5018 	if (err)
5019 		goto err_fib_node_entry_add;
5020 
5021 	if (list_empty(&old_nh_grp->fib_list))
5022 		mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, old_nh_grp);
5023 
5024 	return 0;
5025 
5026 err_fib_node_entry_add:
5027 	mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
5028 err_nexthop6_group_get:
5029 	list_add_tail(&fib6_entry->common.nexthop_group_node,
5030 		      &old_nh_grp->fib_list);
5031 	fib6_entry->common.nh_group = old_nh_grp;
5032 	return err;
5033 }
5034 
5035 static int
5036 mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp,
5037 				struct mlxsw_sp_fib6_entry *fib6_entry,
5038 				struct fib6_info *rt)
5039 {
5040 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5041 	int err;
5042 
5043 	mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt);
5044 	if (IS_ERR(mlxsw_sp_rt6))
5045 		return PTR_ERR(mlxsw_sp_rt6);
5046 
5047 	list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
5048 	fib6_entry->nrt6++;
5049 
5050 	err = mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
5051 	if (err)
5052 		goto err_nexthop6_group_update;
5053 
5054 	return 0;
5055 
5056 err_nexthop6_group_update:
5057 	fib6_entry->nrt6--;
5058 	list_del(&mlxsw_sp_rt6->list);
5059 	mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5060 	return err;
5061 }
5062 
5063 static void
5064 mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp,
5065 				struct mlxsw_sp_fib6_entry *fib6_entry,
5066 				struct fib6_info *rt)
5067 {
5068 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5069 
5070 	mlxsw_sp_rt6 = mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt);
5071 	if (WARN_ON(!mlxsw_sp_rt6))
5072 		return;
5073 
5074 	fib6_entry->nrt6--;
5075 	list_del(&mlxsw_sp_rt6->list);
5076 	mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
5077 	mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5078 }
5079 
5080 static void mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp *mlxsw_sp,
5081 					 struct mlxsw_sp_fib_entry *fib_entry,
5082 					 const struct fib6_info *rt)
5083 {
5084 	/* Packets hitting RTF_REJECT routes need to be discarded by the
5085 	 * stack. We can rely on their destination device not having a
5086 	 * RIF (it's the loopback device) and can thus use action type
5087 	 * local, which will cause them to be trapped with a lower
5088 	 * priority than packets that need to be locally received.
5089 	 */
5090 	if (rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST))
5091 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
5092 	else if (rt->fib6_flags & RTF_REJECT)
5093 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
5094 	else if (mlxsw_sp_rt6_is_gateway(mlxsw_sp, rt))
5095 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
5096 	else
5097 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
5098 }
5099 
5100 static void
5101 mlxsw_sp_fib6_entry_rt_destroy_all(struct mlxsw_sp_fib6_entry *fib6_entry)
5102 {
5103 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6, *tmp;
5104 
5105 	list_for_each_entry_safe(mlxsw_sp_rt6, tmp, &fib6_entry->rt6_list,
5106 				 list) {
5107 		fib6_entry->nrt6--;
5108 		list_del(&mlxsw_sp_rt6->list);
5109 		mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5110 	}
5111 }
5112 
5113 static struct mlxsw_sp_fib6_entry *
5114 mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp,
5115 			   struct mlxsw_sp_fib_node *fib_node,
5116 			   struct fib6_info *rt)
5117 {
5118 	struct mlxsw_sp_fib6_entry *fib6_entry;
5119 	struct mlxsw_sp_fib_entry *fib_entry;
5120 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5121 	int err;
5122 
5123 	fib6_entry = kzalloc(sizeof(*fib6_entry), GFP_KERNEL);
5124 	if (!fib6_entry)
5125 		return ERR_PTR(-ENOMEM);
5126 	fib_entry = &fib6_entry->common;
5127 
5128 	mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt);
5129 	if (IS_ERR(mlxsw_sp_rt6)) {
5130 		err = PTR_ERR(mlxsw_sp_rt6);
5131 		goto err_rt6_create;
5132 	}
5133 
5134 	mlxsw_sp_fib6_entry_type_set(mlxsw_sp, fib_entry, mlxsw_sp_rt6->rt);
5135 
5136 	INIT_LIST_HEAD(&fib6_entry->rt6_list);
5137 	list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
5138 	fib6_entry->nrt6 = 1;
5139 	err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
5140 	if (err)
5141 		goto err_nexthop6_group_get;
5142 
5143 	fib_entry->fib_node = fib_node;
5144 
5145 	return fib6_entry;
5146 
5147 err_nexthop6_group_get:
5148 	list_del(&mlxsw_sp_rt6->list);
5149 	mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5150 err_rt6_create:
5151 	kfree(fib6_entry);
5152 	return ERR_PTR(err);
5153 }
5154 
5155 static void mlxsw_sp_fib6_entry_destroy(struct mlxsw_sp *mlxsw_sp,
5156 					struct mlxsw_sp_fib6_entry *fib6_entry)
5157 {
5158 	mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
5159 	mlxsw_sp_fib6_entry_rt_destroy_all(fib6_entry);
5160 	WARN_ON(fib6_entry->nrt6);
5161 	kfree(fib6_entry);
5162 }
5163 
5164 static struct mlxsw_sp_fib6_entry *
5165 mlxsw_sp_fib6_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
5166 			      const struct fib6_info *nrt, bool replace)
5167 {
5168 	struct mlxsw_sp_fib6_entry *fib6_entry, *fallback = NULL;
5169 
5170 	list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
5171 		struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
5172 
5173 		if (rt->fib6_table->tb6_id > nrt->fib6_table->tb6_id)
5174 			continue;
5175 		if (rt->fib6_table->tb6_id != nrt->fib6_table->tb6_id)
5176 			break;
5177 		if (replace && rt->fib6_metric == nrt->fib6_metric) {
5178 			if (mlxsw_sp_fib6_rt_can_mp(rt) ==
5179 			    mlxsw_sp_fib6_rt_can_mp(nrt))
5180 				return fib6_entry;
5181 			if (mlxsw_sp_fib6_rt_can_mp(nrt))
5182 				fallback = fallback ?: fib6_entry;
5183 		}
5184 		if (rt->fib6_metric > nrt->fib6_metric)
5185 			return fallback ?: fib6_entry;
5186 	}
5187 
5188 	return fallback;
5189 }
5190 
5191 static int
5192 mlxsw_sp_fib6_node_list_insert(struct mlxsw_sp_fib6_entry *new6_entry,
5193 			       bool replace)
5194 {
5195 	struct mlxsw_sp_fib_node *fib_node = new6_entry->common.fib_node;
5196 	struct fib6_info *nrt = mlxsw_sp_fib6_entry_rt(new6_entry);
5197 	struct mlxsw_sp_fib6_entry *fib6_entry;
5198 
5199 	fib6_entry = mlxsw_sp_fib6_node_entry_find(fib_node, nrt, replace);
5200 
5201 	if (replace && WARN_ON(!fib6_entry))
5202 		return -EINVAL;
5203 
5204 	if (fib6_entry) {
5205 		list_add_tail(&new6_entry->common.list,
5206 			      &fib6_entry->common.list);
5207 	} else {
5208 		struct mlxsw_sp_fib6_entry *last;
5209 
5210 		list_for_each_entry(last, &fib_node->entry_list, common.list) {
5211 			struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(last);
5212 
5213 			if (nrt->fib6_table->tb6_id > rt->fib6_table->tb6_id)
5214 				break;
5215 			fib6_entry = last;
5216 		}
5217 
5218 		if (fib6_entry)
5219 			list_add(&new6_entry->common.list,
5220 				 &fib6_entry->common.list);
5221 		else
5222 			list_add(&new6_entry->common.list,
5223 				 &fib_node->entry_list);
5224 	}
5225 
5226 	return 0;
5227 }
5228 
5229 static void
5230 mlxsw_sp_fib6_node_list_remove(struct mlxsw_sp_fib6_entry *fib6_entry)
5231 {
5232 	list_del(&fib6_entry->common.list);
5233 }
5234 
5235 static int mlxsw_sp_fib6_node_entry_link(struct mlxsw_sp *mlxsw_sp,
5236 					 struct mlxsw_sp_fib6_entry *fib6_entry,
5237 					 bool replace)
5238 {
5239 	int err;
5240 
5241 	err = mlxsw_sp_fib6_node_list_insert(fib6_entry, replace);
5242 	if (err)
5243 		return err;
5244 
5245 	err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common);
5246 	if (err)
5247 		goto err_fib_node_entry_add;
5248 
5249 	return 0;
5250 
5251 err_fib_node_entry_add:
5252 	mlxsw_sp_fib6_node_list_remove(fib6_entry);
5253 	return err;
5254 }
5255 
5256 static void
5257 mlxsw_sp_fib6_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
5258 				struct mlxsw_sp_fib6_entry *fib6_entry)
5259 {
5260 	mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib6_entry->common);
5261 	mlxsw_sp_fib6_node_list_remove(fib6_entry);
5262 }
5263 
5264 static struct mlxsw_sp_fib6_entry *
5265 mlxsw_sp_fib6_entry_lookup(struct mlxsw_sp *mlxsw_sp,
5266 			   const struct fib6_info *rt)
5267 {
5268 	struct mlxsw_sp_fib6_entry *fib6_entry;
5269 	struct mlxsw_sp_fib_node *fib_node;
5270 	struct mlxsw_sp_fib *fib;
5271 	struct mlxsw_sp_vr *vr;
5272 
5273 	vr = mlxsw_sp_vr_find(mlxsw_sp, rt->fib6_table->tb6_id);
5274 	if (!vr)
5275 		return NULL;
5276 	fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV6);
5277 
5278 	fib_node = mlxsw_sp_fib_node_lookup(fib, &rt->fib6_dst.addr,
5279 					    sizeof(rt->fib6_dst.addr),
5280 					    rt->fib6_dst.plen);
5281 	if (!fib_node)
5282 		return NULL;
5283 
5284 	list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
5285 		struct fib6_info *iter_rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
5286 
5287 		if (rt->fib6_table->tb6_id == iter_rt->fib6_table->tb6_id &&
5288 		    rt->fib6_metric == iter_rt->fib6_metric &&
5289 		    mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt))
5290 			return fib6_entry;
5291 	}
5292 
5293 	return NULL;
5294 }
5295 
5296 static void mlxsw_sp_fib6_entry_replace(struct mlxsw_sp *mlxsw_sp,
5297 					struct mlxsw_sp_fib6_entry *fib6_entry,
5298 					bool replace)
5299 {
5300 	struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node;
5301 	struct mlxsw_sp_fib6_entry *replaced;
5302 
5303 	if (!replace)
5304 		return;
5305 
5306 	replaced = list_next_entry(fib6_entry, common.list);
5307 
5308 	mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, replaced);
5309 	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, replaced);
5310 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5311 }
5312 
5313 static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp,
5314 				    struct fib6_info *rt, bool replace)
5315 {
5316 	struct mlxsw_sp_fib6_entry *fib6_entry;
5317 	struct mlxsw_sp_fib_node *fib_node;
5318 	int err;
5319 
5320 	if (mlxsw_sp->router->aborted)
5321 		return 0;
5322 
5323 	if (rt->fib6_src.plen)
5324 		return -EINVAL;
5325 
5326 	if (mlxsw_sp_fib6_rt_should_ignore(rt))
5327 		return 0;
5328 
5329 	fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->fib6_table->tb6_id,
5330 					 &rt->fib6_dst.addr,
5331 					 sizeof(rt->fib6_dst.addr),
5332 					 rt->fib6_dst.plen,
5333 					 MLXSW_SP_L3_PROTO_IPV6);
5334 	if (IS_ERR(fib_node))
5335 		return PTR_ERR(fib_node);
5336 
5337 	/* Before creating a new entry, try to append route to an existing
5338 	 * multipath entry.
5339 	 */
5340 	fib6_entry = mlxsw_sp_fib6_node_mp_entry_find(fib_node, rt, replace);
5341 	if (fib6_entry) {
5342 		err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry, rt);
5343 		if (err)
5344 			goto err_fib6_entry_nexthop_add;
5345 		return 0;
5346 	}
5347 
5348 	fib6_entry = mlxsw_sp_fib6_entry_create(mlxsw_sp, fib_node, rt);
5349 	if (IS_ERR(fib6_entry)) {
5350 		err = PTR_ERR(fib6_entry);
5351 		goto err_fib6_entry_create;
5352 	}
5353 
5354 	err = mlxsw_sp_fib6_node_entry_link(mlxsw_sp, fib6_entry, replace);
5355 	if (err)
5356 		goto err_fib6_node_entry_link;
5357 
5358 	mlxsw_sp_fib6_entry_replace(mlxsw_sp, fib6_entry, replace);
5359 
5360 	return 0;
5361 
5362 err_fib6_node_entry_link:
5363 	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5364 err_fib6_entry_create:
5365 err_fib6_entry_nexthop_add:
5366 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5367 	return err;
5368 }
5369 
5370 static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp,
5371 				     struct fib6_info *rt)
5372 {
5373 	struct mlxsw_sp_fib6_entry *fib6_entry;
5374 	struct mlxsw_sp_fib_node *fib_node;
5375 
5376 	if (mlxsw_sp->router->aborted)
5377 		return;
5378 
5379 	if (mlxsw_sp_fib6_rt_should_ignore(rt))
5380 		return;
5381 
5382 	fib6_entry = mlxsw_sp_fib6_entry_lookup(mlxsw_sp, rt);
5383 	if (WARN_ON(!fib6_entry))
5384 		return;
5385 
5386 	/* If route is part of a multipath entry, but not the last one
5387 	 * removed, then only reduce its nexthop group.
5388 	 */
5389 	if (!list_is_singular(&fib6_entry->rt6_list)) {
5390 		mlxsw_sp_fib6_entry_nexthop_del(mlxsw_sp, fib6_entry, rt);
5391 		return;
5392 	}
5393 
5394 	fib_node = fib6_entry->common.fib_node;
5395 
5396 	mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry);
5397 	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5398 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5399 }
5400 
5401 static int __mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp,
5402 					    enum mlxsw_reg_ralxx_protocol proto,
5403 					    u8 tree_id)
5404 {
5405 	char ralta_pl[MLXSW_REG_RALTA_LEN];
5406 	char ralst_pl[MLXSW_REG_RALST_LEN];
5407 	int i, err;
5408 
5409 	mlxsw_reg_ralta_pack(ralta_pl, true, proto, tree_id);
5410 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
5411 	if (err)
5412 		return err;
5413 
5414 	mlxsw_reg_ralst_pack(ralst_pl, 0xff, tree_id);
5415 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
5416 	if (err)
5417 		return err;
5418 
5419 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
5420 		struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
5421 		char raltb_pl[MLXSW_REG_RALTB_LEN];
5422 		char ralue_pl[MLXSW_REG_RALUE_LEN];
5423 
5424 		mlxsw_reg_raltb_pack(raltb_pl, vr->id, proto, tree_id);
5425 		err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb),
5426 				      raltb_pl);
5427 		if (err)
5428 			return err;
5429 
5430 		mlxsw_reg_ralue_pack(ralue_pl, proto,
5431 				     MLXSW_REG_RALUE_OP_WRITE_WRITE, vr->id, 0);
5432 		mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
5433 		err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue),
5434 				      ralue_pl);
5435 		if (err)
5436 			return err;
5437 	}
5438 
5439 	return 0;
5440 }
5441 
5442 static struct mlxsw_sp_mr_table *
5443 mlxsw_sp_router_fibmr_family_to_table(struct mlxsw_sp_vr *vr, int family)
5444 {
5445 	if (family == RTNL_FAMILY_IPMR)
5446 		return vr->mr_table[MLXSW_SP_L3_PROTO_IPV4];
5447 	else
5448 		return vr->mr_table[MLXSW_SP_L3_PROTO_IPV6];
5449 }
5450 
5451 static int mlxsw_sp_router_fibmr_add(struct mlxsw_sp *mlxsw_sp,
5452 				     struct mfc_entry_notifier_info *men_info,
5453 				     bool replace)
5454 {
5455 	struct mlxsw_sp_mr_table *mrt;
5456 	struct mlxsw_sp_vr *vr;
5457 
5458 	if (mlxsw_sp->router->aborted)
5459 		return 0;
5460 
5461 	vr = mlxsw_sp_vr_get(mlxsw_sp, men_info->tb_id, NULL);
5462 	if (IS_ERR(vr))
5463 		return PTR_ERR(vr);
5464 
5465 	mrt = mlxsw_sp_router_fibmr_family_to_table(vr, men_info->info.family);
5466 	return mlxsw_sp_mr_route_add(mrt, men_info->mfc, replace);
5467 }
5468 
5469 static void mlxsw_sp_router_fibmr_del(struct mlxsw_sp *mlxsw_sp,
5470 				      struct mfc_entry_notifier_info *men_info)
5471 {
5472 	struct mlxsw_sp_mr_table *mrt;
5473 	struct mlxsw_sp_vr *vr;
5474 
5475 	if (mlxsw_sp->router->aborted)
5476 		return;
5477 
5478 	vr = mlxsw_sp_vr_find(mlxsw_sp, men_info->tb_id);
5479 	if (WARN_ON(!vr))
5480 		return;
5481 
5482 	mrt = mlxsw_sp_router_fibmr_family_to_table(vr, men_info->info.family);
5483 	mlxsw_sp_mr_route_del(mrt, men_info->mfc);
5484 	mlxsw_sp_vr_put(mlxsw_sp, vr);
5485 }
5486 
5487 static int
5488 mlxsw_sp_router_fibmr_vif_add(struct mlxsw_sp *mlxsw_sp,
5489 			      struct vif_entry_notifier_info *ven_info)
5490 {
5491 	struct mlxsw_sp_mr_table *mrt;
5492 	struct mlxsw_sp_rif *rif;
5493 	struct mlxsw_sp_vr *vr;
5494 
5495 	if (mlxsw_sp->router->aborted)
5496 		return 0;
5497 
5498 	vr = mlxsw_sp_vr_get(mlxsw_sp, ven_info->tb_id, NULL);
5499 	if (IS_ERR(vr))
5500 		return PTR_ERR(vr);
5501 
5502 	mrt = mlxsw_sp_router_fibmr_family_to_table(vr, ven_info->info.family);
5503 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, ven_info->dev);
5504 	return mlxsw_sp_mr_vif_add(mrt, ven_info->dev,
5505 				   ven_info->vif_index,
5506 				   ven_info->vif_flags, rif);
5507 }
5508 
5509 static void
5510 mlxsw_sp_router_fibmr_vif_del(struct mlxsw_sp *mlxsw_sp,
5511 			      struct vif_entry_notifier_info *ven_info)
5512 {
5513 	struct mlxsw_sp_mr_table *mrt;
5514 	struct mlxsw_sp_vr *vr;
5515 
5516 	if (mlxsw_sp->router->aborted)
5517 		return;
5518 
5519 	vr = mlxsw_sp_vr_find(mlxsw_sp, ven_info->tb_id);
5520 	if (WARN_ON(!vr))
5521 		return;
5522 
5523 	mrt = mlxsw_sp_router_fibmr_family_to_table(vr, ven_info->info.family);
5524 	mlxsw_sp_mr_vif_del(mrt, ven_info->vif_index);
5525 	mlxsw_sp_vr_put(mlxsw_sp, vr);
5526 }
5527 
5528 static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp)
5529 {
5530 	enum mlxsw_reg_ralxx_protocol proto = MLXSW_REG_RALXX_PROTOCOL_IPV4;
5531 	int err;
5532 
5533 	err = __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto,
5534 					       MLXSW_SP_LPM_TREE_MIN);
5535 	if (err)
5536 		return err;
5537 
5538 	/* The multicast router code does not need an abort trap as by default,
5539 	 * packets that don't match any routes are trapped to the CPU.
5540 	 */
5541 
5542 	proto = MLXSW_REG_RALXX_PROTOCOL_IPV6;
5543 	return __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto,
5544 						MLXSW_SP_LPM_TREE_MIN + 1);
5545 }
5546 
5547 static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp,
5548 				     struct mlxsw_sp_fib_node *fib_node)
5549 {
5550 	struct mlxsw_sp_fib4_entry *fib4_entry, *tmp;
5551 
5552 	list_for_each_entry_safe(fib4_entry, tmp, &fib_node->entry_list,
5553 				 common.list) {
5554 		bool do_break = &tmp->common.list == &fib_node->entry_list;
5555 
5556 		mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry);
5557 		mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
5558 		mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5559 		/* Break when entry list is empty and node was freed.
5560 		 * Otherwise, we'll access freed memory in the next
5561 		 * iteration.
5562 		 */
5563 		if (do_break)
5564 			break;
5565 	}
5566 }
5567 
5568 static void mlxsw_sp_fib6_node_flush(struct mlxsw_sp *mlxsw_sp,
5569 				     struct mlxsw_sp_fib_node *fib_node)
5570 {
5571 	struct mlxsw_sp_fib6_entry *fib6_entry, *tmp;
5572 
5573 	list_for_each_entry_safe(fib6_entry, tmp, &fib_node->entry_list,
5574 				 common.list) {
5575 		bool do_break = &tmp->common.list == &fib_node->entry_list;
5576 
5577 		mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry);
5578 		mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5579 		mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5580 		if (do_break)
5581 			break;
5582 	}
5583 }
5584 
5585 static void mlxsw_sp_fib_node_flush(struct mlxsw_sp *mlxsw_sp,
5586 				    struct mlxsw_sp_fib_node *fib_node)
5587 {
5588 	switch (fib_node->fib->proto) {
5589 	case MLXSW_SP_L3_PROTO_IPV4:
5590 		mlxsw_sp_fib4_node_flush(mlxsw_sp, fib_node);
5591 		break;
5592 	case MLXSW_SP_L3_PROTO_IPV6:
5593 		mlxsw_sp_fib6_node_flush(mlxsw_sp, fib_node);
5594 		break;
5595 	}
5596 }
5597 
5598 static void mlxsw_sp_vr_fib_flush(struct mlxsw_sp *mlxsw_sp,
5599 				  struct mlxsw_sp_vr *vr,
5600 				  enum mlxsw_sp_l3proto proto)
5601 {
5602 	struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
5603 	struct mlxsw_sp_fib_node *fib_node, *tmp;
5604 
5605 	list_for_each_entry_safe(fib_node, tmp, &fib->node_list, list) {
5606 		bool do_break = &tmp->list == &fib->node_list;
5607 
5608 		mlxsw_sp_fib_node_flush(mlxsw_sp, fib_node);
5609 		if (do_break)
5610 			break;
5611 	}
5612 }
5613 
5614 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp)
5615 {
5616 	int i, j;
5617 
5618 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
5619 		struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
5620 
5621 		if (!mlxsw_sp_vr_is_used(vr))
5622 			continue;
5623 
5624 		for (j = 0; j < MLXSW_SP_L3_PROTO_MAX; j++)
5625 			mlxsw_sp_mr_table_flush(vr->mr_table[j]);
5626 		mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
5627 
5628 		/* If virtual router was only used for IPv4, then it's no
5629 		 * longer used.
5630 		 */
5631 		if (!mlxsw_sp_vr_is_used(vr))
5632 			continue;
5633 		mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
5634 	}
5635 }
5636 
5637 static void mlxsw_sp_router_fib_abort(struct mlxsw_sp *mlxsw_sp)
5638 {
5639 	int err;
5640 
5641 	if (mlxsw_sp->router->aborted)
5642 		return;
5643 	dev_warn(mlxsw_sp->bus_info->dev, "FIB abort triggered. Note that FIB entries are no longer being offloaded to this device.\n");
5644 	mlxsw_sp_router_fib_flush(mlxsw_sp);
5645 	mlxsw_sp->router->aborted = true;
5646 	err = mlxsw_sp_router_set_abort_trap(mlxsw_sp);
5647 	if (err)
5648 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to set abort trap.\n");
5649 }
5650 
5651 struct mlxsw_sp_fib_event_work {
5652 	struct work_struct work;
5653 	union {
5654 		struct fib6_entry_notifier_info fen6_info;
5655 		struct fib_entry_notifier_info fen_info;
5656 		struct fib_rule_notifier_info fr_info;
5657 		struct fib_nh_notifier_info fnh_info;
5658 		struct mfc_entry_notifier_info men_info;
5659 		struct vif_entry_notifier_info ven_info;
5660 	};
5661 	struct mlxsw_sp *mlxsw_sp;
5662 	unsigned long event;
5663 };
5664 
5665 static void mlxsw_sp_router_fib4_event_work(struct work_struct *work)
5666 {
5667 	struct mlxsw_sp_fib_event_work *fib_work =
5668 		container_of(work, struct mlxsw_sp_fib_event_work, work);
5669 	struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5670 	bool replace, append;
5671 	int err;
5672 
5673 	/* Protect internal structures from changes */
5674 	rtnl_lock();
5675 	mlxsw_sp_span_respin(mlxsw_sp);
5676 
5677 	switch (fib_work->event) {
5678 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5679 	case FIB_EVENT_ENTRY_APPEND: /* fall through */
5680 	case FIB_EVENT_ENTRY_ADD:
5681 		replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
5682 		append = fib_work->event == FIB_EVENT_ENTRY_APPEND;
5683 		err = mlxsw_sp_router_fib4_add(mlxsw_sp, &fib_work->fen_info,
5684 					       replace, append);
5685 		if (err)
5686 			mlxsw_sp_router_fib_abort(mlxsw_sp);
5687 		fib_info_put(fib_work->fen_info.fi);
5688 		break;
5689 	case FIB_EVENT_ENTRY_DEL:
5690 		mlxsw_sp_router_fib4_del(mlxsw_sp, &fib_work->fen_info);
5691 		fib_info_put(fib_work->fen_info.fi);
5692 		break;
5693 	case FIB_EVENT_RULE_ADD:
5694 		/* if we get here, a rule was added that we do not support.
5695 		 * just do the fib_abort
5696 		 */
5697 		mlxsw_sp_router_fib_abort(mlxsw_sp);
5698 		break;
5699 	case FIB_EVENT_NH_ADD: /* fall through */
5700 	case FIB_EVENT_NH_DEL:
5701 		mlxsw_sp_nexthop4_event(mlxsw_sp, fib_work->event,
5702 					fib_work->fnh_info.fib_nh);
5703 		fib_info_put(fib_work->fnh_info.fib_nh->nh_parent);
5704 		break;
5705 	}
5706 	rtnl_unlock();
5707 	kfree(fib_work);
5708 }
5709 
5710 static void mlxsw_sp_router_fib6_event_work(struct work_struct *work)
5711 {
5712 	struct mlxsw_sp_fib_event_work *fib_work =
5713 		container_of(work, struct mlxsw_sp_fib_event_work, work);
5714 	struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5715 	bool replace;
5716 	int err;
5717 
5718 	rtnl_lock();
5719 	mlxsw_sp_span_respin(mlxsw_sp);
5720 
5721 	switch (fib_work->event) {
5722 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5723 	case FIB_EVENT_ENTRY_APPEND: /* fall through */
5724 	case FIB_EVENT_ENTRY_ADD:
5725 		replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
5726 		err = mlxsw_sp_router_fib6_add(mlxsw_sp,
5727 					       fib_work->fen6_info.rt, replace);
5728 		if (err)
5729 			mlxsw_sp_router_fib_abort(mlxsw_sp);
5730 		mlxsw_sp_rt6_release(fib_work->fen6_info.rt);
5731 		break;
5732 	case FIB_EVENT_ENTRY_DEL:
5733 		mlxsw_sp_router_fib6_del(mlxsw_sp, fib_work->fen6_info.rt);
5734 		mlxsw_sp_rt6_release(fib_work->fen6_info.rt);
5735 		break;
5736 	case FIB_EVENT_RULE_ADD:
5737 		/* if we get here, a rule was added that we do not support.
5738 		 * just do the fib_abort
5739 		 */
5740 		mlxsw_sp_router_fib_abort(mlxsw_sp);
5741 		break;
5742 	}
5743 	rtnl_unlock();
5744 	kfree(fib_work);
5745 }
5746 
5747 static void mlxsw_sp_router_fibmr_event_work(struct work_struct *work)
5748 {
5749 	struct mlxsw_sp_fib_event_work *fib_work =
5750 		container_of(work, struct mlxsw_sp_fib_event_work, work);
5751 	struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5752 	bool replace;
5753 	int err;
5754 
5755 	rtnl_lock();
5756 	switch (fib_work->event) {
5757 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5758 	case FIB_EVENT_ENTRY_ADD:
5759 		replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
5760 
5761 		err = mlxsw_sp_router_fibmr_add(mlxsw_sp, &fib_work->men_info,
5762 						replace);
5763 		if (err)
5764 			mlxsw_sp_router_fib_abort(mlxsw_sp);
5765 		mr_cache_put(fib_work->men_info.mfc);
5766 		break;
5767 	case FIB_EVENT_ENTRY_DEL:
5768 		mlxsw_sp_router_fibmr_del(mlxsw_sp, &fib_work->men_info);
5769 		mr_cache_put(fib_work->men_info.mfc);
5770 		break;
5771 	case FIB_EVENT_VIF_ADD:
5772 		err = mlxsw_sp_router_fibmr_vif_add(mlxsw_sp,
5773 						    &fib_work->ven_info);
5774 		if (err)
5775 			mlxsw_sp_router_fib_abort(mlxsw_sp);
5776 		dev_put(fib_work->ven_info.dev);
5777 		break;
5778 	case FIB_EVENT_VIF_DEL:
5779 		mlxsw_sp_router_fibmr_vif_del(mlxsw_sp,
5780 					      &fib_work->ven_info);
5781 		dev_put(fib_work->ven_info.dev);
5782 		break;
5783 	case FIB_EVENT_RULE_ADD:
5784 		/* if we get here, a rule was added that we do not support.
5785 		 * just do the fib_abort
5786 		 */
5787 		mlxsw_sp_router_fib_abort(mlxsw_sp);
5788 		break;
5789 	}
5790 	rtnl_unlock();
5791 	kfree(fib_work);
5792 }
5793 
5794 static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work,
5795 				       struct fib_notifier_info *info)
5796 {
5797 	struct fib_entry_notifier_info *fen_info;
5798 	struct fib_nh_notifier_info *fnh_info;
5799 
5800 	switch (fib_work->event) {
5801 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5802 	case FIB_EVENT_ENTRY_APPEND: /* fall through */
5803 	case FIB_EVENT_ENTRY_ADD: /* fall through */
5804 	case FIB_EVENT_ENTRY_DEL:
5805 		fen_info = container_of(info, struct fib_entry_notifier_info,
5806 					info);
5807 		fib_work->fen_info = *fen_info;
5808 		/* Take reference on fib_info to prevent it from being
5809 		 * freed while work is queued. Release it afterwards.
5810 		 */
5811 		fib_info_hold(fib_work->fen_info.fi);
5812 		break;
5813 	case FIB_EVENT_NH_ADD: /* fall through */
5814 	case FIB_EVENT_NH_DEL:
5815 		fnh_info = container_of(info, struct fib_nh_notifier_info,
5816 					info);
5817 		fib_work->fnh_info = *fnh_info;
5818 		fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
5819 		break;
5820 	}
5821 }
5822 
5823 static void mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work,
5824 				       struct fib_notifier_info *info)
5825 {
5826 	struct fib6_entry_notifier_info *fen6_info;
5827 
5828 	switch (fib_work->event) {
5829 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5830 	case FIB_EVENT_ENTRY_APPEND: /* fall through */
5831 	case FIB_EVENT_ENTRY_ADD: /* fall through */
5832 	case FIB_EVENT_ENTRY_DEL:
5833 		fen6_info = container_of(info, struct fib6_entry_notifier_info,
5834 					 info);
5835 		fib_work->fen6_info = *fen6_info;
5836 		fib6_info_hold(fib_work->fen6_info.rt);
5837 		break;
5838 	}
5839 }
5840 
5841 static void
5842 mlxsw_sp_router_fibmr_event(struct mlxsw_sp_fib_event_work *fib_work,
5843 			    struct fib_notifier_info *info)
5844 {
5845 	switch (fib_work->event) {
5846 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5847 	case FIB_EVENT_ENTRY_ADD: /* fall through */
5848 	case FIB_EVENT_ENTRY_DEL:
5849 		memcpy(&fib_work->men_info, info, sizeof(fib_work->men_info));
5850 		mr_cache_hold(fib_work->men_info.mfc);
5851 		break;
5852 	case FIB_EVENT_VIF_ADD: /* fall through */
5853 	case FIB_EVENT_VIF_DEL:
5854 		memcpy(&fib_work->ven_info, info, sizeof(fib_work->ven_info));
5855 		dev_hold(fib_work->ven_info.dev);
5856 		break;
5857 	}
5858 }
5859 
5860 static int mlxsw_sp_router_fib_rule_event(unsigned long event,
5861 					  struct fib_notifier_info *info,
5862 					  struct mlxsw_sp *mlxsw_sp)
5863 {
5864 	struct netlink_ext_ack *extack = info->extack;
5865 	struct fib_rule_notifier_info *fr_info;
5866 	struct fib_rule *rule;
5867 	int err = 0;
5868 
5869 	/* nothing to do at the moment */
5870 	if (event == FIB_EVENT_RULE_DEL)
5871 		return 0;
5872 
5873 	if (mlxsw_sp->router->aborted)
5874 		return 0;
5875 
5876 	fr_info = container_of(info, struct fib_rule_notifier_info, info);
5877 	rule = fr_info->rule;
5878 
5879 	switch (info->family) {
5880 	case AF_INET:
5881 		if (!fib4_rule_default(rule) && !rule->l3mdev)
5882 			err = -EOPNOTSUPP;
5883 		break;
5884 	case AF_INET6:
5885 		if (!fib6_rule_default(rule) && !rule->l3mdev)
5886 			err = -EOPNOTSUPP;
5887 		break;
5888 	case RTNL_FAMILY_IPMR:
5889 		if (!ipmr_rule_default(rule) && !rule->l3mdev)
5890 			err = -EOPNOTSUPP;
5891 		break;
5892 	case RTNL_FAMILY_IP6MR:
5893 		if (!ip6mr_rule_default(rule) && !rule->l3mdev)
5894 			err = -EOPNOTSUPP;
5895 		break;
5896 	}
5897 
5898 	if (err < 0)
5899 		NL_SET_ERR_MSG_MOD(extack, "FIB rules not supported");
5900 
5901 	return err;
5902 }
5903 
5904 /* Called with rcu_read_lock() */
5905 static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
5906 				     unsigned long event, void *ptr)
5907 {
5908 	struct mlxsw_sp_fib_event_work *fib_work;
5909 	struct fib_notifier_info *info = ptr;
5910 	struct mlxsw_sp_router *router;
5911 	int err;
5912 
5913 	if (!net_eq(info->net, &init_net) ||
5914 	    (info->family != AF_INET && info->family != AF_INET6 &&
5915 	     info->family != RTNL_FAMILY_IPMR &&
5916 	     info->family != RTNL_FAMILY_IP6MR))
5917 		return NOTIFY_DONE;
5918 
5919 	router = container_of(nb, struct mlxsw_sp_router, fib_nb);
5920 
5921 	switch (event) {
5922 	case FIB_EVENT_RULE_ADD: /* fall through */
5923 	case FIB_EVENT_RULE_DEL:
5924 		err = mlxsw_sp_router_fib_rule_event(event, info,
5925 						     router->mlxsw_sp);
5926 		if (!err || info->extack)
5927 			return notifier_from_errno(err);
5928 		break;
5929 	case FIB_EVENT_ENTRY_ADD:
5930 		if (router->aborted) {
5931 			NL_SET_ERR_MSG_MOD(info->extack, "FIB offload was aborted. Not configuring route");
5932 			return notifier_from_errno(-EINVAL);
5933 		}
5934 		break;
5935 	}
5936 
5937 	fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
5938 	if (WARN_ON(!fib_work))
5939 		return NOTIFY_BAD;
5940 
5941 	fib_work->mlxsw_sp = router->mlxsw_sp;
5942 	fib_work->event = event;
5943 
5944 	switch (info->family) {
5945 	case AF_INET:
5946 		INIT_WORK(&fib_work->work, mlxsw_sp_router_fib4_event_work);
5947 		mlxsw_sp_router_fib4_event(fib_work, info);
5948 		break;
5949 	case AF_INET6:
5950 		INIT_WORK(&fib_work->work, mlxsw_sp_router_fib6_event_work);
5951 		mlxsw_sp_router_fib6_event(fib_work, info);
5952 		break;
5953 	case RTNL_FAMILY_IP6MR:
5954 	case RTNL_FAMILY_IPMR:
5955 		INIT_WORK(&fib_work->work, mlxsw_sp_router_fibmr_event_work);
5956 		mlxsw_sp_router_fibmr_event(fib_work, info);
5957 		break;
5958 	}
5959 
5960 	mlxsw_core_schedule_work(&fib_work->work);
5961 
5962 	return NOTIFY_DONE;
5963 }
5964 
5965 struct mlxsw_sp_rif *
5966 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
5967 			 const struct net_device *dev)
5968 {
5969 	int i;
5970 
5971 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
5972 		if (mlxsw_sp->router->rifs[i] &&
5973 		    mlxsw_sp->router->rifs[i]->dev == dev)
5974 			return mlxsw_sp->router->rifs[i];
5975 
5976 	return NULL;
5977 }
5978 
5979 static int mlxsw_sp_router_rif_disable(struct mlxsw_sp *mlxsw_sp, u16 rif)
5980 {
5981 	char ritr_pl[MLXSW_REG_RITR_LEN];
5982 	int err;
5983 
5984 	mlxsw_reg_ritr_rif_pack(ritr_pl, rif);
5985 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
5986 	if (WARN_ON_ONCE(err))
5987 		return err;
5988 
5989 	mlxsw_reg_ritr_enable_set(ritr_pl, false);
5990 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
5991 }
5992 
5993 static void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
5994 					  struct mlxsw_sp_rif *rif)
5995 {
5996 	mlxsw_sp_router_rif_disable(mlxsw_sp, rif->rif_index);
5997 	mlxsw_sp_nexthop_rif_gone_sync(mlxsw_sp, rif);
5998 	mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, rif);
5999 }
6000 
6001 static bool
6002 mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *rif, struct net_device *dev,
6003 			   unsigned long event)
6004 {
6005 	struct inet6_dev *inet6_dev;
6006 	bool addr_list_empty = true;
6007 	struct in_device *idev;
6008 
6009 	switch (event) {
6010 	case NETDEV_UP:
6011 		return rif == NULL;
6012 	case NETDEV_DOWN:
6013 		idev = __in_dev_get_rtnl(dev);
6014 		if (idev && idev->ifa_list)
6015 			addr_list_empty = false;
6016 
6017 		inet6_dev = __in6_dev_get(dev);
6018 		if (addr_list_empty && inet6_dev &&
6019 		    !list_empty(&inet6_dev->addr_list))
6020 			addr_list_empty = false;
6021 
6022 		/* macvlans do not have a RIF, but rather piggy back on the
6023 		 * RIF of their lower device.
6024 		 */
6025 		if (netif_is_macvlan(dev) && addr_list_empty)
6026 			return true;
6027 
6028 		if (rif && addr_list_empty &&
6029 		    !netif_is_l3_slave(rif->dev))
6030 			return true;
6031 		/* It is possible we already removed the RIF ourselves
6032 		 * if it was assigned to a netdev that is now a bridge
6033 		 * or LAG slave.
6034 		 */
6035 		return false;
6036 	}
6037 
6038 	return false;
6039 }
6040 
6041 static enum mlxsw_sp_rif_type
6042 mlxsw_sp_dev_rif_type(const struct mlxsw_sp *mlxsw_sp,
6043 		      const struct net_device *dev)
6044 {
6045 	enum mlxsw_sp_fid_type type;
6046 
6047 	if (mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL))
6048 		return MLXSW_SP_RIF_TYPE_IPIP_LB;
6049 
6050 	/* Otherwise RIF type is derived from the type of the underlying FID. */
6051 	if (is_vlan_dev(dev) && netif_is_bridge_master(vlan_dev_real_dev(dev)))
6052 		type = MLXSW_SP_FID_TYPE_8021Q;
6053 	else if (netif_is_bridge_master(dev) && br_vlan_enabled(dev))
6054 		type = MLXSW_SP_FID_TYPE_8021Q;
6055 	else if (netif_is_bridge_master(dev))
6056 		type = MLXSW_SP_FID_TYPE_8021D;
6057 	else
6058 		type = MLXSW_SP_FID_TYPE_RFID;
6059 
6060 	return mlxsw_sp_fid_type_rif_type(mlxsw_sp, type);
6061 }
6062 
6063 static int mlxsw_sp_rif_index_alloc(struct mlxsw_sp *mlxsw_sp, u16 *p_rif_index)
6064 {
6065 	int i;
6066 
6067 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) {
6068 		if (!mlxsw_sp->router->rifs[i]) {
6069 			*p_rif_index = i;
6070 			return 0;
6071 		}
6072 	}
6073 
6074 	return -ENOBUFS;
6075 }
6076 
6077 static struct mlxsw_sp_rif *mlxsw_sp_rif_alloc(size_t rif_size, u16 rif_index,
6078 					       u16 vr_id,
6079 					       struct net_device *l3_dev)
6080 {
6081 	struct mlxsw_sp_rif *rif;
6082 
6083 	rif = kzalloc(rif_size, GFP_KERNEL);
6084 	if (!rif)
6085 		return NULL;
6086 
6087 	INIT_LIST_HEAD(&rif->nexthop_list);
6088 	INIT_LIST_HEAD(&rif->neigh_list);
6089 	ether_addr_copy(rif->addr, l3_dev->dev_addr);
6090 	rif->mtu = l3_dev->mtu;
6091 	rif->vr_id = vr_id;
6092 	rif->dev = l3_dev;
6093 	rif->rif_index = rif_index;
6094 
6095 	return rif;
6096 }
6097 
6098 struct mlxsw_sp_rif *mlxsw_sp_rif_by_index(const struct mlxsw_sp *mlxsw_sp,
6099 					   u16 rif_index)
6100 {
6101 	return mlxsw_sp->router->rifs[rif_index];
6102 }
6103 
6104 u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif)
6105 {
6106 	return rif->rif_index;
6107 }
6108 
6109 u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
6110 {
6111 	return lb_rif->common.rif_index;
6112 }
6113 
6114 u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
6115 {
6116 	return lb_rif->ul_vr_id;
6117 }
6118 
6119 int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif)
6120 {
6121 	return rif->dev->ifindex;
6122 }
6123 
6124 const struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif)
6125 {
6126 	return rif->dev;
6127 }
6128 
6129 struct mlxsw_sp_fid *mlxsw_sp_rif_fid(const struct mlxsw_sp_rif *rif)
6130 {
6131 	return rif->fid;
6132 }
6133 
6134 static struct mlxsw_sp_rif *
6135 mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
6136 		    const struct mlxsw_sp_rif_params *params,
6137 		    struct netlink_ext_ack *extack)
6138 {
6139 	u32 tb_id = l3mdev_fib_table(params->dev);
6140 	const struct mlxsw_sp_rif_ops *ops;
6141 	struct mlxsw_sp_fid *fid = NULL;
6142 	enum mlxsw_sp_rif_type type;
6143 	struct mlxsw_sp_rif *rif;
6144 	struct mlxsw_sp_vr *vr;
6145 	u16 rif_index;
6146 	int i, err;
6147 
6148 	type = mlxsw_sp_dev_rif_type(mlxsw_sp, params->dev);
6149 	ops = mlxsw_sp->router->rif_ops_arr[type];
6150 
6151 	vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN, extack);
6152 	if (IS_ERR(vr))
6153 		return ERR_CAST(vr);
6154 	vr->rif_count++;
6155 
6156 	err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index);
6157 	if (err) {
6158 		NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported router interfaces");
6159 		goto err_rif_index_alloc;
6160 	}
6161 
6162 	rif = mlxsw_sp_rif_alloc(ops->rif_size, rif_index, vr->id, params->dev);
6163 	if (!rif) {
6164 		err = -ENOMEM;
6165 		goto err_rif_alloc;
6166 	}
6167 	rif->mlxsw_sp = mlxsw_sp;
6168 	rif->ops = ops;
6169 
6170 	if (ops->fid_get) {
6171 		fid = ops->fid_get(rif, extack);
6172 		if (IS_ERR(fid)) {
6173 			err = PTR_ERR(fid);
6174 			goto err_fid_get;
6175 		}
6176 		rif->fid = fid;
6177 	}
6178 
6179 	if (ops->setup)
6180 		ops->setup(rif, params);
6181 
6182 	err = ops->configure(rif);
6183 	if (err)
6184 		goto err_configure;
6185 
6186 	for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++) {
6187 		err = mlxsw_sp_mr_rif_add(vr->mr_table[i], rif);
6188 		if (err)
6189 			goto err_mr_rif_add;
6190 	}
6191 
6192 	mlxsw_sp_rif_counters_alloc(rif);
6193 	mlxsw_sp->router->rifs[rif_index] = rif;
6194 
6195 	return rif;
6196 
6197 err_mr_rif_add:
6198 	for (i--; i >= 0; i--)
6199 		mlxsw_sp_mr_rif_del(vr->mr_table[i], rif);
6200 	ops->deconfigure(rif);
6201 err_configure:
6202 	if (fid)
6203 		mlxsw_sp_fid_put(fid);
6204 err_fid_get:
6205 	kfree(rif);
6206 err_rif_alloc:
6207 err_rif_index_alloc:
6208 	vr->rif_count--;
6209 	mlxsw_sp_vr_put(mlxsw_sp, vr);
6210 	return ERR_PTR(err);
6211 }
6212 
6213 void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif)
6214 {
6215 	const struct mlxsw_sp_rif_ops *ops = rif->ops;
6216 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6217 	struct mlxsw_sp_fid *fid = rif->fid;
6218 	struct mlxsw_sp_vr *vr;
6219 	int i;
6220 
6221 	mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif);
6222 	vr = &mlxsw_sp->router->vrs[rif->vr_id];
6223 
6224 	mlxsw_sp->router->rifs[rif->rif_index] = NULL;
6225 	mlxsw_sp_rif_counters_free(rif);
6226 	for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++)
6227 		mlxsw_sp_mr_rif_del(vr->mr_table[i], rif);
6228 	ops->deconfigure(rif);
6229 	if (fid)
6230 		/* Loopback RIFs are not associated with a FID. */
6231 		mlxsw_sp_fid_put(fid);
6232 	kfree(rif);
6233 	vr->rif_count--;
6234 	mlxsw_sp_vr_put(mlxsw_sp, vr);
6235 }
6236 
6237 void mlxsw_sp_rif_destroy_by_dev(struct mlxsw_sp *mlxsw_sp,
6238 				 struct net_device *dev)
6239 {
6240 	struct mlxsw_sp_rif *rif;
6241 
6242 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6243 	if (!rif)
6244 		return;
6245 	mlxsw_sp_rif_destroy(rif);
6246 }
6247 
6248 static void
6249 mlxsw_sp_rif_subport_params_init(struct mlxsw_sp_rif_params *params,
6250 				 struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
6251 {
6252 	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
6253 
6254 	params->vid = mlxsw_sp_port_vlan->vid;
6255 	params->lag = mlxsw_sp_port->lagged;
6256 	if (params->lag)
6257 		params->lag_id = mlxsw_sp_port->lag_id;
6258 	else
6259 		params->system_port = mlxsw_sp_port->local_port;
6260 }
6261 
6262 static int
6263 mlxsw_sp_port_vlan_router_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan,
6264 			       struct net_device *l3_dev,
6265 			       struct netlink_ext_ack *extack)
6266 {
6267 	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
6268 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
6269 	u16 vid = mlxsw_sp_port_vlan->vid;
6270 	struct mlxsw_sp_rif *rif;
6271 	struct mlxsw_sp_fid *fid;
6272 	int err;
6273 
6274 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
6275 	if (!rif) {
6276 		struct mlxsw_sp_rif_params params = {
6277 			.dev = l3_dev,
6278 		};
6279 
6280 		mlxsw_sp_rif_subport_params_init(&params, mlxsw_sp_port_vlan);
6281 		rif = mlxsw_sp_rif_create(mlxsw_sp, &params, extack);
6282 		if (IS_ERR(rif))
6283 			return PTR_ERR(rif);
6284 	}
6285 
6286 	/* FID was already created, just take a reference */
6287 	fid = rif->ops->fid_get(rif, extack);
6288 	err = mlxsw_sp_fid_port_vid_map(fid, mlxsw_sp_port, vid);
6289 	if (err)
6290 		goto err_fid_port_vid_map;
6291 
6292 	err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, false);
6293 	if (err)
6294 		goto err_port_vid_learning_set;
6295 
6296 	err = mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid,
6297 					BR_STATE_FORWARDING);
6298 	if (err)
6299 		goto err_port_vid_stp_set;
6300 
6301 	mlxsw_sp_port_vlan->fid = fid;
6302 
6303 	return 0;
6304 
6305 err_port_vid_stp_set:
6306 	mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
6307 err_port_vid_learning_set:
6308 	mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
6309 err_fid_port_vid_map:
6310 	mlxsw_sp_fid_put(fid);
6311 	return err;
6312 }
6313 
6314 void
6315 mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
6316 {
6317 	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
6318 	struct mlxsw_sp_fid *fid = mlxsw_sp_port_vlan->fid;
6319 	u16 vid = mlxsw_sp_port_vlan->vid;
6320 
6321 	if (WARN_ON(mlxsw_sp_fid_type(fid) != MLXSW_SP_FID_TYPE_RFID))
6322 		return;
6323 
6324 	mlxsw_sp_port_vlan->fid = NULL;
6325 	mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid, BR_STATE_BLOCKING);
6326 	mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
6327 	mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
6328 	/* If router port holds the last reference on the rFID, then the
6329 	 * associated Sub-port RIF will be destroyed.
6330 	 */
6331 	mlxsw_sp_fid_put(fid);
6332 }
6333 
6334 static int mlxsw_sp_inetaddr_port_vlan_event(struct net_device *l3_dev,
6335 					     struct net_device *port_dev,
6336 					     unsigned long event, u16 vid,
6337 					     struct netlink_ext_ack *extack)
6338 {
6339 	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(port_dev);
6340 	struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
6341 
6342 	mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid);
6343 	if (WARN_ON(!mlxsw_sp_port_vlan))
6344 		return -EINVAL;
6345 
6346 	switch (event) {
6347 	case NETDEV_UP:
6348 		return mlxsw_sp_port_vlan_router_join(mlxsw_sp_port_vlan,
6349 						      l3_dev, extack);
6350 	case NETDEV_DOWN:
6351 		mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan);
6352 		break;
6353 	}
6354 
6355 	return 0;
6356 }
6357 
6358 static int mlxsw_sp_inetaddr_port_event(struct net_device *port_dev,
6359 					unsigned long event,
6360 					struct netlink_ext_ack *extack)
6361 {
6362 	if (netif_is_bridge_port(port_dev) ||
6363 	    netif_is_lag_port(port_dev) ||
6364 	    netif_is_ovs_port(port_dev))
6365 		return 0;
6366 
6367 	return mlxsw_sp_inetaddr_port_vlan_event(port_dev, port_dev, event, 1,
6368 						 extack);
6369 }
6370 
6371 static int __mlxsw_sp_inetaddr_lag_event(struct net_device *l3_dev,
6372 					 struct net_device *lag_dev,
6373 					 unsigned long event, u16 vid,
6374 					 struct netlink_ext_ack *extack)
6375 {
6376 	struct net_device *port_dev;
6377 	struct list_head *iter;
6378 	int err;
6379 
6380 	netdev_for_each_lower_dev(lag_dev, port_dev, iter) {
6381 		if (mlxsw_sp_port_dev_check(port_dev)) {
6382 			err = mlxsw_sp_inetaddr_port_vlan_event(l3_dev,
6383 								port_dev,
6384 								event, vid,
6385 								extack);
6386 			if (err)
6387 				return err;
6388 		}
6389 	}
6390 
6391 	return 0;
6392 }
6393 
6394 static int mlxsw_sp_inetaddr_lag_event(struct net_device *lag_dev,
6395 				       unsigned long event,
6396 				       struct netlink_ext_ack *extack)
6397 {
6398 	if (netif_is_bridge_port(lag_dev))
6399 		return 0;
6400 
6401 	return __mlxsw_sp_inetaddr_lag_event(lag_dev, lag_dev, event, 1,
6402 					     extack);
6403 }
6404 
6405 static int mlxsw_sp_inetaddr_bridge_event(struct net_device *l3_dev,
6406 					  unsigned long event,
6407 					  struct netlink_ext_ack *extack)
6408 {
6409 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
6410 	struct mlxsw_sp_rif_params params = {
6411 		.dev = l3_dev,
6412 	};
6413 	struct mlxsw_sp_rif *rif;
6414 
6415 	switch (event) {
6416 	case NETDEV_UP:
6417 		rif = mlxsw_sp_rif_create(mlxsw_sp, &params, extack);
6418 		if (IS_ERR(rif))
6419 			return PTR_ERR(rif);
6420 		break;
6421 	case NETDEV_DOWN:
6422 		rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
6423 		mlxsw_sp_rif_destroy(rif);
6424 		break;
6425 	}
6426 
6427 	return 0;
6428 }
6429 
6430 static int mlxsw_sp_inetaddr_vlan_event(struct net_device *vlan_dev,
6431 					unsigned long event,
6432 					struct netlink_ext_ack *extack)
6433 {
6434 	struct net_device *real_dev = vlan_dev_real_dev(vlan_dev);
6435 	u16 vid = vlan_dev_vlan_id(vlan_dev);
6436 
6437 	if (netif_is_bridge_port(vlan_dev))
6438 		return 0;
6439 
6440 	if (mlxsw_sp_port_dev_check(real_dev))
6441 		return mlxsw_sp_inetaddr_port_vlan_event(vlan_dev, real_dev,
6442 							 event, vid, extack);
6443 	else if (netif_is_lag_master(real_dev))
6444 		return __mlxsw_sp_inetaddr_lag_event(vlan_dev, real_dev, event,
6445 						     vid, extack);
6446 	else if (netif_is_bridge_master(real_dev) && br_vlan_enabled(real_dev))
6447 		return mlxsw_sp_inetaddr_bridge_event(vlan_dev, event, extack);
6448 
6449 	return 0;
6450 }
6451 
6452 static bool mlxsw_sp_rif_macvlan_is_vrrp4(const u8 *mac)
6453 {
6454 	u8 vrrp4[ETH_ALEN] = { 0x00, 0x00, 0x5e, 0x00, 0x01, 0x00 };
6455 	u8 mask[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0x00 };
6456 
6457 	return ether_addr_equal_masked(mac, vrrp4, mask);
6458 }
6459 
6460 static bool mlxsw_sp_rif_macvlan_is_vrrp6(const u8 *mac)
6461 {
6462 	u8 vrrp6[ETH_ALEN] = { 0x00, 0x00, 0x5e, 0x00, 0x02, 0x00 };
6463 	u8 mask[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0x00 };
6464 
6465 	return ether_addr_equal_masked(mac, vrrp6, mask);
6466 }
6467 
6468 static int mlxsw_sp_rif_vrrp_op(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
6469 				const u8 *mac, bool adding)
6470 {
6471 	char ritr_pl[MLXSW_REG_RITR_LEN];
6472 	u8 vrrp_id = adding ? mac[5] : 0;
6473 	int err;
6474 
6475 	if (!mlxsw_sp_rif_macvlan_is_vrrp4(mac) &&
6476 	    !mlxsw_sp_rif_macvlan_is_vrrp6(mac))
6477 		return 0;
6478 
6479 	mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
6480 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6481 	if (err)
6482 		return err;
6483 
6484 	if (mlxsw_sp_rif_macvlan_is_vrrp4(mac))
6485 		mlxsw_reg_ritr_if_vrrp_id_ipv4_set(ritr_pl, vrrp_id);
6486 	else
6487 		mlxsw_reg_ritr_if_vrrp_id_ipv6_set(ritr_pl, vrrp_id);
6488 
6489 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6490 }
6491 
6492 static int mlxsw_sp_rif_macvlan_add(struct mlxsw_sp *mlxsw_sp,
6493 				    const struct net_device *macvlan_dev,
6494 				    struct netlink_ext_ack *extack)
6495 {
6496 	struct macvlan_dev *vlan = netdev_priv(macvlan_dev);
6497 	struct mlxsw_sp_rif *rif;
6498 	int err;
6499 
6500 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, vlan->lowerdev);
6501 	if (!rif) {
6502 		NL_SET_ERR_MSG_MOD(extack, "macvlan is only supported on top of router interfaces");
6503 		return -EOPNOTSUPP;
6504 	}
6505 
6506 	err = mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
6507 				  mlxsw_sp_fid_index(rif->fid), true);
6508 	if (err)
6509 		return err;
6510 
6511 	err = mlxsw_sp_rif_vrrp_op(mlxsw_sp, rif->rif_index,
6512 				   macvlan_dev->dev_addr, true);
6513 	if (err)
6514 		goto err_rif_vrrp_add;
6515 
6516 	/* Make sure the bridge driver does not have this MAC pointing at
6517 	 * some other port.
6518 	 */
6519 	if (rif->ops->fdb_del)
6520 		rif->ops->fdb_del(rif, macvlan_dev->dev_addr);
6521 
6522 	return 0;
6523 
6524 err_rif_vrrp_add:
6525 	mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
6526 			    mlxsw_sp_fid_index(rif->fid), false);
6527 	return err;
6528 }
6529 
6530 void mlxsw_sp_rif_macvlan_del(struct mlxsw_sp *mlxsw_sp,
6531 			      const struct net_device *macvlan_dev)
6532 {
6533 	struct macvlan_dev *vlan = netdev_priv(macvlan_dev);
6534 	struct mlxsw_sp_rif *rif;
6535 
6536 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, vlan->lowerdev);
6537 	/* If we do not have a RIF, then we already took care of
6538 	 * removing the macvlan's MAC during RIF deletion.
6539 	 */
6540 	if (!rif)
6541 		return;
6542 	mlxsw_sp_rif_vrrp_op(mlxsw_sp, rif->rif_index, macvlan_dev->dev_addr,
6543 			     false);
6544 	mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
6545 			    mlxsw_sp_fid_index(rif->fid), false);
6546 }
6547 
6548 static int mlxsw_sp_inetaddr_macvlan_event(struct net_device *macvlan_dev,
6549 					   unsigned long event,
6550 					   struct netlink_ext_ack *extack)
6551 {
6552 	struct mlxsw_sp *mlxsw_sp;
6553 
6554 	mlxsw_sp = mlxsw_sp_lower_get(macvlan_dev);
6555 	if (!mlxsw_sp)
6556 		return 0;
6557 
6558 	switch (event) {
6559 	case NETDEV_UP:
6560 		return mlxsw_sp_rif_macvlan_add(mlxsw_sp, macvlan_dev, extack);
6561 	case NETDEV_DOWN:
6562 		mlxsw_sp_rif_macvlan_del(mlxsw_sp, macvlan_dev);
6563 		break;
6564 	}
6565 
6566 	return 0;
6567 }
6568 
6569 static int __mlxsw_sp_inetaddr_event(struct net_device *dev,
6570 				     unsigned long event,
6571 				     struct netlink_ext_ack *extack)
6572 {
6573 	if (mlxsw_sp_port_dev_check(dev))
6574 		return mlxsw_sp_inetaddr_port_event(dev, event, extack);
6575 	else if (netif_is_lag_master(dev))
6576 		return mlxsw_sp_inetaddr_lag_event(dev, event, extack);
6577 	else if (netif_is_bridge_master(dev))
6578 		return mlxsw_sp_inetaddr_bridge_event(dev, event, extack);
6579 	else if (is_vlan_dev(dev))
6580 		return mlxsw_sp_inetaddr_vlan_event(dev, event, extack);
6581 	else if (netif_is_macvlan(dev))
6582 		return mlxsw_sp_inetaddr_macvlan_event(dev, event, extack);
6583 	else
6584 		return 0;
6585 }
6586 
6587 int mlxsw_sp_inetaddr_event(struct notifier_block *unused,
6588 			    unsigned long event, void *ptr)
6589 {
6590 	struct in_ifaddr *ifa = (struct in_ifaddr *) ptr;
6591 	struct net_device *dev = ifa->ifa_dev->dev;
6592 	struct mlxsw_sp *mlxsw_sp;
6593 	struct mlxsw_sp_rif *rif;
6594 	int err = 0;
6595 
6596 	/* NETDEV_UP event is handled by mlxsw_sp_inetaddr_valid_event */
6597 	if (event == NETDEV_UP)
6598 		goto out;
6599 
6600 	mlxsw_sp = mlxsw_sp_lower_get(dev);
6601 	if (!mlxsw_sp)
6602 		goto out;
6603 
6604 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6605 	if (!mlxsw_sp_rif_should_config(rif, dev, event))
6606 		goto out;
6607 
6608 	err = __mlxsw_sp_inetaddr_event(dev, event, NULL);
6609 out:
6610 	return notifier_from_errno(err);
6611 }
6612 
6613 int mlxsw_sp_inetaddr_valid_event(struct notifier_block *unused,
6614 				  unsigned long event, void *ptr)
6615 {
6616 	struct in_validator_info *ivi = (struct in_validator_info *) ptr;
6617 	struct net_device *dev = ivi->ivi_dev->dev;
6618 	struct mlxsw_sp *mlxsw_sp;
6619 	struct mlxsw_sp_rif *rif;
6620 	int err = 0;
6621 
6622 	mlxsw_sp = mlxsw_sp_lower_get(dev);
6623 	if (!mlxsw_sp)
6624 		goto out;
6625 
6626 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6627 	if (!mlxsw_sp_rif_should_config(rif, dev, event))
6628 		goto out;
6629 
6630 	err = __mlxsw_sp_inetaddr_event(dev, event, ivi->extack);
6631 out:
6632 	return notifier_from_errno(err);
6633 }
6634 
6635 struct mlxsw_sp_inet6addr_event_work {
6636 	struct work_struct work;
6637 	struct net_device *dev;
6638 	unsigned long event;
6639 };
6640 
6641 static void mlxsw_sp_inet6addr_event_work(struct work_struct *work)
6642 {
6643 	struct mlxsw_sp_inet6addr_event_work *inet6addr_work =
6644 		container_of(work, struct mlxsw_sp_inet6addr_event_work, work);
6645 	struct net_device *dev = inet6addr_work->dev;
6646 	unsigned long event = inet6addr_work->event;
6647 	struct mlxsw_sp *mlxsw_sp;
6648 	struct mlxsw_sp_rif *rif;
6649 
6650 	rtnl_lock();
6651 	mlxsw_sp = mlxsw_sp_lower_get(dev);
6652 	if (!mlxsw_sp)
6653 		goto out;
6654 
6655 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6656 	if (!mlxsw_sp_rif_should_config(rif, dev, event))
6657 		goto out;
6658 
6659 	__mlxsw_sp_inetaddr_event(dev, event, NULL);
6660 out:
6661 	rtnl_unlock();
6662 	dev_put(dev);
6663 	kfree(inet6addr_work);
6664 }
6665 
6666 /* Called with rcu_read_lock() */
6667 int mlxsw_sp_inet6addr_event(struct notifier_block *unused,
6668 			     unsigned long event, void *ptr)
6669 {
6670 	struct inet6_ifaddr *if6 = (struct inet6_ifaddr *) ptr;
6671 	struct mlxsw_sp_inet6addr_event_work *inet6addr_work;
6672 	struct net_device *dev = if6->idev->dev;
6673 
6674 	/* NETDEV_UP event is handled by mlxsw_sp_inet6addr_valid_event */
6675 	if (event == NETDEV_UP)
6676 		return NOTIFY_DONE;
6677 
6678 	if (!mlxsw_sp_port_dev_lower_find_rcu(dev))
6679 		return NOTIFY_DONE;
6680 
6681 	inet6addr_work = kzalloc(sizeof(*inet6addr_work), GFP_ATOMIC);
6682 	if (!inet6addr_work)
6683 		return NOTIFY_BAD;
6684 
6685 	INIT_WORK(&inet6addr_work->work, mlxsw_sp_inet6addr_event_work);
6686 	inet6addr_work->dev = dev;
6687 	inet6addr_work->event = event;
6688 	dev_hold(dev);
6689 	mlxsw_core_schedule_work(&inet6addr_work->work);
6690 
6691 	return NOTIFY_DONE;
6692 }
6693 
6694 int mlxsw_sp_inet6addr_valid_event(struct notifier_block *unused,
6695 				   unsigned long event, void *ptr)
6696 {
6697 	struct in6_validator_info *i6vi = (struct in6_validator_info *) ptr;
6698 	struct net_device *dev = i6vi->i6vi_dev->dev;
6699 	struct mlxsw_sp *mlxsw_sp;
6700 	struct mlxsw_sp_rif *rif;
6701 	int err = 0;
6702 
6703 	mlxsw_sp = mlxsw_sp_lower_get(dev);
6704 	if (!mlxsw_sp)
6705 		goto out;
6706 
6707 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6708 	if (!mlxsw_sp_rif_should_config(rif, dev, event))
6709 		goto out;
6710 
6711 	err = __mlxsw_sp_inetaddr_event(dev, event, i6vi->extack);
6712 out:
6713 	return notifier_from_errno(err);
6714 }
6715 
6716 static int mlxsw_sp_rif_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
6717 			     const char *mac, int mtu)
6718 {
6719 	char ritr_pl[MLXSW_REG_RITR_LEN];
6720 	int err;
6721 
6722 	mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
6723 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6724 	if (err)
6725 		return err;
6726 
6727 	mlxsw_reg_ritr_mtu_set(ritr_pl, mtu);
6728 	mlxsw_reg_ritr_if_mac_memcpy_to(ritr_pl, mac);
6729 	mlxsw_reg_ritr_op_set(ritr_pl, MLXSW_REG_RITR_RIF_CREATE);
6730 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6731 }
6732 
6733 int mlxsw_sp_netdevice_router_port_event(struct net_device *dev)
6734 {
6735 	struct mlxsw_sp *mlxsw_sp;
6736 	struct mlxsw_sp_rif *rif;
6737 	u16 fid_index;
6738 	int err;
6739 
6740 	mlxsw_sp = mlxsw_sp_lower_get(dev);
6741 	if (!mlxsw_sp)
6742 		return 0;
6743 
6744 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6745 	if (!rif)
6746 		return 0;
6747 	fid_index = mlxsw_sp_fid_index(rif->fid);
6748 
6749 	err = mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, false);
6750 	if (err)
6751 		return err;
6752 
6753 	err = mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, dev->dev_addr,
6754 				dev->mtu);
6755 	if (err)
6756 		goto err_rif_edit;
6757 
6758 	err = mlxsw_sp_rif_fdb_op(mlxsw_sp, dev->dev_addr, fid_index, true);
6759 	if (err)
6760 		goto err_rif_fdb_op;
6761 
6762 	if (rif->mtu != dev->mtu) {
6763 		struct mlxsw_sp_vr *vr;
6764 		int i;
6765 
6766 		/* The RIF is relevant only to its mr_table instance, as unlike
6767 		 * unicast routing, in multicast routing a RIF cannot be shared
6768 		 * between several multicast routing tables.
6769 		 */
6770 		vr = &mlxsw_sp->router->vrs[rif->vr_id];
6771 		for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++)
6772 			mlxsw_sp_mr_rif_mtu_update(vr->mr_table[i],
6773 						   rif, dev->mtu);
6774 	}
6775 
6776 	ether_addr_copy(rif->addr, dev->dev_addr);
6777 	rif->mtu = dev->mtu;
6778 
6779 	netdev_dbg(dev, "Updated RIF=%d\n", rif->rif_index);
6780 
6781 	return 0;
6782 
6783 err_rif_fdb_op:
6784 	mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, rif->addr, rif->mtu);
6785 err_rif_edit:
6786 	mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, true);
6787 	return err;
6788 }
6789 
6790 static int mlxsw_sp_port_vrf_join(struct mlxsw_sp *mlxsw_sp,
6791 				  struct net_device *l3_dev,
6792 				  struct netlink_ext_ack *extack)
6793 {
6794 	struct mlxsw_sp_rif *rif;
6795 
6796 	/* If netdev is already associated with a RIF, then we need to
6797 	 * destroy it and create a new one with the new virtual router ID.
6798 	 */
6799 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
6800 	if (rif)
6801 		__mlxsw_sp_inetaddr_event(l3_dev, NETDEV_DOWN, extack);
6802 
6803 	return __mlxsw_sp_inetaddr_event(l3_dev, NETDEV_UP, extack);
6804 }
6805 
6806 static void mlxsw_sp_port_vrf_leave(struct mlxsw_sp *mlxsw_sp,
6807 				    struct net_device *l3_dev)
6808 {
6809 	struct mlxsw_sp_rif *rif;
6810 
6811 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
6812 	if (!rif)
6813 		return;
6814 	__mlxsw_sp_inetaddr_event(l3_dev, NETDEV_DOWN, NULL);
6815 }
6816 
6817 int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event,
6818 				 struct netdev_notifier_changeupper_info *info)
6819 {
6820 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
6821 	int err = 0;
6822 
6823 	/* We do not create a RIF for a macvlan, but only use it to
6824 	 * direct more MAC addresses to the router.
6825 	 */
6826 	if (!mlxsw_sp || netif_is_macvlan(l3_dev))
6827 		return 0;
6828 
6829 	switch (event) {
6830 	case NETDEV_PRECHANGEUPPER:
6831 		return 0;
6832 	case NETDEV_CHANGEUPPER:
6833 		if (info->linking) {
6834 			struct netlink_ext_ack *extack;
6835 
6836 			extack = netdev_notifier_info_to_extack(&info->info);
6837 			err = mlxsw_sp_port_vrf_join(mlxsw_sp, l3_dev, extack);
6838 		} else {
6839 			mlxsw_sp_port_vrf_leave(mlxsw_sp, l3_dev);
6840 		}
6841 		break;
6842 	}
6843 
6844 	return err;
6845 }
6846 
6847 static int __mlxsw_sp_rif_macvlan_flush(struct net_device *dev, void *data)
6848 {
6849 	struct mlxsw_sp_rif *rif = data;
6850 
6851 	if (!netif_is_macvlan(dev))
6852 		return 0;
6853 
6854 	return mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr,
6855 				   mlxsw_sp_fid_index(rif->fid), false);
6856 }
6857 
6858 static int mlxsw_sp_rif_macvlan_flush(struct mlxsw_sp_rif *rif)
6859 {
6860 	if (!netif_is_macvlan_port(rif->dev))
6861 		return 0;
6862 
6863 	netdev_warn(rif->dev, "Router interface is deleted. Upper macvlans will not work\n");
6864 	return netdev_walk_all_upper_dev_rcu(rif->dev,
6865 					     __mlxsw_sp_rif_macvlan_flush, rif);
6866 }
6867 
6868 static struct mlxsw_sp_rif_subport *
6869 mlxsw_sp_rif_subport_rif(const struct mlxsw_sp_rif *rif)
6870 {
6871 	return container_of(rif, struct mlxsw_sp_rif_subport, common);
6872 }
6873 
6874 static void mlxsw_sp_rif_subport_setup(struct mlxsw_sp_rif *rif,
6875 				       const struct mlxsw_sp_rif_params *params)
6876 {
6877 	struct mlxsw_sp_rif_subport *rif_subport;
6878 
6879 	rif_subport = mlxsw_sp_rif_subport_rif(rif);
6880 	rif_subport->vid = params->vid;
6881 	rif_subport->lag = params->lag;
6882 	if (params->lag)
6883 		rif_subport->lag_id = params->lag_id;
6884 	else
6885 		rif_subport->system_port = params->system_port;
6886 }
6887 
6888 static int mlxsw_sp_rif_subport_op(struct mlxsw_sp_rif *rif, bool enable)
6889 {
6890 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6891 	struct mlxsw_sp_rif_subport *rif_subport;
6892 	char ritr_pl[MLXSW_REG_RITR_LEN];
6893 
6894 	rif_subport = mlxsw_sp_rif_subport_rif(rif);
6895 	mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_SP_IF,
6896 			    rif->rif_index, rif->vr_id, rif->dev->mtu);
6897 	mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr);
6898 	mlxsw_reg_ritr_sp_if_pack(ritr_pl, rif_subport->lag,
6899 				  rif_subport->lag ? rif_subport->lag_id :
6900 						     rif_subport->system_port,
6901 				  rif_subport->vid);
6902 
6903 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6904 }
6905 
6906 static int mlxsw_sp_rif_subport_configure(struct mlxsw_sp_rif *rif)
6907 {
6908 	int err;
6909 
6910 	err = mlxsw_sp_rif_subport_op(rif, true);
6911 	if (err)
6912 		return err;
6913 
6914 	err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6915 				  mlxsw_sp_fid_index(rif->fid), true);
6916 	if (err)
6917 		goto err_rif_fdb_op;
6918 
6919 	mlxsw_sp_fid_rif_set(rif->fid, rif);
6920 	return 0;
6921 
6922 err_rif_fdb_op:
6923 	mlxsw_sp_rif_subport_op(rif, false);
6924 	return err;
6925 }
6926 
6927 static void mlxsw_sp_rif_subport_deconfigure(struct mlxsw_sp_rif *rif)
6928 {
6929 	struct mlxsw_sp_fid *fid = rif->fid;
6930 
6931 	mlxsw_sp_fid_rif_set(fid, NULL);
6932 	mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6933 			    mlxsw_sp_fid_index(fid), false);
6934 	mlxsw_sp_rif_macvlan_flush(rif);
6935 	mlxsw_sp_rif_subport_op(rif, false);
6936 }
6937 
6938 static struct mlxsw_sp_fid *
6939 mlxsw_sp_rif_subport_fid_get(struct mlxsw_sp_rif *rif,
6940 			     struct netlink_ext_ack *extack)
6941 {
6942 	return mlxsw_sp_fid_rfid_get(rif->mlxsw_sp, rif->rif_index);
6943 }
6944 
6945 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_subport_ops = {
6946 	.type			= MLXSW_SP_RIF_TYPE_SUBPORT,
6947 	.rif_size		= sizeof(struct mlxsw_sp_rif_subport),
6948 	.setup			= mlxsw_sp_rif_subport_setup,
6949 	.configure		= mlxsw_sp_rif_subport_configure,
6950 	.deconfigure		= mlxsw_sp_rif_subport_deconfigure,
6951 	.fid_get		= mlxsw_sp_rif_subport_fid_get,
6952 };
6953 
6954 static int mlxsw_sp_rif_vlan_fid_op(struct mlxsw_sp_rif *rif,
6955 				    enum mlxsw_reg_ritr_if_type type,
6956 				    u16 vid_fid, bool enable)
6957 {
6958 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6959 	char ritr_pl[MLXSW_REG_RITR_LEN];
6960 
6961 	mlxsw_reg_ritr_pack(ritr_pl, enable, type, rif->rif_index, rif->vr_id,
6962 			    rif->dev->mtu);
6963 	mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr);
6964 	mlxsw_reg_ritr_fid_set(ritr_pl, type, vid_fid);
6965 
6966 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6967 }
6968 
6969 u8 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp)
6970 {
6971 	return mlxsw_core_max_ports(mlxsw_sp->core) + 1;
6972 }
6973 
6974 static int mlxsw_sp_rif_vlan_configure(struct mlxsw_sp_rif *rif)
6975 {
6976 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6977 	u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
6978 	int err;
6979 
6980 	err = mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, true);
6981 	if (err)
6982 		return err;
6983 
6984 	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
6985 				     mlxsw_sp_router_port(mlxsw_sp), true);
6986 	if (err)
6987 		goto err_fid_mc_flood_set;
6988 
6989 	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
6990 				     mlxsw_sp_router_port(mlxsw_sp), true);
6991 	if (err)
6992 		goto err_fid_bc_flood_set;
6993 
6994 	err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6995 				  mlxsw_sp_fid_index(rif->fid), true);
6996 	if (err)
6997 		goto err_rif_fdb_op;
6998 
6999 	mlxsw_sp_fid_rif_set(rif->fid, rif);
7000 	return 0;
7001 
7002 err_rif_fdb_op:
7003 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7004 			       mlxsw_sp_router_port(mlxsw_sp), false);
7005 err_fid_bc_flood_set:
7006 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7007 			       mlxsw_sp_router_port(mlxsw_sp), false);
7008 err_fid_mc_flood_set:
7009 	mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false);
7010 	return err;
7011 }
7012 
7013 static void mlxsw_sp_rif_vlan_deconfigure(struct mlxsw_sp_rif *rif)
7014 {
7015 	u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
7016 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7017 	struct mlxsw_sp_fid *fid = rif->fid;
7018 
7019 	mlxsw_sp_fid_rif_set(fid, NULL);
7020 	mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7021 			    mlxsw_sp_fid_index(fid), false);
7022 	mlxsw_sp_rif_macvlan_flush(rif);
7023 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7024 			       mlxsw_sp_router_port(mlxsw_sp), false);
7025 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7026 			       mlxsw_sp_router_port(mlxsw_sp), false);
7027 	mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false);
7028 }
7029 
7030 static struct mlxsw_sp_fid *
7031 mlxsw_sp_rif_vlan_fid_get(struct mlxsw_sp_rif *rif,
7032 			  struct netlink_ext_ack *extack)
7033 {
7034 	u16 vid;
7035 	int err;
7036 
7037 	if (is_vlan_dev(rif->dev)) {
7038 		vid = vlan_dev_vlan_id(rif->dev);
7039 	} else {
7040 		err = br_vlan_get_pvid(rif->dev, &vid);
7041 		if (err < 0 || !vid) {
7042 			NL_SET_ERR_MSG_MOD(extack, "Couldn't determine bridge PVID");
7043 			return ERR_PTR(-EINVAL);
7044 		}
7045 	}
7046 
7047 	return mlxsw_sp_fid_8021q_get(rif->mlxsw_sp, vid);
7048 }
7049 
7050 static void mlxsw_sp_rif_vlan_fdb_del(struct mlxsw_sp_rif *rif, const char *mac)
7051 {
7052 	u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
7053 	struct switchdev_notifier_fdb_info info;
7054 	struct net_device *br_dev;
7055 	struct net_device *dev;
7056 
7057 	br_dev = is_vlan_dev(rif->dev) ? vlan_dev_real_dev(rif->dev) : rif->dev;
7058 	dev = br_fdb_find_port(br_dev, mac, vid);
7059 	if (!dev)
7060 		return;
7061 
7062 	info.addr = mac;
7063 	info.vid = vid;
7064 	call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info);
7065 }
7066 
7067 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_vlan_ops = {
7068 	.type			= MLXSW_SP_RIF_TYPE_VLAN,
7069 	.rif_size		= sizeof(struct mlxsw_sp_rif),
7070 	.configure		= mlxsw_sp_rif_vlan_configure,
7071 	.deconfigure		= mlxsw_sp_rif_vlan_deconfigure,
7072 	.fid_get		= mlxsw_sp_rif_vlan_fid_get,
7073 	.fdb_del		= mlxsw_sp_rif_vlan_fdb_del,
7074 };
7075 
7076 static int mlxsw_sp_rif_fid_configure(struct mlxsw_sp_rif *rif)
7077 {
7078 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7079 	u16 fid_index = mlxsw_sp_fid_index(rif->fid);
7080 	int err;
7081 
7082 	err = mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index,
7083 				       true);
7084 	if (err)
7085 		return err;
7086 
7087 	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7088 				     mlxsw_sp_router_port(mlxsw_sp), true);
7089 	if (err)
7090 		goto err_fid_mc_flood_set;
7091 
7092 	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7093 				     mlxsw_sp_router_port(mlxsw_sp), true);
7094 	if (err)
7095 		goto err_fid_bc_flood_set;
7096 
7097 	err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7098 				  mlxsw_sp_fid_index(rif->fid), true);
7099 	if (err)
7100 		goto err_rif_fdb_op;
7101 
7102 	mlxsw_sp_fid_rif_set(rif->fid, rif);
7103 	return 0;
7104 
7105 err_rif_fdb_op:
7106 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7107 			       mlxsw_sp_router_port(mlxsw_sp), false);
7108 err_fid_bc_flood_set:
7109 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7110 			       mlxsw_sp_router_port(mlxsw_sp), false);
7111 err_fid_mc_flood_set:
7112 	mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false);
7113 	return err;
7114 }
7115 
7116 static void mlxsw_sp_rif_fid_deconfigure(struct mlxsw_sp_rif *rif)
7117 {
7118 	u16 fid_index = mlxsw_sp_fid_index(rif->fid);
7119 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7120 	struct mlxsw_sp_fid *fid = rif->fid;
7121 
7122 	mlxsw_sp_fid_rif_set(fid, NULL);
7123 	mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7124 			    mlxsw_sp_fid_index(fid), false);
7125 	mlxsw_sp_rif_macvlan_flush(rif);
7126 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7127 			       mlxsw_sp_router_port(mlxsw_sp), false);
7128 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7129 			       mlxsw_sp_router_port(mlxsw_sp), false);
7130 	mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false);
7131 }
7132 
7133 static struct mlxsw_sp_fid *
7134 mlxsw_sp_rif_fid_fid_get(struct mlxsw_sp_rif *rif,
7135 			 struct netlink_ext_ack *extack)
7136 {
7137 	return mlxsw_sp_fid_8021d_get(rif->mlxsw_sp, rif->dev->ifindex);
7138 }
7139 
7140 static void mlxsw_sp_rif_fid_fdb_del(struct mlxsw_sp_rif *rif, const char *mac)
7141 {
7142 	struct switchdev_notifier_fdb_info info;
7143 	struct net_device *dev;
7144 
7145 	dev = br_fdb_find_port(rif->dev, mac, 0);
7146 	if (!dev)
7147 		return;
7148 
7149 	info.addr = mac;
7150 	info.vid = 0;
7151 	call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info);
7152 }
7153 
7154 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_fid_ops = {
7155 	.type			= MLXSW_SP_RIF_TYPE_FID,
7156 	.rif_size		= sizeof(struct mlxsw_sp_rif),
7157 	.configure		= mlxsw_sp_rif_fid_configure,
7158 	.deconfigure		= mlxsw_sp_rif_fid_deconfigure,
7159 	.fid_get		= mlxsw_sp_rif_fid_fid_get,
7160 	.fdb_del		= mlxsw_sp_rif_fid_fdb_del,
7161 };
7162 
7163 static struct mlxsw_sp_rif_ipip_lb *
7164 mlxsw_sp_rif_ipip_lb_rif(struct mlxsw_sp_rif *rif)
7165 {
7166 	return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
7167 }
7168 
7169 static void
7170 mlxsw_sp_rif_ipip_lb_setup(struct mlxsw_sp_rif *rif,
7171 			   const struct mlxsw_sp_rif_params *params)
7172 {
7173 	struct mlxsw_sp_rif_params_ipip_lb *params_lb;
7174 	struct mlxsw_sp_rif_ipip_lb *rif_lb;
7175 
7176 	params_lb = container_of(params, struct mlxsw_sp_rif_params_ipip_lb,
7177 				 common);
7178 	rif_lb = mlxsw_sp_rif_ipip_lb_rif(rif);
7179 	rif_lb->lb_config = params_lb->lb_config;
7180 }
7181 
7182 static int
7183 mlxsw_sp_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif)
7184 {
7185 	struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
7186 	u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(rif->dev);
7187 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7188 	struct mlxsw_sp_vr *ul_vr;
7189 	int err;
7190 
7191 	ul_vr = mlxsw_sp_vr_get(mlxsw_sp, ul_tb_id, NULL);
7192 	if (IS_ERR(ul_vr))
7193 		return PTR_ERR(ul_vr);
7194 
7195 	err = mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, true);
7196 	if (err)
7197 		goto err_loopback_op;
7198 
7199 	lb_rif->ul_vr_id = ul_vr->id;
7200 	++ul_vr->rif_count;
7201 	return 0;
7202 
7203 err_loopback_op:
7204 	mlxsw_sp_vr_put(mlxsw_sp, ul_vr);
7205 	return err;
7206 }
7207 
7208 static void mlxsw_sp_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif)
7209 {
7210 	struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
7211 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7212 	struct mlxsw_sp_vr *ul_vr;
7213 
7214 	ul_vr = &mlxsw_sp->router->vrs[lb_rif->ul_vr_id];
7215 	mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, false);
7216 
7217 	--ul_vr->rif_count;
7218 	mlxsw_sp_vr_put(mlxsw_sp, ul_vr);
7219 }
7220 
7221 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_ipip_lb_ops = {
7222 	.type			= MLXSW_SP_RIF_TYPE_IPIP_LB,
7223 	.rif_size		= sizeof(struct mlxsw_sp_rif_ipip_lb),
7224 	.setup                  = mlxsw_sp_rif_ipip_lb_setup,
7225 	.configure		= mlxsw_sp_rif_ipip_lb_configure,
7226 	.deconfigure		= mlxsw_sp_rif_ipip_lb_deconfigure,
7227 };
7228 
7229 static const struct mlxsw_sp_rif_ops *mlxsw_sp_rif_ops_arr[] = {
7230 	[MLXSW_SP_RIF_TYPE_SUBPORT]	= &mlxsw_sp_rif_subport_ops,
7231 	[MLXSW_SP_RIF_TYPE_VLAN]	= &mlxsw_sp_rif_vlan_ops,
7232 	[MLXSW_SP_RIF_TYPE_FID]		= &mlxsw_sp_rif_fid_ops,
7233 	[MLXSW_SP_RIF_TYPE_IPIP_LB]	= &mlxsw_sp_rif_ipip_lb_ops,
7234 };
7235 
7236 static int mlxsw_sp_rifs_init(struct mlxsw_sp *mlxsw_sp)
7237 {
7238 	u64 max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
7239 
7240 	mlxsw_sp->router->rifs = kcalloc(max_rifs,
7241 					 sizeof(struct mlxsw_sp_rif *),
7242 					 GFP_KERNEL);
7243 	if (!mlxsw_sp->router->rifs)
7244 		return -ENOMEM;
7245 
7246 	mlxsw_sp->router->rif_ops_arr = mlxsw_sp_rif_ops_arr;
7247 
7248 	return 0;
7249 }
7250 
7251 static void mlxsw_sp_rifs_fini(struct mlxsw_sp *mlxsw_sp)
7252 {
7253 	int i;
7254 
7255 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
7256 		WARN_ON_ONCE(mlxsw_sp->router->rifs[i]);
7257 
7258 	kfree(mlxsw_sp->router->rifs);
7259 }
7260 
7261 static int
7262 mlxsw_sp_ipip_config_tigcr(struct mlxsw_sp *mlxsw_sp)
7263 {
7264 	char tigcr_pl[MLXSW_REG_TIGCR_LEN];
7265 
7266 	mlxsw_reg_tigcr_pack(tigcr_pl, true, 0);
7267 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tigcr), tigcr_pl);
7268 }
7269 
7270 static int mlxsw_sp_ipips_init(struct mlxsw_sp *mlxsw_sp)
7271 {
7272 	mlxsw_sp->router->ipip_ops_arr = mlxsw_sp_ipip_ops_arr;
7273 	INIT_LIST_HEAD(&mlxsw_sp->router->ipip_list);
7274 	return mlxsw_sp_ipip_config_tigcr(mlxsw_sp);
7275 }
7276 
7277 static void mlxsw_sp_ipips_fini(struct mlxsw_sp *mlxsw_sp)
7278 {
7279 	WARN_ON(!list_empty(&mlxsw_sp->router->ipip_list));
7280 }
7281 
7282 static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb)
7283 {
7284 	struct mlxsw_sp_router *router;
7285 
7286 	/* Flush pending FIB notifications and then flush the device's
7287 	 * table before requesting another dump. The FIB notification
7288 	 * block is unregistered, so no need to take RTNL.
7289 	 */
7290 	mlxsw_core_flush_owq();
7291 	router = container_of(nb, struct mlxsw_sp_router, fib_nb);
7292 	mlxsw_sp_router_fib_flush(router->mlxsw_sp);
7293 }
7294 
7295 #ifdef CONFIG_IP_ROUTE_MULTIPATH
7296 static void mlxsw_sp_mp_hash_header_set(char *recr2_pl, int header)
7297 {
7298 	mlxsw_reg_recr2_outer_header_enables_set(recr2_pl, header, true);
7299 }
7300 
7301 static void mlxsw_sp_mp_hash_field_set(char *recr2_pl, int field)
7302 {
7303 	mlxsw_reg_recr2_outer_header_fields_enable_set(recr2_pl, field, true);
7304 }
7305 
7306 static void mlxsw_sp_mp4_hash_init(char *recr2_pl)
7307 {
7308 	bool only_l3 = !init_net.ipv4.sysctl_fib_multipath_hash_policy;
7309 
7310 	mlxsw_sp_mp_hash_header_set(recr2_pl,
7311 				    MLXSW_REG_RECR2_IPV4_EN_NOT_TCP_NOT_UDP);
7312 	mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV4_EN_TCP_UDP);
7313 	mlxsw_reg_recr2_ipv4_sip_enable(recr2_pl);
7314 	mlxsw_reg_recr2_ipv4_dip_enable(recr2_pl);
7315 	if (only_l3)
7316 		return;
7317 	mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_EN_IPV4);
7318 	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV4_PROTOCOL);
7319 	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_SPORT);
7320 	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_DPORT);
7321 }
7322 
7323 static void mlxsw_sp_mp6_hash_init(char *recr2_pl)
7324 {
7325 	bool only_l3 = !ip6_multipath_hash_policy(&init_net);
7326 
7327 	mlxsw_sp_mp_hash_header_set(recr2_pl,
7328 				    MLXSW_REG_RECR2_IPV6_EN_NOT_TCP_NOT_UDP);
7329 	mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV6_EN_TCP_UDP);
7330 	mlxsw_reg_recr2_ipv6_sip_enable(recr2_pl);
7331 	mlxsw_reg_recr2_ipv6_dip_enable(recr2_pl);
7332 	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV6_NEXT_HEADER);
7333 	if (only_l3) {
7334 		mlxsw_sp_mp_hash_field_set(recr2_pl,
7335 					   MLXSW_REG_RECR2_IPV6_FLOW_LABEL);
7336 	} else {
7337 		mlxsw_sp_mp_hash_header_set(recr2_pl,
7338 					    MLXSW_REG_RECR2_TCP_UDP_EN_IPV6);
7339 		mlxsw_sp_mp_hash_field_set(recr2_pl,
7340 					   MLXSW_REG_RECR2_TCP_UDP_SPORT);
7341 		mlxsw_sp_mp_hash_field_set(recr2_pl,
7342 					   MLXSW_REG_RECR2_TCP_UDP_DPORT);
7343 	}
7344 }
7345 
7346 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
7347 {
7348 	char recr2_pl[MLXSW_REG_RECR2_LEN];
7349 	u32 seed;
7350 
7351 	get_random_bytes(&seed, sizeof(seed));
7352 	mlxsw_reg_recr2_pack(recr2_pl, seed);
7353 	mlxsw_sp_mp4_hash_init(recr2_pl);
7354 	mlxsw_sp_mp6_hash_init(recr2_pl);
7355 
7356 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(recr2), recr2_pl);
7357 }
7358 #else
7359 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
7360 {
7361 	return 0;
7362 }
7363 #endif
7364 
7365 static int mlxsw_sp_dscp_init(struct mlxsw_sp *mlxsw_sp)
7366 {
7367 	char rdpm_pl[MLXSW_REG_RDPM_LEN];
7368 	unsigned int i;
7369 
7370 	MLXSW_REG_ZERO(rdpm, rdpm_pl);
7371 
7372 	/* HW is determining switch priority based on DSCP-bits, but the
7373 	 * kernel is still doing that based on the ToS. Since there's a
7374 	 * mismatch in bits we need to make sure to translate the right
7375 	 * value ToS would observe, skipping the 2 least-significant ECN bits.
7376 	 */
7377 	for (i = 0; i < MLXSW_REG_RDPM_DSCP_ENTRY_REC_MAX_COUNT; i++)
7378 		mlxsw_reg_rdpm_pack(rdpm_pl, i, rt_tos2priority(i << 2));
7379 
7380 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rdpm), rdpm_pl);
7381 }
7382 
7383 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
7384 {
7385 	bool usp = init_net.ipv4.sysctl_ip_fwd_update_priority;
7386 	char rgcr_pl[MLXSW_REG_RGCR_LEN];
7387 	u64 max_rifs;
7388 	int err;
7389 
7390 	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_RIFS))
7391 		return -EIO;
7392 	max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
7393 
7394 	mlxsw_reg_rgcr_pack(rgcr_pl, true, true);
7395 	mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs);
7396 	mlxsw_reg_rgcr_usp_set(rgcr_pl, usp);
7397 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
7398 	if (err)
7399 		return err;
7400 	return 0;
7401 }
7402 
7403 static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
7404 {
7405 	char rgcr_pl[MLXSW_REG_RGCR_LEN];
7406 
7407 	mlxsw_reg_rgcr_pack(rgcr_pl, false, false);
7408 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
7409 }
7410 
7411 int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
7412 {
7413 	struct mlxsw_sp_router *router;
7414 	int err;
7415 
7416 	router = kzalloc(sizeof(*mlxsw_sp->router), GFP_KERNEL);
7417 	if (!router)
7418 		return -ENOMEM;
7419 	mlxsw_sp->router = router;
7420 	router->mlxsw_sp = mlxsw_sp;
7421 
7422 	INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_neighs_list);
7423 	err = __mlxsw_sp_router_init(mlxsw_sp);
7424 	if (err)
7425 		goto err_router_init;
7426 
7427 	err = mlxsw_sp_rifs_init(mlxsw_sp);
7428 	if (err)
7429 		goto err_rifs_init;
7430 
7431 	err = mlxsw_sp_ipips_init(mlxsw_sp);
7432 	if (err)
7433 		goto err_ipips_init;
7434 
7435 	err = rhashtable_init(&mlxsw_sp->router->nexthop_ht,
7436 			      &mlxsw_sp_nexthop_ht_params);
7437 	if (err)
7438 		goto err_nexthop_ht_init;
7439 
7440 	err = rhashtable_init(&mlxsw_sp->router->nexthop_group_ht,
7441 			      &mlxsw_sp_nexthop_group_ht_params);
7442 	if (err)
7443 		goto err_nexthop_group_ht_init;
7444 
7445 	INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_list);
7446 	err = mlxsw_sp_lpm_init(mlxsw_sp);
7447 	if (err)
7448 		goto err_lpm_init;
7449 
7450 	err = mlxsw_sp_mr_init(mlxsw_sp, &mlxsw_sp_mr_tcam_ops);
7451 	if (err)
7452 		goto err_mr_init;
7453 
7454 	err = mlxsw_sp_vrs_init(mlxsw_sp);
7455 	if (err)
7456 		goto err_vrs_init;
7457 
7458 	err = mlxsw_sp_neigh_init(mlxsw_sp);
7459 	if (err)
7460 		goto err_neigh_init;
7461 
7462 	mlxsw_sp->router->netevent_nb.notifier_call =
7463 		mlxsw_sp_router_netevent_event;
7464 	err = register_netevent_notifier(&mlxsw_sp->router->netevent_nb);
7465 	if (err)
7466 		goto err_register_netevent_notifier;
7467 
7468 	err = mlxsw_sp_mp_hash_init(mlxsw_sp);
7469 	if (err)
7470 		goto err_mp_hash_init;
7471 
7472 	err = mlxsw_sp_dscp_init(mlxsw_sp);
7473 	if (err)
7474 		goto err_dscp_init;
7475 
7476 	mlxsw_sp->router->fib_nb.notifier_call = mlxsw_sp_router_fib_event;
7477 	err = register_fib_notifier(&mlxsw_sp->router->fib_nb,
7478 				    mlxsw_sp_router_fib_dump_flush);
7479 	if (err)
7480 		goto err_register_fib_notifier;
7481 
7482 	return 0;
7483 
7484 err_register_fib_notifier:
7485 err_dscp_init:
7486 err_mp_hash_init:
7487 	unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
7488 err_register_netevent_notifier:
7489 	mlxsw_sp_neigh_fini(mlxsw_sp);
7490 err_neigh_init:
7491 	mlxsw_sp_vrs_fini(mlxsw_sp);
7492 err_vrs_init:
7493 	mlxsw_sp_mr_fini(mlxsw_sp);
7494 err_mr_init:
7495 	mlxsw_sp_lpm_fini(mlxsw_sp);
7496 err_lpm_init:
7497 	rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
7498 err_nexthop_group_ht_init:
7499 	rhashtable_destroy(&mlxsw_sp->router->nexthop_ht);
7500 err_nexthop_ht_init:
7501 	mlxsw_sp_ipips_fini(mlxsw_sp);
7502 err_ipips_init:
7503 	mlxsw_sp_rifs_fini(mlxsw_sp);
7504 err_rifs_init:
7505 	__mlxsw_sp_router_fini(mlxsw_sp);
7506 err_router_init:
7507 	kfree(mlxsw_sp->router);
7508 	return err;
7509 }
7510 
7511 void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
7512 {
7513 	unregister_fib_notifier(&mlxsw_sp->router->fib_nb);
7514 	unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
7515 	mlxsw_sp_neigh_fini(mlxsw_sp);
7516 	mlxsw_sp_vrs_fini(mlxsw_sp);
7517 	mlxsw_sp_mr_fini(mlxsw_sp);
7518 	mlxsw_sp_lpm_fini(mlxsw_sp);
7519 	rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
7520 	rhashtable_destroy(&mlxsw_sp->router->nexthop_ht);
7521 	mlxsw_sp_ipips_fini(mlxsw_sp);
7522 	mlxsw_sp_rifs_fini(mlxsw_sp);
7523 	__mlxsw_sp_router_fini(mlxsw_sp);
7524 	kfree(mlxsw_sp->router);
7525 }
7526