xref: /linux/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c (revision 83a37b3292f4aca799b355179ad6fbdd78a08e10)
1 /*
2  * drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
3  * Copyright (c) 2016-2017 Mellanox Technologies. All rights reserved.
4  * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
5  * Copyright (c) 2016 Ido Schimmel <idosch@mellanox.com>
6  * Copyright (c) 2016 Yotam Gigi <yotamg@mellanox.com>
7  * Copyright (c) 2017 Petr Machata <petrm@mellanox.com>
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. Neither the names of the copyright holders nor the names of its
18  *    contributors may be used to endorse or promote products derived from
19  *    this software without specific prior written permission.
20  *
21  * Alternatively, this software may be distributed under the terms of the
22  * GNU General Public License ("GPL") version 2 as published by the Free
23  * Software Foundation.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35  * POSSIBILITY OF SUCH DAMAGE.
36  */
37 
38 #include <linux/kernel.h>
39 #include <linux/types.h>
40 #include <linux/rhashtable.h>
41 #include <linux/bitops.h>
42 #include <linux/in6.h>
43 #include <linux/notifier.h>
44 #include <linux/inetdevice.h>
45 #include <linux/netdevice.h>
46 #include <linux/if_bridge.h>
47 #include <linux/socket.h>
48 #include <linux/route.h>
49 #include <net/netevent.h>
50 #include <net/neighbour.h>
51 #include <net/arp.h>
52 #include <net/ip_fib.h>
53 #include <net/ip6_fib.h>
54 #include <net/fib_rules.h>
55 #include <net/ip_tunnels.h>
56 #include <net/l3mdev.h>
57 #include <net/addrconf.h>
58 #include <net/ndisc.h>
59 #include <net/ipv6.h>
60 #include <net/fib_notifier.h>
61 
62 #include "spectrum.h"
63 #include "core.h"
64 #include "reg.h"
65 #include "spectrum_cnt.h"
66 #include "spectrum_dpipe.h"
67 #include "spectrum_ipip.h"
68 #include "spectrum_mr.h"
69 #include "spectrum_mr_tcam.h"
70 #include "spectrum_router.h"
71 
72 struct mlxsw_sp_vr;
73 struct mlxsw_sp_lpm_tree;
74 struct mlxsw_sp_rif_ops;
75 
76 struct mlxsw_sp_router {
77 	struct mlxsw_sp *mlxsw_sp;
78 	struct mlxsw_sp_rif **rifs;
79 	struct mlxsw_sp_vr *vrs;
80 	struct rhashtable neigh_ht;
81 	struct rhashtable nexthop_group_ht;
82 	struct rhashtable nexthop_ht;
83 	struct list_head nexthop_list;
84 	struct {
85 		struct mlxsw_sp_lpm_tree *trees;
86 		unsigned int tree_count;
87 	} lpm;
88 	struct {
89 		struct delayed_work dw;
90 		unsigned long interval;	/* ms */
91 	} neighs_update;
92 	struct delayed_work nexthop_probe_dw;
93 #define MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL 5000 /* ms */
94 	struct list_head nexthop_neighs_list;
95 	struct list_head ipip_list;
96 	bool aborted;
97 	struct notifier_block fib_nb;
98 	const struct mlxsw_sp_rif_ops **rif_ops_arr;
99 	const struct mlxsw_sp_ipip_ops **ipip_ops_arr;
100 };
101 
102 struct mlxsw_sp_rif {
103 	struct list_head nexthop_list;
104 	struct list_head neigh_list;
105 	struct net_device *dev;
106 	struct mlxsw_sp_fid *fid;
107 	unsigned char addr[ETH_ALEN];
108 	int mtu;
109 	u16 rif_index;
110 	u16 vr_id;
111 	const struct mlxsw_sp_rif_ops *ops;
112 	struct mlxsw_sp *mlxsw_sp;
113 
114 	unsigned int counter_ingress;
115 	bool counter_ingress_valid;
116 	unsigned int counter_egress;
117 	bool counter_egress_valid;
118 };
119 
120 struct mlxsw_sp_rif_params {
121 	struct net_device *dev;
122 	union {
123 		u16 system_port;
124 		u16 lag_id;
125 	};
126 	u16 vid;
127 	bool lag;
128 };
129 
130 struct mlxsw_sp_rif_subport {
131 	struct mlxsw_sp_rif common;
132 	union {
133 		u16 system_port;
134 		u16 lag_id;
135 	};
136 	u16 vid;
137 	bool lag;
138 };
139 
140 struct mlxsw_sp_rif_ipip_lb {
141 	struct mlxsw_sp_rif common;
142 	struct mlxsw_sp_rif_ipip_lb_config lb_config;
143 	u16 ul_vr_id; /* Reserved for Spectrum-2. */
144 };
145 
146 struct mlxsw_sp_rif_params_ipip_lb {
147 	struct mlxsw_sp_rif_params common;
148 	struct mlxsw_sp_rif_ipip_lb_config lb_config;
149 };
150 
151 struct mlxsw_sp_rif_ops {
152 	enum mlxsw_sp_rif_type type;
153 	size_t rif_size;
154 
155 	void (*setup)(struct mlxsw_sp_rif *rif,
156 		      const struct mlxsw_sp_rif_params *params);
157 	int (*configure)(struct mlxsw_sp_rif *rif);
158 	void (*deconfigure)(struct mlxsw_sp_rif *rif);
159 	struct mlxsw_sp_fid * (*fid_get)(struct mlxsw_sp_rif *rif);
160 };
161 
162 static unsigned int *
163 mlxsw_sp_rif_p_counter_get(struct mlxsw_sp_rif *rif,
164 			   enum mlxsw_sp_rif_counter_dir dir)
165 {
166 	switch (dir) {
167 	case MLXSW_SP_RIF_COUNTER_EGRESS:
168 		return &rif->counter_egress;
169 	case MLXSW_SP_RIF_COUNTER_INGRESS:
170 		return &rif->counter_ingress;
171 	}
172 	return NULL;
173 }
174 
175 static bool
176 mlxsw_sp_rif_counter_valid_get(struct mlxsw_sp_rif *rif,
177 			       enum mlxsw_sp_rif_counter_dir dir)
178 {
179 	switch (dir) {
180 	case MLXSW_SP_RIF_COUNTER_EGRESS:
181 		return rif->counter_egress_valid;
182 	case MLXSW_SP_RIF_COUNTER_INGRESS:
183 		return rif->counter_ingress_valid;
184 	}
185 	return false;
186 }
187 
188 static void
189 mlxsw_sp_rif_counter_valid_set(struct mlxsw_sp_rif *rif,
190 			       enum mlxsw_sp_rif_counter_dir dir,
191 			       bool valid)
192 {
193 	switch (dir) {
194 	case MLXSW_SP_RIF_COUNTER_EGRESS:
195 		rif->counter_egress_valid = valid;
196 		break;
197 	case MLXSW_SP_RIF_COUNTER_INGRESS:
198 		rif->counter_ingress_valid = valid;
199 		break;
200 	}
201 }
202 
203 static int mlxsw_sp_rif_counter_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
204 				     unsigned int counter_index, bool enable,
205 				     enum mlxsw_sp_rif_counter_dir dir)
206 {
207 	char ritr_pl[MLXSW_REG_RITR_LEN];
208 	bool is_egress = false;
209 	int err;
210 
211 	if (dir == MLXSW_SP_RIF_COUNTER_EGRESS)
212 		is_egress = true;
213 	mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
214 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
215 	if (err)
216 		return err;
217 
218 	mlxsw_reg_ritr_counter_pack(ritr_pl, counter_index, enable,
219 				    is_egress);
220 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
221 }
222 
223 int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp,
224 				   struct mlxsw_sp_rif *rif,
225 				   enum mlxsw_sp_rif_counter_dir dir, u64 *cnt)
226 {
227 	char ricnt_pl[MLXSW_REG_RICNT_LEN];
228 	unsigned int *p_counter_index;
229 	bool valid;
230 	int err;
231 
232 	valid = mlxsw_sp_rif_counter_valid_get(rif, dir);
233 	if (!valid)
234 		return -EINVAL;
235 
236 	p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
237 	if (!p_counter_index)
238 		return -EINVAL;
239 	mlxsw_reg_ricnt_pack(ricnt_pl, *p_counter_index,
240 			     MLXSW_REG_RICNT_OPCODE_NOP);
241 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
242 	if (err)
243 		return err;
244 	*cnt = mlxsw_reg_ricnt_good_unicast_packets_get(ricnt_pl);
245 	return 0;
246 }
247 
248 static int mlxsw_sp_rif_counter_clear(struct mlxsw_sp *mlxsw_sp,
249 				      unsigned int counter_index)
250 {
251 	char ricnt_pl[MLXSW_REG_RICNT_LEN];
252 
253 	mlxsw_reg_ricnt_pack(ricnt_pl, counter_index,
254 			     MLXSW_REG_RICNT_OPCODE_CLEAR);
255 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
256 }
257 
258 int mlxsw_sp_rif_counter_alloc(struct mlxsw_sp *mlxsw_sp,
259 			       struct mlxsw_sp_rif *rif,
260 			       enum mlxsw_sp_rif_counter_dir dir)
261 {
262 	unsigned int *p_counter_index;
263 	int err;
264 
265 	p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
266 	if (!p_counter_index)
267 		return -EINVAL;
268 	err = mlxsw_sp_counter_alloc(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
269 				     p_counter_index);
270 	if (err)
271 		return err;
272 
273 	err = mlxsw_sp_rif_counter_clear(mlxsw_sp, *p_counter_index);
274 	if (err)
275 		goto err_counter_clear;
276 
277 	err = mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
278 					*p_counter_index, true, dir);
279 	if (err)
280 		goto err_counter_edit;
281 	mlxsw_sp_rif_counter_valid_set(rif, dir, true);
282 	return 0;
283 
284 err_counter_edit:
285 err_counter_clear:
286 	mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
287 			      *p_counter_index);
288 	return err;
289 }
290 
291 void mlxsw_sp_rif_counter_free(struct mlxsw_sp *mlxsw_sp,
292 			       struct mlxsw_sp_rif *rif,
293 			       enum mlxsw_sp_rif_counter_dir dir)
294 {
295 	unsigned int *p_counter_index;
296 
297 	if (!mlxsw_sp_rif_counter_valid_get(rif, dir))
298 		return;
299 
300 	p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
301 	if (WARN_ON(!p_counter_index))
302 		return;
303 	mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
304 				  *p_counter_index, false, dir);
305 	mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
306 			      *p_counter_index);
307 	mlxsw_sp_rif_counter_valid_set(rif, dir, false);
308 }
309 
310 static void mlxsw_sp_rif_counters_alloc(struct mlxsw_sp_rif *rif)
311 {
312 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
313 	struct devlink *devlink;
314 
315 	devlink = priv_to_devlink(mlxsw_sp->core);
316 	if (!devlink_dpipe_table_counter_enabled(devlink,
317 						 MLXSW_SP_DPIPE_TABLE_NAME_ERIF))
318 		return;
319 	mlxsw_sp_rif_counter_alloc(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS);
320 }
321 
322 static void mlxsw_sp_rif_counters_free(struct mlxsw_sp_rif *rif)
323 {
324 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
325 
326 	mlxsw_sp_rif_counter_free(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS);
327 }
328 
329 static struct mlxsw_sp_rif *
330 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
331 			 const struct net_device *dev);
332 
333 #define MLXSW_SP_PREFIX_COUNT (sizeof(struct in6_addr) * BITS_PER_BYTE + 1)
334 
335 struct mlxsw_sp_prefix_usage {
336 	DECLARE_BITMAP(b, MLXSW_SP_PREFIX_COUNT);
337 };
338 
339 #define mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) \
340 	for_each_set_bit(prefix, (prefix_usage)->b, MLXSW_SP_PREFIX_COUNT)
341 
342 static bool
343 mlxsw_sp_prefix_usage_eq(struct mlxsw_sp_prefix_usage *prefix_usage1,
344 			 struct mlxsw_sp_prefix_usage *prefix_usage2)
345 {
346 	return !memcmp(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
347 }
348 
349 static bool
350 mlxsw_sp_prefix_usage_none(struct mlxsw_sp_prefix_usage *prefix_usage)
351 {
352 	struct mlxsw_sp_prefix_usage prefix_usage_none = {{ 0 } };
353 
354 	return mlxsw_sp_prefix_usage_eq(prefix_usage, &prefix_usage_none);
355 }
356 
357 static void
358 mlxsw_sp_prefix_usage_cpy(struct mlxsw_sp_prefix_usage *prefix_usage1,
359 			  struct mlxsw_sp_prefix_usage *prefix_usage2)
360 {
361 	memcpy(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
362 }
363 
364 static void
365 mlxsw_sp_prefix_usage_set(struct mlxsw_sp_prefix_usage *prefix_usage,
366 			  unsigned char prefix_len)
367 {
368 	set_bit(prefix_len, prefix_usage->b);
369 }
370 
371 static void
372 mlxsw_sp_prefix_usage_clear(struct mlxsw_sp_prefix_usage *prefix_usage,
373 			    unsigned char prefix_len)
374 {
375 	clear_bit(prefix_len, prefix_usage->b);
376 }
377 
378 struct mlxsw_sp_fib_key {
379 	unsigned char addr[sizeof(struct in6_addr)];
380 	unsigned char prefix_len;
381 };
382 
383 enum mlxsw_sp_fib_entry_type {
384 	MLXSW_SP_FIB_ENTRY_TYPE_REMOTE,
385 	MLXSW_SP_FIB_ENTRY_TYPE_LOCAL,
386 	MLXSW_SP_FIB_ENTRY_TYPE_TRAP,
387 
388 	/* This is a special case of local delivery, where a packet should be
389 	 * decapsulated on reception. Note that there is no corresponding ENCAP,
390 	 * because that's a type of next hop, not of FIB entry. (There can be
391 	 * several next hops in a REMOTE entry, and some of them may be
392 	 * encapsulating entries.)
393 	 */
394 	MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP,
395 };
396 
397 struct mlxsw_sp_nexthop_group;
398 struct mlxsw_sp_fib;
399 
400 struct mlxsw_sp_fib_node {
401 	struct list_head entry_list;
402 	struct list_head list;
403 	struct rhash_head ht_node;
404 	struct mlxsw_sp_fib *fib;
405 	struct mlxsw_sp_fib_key key;
406 };
407 
408 struct mlxsw_sp_fib_entry_decap {
409 	struct mlxsw_sp_ipip_entry *ipip_entry;
410 	u32 tunnel_index;
411 };
412 
413 struct mlxsw_sp_fib_entry {
414 	struct list_head list;
415 	struct mlxsw_sp_fib_node *fib_node;
416 	enum mlxsw_sp_fib_entry_type type;
417 	struct list_head nexthop_group_node;
418 	struct mlxsw_sp_nexthop_group *nh_group;
419 	struct mlxsw_sp_fib_entry_decap decap; /* Valid for decap entries. */
420 };
421 
422 struct mlxsw_sp_fib4_entry {
423 	struct mlxsw_sp_fib_entry common;
424 	u32 tb_id;
425 	u32 prio;
426 	u8 tos;
427 	u8 type;
428 };
429 
430 struct mlxsw_sp_fib6_entry {
431 	struct mlxsw_sp_fib_entry common;
432 	struct list_head rt6_list;
433 	unsigned int nrt6;
434 };
435 
436 struct mlxsw_sp_rt6 {
437 	struct list_head list;
438 	struct rt6_info *rt;
439 };
440 
441 struct mlxsw_sp_lpm_tree {
442 	u8 id; /* tree ID */
443 	unsigned int ref_count;
444 	enum mlxsw_sp_l3proto proto;
445 	struct mlxsw_sp_prefix_usage prefix_usage;
446 };
447 
448 struct mlxsw_sp_fib {
449 	struct rhashtable ht;
450 	struct list_head node_list;
451 	struct mlxsw_sp_vr *vr;
452 	struct mlxsw_sp_lpm_tree *lpm_tree;
453 	unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT];
454 	struct mlxsw_sp_prefix_usage prefix_usage;
455 	enum mlxsw_sp_l3proto proto;
456 };
457 
458 struct mlxsw_sp_vr {
459 	u16 id; /* virtual router ID */
460 	u32 tb_id; /* kernel fib table id */
461 	unsigned int rif_count;
462 	struct mlxsw_sp_fib *fib4;
463 	struct mlxsw_sp_fib *fib6;
464 	struct mlxsw_sp_mr_table *mr4_table;
465 };
466 
467 static const struct rhashtable_params mlxsw_sp_fib_ht_params;
468 
469 static struct mlxsw_sp_fib *mlxsw_sp_fib_create(struct mlxsw_sp_vr *vr,
470 						enum mlxsw_sp_l3proto proto)
471 {
472 	struct mlxsw_sp_fib *fib;
473 	int err;
474 
475 	fib = kzalloc(sizeof(*fib), GFP_KERNEL);
476 	if (!fib)
477 		return ERR_PTR(-ENOMEM);
478 	err = rhashtable_init(&fib->ht, &mlxsw_sp_fib_ht_params);
479 	if (err)
480 		goto err_rhashtable_init;
481 	INIT_LIST_HEAD(&fib->node_list);
482 	fib->proto = proto;
483 	fib->vr = vr;
484 	return fib;
485 
486 err_rhashtable_init:
487 	kfree(fib);
488 	return ERR_PTR(err);
489 }
490 
491 static void mlxsw_sp_fib_destroy(struct mlxsw_sp_fib *fib)
492 {
493 	WARN_ON(!list_empty(&fib->node_list));
494 	WARN_ON(fib->lpm_tree);
495 	rhashtable_destroy(&fib->ht);
496 	kfree(fib);
497 }
498 
499 static struct mlxsw_sp_lpm_tree *
500 mlxsw_sp_lpm_tree_find_unused(struct mlxsw_sp *mlxsw_sp)
501 {
502 	static struct mlxsw_sp_lpm_tree *lpm_tree;
503 	int i;
504 
505 	for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
506 		lpm_tree = &mlxsw_sp->router->lpm.trees[i];
507 		if (lpm_tree->ref_count == 0)
508 			return lpm_tree;
509 	}
510 	return NULL;
511 }
512 
513 static int mlxsw_sp_lpm_tree_alloc(struct mlxsw_sp *mlxsw_sp,
514 				   struct mlxsw_sp_lpm_tree *lpm_tree)
515 {
516 	char ralta_pl[MLXSW_REG_RALTA_LEN];
517 
518 	mlxsw_reg_ralta_pack(ralta_pl, true,
519 			     (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
520 			     lpm_tree->id);
521 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
522 }
523 
524 static void mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp,
525 				   struct mlxsw_sp_lpm_tree *lpm_tree)
526 {
527 	char ralta_pl[MLXSW_REG_RALTA_LEN];
528 
529 	mlxsw_reg_ralta_pack(ralta_pl, false,
530 			     (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
531 			     lpm_tree->id);
532 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
533 }
534 
535 static int
536 mlxsw_sp_lpm_tree_left_struct_set(struct mlxsw_sp *mlxsw_sp,
537 				  struct mlxsw_sp_prefix_usage *prefix_usage,
538 				  struct mlxsw_sp_lpm_tree *lpm_tree)
539 {
540 	char ralst_pl[MLXSW_REG_RALST_LEN];
541 	u8 root_bin = 0;
542 	u8 prefix;
543 	u8 last_prefix = MLXSW_REG_RALST_BIN_NO_CHILD;
544 
545 	mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage)
546 		root_bin = prefix;
547 
548 	mlxsw_reg_ralst_pack(ralst_pl, root_bin, lpm_tree->id);
549 	mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) {
550 		if (prefix == 0)
551 			continue;
552 		mlxsw_reg_ralst_bin_pack(ralst_pl, prefix, last_prefix,
553 					 MLXSW_REG_RALST_BIN_NO_CHILD);
554 		last_prefix = prefix;
555 	}
556 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
557 }
558 
559 static struct mlxsw_sp_lpm_tree *
560 mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp,
561 			 struct mlxsw_sp_prefix_usage *prefix_usage,
562 			 enum mlxsw_sp_l3proto proto)
563 {
564 	struct mlxsw_sp_lpm_tree *lpm_tree;
565 	int err;
566 
567 	lpm_tree = mlxsw_sp_lpm_tree_find_unused(mlxsw_sp);
568 	if (!lpm_tree)
569 		return ERR_PTR(-EBUSY);
570 	lpm_tree->proto = proto;
571 	err = mlxsw_sp_lpm_tree_alloc(mlxsw_sp, lpm_tree);
572 	if (err)
573 		return ERR_PTR(err);
574 
575 	err = mlxsw_sp_lpm_tree_left_struct_set(mlxsw_sp, prefix_usage,
576 						lpm_tree);
577 	if (err)
578 		goto err_left_struct_set;
579 	memcpy(&lpm_tree->prefix_usage, prefix_usage,
580 	       sizeof(lpm_tree->prefix_usage));
581 	return lpm_tree;
582 
583 err_left_struct_set:
584 	mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
585 	return ERR_PTR(err);
586 }
587 
588 static void mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp *mlxsw_sp,
589 				      struct mlxsw_sp_lpm_tree *lpm_tree)
590 {
591 	mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
592 }
593 
594 static struct mlxsw_sp_lpm_tree *
595 mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp,
596 		      struct mlxsw_sp_prefix_usage *prefix_usage,
597 		      enum mlxsw_sp_l3proto proto)
598 {
599 	struct mlxsw_sp_lpm_tree *lpm_tree;
600 	int i;
601 
602 	for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
603 		lpm_tree = &mlxsw_sp->router->lpm.trees[i];
604 		if (lpm_tree->ref_count != 0 &&
605 		    lpm_tree->proto == proto &&
606 		    mlxsw_sp_prefix_usage_eq(&lpm_tree->prefix_usage,
607 					     prefix_usage))
608 			return lpm_tree;
609 	}
610 	return mlxsw_sp_lpm_tree_create(mlxsw_sp, prefix_usage, proto);
611 }
612 
613 static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree)
614 {
615 	lpm_tree->ref_count++;
616 }
617 
618 static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
619 				  struct mlxsw_sp_lpm_tree *lpm_tree)
620 {
621 	if (--lpm_tree->ref_count == 0)
622 		mlxsw_sp_lpm_tree_destroy(mlxsw_sp, lpm_tree);
623 }
624 
625 #define MLXSW_SP_LPM_TREE_MIN 1 /* tree 0 is reserved */
626 
627 static int mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp)
628 {
629 	struct mlxsw_sp_lpm_tree *lpm_tree;
630 	u64 max_trees;
631 	int i;
632 
633 	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_LPM_TREES))
634 		return -EIO;
635 
636 	max_trees = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_LPM_TREES);
637 	mlxsw_sp->router->lpm.tree_count = max_trees - MLXSW_SP_LPM_TREE_MIN;
638 	mlxsw_sp->router->lpm.trees = kcalloc(mlxsw_sp->router->lpm.tree_count,
639 					     sizeof(struct mlxsw_sp_lpm_tree),
640 					     GFP_KERNEL);
641 	if (!mlxsw_sp->router->lpm.trees)
642 		return -ENOMEM;
643 
644 	for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
645 		lpm_tree = &mlxsw_sp->router->lpm.trees[i];
646 		lpm_tree->id = i + MLXSW_SP_LPM_TREE_MIN;
647 	}
648 
649 	return 0;
650 }
651 
652 static void mlxsw_sp_lpm_fini(struct mlxsw_sp *mlxsw_sp)
653 {
654 	kfree(mlxsw_sp->router->lpm.trees);
655 }
656 
657 static bool mlxsw_sp_vr_is_used(const struct mlxsw_sp_vr *vr)
658 {
659 	return !!vr->fib4 || !!vr->fib6 || !!vr->mr4_table;
660 }
661 
662 static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp)
663 {
664 	struct mlxsw_sp_vr *vr;
665 	int i;
666 
667 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
668 		vr = &mlxsw_sp->router->vrs[i];
669 		if (!mlxsw_sp_vr_is_used(vr))
670 			return vr;
671 	}
672 	return NULL;
673 }
674 
675 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
676 				     const struct mlxsw_sp_fib *fib, u8 tree_id)
677 {
678 	char raltb_pl[MLXSW_REG_RALTB_LEN];
679 
680 	mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
681 			     (enum mlxsw_reg_ralxx_protocol) fib->proto,
682 			     tree_id);
683 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
684 }
685 
686 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
687 				       const struct mlxsw_sp_fib *fib)
688 {
689 	char raltb_pl[MLXSW_REG_RALTB_LEN];
690 
691 	/* Bind to tree 0 which is default */
692 	mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
693 			     (enum mlxsw_reg_ralxx_protocol) fib->proto, 0);
694 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
695 }
696 
697 static u32 mlxsw_sp_fix_tb_id(u32 tb_id)
698 {
699 	/* For our purpose, squash main, default and local tables into one */
700 	if (tb_id == RT_TABLE_LOCAL || tb_id == RT_TABLE_DEFAULT)
701 		tb_id = RT_TABLE_MAIN;
702 	return tb_id;
703 }
704 
705 static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp,
706 					    u32 tb_id)
707 {
708 	struct mlxsw_sp_vr *vr;
709 	int i;
710 
711 	tb_id = mlxsw_sp_fix_tb_id(tb_id);
712 
713 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
714 		vr = &mlxsw_sp->router->vrs[i];
715 		if (mlxsw_sp_vr_is_used(vr) && vr->tb_id == tb_id)
716 			return vr;
717 	}
718 	return NULL;
719 }
720 
721 static struct mlxsw_sp_fib *mlxsw_sp_vr_fib(const struct mlxsw_sp_vr *vr,
722 					    enum mlxsw_sp_l3proto proto)
723 {
724 	switch (proto) {
725 	case MLXSW_SP_L3_PROTO_IPV4:
726 		return vr->fib4;
727 	case MLXSW_SP_L3_PROTO_IPV6:
728 		return vr->fib6;
729 	}
730 	return NULL;
731 }
732 
733 static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp,
734 					      u32 tb_id)
735 {
736 	struct mlxsw_sp_vr *vr;
737 	int err;
738 
739 	vr = mlxsw_sp_vr_find_unused(mlxsw_sp);
740 	if (!vr)
741 		return ERR_PTR(-EBUSY);
742 	vr->fib4 = mlxsw_sp_fib_create(vr, MLXSW_SP_L3_PROTO_IPV4);
743 	if (IS_ERR(vr->fib4))
744 		return ERR_CAST(vr->fib4);
745 	vr->fib6 = mlxsw_sp_fib_create(vr, MLXSW_SP_L3_PROTO_IPV6);
746 	if (IS_ERR(vr->fib6)) {
747 		err = PTR_ERR(vr->fib6);
748 		goto err_fib6_create;
749 	}
750 	vr->mr4_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id,
751 						 MLXSW_SP_L3_PROTO_IPV4);
752 	if (IS_ERR(vr->mr4_table)) {
753 		err = PTR_ERR(vr->mr4_table);
754 		goto err_mr_table_create;
755 	}
756 	vr->tb_id = tb_id;
757 	return vr;
758 
759 err_mr_table_create:
760 	mlxsw_sp_fib_destroy(vr->fib6);
761 	vr->fib6 = NULL;
762 err_fib6_create:
763 	mlxsw_sp_fib_destroy(vr->fib4);
764 	vr->fib4 = NULL;
765 	return ERR_PTR(err);
766 }
767 
768 static void mlxsw_sp_vr_destroy(struct mlxsw_sp_vr *vr)
769 {
770 	mlxsw_sp_mr_table_destroy(vr->mr4_table);
771 	vr->mr4_table = NULL;
772 	mlxsw_sp_fib_destroy(vr->fib6);
773 	vr->fib6 = NULL;
774 	mlxsw_sp_fib_destroy(vr->fib4);
775 	vr->fib4 = NULL;
776 }
777 
778 static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id)
779 {
780 	struct mlxsw_sp_vr *vr;
781 
782 	tb_id = mlxsw_sp_fix_tb_id(tb_id);
783 	vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
784 	if (!vr)
785 		vr = mlxsw_sp_vr_create(mlxsw_sp, tb_id);
786 	return vr;
787 }
788 
789 static void mlxsw_sp_vr_put(struct mlxsw_sp_vr *vr)
790 {
791 	if (!vr->rif_count && list_empty(&vr->fib4->node_list) &&
792 	    list_empty(&vr->fib6->node_list) &&
793 	    mlxsw_sp_mr_table_empty(vr->mr4_table))
794 		mlxsw_sp_vr_destroy(vr);
795 }
796 
797 static bool
798 mlxsw_sp_vr_lpm_tree_should_replace(struct mlxsw_sp_vr *vr,
799 				    enum mlxsw_sp_l3proto proto, u8 tree_id)
800 {
801 	struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
802 
803 	if (!mlxsw_sp_vr_is_used(vr))
804 		return false;
805 	if (fib->lpm_tree && fib->lpm_tree->id == tree_id)
806 		return true;
807 	return false;
808 }
809 
810 static int mlxsw_sp_vr_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
811 					struct mlxsw_sp_fib *fib,
812 					struct mlxsw_sp_lpm_tree *new_tree)
813 {
814 	struct mlxsw_sp_lpm_tree *old_tree = fib->lpm_tree;
815 	int err;
816 
817 	err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, new_tree->id);
818 	if (err)
819 		return err;
820 	fib->lpm_tree = new_tree;
821 	mlxsw_sp_lpm_tree_hold(new_tree);
822 	mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree);
823 	return 0;
824 }
825 
826 static int mlxsw_sp_vrs_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
827 					 struct mlxsw_sp_fib *fib,
828 					 struct mlxsw_sp_lpm_tree *new_tree)
829 {
830 	struct mlxsw_sp_lpm_tree *old_tree = fib->lpm_tree;
831 	enum mlxsw_sp_l3proto proto = fib->proto;
832 	u8 old_id, new_id = new_tree->id;
833 	struct mlxsw_sp_vr *vr;
834 	int i, err;
835 
836 	if (!old_tree)
837 		goto no_replace;
838 	old_id = old_tree->id;
839 
840 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
841 		vr = &mlxsw_sp->router->vrs[i];
842 		if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, old_id))
843 			continue;
844 		err = mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
845 						   mlxsw_sp_vr_fib(vr, proto),
846 						   new_tree);
847 		if (err)
848 			goto err_tree_replace;
849 	}
850 
851 	return 0;
852 
853 err_tree_replace:
854 	for (i--; i >= 0; i--) {
855 		if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, new_id))
856 			continue;
857 		mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
858 					     mlxsw_sp_vr_fib(vr, proto),
859 					     old_tree);
860 	}
861 	return err;
862 
863 no_replace:
864 	err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, new_tree->id);
865 	if (err)
866 		return err;
867 	fib->lpm_tree = new_tree;
868 	mlxsw_sp_lpm_tree_hold(new_tree);
869 	return 0;
870 }
871 
872 static void
873 mlxsw_sp_vrs_prefixes(struct mlxsw_sp *mlxsw_sp,
874 		      enum mlxsw_sp_l3proto proto,
875 		      struct mlxsw_sp_prefix_usage *req_prefix_usage)
876 {
877 	int i;
878 
879 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
880 		struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
881 		struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
882 		unsigned char prefix;
883 
884 		if (!mlxsw_sp_vr_is_used(vr))
885 			continue;
886 		mlxsw_sp_prefix_usage_for_each(prefix, &fib->prefix_usage)
887 			mlxsw_sp_prefix_usage_set(req_prefix_usage, prefix);
888 	}
889 }
890 
891 static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp)
892 {
893 	struct mlxsw_sp_vr *vr;
894 	u64 max_vrs;
895 	int i;
896 
897 	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_VRS))
898 		return -EIO;
899 
900 	max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
901 	mlxsw_sp->router->vrs = kcalloc(max_vrs, sizeof(struct mlxsw_sp_vr),
902 					GFP_KERNEL);
903 	if (!mlxsw_sp->router->vrs)
904 		return -ENOMEM;
905 
906 	for (i = 0; i < max_vrs; i++) {
907 		vr = &mlxsw_sp->router->vrs[i];
908 		vr->id = i;
909 	}
910 
911 	return 0;
912 }
913 
914 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp);
915 
916 static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp)
917 {
918 	/* At this stage we're guaranteed not to have new incoming
919 	 * FIB notifications and the work queue is free from FIBs
920 	 * sitting on top of mlxsw netdevs. However, we can still
921 	 * have other FIBs queued. Flush the queue before flushing
922 	 * the device's tables. No need for locks, as we're the only
923 	 * writer.
924 	 */
925 	mlxsw_core_flush_owq();
926 	mlxsw_sp_router_fib_flush(mlxsw_sp);
927 	kfree(mlxsw_sp->router->vrs);
928 }
929 
930 static struct net_device *
931 __mlxsw_sp_ipip_netdev_ul_dev_get(const struct net_device *ol_dev)
932 {
933 	struct ip_tunnel *tun = netdev_priv(ol_dev);
934 	struct net *net = dev_net(ol_dev);
935 
936 	return __dev_get_by_index(net, tun->parms.link);
937 }
938 
939 static u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev)
940 {
941 	struct net_device *d = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
942 
943 	if (d)
944 		return l3mdev_fib_table(d) ? : RT_TABLE_MAIN;
945 	else
946 		return l3mdev_fib_table(ol_dev) ? : RT_TABLE_MAIN;
947 }
948 
949 static struct mlxsw_sp_rif *
950 mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
951 		    const struct mlxsw_sp_rif_params *params);
952 
953 static struct mlxsw_sp_rif_ipip_lb *
954 mlxsw_sp_ipip_ol_ipip_lb_create(struct mlxsw_sp *mlxsw_sp,
955 				enum mlxsw_sp_ipip_type ipipt,
956 				struct net_device *ol_dev)
957 {
958 	struct mlxsw_sp_rif_params_ipip_lb lb_params;
959 	const struct mlxsw_sp_ipip_ops *ipip_ops;
960 	struct mlxsw_sp_rif *rif;
961 
962 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
963 	lb_params = (struct mlxsw_sp_rif_params_ipip_lb) {
964 		.common.dev = ol_dev,
965 		.common.lag = false,
966 		.lb_config = ipip_ops->ol_loopback_config(mlxsw_sp, ol_dev),
967 	};
968 
969 	rif = mlxsw_sp_rif_create(mlxsw_sp, &lb_params.common);
970 	if (IS_ERR(rif))
971 		return ERR_CAST(rif);
972 	return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
973 }
974 
975 static struct mlxsw_sp_ipip_entry *
976 mlxsw_sp_ipip_entry_alloc(struct mlxsw_sp *mlxsw_sp,
977 			  enum mlxsw_sp_ipip_type ipipt,
978 			  struct net_device *ol_dev)
979 {
980 	struct mlxsw_sp_ipip_entry *ipip_entry;
981 	struct mlxsw_sp_ipip_entry *ret = NULL;
982 
983 	ipip_entry = kzalloc(sizeof(*ipip_entry), GFP_KERNEL);
984 	if (!ipip_entry)
985 		return ERR_PTR(-ENOMEM);
986 
987 	ipip_entry->ol_lb = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp, ipipt,
988 							    ol_dev);
989 	if (IS_ERR(ipip_entry->ol_lb)) {
990 		ret = ERR_CAST(ipip_entry->ol_lb);
991 		goto err_ol_ipip_lb_create;
992 	}
993 
994 	ipip_entry->ipipt = ipipt;
995 	ipip_entry->ol_dev = ol_dev;
996 
997 	return ipip_entry;
998 
999 err_ol_ipip_lb_create:
1000 	kfree(ipip_entry);
1001 	return ret;
1002 }
1003 
1004 static void
1005 mlxsw_sp_ipip_entry_dealloc(struct mlxsw_sp_ipip_entry *ipip_entry)
1006 {
1007 	mlxsw_sp_rif_destroy(&ipip_entry->ol_lb->common);
1008 	kfree(ipip_entry);
1009 }
1010 
1011 static __be32
1012 mlxsw_sp_ipip_netdev_saddr4(const struct net_device *ol_dev)
1013 {
1014 	struct ip_tunnel *tun = netdev_priv(ol_dev);
1015 
1016 	return tun->parms.iph.saddr;
1017 }
1018 
1019 union mlxsw_sp_l3addr
1020 mlxsw_sp_ipip_netdev_saddr(enum mlxsw_sp_l3proto proto,
1021 			   const struct net_device *ol_dev)
1022 {
1023 	switch (proto) {
1024 	case MLXSW_SP_L3_PROTO_IPV4:
1025 		return (union mlxsw_sp_l3addr) {
1026 			.addr4 = mlxsw_sp_ipip_netdev_saddr4(ol_dev),
1027 		};
1028 	case MLXSW_SP_L3_PROTO_IPV6:
1029 		break;
1030 	};
1031 
1032 	WARN_ON(1);
1033 	return (union mlxsw_sp_l3addr) {
1034 		.addr4 = 0,
1035 	};
1036 }
1037 
1038 __be32 mlxsw_sp_ipip_netdev_daddr4(const struct net_device *ol_dev)
1039 {
1040 	struct ip_tunnel *tun = netdev_priv(ol_dev);
1041 
1042 	return tun->parms.iph.daddr;
1043 }
1044 
1045 union mlxsw_sp_l3addr
1046 mlxsw_sp_ipip_netdev_daddr(enum mlxsw_sp_l3proto proto,
1047 			   const struct net_device *ol_dev)
1048 {
1049 	switch (proto) {
1050 	case MLXSW_SP_L3_PROTO_IPV4:
1051 		return (union mlxsw_sp_l3addr) {
1052 			.addr4 = mlxsw_sp_ipip_netdev_daddr4(ol_dev),
1053 		};
1054 	case MLXSW_SP_L3_PROTO_IPV6:
1055 		break;
1056 	};
1057 
1058 	WARN_ON(1);
1059 	return (union mlxsw_sp_l3addr) {
1060 		.addr4 = 0,
1061 	};
1062 }
1063 
1064 static bool mlxsw_sp_l3addr_eq(const union mlxsw_sp_l3addr *addr1,
1065 			       const union mlxsw_sp_l3addr *addr2)
1066 {
1067 	return !memcmp(addr1, addr2, sizeof(*addr1));
1068 }
1069 
1070 static bool
1071 mlxsw_sp_ipip_entry_saddr_matches(struct mlxsw_sp *mlxsw_sp,
1072 				  const enum mlxsw_sp_l3proto ul_proto,
1073 				  union mlxsw_sp_l3addr saddr,
1074 				  u32 ul_tb_id,
1075 				  struct mlxsw_sp_ipip_entry *ipip_entry)
1076 {
1077 	u32 tun_ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
1078 	enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
1079 	union mlxsw_sp_l3addr tun_saddr;
1080 
1081 	if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
1082 		return false;
1083 
1084 	tun_saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ipip_entry->ol_dev);
1085 	return tun_ul_tb_id == ul_tb_id &&
1086 	       mlxsw_sp_l3addr_eq(&tun_saddr, &saddr);
1087 }
1088 
1089 static int
1090 mlxsw_sp_fib_entry_decap_init(struct mlxsw_sp *mlxsw_sp,
1091 			      struct mlxsw_sp_fib_entry *fib_entry,
1092 			      struct mlxsw_sp_ipip_entry *ipip_entry)
1093 {
1094 	u32 tunnel_index;
1095 	int err;
1096 
1097 	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, 1, &tunnel_index);
1098 	if (err)
1099 		return err;
1100 
1101 	ipip_entry->decap_fib_entry = fib_entry;
1102 	fib_entry->decap.ipip_entry = ipip_entry;
1103 	fib_entry->decap.tunnel_index = tunnel_index;
1104 	return 0;
1105 }
1106 
1107 static void mlxsw_sp_fib_entry_decap_fini(struct mlxsw_sp *mlxsw_sp,
1108 					  struct mlxsw_sp_fib_entry *fib_entry)
1109 {
1110 	/* Unlink this node from the IPIP entry that it's the decap entry of. */
1111 	fib_entry->decap.ipip_entry->decap_fib_entry = NULL;
1112 	fib_entry->decap.ipip_entry = NULL;
1113 	mlxsw_sp_kvdl_free(mlxsw_sp, fib_entry->decap.tunnel_index);
1114 }
1115 
1116 static struct mlxsw_sp_fib_node *
1117 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
1118 			 size_t addr_len, unsigned char prefix_len);
1119 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
1120 				     struct mlxsw_sp_fib_entry *fib_entry);
1121 
1122 static void
1123 mlxsw_sp_ipip_entry_demote_decap(struct mlxsw_sp *mlxsw_sp,
1124 				 struct mlxsw_sp_ipip_entry *ipip_entry)
1125 {
1126 	struct mlxsw_sp_fib_entry *fib_entry = ipip_entry->decap_fib_entry;
1127 
1128 	mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, fib_entry);
1129 	fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1130 
1131 	mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1132 }
1133 
1134 static void
1135 mlxsw_sp_ipip_entry_promote_decap(struct mlxsw_sp *mlxsw_sp,
1136 				  struct mlxsw_sp_ipip_entry *ipip_entry,
1137 				  struct mlxsw_sp_fib_entry *decap_fib_entry)
1138 {
1139 	if (mlxsw_sp_fib_entry_decap_init(mlxsw_sp, decap_fib_entry,
1140 					  ipip_entry))
1141 		return;
1142 	decap_fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
1143 
1144 	if (mlxsw_sp_fib_entry_update(mlxsw_sp, decap_fib_entry))
1145 		mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1146 }
1147 
1148 /* Given an IPIP entry, find the corresponding decap route. */
1149 static struct mlxsw_sp_fib_entry *
1150 mlxsw_sp_ipip_entry_find_decap(struct mlxsw_sp *mlxsw_sp,
1151 			       struct mlxsw_sp_ipip_entry *ipip_entry)
1152 {
1153 	static struct mlxsw_sp_fib_node *fib_node;
1154 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1155 	struct mlxsw_sp_fib_entry *fib_entry;
1156 	unsigned char saddr_prefix_len;
1157 	union mlxsw_sp_l3addr saddr;
1158 	struct mlxsw_sp_fib *ul_fib;
1159 	struct mlxsw_sp_vr *ul_vr;
1160 	const void *saddrp;
1161 	size_t saddr_len;
1162 	u32 ul_tb_id;
1163 	u32 saddr4;
1164 
1165 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1166 
1167 	ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
1168 	ul_vr = mlxsw_sp_vr_find(mlxsw_sp, ul_tb_id);
1169 	if (!ul_vr)
1170 		return NULL;
1171 
1172 	ul_fib = mlxsw_sp_vr_fib(ul_vr, ipip_ops->ul_proto);
1173 	saddr = mlxsw_sp_ipip_netdev_saddr(ipip_ops->ul_proto,
1174 					   ipip_entry->ol_dev);
1175 
1176 	switch (ipip_ops->ul_proto) {
1177 	case MLXSW_SP_L3_PROTO_IPV4:
1178 		saddr4 = be32_to_cpu(saddr.addr4);
1179 		saddrp = &saddr4;
1180 		saddr_len = 4;
1181 		saddr_prefix_len = 32;
1182 		break;
1183 	case MLXSW_SP_L3_PROTO_IPV6:
1184 		WARN_ON(1);
1185 		return NULL;
1186 	}
1187 
1188 	fib_node = mlxsw_sp_fib_node_lookup(ul_fib, saddrp, saddr_len,
1189 					    saddr_prefix_len);
1190 	if (!fib_node || list_empty(&fib_node->entry_list))
1191 		return NULL;
1192 
1193 	fib_entry = list_first_entry(&fib_node->entry_list,
1194 				     struct mlxsw_sp_fib_entry, list);
1195 	if (fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_TRAP)
1196 		return NULL;
1197 
1198 	return fib_entry;
1199 }
1200 
1201 static struct mlxsw_sp_ipip_entry *
1202 mlxsw_sp_ipip_entry_create(struct mlxsw_sp *mlxsw_sp,
1203 			   enum mlxsw_sp_ipip_type ipipt,
1204 			   struct net_device *ol_dev)
1205 {
1206 	u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev);
1207 	struct mlxsw_sp_router *router = mlxsw_sp->router;
1208 	struct mlxsw_sp_ipip_entry *ipip_entry;
1209 	enum mlxsw_sp_l3proto ul_proto;
1210 	union mlxsw_sp_l3addr saddr;
1211 
1212 	/* The configuration where several tunnels have the same local address
1213 	 * in the same underlay table needs special treatment in the HW. That is
1214 	 * currently not implemented in the driver.
1215 	 */
1216 	list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
1217 			    ipip_list_node) {
1218 		ul_proto = router->ipip_ops_arr[ipip_entry->ipipt]->ul_proto;
1219 		saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev);
1220 		if (mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, saddr,
1221 						      ul_tb_id, ipip_entry))
1222 			return ERR_PTR(-EEXIST);
1223 	}
1224 
1225 	ipip_entry = mlxsw_sp_ipip_entry_alloc(mlxsw_sp, ipipt, ol_dev);
1226 	if (IS_ERR(ipip_entry))
1227 		return ipip_entry;
1228 
1229 	list_add_tail(&ipip_entry->ipip_list_node,
1230 		      &mlxsw_sp->router->ipip_list);
1231 
1232 	return ipip_entry;
1233 }
1234 
1235 static void
1236 mlxsw_sp_ipip_entry_destroy(struct mlxsw_sp *mlxsw_sp,
1237 			    struct mlxsw_sp_ipip_entry *ipip_entry)
1238 {
1239 	list_del(&ipip_entry->ipip_list_node);
1240 	mlxsw_sp_ipip_entry_dealloc(ipip_entry);
1241 }
1242 
1243 static bool
1244 mlxsw_sp_ipip_entry_matches_decap(struct mlxsw_sp *mlxsw_sp,
1245 				  const struct net_device *ul_dev,
1246 				  enum mlxsw_sp_l3proto ul_proto,
1247 				  union mlxsw_sp_l3addr ul_dip,
1248 				  struct mlxsw_sp_ipip_entry *ipip_entry)
1249 {
1250 	u32 ul_tb_id = l3mdev_fib_table(ul_dev) ? : RT_TABLE_MAIN;
1251 	enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
1252 	struct net_device *ipip_ul_dev;
1253 
1254 	if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
1255 		return false;
1256 
1257 	ipip_ul_dev = __mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev);
1258 	return mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, ul_dip,
1259 						 ul_tb_id, ipip_entry) &&
1260 	       (!ipip_ul_dev || ipip_ul_dev == ul_dev);
1261 }
1262 
1263 /* Given decap parameters, find the corresponding IPIP entry. */
1264 static struct mlxsw_sp_ipip_entry *
1265 mlxsw_sp_ipip_entry_find_by_decap(struct mlxsw_sp *mlxsw_sp,
1266 				  const struct net_device *ul_dev,
1267 				  enum mlxsw_sp_l3proto ul_proto,
1268 				  union mlxsw_sp_l3addr ul_dip)
1269 {
1270 	struct mlxsw_sp_ipip_entry *ipip_entry;
1271 
1272 	list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
1273 			    ipip_list_node)
1274 		if (mlxsw_sp_ipip_entry_matches_decap(mlxsw_sp, ul_dev,
1275 						      ul_proto, ul_dip,
1276 						      ipip_entry))
1277 			return ipip_entry;
1278 
1279 	return NULL;
1280 }
1281 
1282 static bool mlxsw_sp_netdev_ipip_type(const struct mlxsw_sp *mlxsw_sp,
1283 				      const struct net_device *dev,
1284 				      enum mlxsw_sp_ipip_type *p_type)
1285 {
1286 	struct mlxsw_sp_router *router = mlxsw_sp->router;
1287 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1288 	enum mlxsw_sp_ipip_type ipipt;
1289 
1290 	for (ipipt = 0; ipipt < MLXSW_SP_IPIP_TYPE_MAX; ++ipipt) {
1291 		ipip_ops = router->ipip_ops_arr[ipipt];
1292 		if (dev->type == ipip_ops->dev_type) {
1293 			if (p_type)
1294 				*p_type = ipipt;
1295 			return true;
1296 		}
1297 	}
1298 	return false;
1299 }
1300 
1301 bool mlxsw_sp_netdev_is_ipip(const struct mlxsw_sp *mlxsw_sp,
1302 			     const struct net_device *dev)
1303 {
1304 	return mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL);
1305 }
1306 
1307 static struct mlxsw_sp_ipip_entry *
1308 mlxsw_sp_ipip_entry_find_by_ol_dev(struct mlxsw_sp *mlxsw_sp,
1309 				   const struct net_device *ol_dev)
1310 {
1311 	struct mlxsw_sp_ipip_entry *ipip_entry;
1312 
1313 	list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
1314 			    ipip_list_node)
1315 		if (ipip_entry->ol_dev == ol_dev)
1316 			return ipip_entry;
1317 
1318 	return NULL;
1319 }
1320 
1321 static int mlxsw_sp_netdevice_ipip_reg_event(struct mlxsw_sp *mlxsw_sp,
1322 					     struct net_device *ol_dev)
1323 {
1324 	struct mlxsw_sp_router *router = mlxsw_sp->router;
1325 	struct mlxsw_sp_ipip_entry *ipip_entry;
1326 	enum mlxsw_sp_ipip_type ipipt;
1327 
1328 	mlxsw_sp_netdev_ipip_type(mlxsw_sp, ol_dev, &ipipt);
1329 	if (router->ipip_ops_arr[ipipt]->can_offload(mlxsw_sp, ol_dev,
1330 						     MLXSW_SP_L3_PROTO_IPV4) ||
1331 	    router->ipip_ops_arr[ipipt]->can_offload(mlxsw_sp, ol_dev,
1332 						     MLXSW_SP_L3_PROTO_IPV6)) {
1333 		ipip_entry = mlxsw_sp_ipip_entry_create(mlxsw_sp, ipipt,
1334 							ol_dev);
1335 		if (IS_ERR(ipip_entry))
1336 			return PTR_ERR(ipip_entry);
1337 	}
1338 
1339 	return 0;
1340 }
1341 
1342 static void mlxsw_sp_netdevice_ipip_unreg_event(struct mlxsw_sp *mlxsw_sp,
1343 						struct net_device *ol_dev)
1344 {
1345 	struct mlxsw_sp_ipip_entry *ipip_entry;
1346 
1347 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1348 	if (ipip_entry)
1349 		mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
1350 }
1351 
1352 static int mlxsw_sp_netdevice_ipip_up_event(struct mlxsw_sp *mlxsw_sp,
1353 					    struct net_device *ol_dev)
1354 {
1355 	struct mlxsw_sp_fib_entry *decap_fib_entry;
1356 	struct mlxsw_sp_ipip_entry *ipip_entry;
1357 
1358 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1359 	if (ipip_entry) {
1360 		decap_fib_entry = mlxsw_sp_ipip_entry_find_decap(mlxsw_sp,
1361 								 ipip_entry);
1362 		if (decap_fib_entry)
1363 			mlxsw_sp_ipip_entry_promote_decap(mlxsw_sp, ipip_entry,
1364 							  decap_fib_entry);
1365 	}
1366 
1367 	return 0;
1368 }
1369 
1370 static void mlxsw_sp_netdevice_ipip_down_event(struct mlxsw_sp *mlxsw_sp,
1371 					       struct net_device *ol_dev)
1372 {
1373 	struct mlxsw_sp_ipip_entry *ipip_entry;
1374 
1375 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1376 	if (ipip_entry && ipip_entry->decap_fib_entry)
1377 		mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1378 }
1379 
1380 static int mlxsw_sp_netdevice_ipip_vrf_event(struct mlxsw_sp *mlxsw_sp,
1381 					     struct net_device *ol_dev)
1382 {
1383 	struct mlxsw_sp_fib_entry *decap_fib_entry;
1384 	struct mlxsw_sp_ipip_entry *ipip_entry;
1385 	struct mlxsw_sp_rif_ipip_lb *lb_rif;
1386 
1387 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1388 	if (!ipip_entry)
1389 		return 0;
1390 
1391 	/* When a tunneling device is moved to a different VRF, we need to
1392 	 * update the backing loopback. Since RIFs can't be edited, we need to
1393 	 * destroy and recreate it. That might create a window of opportunity
1394 	 * where RALUE and RATR registers end up referencing a RIF that's
1395 	 * already gone. RATRs are handled by the RIF destroy, and to take care
1396 	 * of RALUE, demote the decap route back.
1397 	 */
1398 	if (ipip_entry->decap_fib_entry)
1399 		mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1400 
1401 	lb_rif = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp, ipip_entry->ipipt,
1402 						 ol_dev);
1403 	if (IS_ERR(lb_rif))
1404 		return PTR_ERR(lb_rif);
1405 	mlxsw_sp_rif_destroy(&ipip_entry->ol_lb->common);
1406 	ipip_entry->ol_lb = lb_rif;
1407 
1408 	if (ol_dev->flags & IFF_UP) {
1409 		decap_fib_entry = mlxsw_sp_ipip_entry_find_decap(mlxsw_sp,
1410 								 ipip_entry);
1411 		if (decap_fib_entry)
1412 			mlxsw_sp_ipip_entry_promote_decap(mlxsw_sp, ipip_entry,
1413 							  decap_fib_entry);
1414 	}
1415 
1416 	return 0;
1417 }
1418 
1419 int mlxsw_sp_netdevice_ipip_event(struct mlxsw_sp *mlxsw_sp,
1420 				  struct net_device *ol_dev,
1421 				  unsigned long event,
1422 				  struct netdev_notifier_changeupper_info *info)
1423 {
1424 	switch (event) {
1425 	case NETDEV_REGISTER:
1426 		return mlxsw_sp_netdevice_ipip_reg_event(mlxsw_sp, ol_dev);
1427 	case NETDEV_UNREGISTER:
1428 		mlxsw_sp_netdevice_ipip_unreg_event(mlxsw_sp, ol_dev);
1429 		return 0;
1430 	case NETDEV_UP:
1431 		return mlxsw_sp_netdevice_ipip_up_event(mlxsw_sp, ol_dev);
1432 	case NETDEV_DOWN:
1433 		mlxsw_sp_netdevice_ipip_down_event(mlxsw_sp, ol_dev);
1434 		return 0;
1435 	case NETDEV_CHANGEUPPER:
1436 		if (netif_is_l3_master(info->upper_dev))
1437 			return mlxsw_sp_netdevice_ipip_vrf_event(mlxsw_sp,
1438 								 ol_dev);
1439 		return 0;
1440 	}
1441 	return 0;
1442 }
1443 
1444 struct mlxsw_sp_neigh_key {
1445 	struct neighbour *n;
1446 };
1447 
1448 struct mlxsw_sp_neigh_entry {
1449 	struct list_head rif_list_node;
1450 	struct rhash_head ht_node;
1451 	struct mlxsw_sp_neigh_key key;
1452 	u16 rif;
1453 	bool connected;
1454 	unsigned char ha[ETH_ALEN];
1455 	struct list_head nexthop_list; /* list of nexthops using
1456 					* this neigh entry
1457 					*/
1458 	struct list_head nexthop_neighs_list_node;
1459 	unsigned int counter_index;
1460 	bool counter_valid;
1461 };
1462 
1463 static const struct rhashtable_params mlxsw_sp_neigh_ht_params = {
1464 	.key_offset = offsetof(struct mlxsw_sp_neigh_entry, key),
1465 	.head_offset = offsetof(struct mlxsw_sp_neigh_entry, ht_node),
1466 	.key_len = sizeof(struct mlxsw_sp_neigh_key),
1467 };
1468 
1469 struct mlxsw_sp_neigh_entry *
1470 mlxsw_sp_rif_neigh_next(struct mlxsw_sp_rif *rif,
1471 			struct mlxsw_sp_neigh_entry *neigh_entry)
1472 {
1473 	if (!neigh_entry) {
1474 		if (list_empty(&rif->neigh_list))
1475 			return NULL;
1476 		else
1477 			return list_first_entry(&rif->neigh_list,
1478 						typeof(*neigh_entry),
1479 						rif_list_node);
1480 	}
1481 	if (list_is_last(&neigh_entry->rif_list_node, &rif->neigh_list))
1482 		return NULL;
1483 	return list_next_entry(neigh_entry, rif_list_node);
1484 }
1485 
1486 int mlxsw_sp_neigh_entry_type(struct mlxsw_sp_neigh_entry *neigh_entry)
1487 {
1488 	return neigh_entry->key.n->tbl->family;
1489 }
1490 
1491 unsigned char *
1492 mlxsw_sp_neigh_entry_ha(struct mlxsw_sp_neigh_entry *neigh_entry)
1493 {
1494 	return neigh_entry->ha;
1495 }
1496 
1497 u32 mlxsw_sp_neigh4_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
1498 {
1499 	struct neighbour *n;
1500 
1501 	n = neigh_entry->key.n;
1502 	return ntohl(*((__be32 *) n->primary_key));
1503 }
1504 
1505 struct in6_addr *
1506 mlxsw_sp_neigh6_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
1507 {
1508 	struct neighbour *n;
1509 
1510 	n = neigh_entry->key.n;
1511 	return (struct in6_addr *) &n->primary_key;
1512 }
1513 
1514 int mlxsw_sp_neigh_counter_get(struct mlxsw_sp *mlxsw_sp,
1515 			       struct mlxsw_sp_neigh_entry *neigh_entry,
1516 			       u64 *p_counter)
1517 {
1518 	if (!neigh_entry->counter_valid)
1519 		return -EINVAL;
1520 
1521 	return mlxsw_sp_flow_counter_get(mlxsw_sp, neigh_entry->counter_index,
1522 					 p_counter, NULL);
1523 }
1524 
1525 static struct mlxsw_sp_neigh_entry *
1526 mlxsw_sp_neigh_entry_alloc(struct mlxsw_sp *mlxsw_sp, struct neighbour *n,
1527 			   u16 rif)
1528 {
1529 	struct mlxsw_sp_neigh_entry *neigh_entry;
1530 
1531 	neigh_entry = kzalloc(sizeof(*neigh_entry), GFP_KERNEL);
1532 	if (!neigh_entry)
1533 		return NULL;
1534 
1535 	neigh_entry->key.n = n;
1536 	neigh_entry->rif = rif;
1537 	INIT_LIST_HEAD(&neigh_entry->nexthop_list);
1538 
1539 	return neigh_entry;
1540 }
1541 
1542 static void mlxsw_sp_neigh_entry_free(struct mlxsw_sp_neigh_entry *neigh_entry)
1543 {
1544 	kfree(neigh_entry);
1545 }
1546 
1547 static int
1548 mlxsw_sp_neigh_entry_insert(struct mlxsw_sp *mlxsw_sp,
1549 			    struct mlxsw_sp_neigh_entry *neigh_entry)
1550 {
1551 	return rhashtable_insert_fast(&mlxsw_sp->router->neigh_ht,
1552 				      &neigh_entry->ht_node,
1553 				      mlxsw_sp_neigh_ht_params);
1554 }
1555 
1556 static void
1557 mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp,
1558 			    struct mlxsw_sp_neigh_entry *neigh_entry)
1559 {
1560 	rhashtable_remove_fast(&mlxsw_sp->router->neigh_ht,
1561 			       &neigh_entry->ht_node,
1562 			       mlxsw_sp_neigh_ht_params);
1563 }
1564 
1565 static bool
1566 mlxsw_sp_neigh_counter_should_alloc(struct mlxsw_sp *mlxsw_sp,
1567 				    struct mlxsw_sp_neigh_entry *neigh_entry)
1568 {
1569 	struct devlink *devlink;
1570 	const char *table_name;
1571 
1572 	switch (mlxsw_sp_neigh_entry_type(neigh_entry)) {
1573 	case AF_INET:
1574 		table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST4;
1575 		break;
1576 	case AF_INET6:
1577 		table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST6;
1578 		break;
1579 	default:
1580 		WARN_ON(1);
1581 		return false;
1582 	}
1583 
1584 	devlink = priv_to_devlink(mlxsw_sp->core);
1585 	return devlink_dpipe_table_counter_enabled(devlink, table_name);
1586 }
1587 
1588 static void
1589 mlxsw_sp_neigh_counter_alloc(struct mlxsw_sp *mlxsw_sp,
1590 			     struct mlxsw_sp_neigh_entry *neigh_entry)
1591 {
1592 	if (!mlxsw_sp_neigh_counter_should_alloc(mlxsw_sp, neigh_entry))
1593 		return;
1594 
1595 	if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &neigh_entry->counter_index))
1596 		return;
1597 
1598 	neigh_entry->counter_valid = true;
1599 }
1600 
1601 static void
1602 mlxsw_sp_neigh_counter_free(struct mlxsw_sp *mlxsw_sp,
1603 			    struct mlxsw_sp_neigh_entry *neigh_entry)
1604 {
1605 	if (!neigh_entry->counter_valid)
1606 		return;
1607 	mlxsw_sp_flow_counter_free(mlxsw_sp,
1608 				   neigh_entry->counter_index);
1609 	neigh_entry->counter_valid = false;
1610 }
1611 
1612 static struct mlxsw_sp_neigh_entry *
1613 mlxsw_sp_neigh_entry_create(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
1614 {
1615 	struct mlxsw_sp_neigh_entry *neigh_entry;
1616 	struct mlxsw_sp_rif *rif;
1617 	int err;
1618 
1619 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, n->dev);
1620 	if (!rif)
1621 		return ERR_PTR(-EINVAL);
1622 
1623 	neigh_entry = mlxsw_sp_neigh_entry_alloc(mlxsw_sp, n, rif->rif_index);
1624 	if (!neigh_entry)
1625 		return ERR_PTR(-ENOMEM);
1626 
1627 	err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
1628 	if (err)
1629 		goto err_neigh_entry_insert;
1630 
1631 	mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
1632 	list_add(&neigh_entry->rif_list_node, &rif->neigh_list);
1633 
1634 	return neigh_entry;
1635 
1636 err_neigh_entry_insert:
1637 	mlxsw_sp_neigh_entry_free(neigh_entry);
1638 	return ERR_PTR(err);
1639 }
1640 
1641 static void
1642 mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp *mlxsw_sp,
1643 			     struct mlxsw_sp_neigh_entry *neigh_entry)
1644 {
1645 	list_del(&neigh_entry->rif_list_node);
1646 	mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
1647 	mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry);
1648 	mlxsw_sp_neigh_entry_free(neigh_entry);
1649 }
1650 
1651 static struct mlxsw_sp_neigh_entry *
1652 mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
1653 {
1654 	struct mlxsw_sp_neigh_key key;
1655 
1656 	key.n = n;
1657 	return rhashtable_lookup_fast(&mlxsw_sp->router->neigh_ht,
1658 				      &key, mlxsw_sp_neigh_ht_params);
1659 }
1660 
1661 static void
1662 mlxsw_sp_router_neighs_update_interval_init(struct mlxsw_sp *mlxsw_sp)
1663 {
1664 	unsigned long interval;
1665 
1666 #if IS_ENABLED(CONFIG_IPV6)
1667 	interval = min_t(unsigned long,
1668 			 NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME),
1669 			 NEIGH_VAR(&nd_tbl.parms, DELAY_PROBE_TIME));
1670 #else
1671 	interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
1672 #endif
1673 	mlxsw_sp->router->neighs_update.interval = jiffies_to_msecs(interval);
1674 }
1675 
1676 static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp,
1677 						   char *rauhtd_pl,
1678 						   int ent_index)
1679 {
1680 	struct net_device *dev;
1681 	struct neighbour *n;
1682 	__be32 dipn;
1683 	u32 dip;
1684 	u16 rif;
1685 
1686 	mlxsw_reg_rauhtd_ent_ipv4_unpack(rauhtd_pl, ent_index, &rif, &dip);
1687 
1688 	if (!mlxsw_sp->router->rifs[rif]) {
1689 		dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
1690 		return;
1691 	}
1692 
1693 	dipn = htonl(dip);
1694 	dev = mlxsw_sp->router->rifs[rif]->dev;
1695 	n = neigh_lookup(&arp_tbl, &dipn, dev);
1696 	if (!n) {
1697 		netdev_err(dev, "Failed to find matching neighbour for IP=%pI4h\n",
1698 			   &dip);
1699 		return;
1700 	}
1701 
1702 	netdev_dbg(dev, "Updating neighbour with IP=%pI4h\n", &dip);
1703 	neigh_event_send(n, NULL);
1704 	neigh_release(n);
1705 }
1706 
1707 #if IS_ENABLED(CONFIG_IPV6)
1708 static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
1709 						   char *rauhtd_pl,
1710 						   int rec_index)
1711 {
1712 	struct net_device *dev;
1713 	struct neighbour *n;
1714 	struct in6_addr dip;
1715 	u16 rif;
1716 
1717 	mlxsw_reg_rauhtd_ent_ipv6_unpack(rauhtd_pl, rec_index, &rif,
1718 					 (char *) &dip);
1719 
1720 	if (!mlxsw_sp->router->rifs[rif]) {
1721 		dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
1722 		return;
1723 	}
1724 
1725 	dev = mlxsw_sp->router->rifs[rif]->dev;
1726 	n = neigh_lookup(&nd_tbl, &dip, dev);
1727 	if (!n) {
1728 		netdev_err(dev, "Failed to find matching neighbour for IP=%pI6c\n",
1729 			   &dip);
1730 		return;
1731 	}
1732 
1733 	netdev_dbg(dev, "Updating neighbour with IP=%pI6c\n", &dip);
1734 	neigh_event_send(n, NULL);
1735 	neigh_release(n);
1736 }
1737 #else
1738 static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
1739 						   char *rauhtd_pl,
1740 						   int rec_index)
1741 {
1742 }
1743 #endif
1744 
1745 static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp,
1746 						   char *rauhtd_pl,
1747 						   int rec_index)
1748 {
1749 	u8 num_entries;
1750 	int i;
1751 
1752 	num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
1753 								rec_index);
1754 	/* Hardware starts counting at 0, so add 1. */
1755 	num_entries++;
1756 
1757 	/* Each record consists of several neighbour entries. */
1758 	for (i = 0; i < num_entries; i++) {
1759 		int ent_index;
1760 
1761 		ent_index = rec_index * MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC + i;
1762 		mlxsw_sp_router_neigh_ent_ipv4_process(mlxsw_sp, rauhtd_pl,
1763 						       ent_index);
1764 	}
1765 
1766 }
1767 
1768 static void mlxsw_sp_router_neigh_rec_ipv6_process(struct mlxsw_sp *mlxsw_sp,
1769 						   char *rauhtd_pl,
1770 						   int rec_index)
1771 {
1772 	/* One record contains one entry. */
1773 	mlxsw_sp_router_neigh_ent_ipv6_process(mlxsw_sp, rauhtd_pl,
1774 					       rec_index);
1775 }
1776 
1777 static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp,
1778 					      char *rauhtd_pl, int rec_index)
1779 {
1780 	switch (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, rec_index)) {
1781 	case MLXSW_REG_RAUHTD_TYPE_IPV4:
1782 		mlxsw_sp_router_neigh_rec_ipv4_process(mlxsw_sp, rauhtd_pl,
1783 						       rec_index);
1784 		break;
1785 	case MLXSW_REG_RAUHTD_TYPE_IPV6:
1786 		mlxsw_sp_router_neigh_rec_ipv6_process(mlxsw_sp, rauhtd_pl,
1787 						       rec_index);
1788 		break;
1789 	}
1790 }
1791 
1792 static bool mlxsw_sp_router_rauhtd_is_full(char *rauhtd_pl)
1793 {
1794 	u8 num_rec, last_rec_index, num_entries;
1795 
1796 	num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
1797 	last_rec_index = num_rec - 1;
1798 
1799 	if (num_rec < MLXSW_REG_RAUHTD_REC_MAX_NUM)
1800 		return false;
1801 	if (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, last_rec_index) ==
1802 	    MLXSW_REG_RAUHTD_TYPE_IPV6)
1803 		return true;
1804 
1805 	num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
1806 								last_rec_index);
1807 	if (++num_entries == MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC)
1808 		return true;
1809 	return false;
1810 }
1811 
1812 static int
1813 __mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp,
1814 				       char *rauhtd_pl,
1815 				       enum mlxsw_reg_rauhtd_type type)
1816 {
1817 	int i, num_rec;
1818 	int err;
1819 
1820 	/* Make sure the neighbour's netdev isn't removed in the
1821 	 * process.
1822 	 */
1823 	rtnl_lock();
1824 	do {
1825 		mlxsw_reg_rauhtd_pack(rauhtd_pl, type);
1826 		err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd),
1827 				      rauhtd_pl);
1828 		if (err) {
1829 			dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to dump neighbour table\n");
1830 			break;
1831 		}
1832 		num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
1833 		for (i = 0; i < num_rec; i++)
1834 			mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl,
1835 							  i);
1836 	} while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl));
1837 	rtnl_unlock();
1838 
1839 	return err;
1840 }
1841 
1842 static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp)
1843 {
1844 	enum mlxsw_reg_rauhtd_type type;
1845 	char *rauhtd_pl;
1846 	int err;
1847 
1848 	rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL);
1849 	if (!rauhtd_pl)
1850 		return -ENOMEM;
1851 
1852 	type = MLXSW_REG_RAUHTD_TYPE_IPV4;
1853 	err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
1854 	if (err)
1855 		goto out;
1856 
1857 	type = MLXSW_REG_RAUHTD_TYPE_IPV6;
1858 	err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
1859 out:
1860 	kfree(rauhtd_pl);
1861 	return err;
1862 }
1863 
1864 static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp *mlxsw_sp)
1865 {
1866 	struct mlxsw_sp_neigh_entry *neigh_entry;
1867 
1868 	/* Take RTNL mutex here to prevent lists from changes */
1869 	rtnl_lock();
1870 	list_for_each_entry(neigh_entry, &mlxsw_sp->router->nexthop_neighs_list,
1871 			    nexthop_neighs_list_node)
1872 		/* If this neigh have nexthops, make the kernel think this neigh
1873 		 * is active regardless of the traffic.
1874 		 */
1875 		neigh_event_send(neigh_entry->key.n, NULL);
1876 	rtnl_unlock();
1877 }
1878 
1879 static void
1880 mlxsw_sp_router_neighs_update_work_schedule(struct mlxsw_sp *mlxsw_sp)
1881 {
1882 	unsigned long interval = mlxsw_sp->router->neighs_update.interval;
1883 
1884 	mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw,
1885 			       msecs_to_jiffies(interval));
1886 }
1887 
1888 static void mlxsw_sp_router_neighs_update_work(struct work_struct *work)
1889 {
1890 	struct mlxsw_sp_router *router;
1891 	int err;
1892 
1893 	router = container_of(work, struct mlxsw_sp_router,
1894 			      neighs_update.dw.work);
1895 	err = mlxsw_sp_router_neighs_update_rauhtd(router->mlxsw_sp);
1896 	if (err)
1897 		dev_err(router->mlxsw_sp->bus_info->dev, "Could not update kernel for neigh activity");
1898 
1899 	mlxsw_sp_router_neighs_update_nh(router->mlxsw_sp);
1900 
1901 	mlxsw_sp_router_neighs_update_work_schedule(router->mlxsw_sp);
1902 }
1903 
1904 static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work)
1905 {
1906 	struct mlxsw_sp_neigh_entry *neigh_entry;
1907 	struct mlxsw_sp_router *router;
1908 
1909 	router = container_of(work, struct mlxsw_sp_router,
1910 			      nexthop_probe_dw.work);
1911 	/* Iterate over nexthop neighbours, find those who are unresolved and
1912 	 * send arp on them. This solves the chicken-egg problem when
1913 	 * the nexthop wouldn't get offloaded until the neighbor is resolved
1914 	 * but it wouldn't get resolved ever in case traffic is flowing in HW
1915 	 * using different nexthop.
1916 	 *
1917 	 * Take RTNL mutex here to prevent lists from changes.
1918 	 */
1919 	rtnl_lock();
1920 	list_for_each_entry(neigh_entry, &router->nexthop_neighs_list,
1921 			    nexthop_neighs_list_node)
1922 		if (!neigh_entry->connected)
1923 			neigh_event_send(neigh_entry->key.n, NULL);
1924 	rtnl_unlock();
1925 
1926 	mlxsw_core_schedule_dw(&router->nexthop_probe_dw,
1927 			       MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL);
1928 }
1929 
1930 static void
1931 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
1932 			      struct mlxsw_sp_neigh_entry *neigh_entry,
1933 			      bool removing);
1934 
1935 static enum mlxsw_reg_rauht_op mlxsw_sp_rauht_op(bool adding)
1936 {
1937 	return adding ? MLXSW_REG_RAUHT_OP_WRITE_ADD :
1938 			MLXSW_REG_RAUHT_OP_WRITE_DELETE;
1939 }
1940 
1941 static void
1942 mlxsw_sp_router_neigh_entry_op4(struct mlxsw_sp *mlxsw_sp,
1943 				struct mlxsw_sp_neigh_entry *neigh_entry,
1944 				enum mlxsw_reg_rauht_op op)
1945 {
1946 	struct neighbour *n = neigh_entry->key.n;
1947 	u32 dip = ntohl(*((__be32 *) n->primary_key));
1948 	char rauht_pl[MLXSW_REG_RAUHT_LEN];
1949 
1950 	mlxsw_reg_rauht_pack4(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
1951 			      dip);
1952 	if (neigh_entry->counter_valid)
1953 		mlxsw_reg_rauht_pack_counter(rauht_pl,
1954 					     neigh_entry->counter_index);
1955 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
1956 }
1957 
1958 static void
1959 mlxsw_sp_router_neigh_entry_op6(struct mlxsw_sp *mlxsw_sp,
1960 				struct mlxsw_sp_neigh_entry *neigh_entry,
1961 				enum mlxsw_reg_rauht_op op)
1962 {
1963 	struct neighbour *n = neigh_entry->key.n;
1964 	char rauht_pl[MLXSW_REG_RAUHT_LEN];
1965 	const char *dip = n->primary_key;
1966 
1967 	mlxsw_reg_rauht_pack6(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
1968 			      dip);
1969 	if (neigh_entry->counter_valid)
1970 		mlxsw_reg_rauht_pack_counter(rauht_pl,
1971 					     neigh_entry->counter_index);
1972 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
1973 }
1974 
1975 bool mlxsw_sp_neigh_ipv6_ignore(struct mlxsw_sp_neigh_entry *neigh_entry)
1976 {
1977 	struct neighbour *n = neigh_entry->key.n;
1978 
1979 	/* Packets with a link-local destination address are trapped
1980 	 * after LPM lookup and never reach the neighbour table, so
1981 	 * there is no need to program such neighbours to the device.
1982 	 */
1983 	if (ipv6_addr_type((struct in6_addr *) &n->primary_key) &
1984 	    IPV6_ADDR_LINKLOCAL)
1985 		return true;
1986 	return false;
1987 }
1988 
1989 static void
1990 mlxsw_sp_neigh_entry_update(struct mlxsw_sp *mlxsw_sp,
1991 			    struct mlxsw_sp_neigh_entry *neigh_entry,
1992 			    bool adding)
1993 {
1994 	if (!adding && !neigh_entry->connected)
1995 		return;
1996 	neigh_entry->connected = adding;
1997 	if (neigh_entry->key.n->tbl->family == AF_INET) {
1998 		mlxsw_sp_router_neigh_entry_op4(mlxsw_sp, neigh_entry,
1999 						mlxsw_sp_rauht_op(adding));
2000 	} else if (neigh_entry->key.n->tbl->family == AF_INET6) {
2001 		if (mlxsw_sp_neigh_ipv6_ignore(neigh_entry))
2002 			return;
2003 		mlxsw_sp_router_neigh_entry_op6(mlxsw_sp, neigh_entry,
2004 						mlxsw_sp_rauht_op(adding));
2005 	} else {
2006 		WARN_ON_ONCE(1);
2007 	}
2008 }
2009 
2010 void
2011 mlxsw_sp_neigh_entry_counter_update(struct mlxsw_sp *mlxsw_sp,
2012 				    struct mlxsw_sp_neigh_entry *neigh_entry,
2013 				    bool adding)
2014 {
2015 	if (adding)
2016 		mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
2017 	else
2018 		mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
2019 	mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, true);
2020 }
2021 
2022 struct mlxsw_sp_neigh_event_work {
2023 	struct work_struct work;
2024 	struct mlxsw_sp *mlxsw_sp;
2025 	struct neighbour *n;
2026 };
2027 
2028 static void mlxsw_sp_router_neigh_event_work(struct work_struct *work)
2029 {
2030 	struct mlxsw_sp_neigh_event_work *neigh_work =
2031 		container_of(work, struct mlxsw_sp_neigh_event_work, work);
2032 	struct mlxsw_sp *mlxsw_sp = neigh_work->mlxsw_sp;
2033 	struct mlxsw_sp_neigh_entry *neigh_entry;
2034 	struct neighbour *n = neigh_work->n;
2035 	unsigned char ha[ETH_ALEN];
2036 	bool entry_connected;
2037 	u8 nud_state, dead;
2038 
2039 	/* If these parameters are changed after we release the lock,
2040 	 * then we are guaranteed to receive another event letting us
2041 	 * know about it.
2042 	 */
2043 	read_lock_bh(&n->lock);
2044 	memcpy(ha, n->ha, ETH_ALEN);
2045 	nud_state = n->nud_state;
2046 	dead = n->dead;
2047 	read_unlock_bh(&n->lock);
2048 
2049 	rtnl_lock();
2050 	entry_connected = nud_state & NUD_VALID && !dead;
2051 	neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
2052 	if (!entry_connected && !neigh_entry)
2053 		goto out;
2054 	if (!neigh_entry) {
2055 		neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
2056 		if (IS_ERR(neigh_entry))
2057 			goto out;
2058 	}
2059 
2060 	memcpy(neigh_entry->ha, ha, ETH_ALEN);
2061 	mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, entry_connected);
2062 	mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, !entry_connected);
2063 
2064 	if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
2065 		mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2066 
2067 out:
2068 	rtnl_unlock();
2069 	neigh_release(n);
2070 	kfree(neigh_work);
2071 }
2072 
2073 int mlxsw_sp_router_netevent_event(struct notifier_block *unused,
2074 				   unsigned long event, void *ptr)
2075 {
2076 	struct mlxsw_sp_neigh_event_work *neigh_work;
2077 	struct mlxsw_sp_port *mlxsw_sp_port;
2078 	struct mlxsw_sp *mlxsw_sp;
2079 	unsigned long interval;
2080 	struct neigh_parms *p;
2081 	struct neighbour *n;
2082 
2083 	switch (event) {
2084 	case NETEVENT_DELAY_PROBE_TIME_UPDATE:
2085 		p = ptr;
2086 
2087 		/* We don't care about changes in the default table. */
2088 		if (!p->dev || (p->tbl->family != AF_INET &&
2089 				p->tbl->family != AF_INET6))
2090 			return NOTIFY_DONE;
2091 
2092 		/* We are in atomic context and can't take RTNL mutex,
2093 		 * so use RCU variant to walk the device chain.
2094 		 */
2095 		mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(p->dev);
2096 		if (!mlxsw_sp_port)
2097 			return NOTIFY_DONE;
2098 
2099 		mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
2100 		interval = jiffies_to_msecs(NEIGH_VAR(p, DELAY_PROBE_TIME));
2101 		mlxsw_sp->router->neighs_update.interval = interval;
2102 
2103 		mlxsw_sp_port_dev_put(mlxsw_sp_port);
2104 		break;
2105 	case NETEVENT_NEIGH_UPDATE:
2106 		n = ptr;
2107 
2108 		if (n->tbl->family != AF_INET && n->tbl->family != AF_INET6)
2109 			return NOTIFY_DONE;
2110 
2111 		mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(n->dev);
2112 		if (!mlxsw_sp_port)
2113 			return NOTIFY_DONE;
2114 
2115 		neigh_work = kzalloc(sizeof(*neigh_work), GFP_ATOMIC);
2116 		if (!neigh_work) {
2117 			mlxsw_sp_port_dev_put(mlxsw_sp_port);
2118 			return NOTIFY_BAD;
2119 		}
2120 
2121 		INIT_WORK(&neigh_work->work, mlxsw_sp_router_neigh_event_work);
2122 		neigh_work->mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
2123 		neigh_work->n = n;
2124 
2125 		/* Take a reference to ensure the neighbour won't be
2126 		 * destructed until we drop the reference in delayed
2127 		 * work.
2128 		 */
2129 		neigh_clone(n);
2130 		mlxsw_core_schedule_work(&neigh_work->work);
2131 		mlxsw_sp_port_dev_put(mlxsw_sp_port);
2132 		break;
2133 	}
2134 
2135 	return NOTIFY_DONE;
2136 }
2137 
2138 static int mlxsw_sp_neigh_init(struct mlxsw_sp *mlxsw_sp)
2139 {
2140 	int err;
2141 
2142 	err = rhashtable_init(&mlxsw_sp->router->neigh_ht,
2143 			      &mlxsw_sp_neigh_ht_params);
2144 	if (err)
2145 		return err;
2146 
2147 	/* Initialize the polling interval according to the default
2148 	 * table.
2149 	 */
2150 	mlxsw_sp_router_neighs_update_interval_init(mlxsw_sp);
2151 
2152 	/* Create the delayed works for the activity_update */
2153 	INIT_DELAYED_WORK(&mlxsw_sp->router->neighs_update.dw,
2154 			  mlxsw_sp_router_neighs_update_work);
2155 	INIT_DELAYED_WORK(&mlxsw_sp->router->nexthop_probe_dw,
2156 			  mlxsw_sp_router_probe_unresolved_nexthops);
2157 	mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw, 0);
2158 	mlxsw_core_schedule_dw(&mlxsw_sp->router->nexthop_probe_dw, 0);
2159 	return 0;
2160 }
2161 
2162 static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp)
2163 {
2164 	cancel_delayed_work_sync(&mlxsw_sp->router->neighs_update.dw);
2165 	cancel_delayed_work_sync(&mlxsw_sp->router->nexthop_probe_dw);
2166 	rhashtable_destroy(&mlxsw_sp->router->neigh_ht);
2167 }
2168 
2169 static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
2170 					 struct mlxsw_sp_rif *rif)
2171 {
2172 	struct mlxsw_sp_neigh_entry *neigh_entry, *tmp;
2173 
2174 	list_for_each_entry_safe(neigh_entry, tmp, &rif->neigh_list,
2175 				 rif_list_node) {
2176 		mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, false);
2177 		mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2178 	}
2179 }
2180 
2181 enum mlxsw_sp_nexthop_type {
2182 	MLXSW_SP_NEXTHOP_TYPE_ETH,
2183 	MLXSW_SP_NEXTHOP_TYPE_IPIP,
2184 };
2185 
2186 struct mlxsw_sp_nexthop_key {
2187 	struct fib_nh *fib_nh;
2188 };
2189 
2190 struct mlxsw_sp_nexthop {
2191 	struct list_head neigh_list_node; /* member of neigh entry list */
2192 	struct list_head rif_list_node;
2193 	struct list_head router_list_node;
2194 	struct mlxsw_sp_nexthop_group *nh_grp; /* pointer back to the group
2195 						* this belongs to
2196 						*/
2197 	struct rhash_head ht_node;
2198 	struct mlxsw_sp_nexthop_key key;
2199 	unsigned char gw_addr[sizeof(struct in6_addr)];
2200 	int ifindex;
2201 	struct mlxsw_sp_rif *rif;
2202 	u8 should_offload:1, /* set indicates this neigh is connected and
2203 			      * should be put to KVD linear area of this group.
2204 			      */
2205 	   offloaded:1, /* set in case the neigh is actually put into
2206 			 * KVD linear area of this group.
2207 			 */
2208 	   update:1; /* set indicates that MAC of this neigh should be
2209 		      * updated in HW
2210 		      */
2211 	enum mlxsw_sp_nexthop_type type;
2212 	union {
2213 		struct mlxsw_sp_neigh_entry *neigh_entry;
2214 		struct mlxsw_sp_ipip_entry *ipip_entry;
2215 	};
2216 	unsigned int counter_index;
2217 	bool counter_valid;
2218 };
2219 
2220 struct mlxsw_sp_nexthop_group {
2221 	void *priv;
2222 	struct rhash_head ht_node;
2223 	struct list_head fib_list; /* list of fib entries that use this group */
2224 	struct neigh_table *neigh_tbl;
2225 	u8 adj_index_valid:1,
2226 	   gateway:1; /* routes using the group use a gateway */
2227 	u32 adj_index;
2228 	u16 ecmp_size;
2229 	u16 count;
2230 	struct mlxsw_sp_nexthop nexthops[0];
2231 #define nh_rif	nexthops[0].rif
2232 };
2233 
2234 void mlxsw_sp_nexthop_counter_alloc(struct mlxsw_sp *mlxsw_sp,
2235 				    struct mlxsw_sp_nexthop *nh)
2236 {
2237 	struct devlink *devlink;
2238 
2239 	devlink = priv_to_devlink(mlxsw_sp->core);
2240 	if (!devlink_dpipe_table_counter_enabled(devlink,
2241 						 MLXSW_SP_DPIPE_TABLE_NAME_ADJ))
2242 		return;
2243 
2244 	if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &nh->counter_index))
2245 		return;
2246 
2247 	nh->counter_valid = true;
2248 }
2249 
2250 void mlxsw_sp_nexthop_counter_free(struct mlxsw_sp *mlxsw_sp,
2251 				   struct mlxsw_sp_nexthop *nh)
2252 {
2253 	if (!nh->counter_valid)
2254 		return;
2255 	mlxsw_sp_flow_counter_free(mlxsw_sp, nh->counter_index);
2256 	nh->counter_valid = false;
2257 }
2258 
2259 int mlxsw_sp_nexthop_counter_get(struct mlxsw_sp *mlxsw_sp,
2260 				 struct mlxsw_sp_nexthop *nh, u64 *p_counter)
2261 {
2262 	if (!nh->counter_valid)
2263 		return -EINVAL;
2264 
2265 	return mlxsw_sp_flow_counter_get(mlxsw_sp, nh->counter_index,
2266 					 p_counter, NULL);
2267 }
2268 
2269 struct mlxsw_sp_nexthop *mlxsw_sp_nexthop_next(struct mlxsw_sp_router *router,
2270 					       struct mlxsw_sp_nexthop *nh)
2271 {
2272 	if (!nh) {
2273 		if (list_empty(&router->nexthop_list))
2274 			return NULL;
2275 		else
2276 			return list_first_entry(&router->nexthop_list,
2277 						typeof(*nh), router_list_node);
2278 	}
2279 	if (list_is_last(&nh->router_list_node, &router->nexthop_list))
2280 		return NULL;
2281 	return list_next_entry(nh, router_list_node);
2282 }
2283 
2284 bool mlxsw_sp_nexthop_offload(struct mlxsw_sp_nexthop *nh)
2285 {
2286 	return nh->offloaded;
2287 }
2288 
2289 unsigned char *mlxsw_sp_nexthop_ha(struct mlxsw_sp_nexthop *nh)
2290 {
2291 	if (!nh->offloaded)
2292 		return NULL;
2293 	return nh->neigh_entry->ha;
2294 }
2295 
2296 int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop *nh, u32 *p_adj_index,
2297 			     u32 *p_adj_hash_index)
2298 {
2299 	struct mlxsw_sp_nexthop_group *nh_grp = nh->nh_grp;
2300 	u32 adj_hash_index = 0;
2301 	int i;
2302 
2303 	if (!nh->offloaded || !nh_grp->adj_index_valid)
2304 		return -EINVAL;
2305 
2306 	*p_adj_index = nh_grp->adj_index;
2307 
2308 	for (i = 0; i < nh_grp->count; i++) {
2309 		struct mlxsw_sp_nexthop *nh_iter = &nh_grp->nexthops[i];
2310 
2311 		if (nh_iter == nh)
2312 			break;
2313 		if (nh_iter->offloaded)
2314 			adj_hash_index++;
2315 	}
2316 
2317 	*p_adj_hash_index = adj_hash_index;
2318 	return 0;
2319 }
2320 
2321 struct mlxsw_sp_rif *mlxsw_sp_nexthop_rif(struct mlxsw_sp_nexthop *nh)
2322 {
2323 	return nh->rif;
2324 }
2325 
2326 bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh)
2327 {
2328 	struct mlxsw_sp_nexthop_group *nh_grp = nh->nh_grp;
2329 	int i;
2330 
2331 	for (i = 0; i < nh_grp->count; i++) {
2332 		struct mlxsw_sp_nexthop *nh_iter = &nh_grp->nexthops[i];
2333 
2334 		if (nh_iter->type == MLXSW_SP_NEXTHOP_TYPE_IPIP)
2335 			return true;
2336 	}
2337 	return false;
2338 }
2339 
2340 static struct fib_info *
2341 mlxsw_sp_nexthop4_group_fi(const struct mlxsw_sp_nexthop_group *nh_grp)
2342 {
2343 	return nh_grp->priv;
2344 }
2345 
2346 struct mlxsw_sp_nexthop_group_cmp_arg {
2347 	enum mlxsw_sp_l3proto proto;
2348 	union {
2349 		struct fib_info *fi;
2350 		struct mlxsw_sp_fib6_entry *fib6_entry;
2351 	};
2352 };
2353 
2354 static bool
2355 mlxsw_sp_nexthop6_group_has_nexthop(const struct mlxsw_sp_nexthop_group *nh_grp,
2356 				    const struct in6_addr *gw, int ifindex)
2357 {
2358 	int i;
2359 
2360 	for (i = 0; i < nh_grp->count; i++) {
2361 		const struct mlxsw_sp_nexthop *nh;
2362 
2363 		nh = &nh_grp->nexthops[i];
2364 		if (nh->ifindex == ifindex &&
2365 		    ipv6_addr_equal(gw, (struct in6_addr *) nh->gw_addr))
2366 			return true;
2367 	}
2368 
2369 	return false;
2370 }
2371 
2372 static bool
2373 mlxsw_sp_nexthop6_group_cmp(const struct mlxsw_sp_nexthop_group *nh_grp,
2374 			    const struct mlxsw_sp_fib6_entry *fib6_entry)
2375 {
2376 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
2377 
2378 	if (nh_grp->count != fib6_entry->nrt6)
2379 		return false;
2380 
2381 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
2382 		struct in6_addr *gw;
2383 		int ifindex;
2384 
2385 		ifindex = mlxsw_sp_rt6->rt->dst.dev->ifindex;
2386 		gw = &mlxsw_sp_rt6->rt->rt6i_gateway;
2387 		if (!mlxsw_sp_nexthop6_group_has_nexthop(nh_grp, gw, ifindex))
2388 			return false;
2389 	}
2390 
2391 	return true;
2392 }
2393 
2394 static int
2395 mlxsw_sp_nexthop_group_cmp(struct rhashtable_compare_arg *arg, const void *ptr)
2396 {
2397 	const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = arg->key;
2398 	const struct mlxsw_sp_nexthop_group *nh_grp = ptr;
2399 
2400 	switch (cmp_arg->proto) {
2401 	case MLXSW_SP_L3_PROTO_IPV4:
2402 		return cmp_arg->fi != mlxsw_sp_nexthop4_group_fi(nh_grp);
2403 	case MLXSW_SP_L3_PROTO_IPV6:
2404 		return !mlxsw_sp_nexthop6_group_cmp(nh_grp,
2405 						    cmp_arg->fib6_entry);
2406 	default:
2407 		WARN_ON(1);
2408 		return 1;
2409 	}
2410 }
2411 
2412 static int
2413 mlxsw_sp_nexthop_group_type(const struct mlxsw_sp_nexthop_group *nh_grp)
2414 {
2415 	return nh_grp->neigh_tbl->family;
2416 }
2417 
2418 static u32 mlxsw_sp_nexthop_group_hash_obj(const void *data, u32 len, u32 seed)
2419 {
2420 	const struct mlxsw_sp_nexthop_group *nh_grp = data;
2421 	const struct mlxsw_sp_nexthop *nh;
2422 	struct fib_info *fi;
2423 	unsigned int val;
2424 	int i;
2425 
2426 	switch (mlxsw_sp_nexthop_group_type(nh_grp)) {
2427 	case AF_INET:
2428 		fi = mlxsw_sp_nexthop4_group_fi(nh_grp);
2429 		return jhash(&fi, sizeof(fi), seed);
2430 	case AF_INET6:
2431 		val = nh_grp->count;
2432 		for (i = 0; i < nh_grp->count; i++) {
2433 			nh = &nh_grp->nexthops[i];
2434 			val ^= nh->ifindex;
2435 		}
2436 		return jhash(&val, sizeof(val), seed);
2437 	default:
2438 		WARN_ON(1);
2439 		return 0;
2440 	}
2441 }
2442 
2443 static u32
2444 mlxsw_sp_nexthop6_group_hash(struct mlxsw_sp_fib6_entry *fib6_entry, u32 seed)
2445 {
2446 	unsigned int val = fib6_entry->nrt6;
2447 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
2448 	struct net_device *dev;
2449 
2450 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
2451 		dev = mlxsw_sp_rt6->rt->dst.dev;
2452 		val ^= dev->ifindex;
2453 	}
2454 
2455 	return jhash(&val, sizeof(val), seed);
2456 }
2457 
2458 static u32
2459 mlxsw_sp_nexthop_group_hash(const void *data, u32 len, u32 seed)
2460 {
2461 	const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = data;
2462 
2463 	switch (cmp_arg->proto) {
2464 	case MLXSW_SP_L3_PROTO_IPV4:
2465 		return jhash(&cmp_arg->fi, sizeof(cmp_arg->fi), seed);
2466 	case MLXSW_SP_L3_PROTO_IPV6:
2467 		return mlxsw_sp_nexthop6_group_hash(cmp_arg->fib6_entry, seed);
2468 	default:
2469 		WARN_ON(1);
2470 		return 0;
2471 	}
2472 }
2473 
2474 static const struct rhashtable_params mlxsw_sp_nexthop_group_ht_params = {
2475 	.head_offset = offsetof(struct mlxsw_sp_nexthop_group, ht_node),
2476 	.hashfn	     = mlxsw_sp_nexthop_group_hash,
2477 	.obj_hashfn  = mlxsw_sp_nexthop_group_hash_obj,
2478 	.obj_cmpfn   = mlxsw_sp_nexthop_group_cmp,
2479 };
2480 
2481 static int mlxsw_sp_nexthop_group_insert(struct mlxsw_sp *mlxsw_sp,
2482 					 struct mlxsw_sp_nexthop_group *nh_grp)
2483 {
2484 	if (mlxsw_sp_nexthop_group_type(nh_grp) == AF_INET6 &&
2485 	    !nh_grp->gateway)
2486 		return 0;
2487 
2488 	return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_group_ht,
2489 				      &nh_grp->ht_node,
2490 				      mlxsw_sp_nexthop_group_ht_params);
2491 }
2492 
2493 static void mlxsw_sp_nexthop_group_remove(struct mlxsw_sp *mlxsw_sp,
2494 					  struct mlxsw_sp_nexthop_group *nh_grp)
2495 {
2496 	if (mlxsw_sp_nexthop_group_type(nh_grp) == AF_INET6 &&
2497 	    !nh_grp->gateway)
2498 		return;
2499 
2500 	rhashtable_remove_fast(&mlxsw_sp->router->nexthop_group_ht,
2501 			       &nh_grp->ht_node,
2502 			       mlxsw_sp_nexthop_group_ht_params);
2503 }
2504 
2505 static struct mlxsw_sp_nexthop_group *
2506 mlxsw_sp_nexthop4_group_lookup(struct mlxsw_sp *mlxsw_sp,
2507 			       struct fib_info *fi)
2508 {
2509 	struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
2510 
2511 	cmp_arg.proto = MLXSW_SP_L3_PROTO_IPV4;
2512 	cmp_arg.fi = fi;
2513 	return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
2514 				      &cmp_arg,
2515 				      mlxsw_sp_nexthop_group_ht_params);
2516 }
2517 
2518 static struct mlxsw_sp_nexthop_group *
2519 mlxsw_sp_nexthop6_group_lookup(struct mlxsw_sp *mlxsw_sp,
2520 			       struct mlxsw_sp_fib6_entry *fib6_entry)
2521 {
2522 	struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
2523 
2524 	cmp_arg.proto = MLXSW_SP_L3_PROTO_IPV6;
2525 	cmp_arg.fib6_entry = fib6_entry;
2526 	return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
2527 				      &cmp_arg,
2528 				      mlxsw_sp_nexthop_group_ht_params);
2529 }
2530 
2531 static const struct rhashtable_params mlxsw_sp_nexthop_ht_params = {
2532 	.key_offset = offsetof(struct mlxsw_sp_nexthop, key),
2533 	.head_offset = offsetof(struct mlxsw_sp_nexthop, ht_node),
2534 	.key_len = sizeof(struct mlxsw_sp_nexthop_key),
2535 };
2536 
2537 static int mlxsw_sp_nexthop_insert(struct mlxsw_sp *mlxsw_sp,
2538 				   struct mlxsw_sp_nexthop *nh)
2539 {
2540 	return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_ht,
2541 				      &nh->ht_node, mlxsw_sp_nexthop_ht_params);
2542 }
2543 
2544 static void mlxsw_sp_nexthop_remove(struct mlxsw_sp *mlxsw_sp,
2545 				    struct mlxsw_sp_nexthop *nh)
2546 {
2547 	rhashtable_remove_fast(&mlxsw_sp->router->nexthop_ht, &nh->ht_node,
2548 			       mlxsw_sp_nexthop_ht_params);
2549 }
2550 
2551 static struct mlxsw_sp_nexthop *
2552 mlxsw_sp_nexthop_lookup(struct mlxsw_sp *mlxsw_sp,
2553 			struct mlxsw_sp_nexthop_key key)
2554 {
2555 	return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_ht, &key,
2556 				      mlxsw_sp_nexthop_ht_params);
2557 }
2558 
2559 static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp,
2560 					     const struct mlxsw_sp_fib *fib,
2561 					     u32 adj_index, u16 ecmp_size,
2562 					     u32 new_adj_index,
2563 					     u16 new_ecmp_size)
2564 {
2565 	char raleu_pl[MLXSW_REG_RALEU_LEN];
2566 
2567 	mlxsw_reg_raleu_pack(raleu_pl,
2568 			     (enum mlxsw_reg_ralxx_protocol) fib->proto,
2569 			     fib->vr->id, adj_index, ecmp_size, new_adj_index,
2570 			     new_ecmp_size);
2571 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raleu), raleu_pl);
2572 }
2573 
2574 static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp,
2575 					  struct mlxsw_sp_nexthop_group *nh_grp,
2576 					  u32 old_adj_index, u16 old_ecmp_size)
2577 {
2578 	struct mlxsw_sp_fib_entry *fib_entry;
2579 	struct mlxsw_sp_fib *fib = NULL;
2580 	int err;
2581 
2582 	list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
2583 		if (fib == fib_entry->fib_node->fib)
2584 			continue;
2585 		fib = fib_entry->fib_node->fib;
2586 		err = mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, fib,
2587 							old_adj_index,
2588 							old_ecmp_size,
2589 							nh_grp->adj_index,
2590 							nh_grp->ecmp_size);
2591 		if (err)
2592 			return err;
2593 	}
2594 	return 0;
2595 }
2596 
2597 int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
2598 			    struct mlxsw_sp_nexthop *nh)
2599 {
2600 	struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
2601 	char ratr_pl[MLXSW_REG_RATR_LEN];
2602 
2603 	mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY,
2604 			    true, MLXSW_REG_RATR_TYPE_ETHERNET,
2605 			    adj_index, neigh_entry->rif);
2606 	mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha);
2607 	if (nh->counter_valid)
2608 		mlxsw_reg_ratr_counter_pack(ratr_pl, nh->counter_index, true);
2609 	else
2610 		mlxsw_reg_ratr_counter_pack(ratr_pl, 0, false);
2611 
2612 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
2613 }
2614 
2615 static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
2616 					u32 adj_index,
2617 					struct mlxsw_sp_nexthop *nh)
2618 {
2619 	const struct mlxsw_sp_ipip_ops *ipip_ops;
2620 
2621 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[nh->ipip_entry->ipipt];
2622 	return ipip_ops->nexthop_update(mlxsw_sp, adj_index, nh->ipip_entry);
2623 }
2624 
2625 static int
2626 mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp,
2627 			      struct mlxsw_sp_nexthop_group *nh_grp,
2628 			      bool reallocate)
2629 {
2630 	u32 adj_index = nh_grp->adj_index; /* base */
2631 	struct mlxsw_sp_nexthop *nh;
2632 	int i;
2633 	int err;
2634 
2635 	for (i = 0; i < nh_grp->count; i++) {
2636 		nh = &nh_grp->nexthops[i];
2637 
2638 		if (!nh->should_offload) {
2639 			nh->offloaded = 0;
2640 			continue;
2641 		}
2642 
2643 		if (nh->update || reallocate) {
2644 			switch (nh->type) {
2645 			case MLXSW_SP_NEXTHOP_TYPE_ETH:
2646 				err = mlxsw_sp_nexthop_update
2647 					    (mlxsw_sp, adj_index, nh);
2648 				break;
2649 			case MLXSW_SP_NEXTHOP_TYPE_IPIP:
2650 				err = mlxsw_sp_nexthop_ipip_update
2651 					    (mlxsw_sp, adj_index, nh);
2652 				break;
2653 			}
2654 			if (err)
2655 				return err;
2656 			nh->update = 0;
2657 			nh->offloaded = 1;
2658 		}
2659 		adj_index++;
2660 	}
2661 	return 0;
2662 }
2663 
2664 static bool
2665 mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node,
2666 				 const struct mlxsw_sp_fib_entry *fib_entry);
2667 
2668 static int
2669 mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp,
2670 				    struct mlxsw_sp_nexthop_group *nh_grp)
2671 {
2672 	struct mlxsw_sp_fib_entry *fib_entry;
2673 	int err;
2674 
2675 	list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
2676 		if (!mlxsw_sp_fib_node_entry_is_first(fib_entry->fib_node,
2677 						      fib_entry))
2678 			continue;
2679 		err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
2680 		if (err)
2681 			return err;
2682 	}
2683 	return 0;
2684 }
2685 
2686 static void
2687 mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
2688 				   enum mlxsw_reg_ralue_op op, int err);
2689 
2690 static void
2691 mlxsw_sp_nexthop_fib_entries_refresh(struct mlxsw_sp_nexthop_group *nh_grp)
2692 {
2693 	enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_WRITE;
2694 	struct mlxsw_sp_fib_entry *fib_entry;
2695 
2696 	list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
2697 		if (!mlxsw_sp_fib_node_entry_is_first(fib_entry->fib_node,
2698 						      fib_entry))
2699 			continue;
2700 		mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0);
2701 	}
2702 }
2703 
2704 static void
2705 mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
2706 			       struct mlxsw_sp_nexthop_group *nh_grp)
2707 {
2708 	struct mlxsw_sp_nexthop *nh;
2709 	bool offload_change = false;
2710 	u32 adj_index;
2711 	u16 ecmp_size = 0;
2712 	bool old_adj_index_valid;
2713 	u32 old_adj_index;
2714 	u16 old_ecmp_size;
2715 	int i;
2716 	int err;
2717 
2718 	if (!nh_grp->gateway) {
2719 		mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
2720 		return;
2721 	}
2722 
2723 	for (i = 0; i < nh_grp->count; i++) {
2724 		nh = &nh_grp->nexthops[i];
2725 
2726 		if (nh->should_offload != nh->offloaded) {
2727 			offload_change = true;
2728 			if (nh->should_offload)
2729 				nh->update = 1;
2730 		}
2731 		if (nh->should_offload)
2732 			ecmp_size++;
2733 	}
2734 	if (!offload_change) {
2735 		/* Nothing was added or removed, so no need to reallocate. Just
2736 		 * update MAC on existing adjacency indexes.
2737 		 */
2738 		err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, false);
2739 		if (err) {
2740 			dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
2741 			goto set_trap;
2742 		}
2743 		return;
2744 	}
2745 	if (!ecmp_size)
2746 		/* No neigh of this group is connected so we just set
2747 		 * the trap and let everthing flow through kernel.
2748 		 */
2749 		goto set_trap;
2750 
2751 	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, ecmp_size, &adj_index);
2752 	if (err) {
2753 		/* We ran out of KVD linear space, just set the
2754 		 * trap and let everything flow through kernel.
2755 		 */
2756 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to allocate KVD linear area for nexthop group.\n");
2757 		goto set_trap;
2758 	}
2759 	old_adj_index_valid = nh_grp->adj_index_valid;
2760 	old_adj_index = nh_grp->adj_index;
2761 	old_ecmp_size = nh_grp->ecmp_size;
2762 	nh_grp->adj_index_valid = 1;
2763 	nh_grp->adj_index = adj_index;
2764 	nh_grp->ecmp_size = ecmp_size;
2765 	err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, true);
2766 	if (err) {
2767 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
2768 		goto set_trap;
2769 	}
2770 
2771 	if (!old_adj_index_valid) {
2772 		/* The trap was set for fib entries, so we have to call
2773 		 * fib entry update to unset it and use adjacency index.
2774 		 */
2775 		err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
2776 		if (err) {
2777 			dev_warn(mlxsw_sp->bus_info->dev, "Failed to add adjacency index to fib entries.\n");
2778 			goto set_trap;
2779 		}
2780 		return;
2781 	}
2782 
2783 	err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, nh_grp,
2784 					     old_adj_index, old_ecmp_size);
2785 	mlxsw_sp_kvdl_free(mlxsw_sp, old_adj_index);
2786 	if (err) {
2787 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to mass-update adjacency index for nexthop group.\n");
2788 		goto set_trap;
2789 	}
2790 
2791 	/* Offload state within the group changed, so update the flags. */
2792 	mlxsw_sp_nexthop_fib_entries_refresh(nh_grp);
2793 
2794 	return;
2795 
2796 set_trap:
2797 	old_adj_index_valid = nh_grp->adj_index_valid;
2798 	nh_grp->adj_index_valid = 0;
2799 	for (i = 0; i < nh_grp->count; i++) {
2800 		nh = &nh_grp->nexthops[i];
2801 		nh->offloaded = 0;
2802 	}
2803 	err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
2804 	if (err)
2805 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n");
2806 	if (old_adj_index_valid)
2807 		mlxsw_sp_kvdl_free(mlxsw_sp, nh_grp->adj_index);
2808 }
2809 
2810 static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh,
2811 					    bool removing)
2812 {
2813 	if (!removing)
2814 		nh->should_offload = 1;
2815 	else if (nh->offloaded)
2816 		nh->should_offload = 0;
2817 	nh->update = 1;
2818 }
2819 
2820 static void
2821 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
2822 			      struct mlxsw_sp_neigh_entry *neigh_entry,
2823 			      bool removing)
2824 {
2825 	struct mlxsw_sp_nexthop *nh;
2826 
2827 	list_for_each_entry(nh, &neigh_entry->nexthop_list,
2828 			    neigh_list_node) {
2829 		__mlxsw_sp_nexthop_neigh_update(nh, removing);
2830 		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
2831 	}
2832 }
2833 
2834 static void mlxsw_sp_nexthop_rif_init(struct mlxsw_sp_nexthop *nh,
2835 				      struct mlxsw_sp_rif *rif)
2836 {
2837 	if (nh->rif)
2838 		return;
2839 
2840 	nh->rif = rif;
2841 	list_add(&nh->rif_list_node, &rif->nexthop_list);
2842 }
2843 
2844 static void mlxsw_sp_nexthop_rif_fini(struct mlxsw_sp_nexthop *nh)
2845 {
2846 	if (!nh->rif)
2847 		return;
2848 
2849 	list_del(&nh->rif_list_node);
2850 	nh->rif = NULL;
2851 }
2852 
2853 static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp,
2854 				       struct mlxsw_sp_nexthop *nh)
2855 {
2856 	struct mlxsw_sp_neigh_entry *neigh_entry;
2857 	struct neighbour *n;
2858 	u8 nud_state, dead;
2859 	int err;
2860 
2861 	if (!nh->nh_grp->gateway || nh->neigh_entry)
2862 		return 0;
2863 
2864 	/* Take a reference of neigh here ensuring that neigh would
2865 	 * not be destructed before the nexthop entry is finished.
2866 	 * The reference is taken either in neigh_lookup() or
2867 	 * in neigh_create() in case n is not found.
2868 	 */
2869 	n = neigh_lookup(nh->nh_grp->neigh_tbl, &nh->gw_addr, nh->rif->dev);
2870 	if (!n) {
2871 		n = neigh_create(nh->nh_grp->neigh_tbl, &nh->gw_addr,
2872 				 nh->rif->dev);
2873 		if (IS_ERR(n))
2874 			return PTR_ERR(n);
2875 		neigh_event_send(n, NULL);
2876 	}
2877 	neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
2878 	if (!neigh_entry) {
2879 		neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
2880 		if (IS_ERR(neigh_entry)) {
2881 			err = -EINVAL;
2882 			goto err_neigh_entry_create;
2883 		}
2884 	}
2885 
2886 	/* If that is the first nexthop connected to that neigh, add to
2887 	 * nexthop_neighs_list
2888 	 */
2889 	if (list_empty(&neigh_entry->nexthop_list))
2890 		list_add_tail(&neigh_entry->nexthop_neighs_list_node,
2891 			      &mlxsw_sp->router->nexthop_neighs_list);
2892 
2893 	nh->neigh_entry = neigh_entry;
2894 	list_add_tail(&nh->neigh_list_node, &neigh_entry->nexthop_list);
2895 	read_lock_bh(&n->lock);
2896 	nud_state = n->nud_state;
2897 	dead = n->dead;
2898 	read_unlock_bh(&n->lock);
2899 	__mlxsw_sp_nexthop_neigh_update(nh, !(nud_state & NUD_VALID && !dead));
2900 
2901 	return 0;
2902 
2903 err_neigh_entry_create:
2904 	neigh_release(n);
2905 	return err;
2906 }
2907 
2908 static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp,
2909 					struct mlxsw_sp_nexthop *nh)
2910 {
2911 	struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
2912 	struct neighbour *n;
2913 
2914 	if (!neigh_entry)
2915 		return;
2916 	n = neigh_entry->key.n;
2917 
2918 	__mlxsw_sp_nexthop_neigh_update(nh, true);
2919 	list_del(&nh->neigh_list_node);
2920 	nh->neigh_entry = NULL;
2921 
2922 	/* If that is the last nexthop connected to that neigh, remove from
2923 	 * nexthop_neighs_list
2924 	 */
2925 	if (list_empty(&neigh_entry->nexthop_list))
2926 		list_del(&neigh_entry->nexthop_neighs_list_node);
2927 
2928 	if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
2929 		mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2930 
2931 	neigh_release(n);
2932 }
2933 
2934 static int mlxsw_sp_nexthop_ipip_init(struct mlxsw_sp *mlxsw_sp,
2935 				      struct mlxsw_sp_nexthop *nh,
2936 				      struct net_device *ol_dev)
2937 {
2938 	if (!nh->nh_grp->gateway || nh->ipip_entry)
2939 		return 0;
2940 
2941 	nh->ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
2942 	if (!nh->ipip_entry)
2943 		return -ENOENT;
2944 
2945 	__mlxsw_sp_nexthop_neigh_update(nh, false);
2946 	return 0;
2947 }
2948 
2949 static void mlxsw_sp_nexthop_ipip_fini(struct mlxsw_sp *mlxsw_sp,
2950 				       struct mlxsw_sp_nexthop *nh)
2951 {
2952 	struct mlxsw_sp_ipip_entry *ipip_entry = nh->ipip_entry;
2953 
2954 	if (!ipip_entry)
2955 		return;
2956 
2957 	__mlxsw_sp_nexthop_neigh_update(nh, true);
2958 	nh->ipip_entry = NULL;
2959 }
2960 
2961 static bool mlxsw_sp_nexthop4_ipip_type(const struct mlxsw_sp *mlxsw_sp,
2962 					const struct fib_nh *fib_nh,
2963 					enum mlxsw_sp_ipip_type *p_ipipt)
2964 {
2965 	struct net_device *dev = fib_nh->nh_dev;
2966 
2967 	return dev &&
2968 	       fib_nh->nh_parent->fib_type == RTN_UNICAST &&
2969 	       mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, p_ipipt);
2970 }
2971 
2972 static void mlxsw_sp_nexthop_type_fini(struct mlxsw_sp *mlxsw_sp,
2973 				       struct mlxsw_sp_nexthop *nh)
2974 {
2975 	switch (nh->type) {
2976 	case MLXSW_SP_NEXTHOP_TYPE_ETH:
2977 		mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
2978 		mlxsw_sp_nexthop_rif_fini(nh);
2979 		break;
2980 	case MLXSW_SP_NEXTHOP_TYPE_IPIP:
2981 		mlxsw_sp_nexthop_rif_fini(nh);
2982 		mlxsw_sp_nexthop_ipip_fini(mlxsw_sp, nh);
2983 		break;
2984 	}
2985 }
2986 
2987 static int mlxsw_sp_nexthop4_type_init(struct mlxsw_sp *mlxsw_sp,
2988 				       struct mlxsw_sp_nexthop *nh,
2989 				       struct fib_nh *fib_nh)
2990 {
2991 	struct mlxsw_sp_router *router = mlxsw_sp->router;
2992 	struct net_device *dev = fib_nh->nh_dev;
2993 	enum mlxsw_sp_ipip_type ipipt;
2994 	struct mlxsw_sp_rif *rif;
2995 	int err;
2996 
2997 	if (mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, fib_nh, &ipipt) &&
2998 	    router->ipip_ops_arr[ipipt]->can_offload(mlxsw_sp, dev,
2999 						     MLXSW_SP_L3_PROTO_IPV4)) {
3000 		nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP;
3001 		err = mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, dev);
3002 		if (err)
3003 			return err;
3004 		mlxsw_sp_nexthop_rif_init(nh, &nh->ipip_entry->ol_lb->common);
3005 		return 0;
3006 	}
3007 
3008 	nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH;
3009 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
3010 	if (!rif)
3011 		return 0;
3012 
3013 	mlxsw_sp_nexthop_rif_init(nh, rif);
3014 	err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
3015 	if (err)
3016 		goto err_neigh_init;
3017 
3018 	return 0;
3019 
3020 err_neigh_init:
3021 	mlxsw_sp_nexthop_rif_fini(nh);
3022 	return err;
3023 }
3024 
3025 static void mlxsw_sp_nexthop4_type_fini(struct mlxsw_sp *mlxsw_sp,
3026 					struct mlxsw_sp_nexthop *nh)
3027 {
3028 	mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
3029 }
3030 
3031 static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp,
3032 				  struct mlxsw_sp_nexthop_group *nh_grp,
3033 				  struct mlxsw_sp_nexthop *nh,
3034 				  struct fib_nh *fib_nh)
3035 {
3036 	struct net_device *dev = fib_nh->nh_dev;
3037 	struct in_device *in_dev;
3038 	int err;
3039 
3040 	nh->nh_grp = nh_grp;
3041 	nh->key.fib_nh = fib_nh;
3042 	memcpy(&nh->gw_addr, &fib_nh->nh_gw, sizeof(fib_nh->nh_gw));
3043 	err = mlxsw_sp_nexthop_insert(mlxsw_sp, nh);
3044 	if (err)
3045 		return err;
3046 
3047 	mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
3048 	list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
3049 
3050 	if (!dev)
3051 		return 0;
3052 
3053 	in_dev = __in_dev_get_rtnl(dev);
3054 	if (in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
3055 	    fib_nh->nh_flags & RTNH_F_LINKDOWN)
3056 		return 0;
3057 
3058 	err = mlxsw_sp_nexthop4_type_init(mlxsw_sp, nh, fib_nh);
3059 	if (err)
3060 		goto err_nexthop_neigh_init;
3061 
3062 	return 0;
3063 
3064 err_nexthop_neigh_init:
3065 	mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
3066 	return err;
3067 }
3068 
3069 static void mlxsw_sp_nexthop4_fini(struct mlxsw_sp *mlxsw_sp,
3070 				   struct mlxsw_sp_nexthop *nh)
3071 {
3072 	mlxsw_sp_nexthop4_type_fini(mlxsw_sp, nh);
3073 	list_del(&nh->router_list_node);
3074 	mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
3075 	mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
3076 }
3077 
3078 static void mlxsw_sp_nexthop4_event(struct mlxsw_sp *mlxsw_sp,
3079 				    unsigned long event, struct fib_nh *fib_nh)
3080 {
3081 	struct mlxsw_sp_nexthop_key key;
3082 	struct mlxsw_sp_nexthop *nh;
3083 
3084 	if (mlxsw_sp->router->aborted)
3085 		return;
3086 
3087 	key.fib_nh = fib_nh;
3088 	nh = mlxsw_sp_nexthop_lookup(mlxsw_sp, key);
3089 	if (WARN_ON_ONCE(!nh))
3090 		return;
3091 
3092 	switch (event) {
3093 	case FIB_EVENT_NH_ADD:
3094 		mlxsw_sp_nexthop4_type_init(mlxsw_sp, nh, fib_nh);
3095 		break;
3096 	case FIB_EVENT_NH_DEL:
3097 		mlxsw_sp_nexthop4_type_fini(mlxsw_sp, nh);
3098 		break;
3099 	}
3100 
3101 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3102 }
3103 
3104 static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
3105 					   struct mlxsw_sp_rif *rif)
3106 {
3107 	struct mlxsw_sp_nexthop *nh, *tmp;
3108 
3109 	list_for_each_entry_safe(nh, tmp, &rif->nexthop_list, rif_list_node) {
3110 		mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
3111 		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3112 	}
3113 }
3114 
3115 static bool mlxsw_sp_fi_is_gateway(const struct mlxsw_sp *mlxsw_sp,
3116 				   const struct fib_info *fi)
3117 {
3118 	return fi->fib_nh->nh_scope == RT_SCOPE_LINK ||
3119 	       mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, fi->fib_nh, NULL);
3120 }
3121 
3122 static struct mlxsw_sp_nexthop_group *
3123 mlxsw_sp_nexthop4_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
3124 {
3125 	struct mlxsw_sp_nexthop_group *nh_grp;
3126 	struct mlxsw_sp_nexthop *nh;
3127 	struct fib_nh *fib_nh;
3128 	size_t alloc_size;
3129 	int i;
3130 	int err;
3131 
3132 	alloc_size = sizeof(*nh_grp) +
3133 		     fi->fib_nhs * sizeof(struct mlxsw_sp_nexthop);
3134 	nh_grp = kzalloc(alloc_size, GFP_KERNEL);
3135 	if (!nh_grp)
3136 		return ERR_PTR(-ENOMEM);
3137 	nh_grp->priv = fi;
3138 	INIT_LIST_HEAD(&nh_grp->fib_list);
3139 	nh_grp->neigh_tbl = &arp_tbl;
3140 
3141 	nh_grp->gateway = mlxsw_sp_fi_is_gateway(mlxsw_sp, fi);
3142 	nh_grp->count = fi->fib_nhs;
3143 	fib_info_hold(fi);
3144 	for (i = 0; i < nh_grp->count; i++) {
3145 		nh = &nh_grp->nexthops[i];
3146 		fib_nh = &fi->fib_nh[i];
3147 		err = mlxsw_sp_nexthop4_init(mlxsw_sp, nh_grp, nh, fib_nh);
3148 		if (err)
3149 			goto err_nexthop4_init;
3150 	}
3151 	err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
3152 	if (err)
3153 		goto err_nexthop_group_insert;
3154 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
3155 	return nh_grp;
3156 
3157 err_nexthop_group_insert:
3158 err_nexthop4_init:
3159 	for (i--; i >= 0; i--) {
3160 		nh = &nh_grp->nexthops[i];
3161 		mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
3162 	}
3163 	fib_info_put(fi);
3164 	kfree(nh_grp);
3165 	return ERR_PTR(err);
3166 }
3167 
3168 static void
3169 mlxsw_sp_nexthop4_group_destroy(struct mlxsw_sp *mlxsw_sp,
3170 				struct mlxsw_sp_nexthop_group *nh_grp)
3171 {
3172 	struct mlxsw_sp_nexthop *nh;
3173 	int i;
3174 
3175 	mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
3176 	for (i = 0; i < nh_grp->count; i++) {
3177 		nh = &nh_grp->nexthops[i];
3178 		mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
3179 	}
3180 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
3181 	WARN_ON_ONCE(nh_grp->adj_index_valid);
3182 	fib_info_put(mlxsw_sp_nexthop4_group_fi(nh_grp));
3183 	kfree(nh_grp);
3184 }
3185 
3186 static int mlxsw_sp_nexthop4_group_get(struct mlxsw_sp *mlxsw_sp,
3187 				       struct mlxsw_sp_fib_entry *fib_entry,
3188 				       struct fib_info *fi)
3189 {
3190 	struct mlxsw_sp_nexthop_group *nh_grp;
3191 
3192 	nh_grp = mlxsw_sp_nexthop4_group_lookup(mlxsw_sp, fi);
3193 	if (!nh_grp) {
3194 		nh_grp = mlxsw_sp_nexthop4_group_create(mlxsw_sp, fi);
3195 		if (IS_ERR(nh_grp))
3196 			return PTR_ERR(nh_grp);
3197 	}
3198 	list_add_tail(&fib_entry->nexthop_group_node, &nh_grp->fib_list);
3199 	fib_entry->nh_group = nh_grp;
3200 	return 0;
3201 }
3202 
3203 static void mlxsw_sp_nexthop4_group_put(struct mlxsw_sp *mlxsw_sp,
3204 					struct mlxsw_sp_fib_entry *fib_entry)
3205 {
3206 	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3207 
3208 	list_del(&fib_entry->nexthop_group_node);
3209 	if (!list_empty(&nh_grp->fib_list))
3210 		return;
3211 	mlxsw_sp_nexthop4_group_destroy(mlxsw_sp, nh_grp);
3212 }
3213 
3214 static bool
3215 mlxsw_sp_fib4_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
3216 {
3217 	struct mlxsw_sp_fib4_entry *fib4_entry;
3218 
3219 	fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry,
3220 				  common);
3221 	return !fib4_entry->tos;
3222 }
3223 
3224 static bool
3225 mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
3226 {
3227 	struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group;
3228 
3229 	switch (fib_entry->fib_node->fib->proto) {
3230 	case MLXSW_SP_L3_PROTO_IPV4:
3231 		if (!mlxsw_sp_fib4_entry_should_offload(fib_entry))
3232 			return false;
3233 		break;
3234 	case MLXSW_SP_L3_PROTO_IPV6:
3235 		break;
3236 	}
3237 
3238 	switch (fib_entry->type) {
3239 	case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
3240 		return !!nh_group->adj_index_valid;
3241 	case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
3242 		return !!nh_group->nh_rif;
3243 	case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
3244 		return true;
3245 	default:
3246 		return false;
3247 	}
3248 }
3249 
3250 static struct mlxsw_sp_nexthop *
3251 mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp,
3252 		     const struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
3253 {
3254 	int i;
3255 
3256 	for (i = 0; i < nh_grp->count; i++) {
3257 		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3258 		struct rt6_info *rt = mlxsw_sp_rt6->rt;
3259 
3260 		if (nh->rif && nh->rif->dev == rt->dst.dev &&
3261 		    ipv6_addr_equal((const struct in6_addr *) &nh->gw_addr,
3262 				    &rt->rt6i_gateway))
3263 			return nh;
3264 		continue;
3265 	}
3266 
3267 	return NULL;
3268 }
3269 
3270 static void
3271 mlxsw_sp_fib4_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
3272 {
3273 	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3274 	int i;
3275 
3276 	if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL ||
3277 	    fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP) {
3278 		nh_grp->nexthops->key.fib_nh->nh_flags |= RTNH_F_OFFLOAD;
3279 		return;
3280 	}
3281 
3282 	for (i = 0; i < nh_grp->count; i++) {
3283 		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3284 
3285 		if (nh->offloaded)
3286 			nh->key.fib_nh->nh_flags |= RTNH_F_OFFLOAD;
3287 		else
3288 			nh->key.fib_nh->nh_flags &= ~RTNH_F_OFFLOAD;
3289 	}
3290 }
3291 
3292 static void
3293 mlxsw_sp_fib4_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
3294 {
3295 	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3296 	int i;
3297 
3298 	for (i = 0; i < nh_grp->count; i++) {
3299 		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3300 
3301 		nh->key.fib_nh->nh_flags &= ~RTNH_F_OFFLOAD;
3302 	}
3303 }
3304 
3305 static void
3306 mlxsw_sp_fib6_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
3307 {
3308 	struct mlxsw_sp_fib6_entry *fib6_entry;
3309 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
3310 
3311 	fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
3312 				  common);
3313 
3314 	if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL) {
3315 		list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
3316 				 list)->rt->rt6i_nh_flags |= RTNH_F_OFFLOAD;
3317 		return;
3318 	}
3319 
3320 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
3321 		struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3322 		struct mlxsw_sp_nexthop *nh;
3323 
3324 		nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6);
3325 		if (nh && nh->offloaded)
3326 			mlxsw_sp_rt6->rt->rt6i_nh_flags |= RTNH_F_OFFLOAD;
3327 		else
3328 			mlxsw_sp_rt6->rt->rt6i_nh_flags &= ~RTNH_F_OFFLOAD;
3329 	}
3330 }
3331 
3332 static void
3333 mlxsw_sp_fib6_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
3334 {
3335 	struct mlxsw_sp_fib6_entry *fib6_entry;
3336 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
3337 
3338 	fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
3339 				  common);
3340 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
3341 		struct rt6_info *rt = mlxsw_sp_rt6->rt;
3342 
3343 		rt->rt6i_nh_flags &= ~RTNH_F_OFFLOAD;
3344 	}
3345 }
3346 
3347 static void mlxsw_sp_fib_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
3348 {
3349 	switch (fib_entry->fib_node->fib->proto) {
3350 	case MLXSW_SP_L3_PROTO_IPV4:
3351 		mlxsw_sp_fib4_entry_offload_set(fib_entry);
3352 		break;
3353 	case MLXSW_SP_L3_PROTO_IPV6:
3354 		mlxsw_sp_fib6_entry_offload_set(fib_entry);
3355 		break;
3356 	}
3357 }
3358 
3359 static void
3360 mlxsw_sp_fib_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
3361 {
3362 	switch (fib_entry->fib_node->fib->proto) {
3363 	case MLXSW_SP_L3_PROTO_IPV4:
3364 		mlxsw_sp_fib4_entry_offload_unset(fib_entry);
3365 		break;
3366 	case MLXSW_SP_L3_PROTO_IPV6:
3367 		mlxsw_sp_fib6_entry_offload_unset(fib_entry);
3368 		break;
3369 	}
3370 }
3371 
3372 static void
3373 mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
3374 				   enum mlxsw_reg_ralue_op op, int err)
3375 {
3376 	switch (op) {
3377 	case MLXSW_REG_RALUE_OP_WRITE_DELETE:
3378 		return mlxsw_sp_fib_entry_offload_unset(fib_entry);
3379 	case MLXSW_REG_RALUE_OP_WRITE_WRITE:
3380 		if (err)
3381 			return;
3382 		if (mlxsw_sp_fib_entry_should_offload(fib_entry))
3383 			mlxsw_sp_fib_entry_offload_set(fib_entry);
3384 		else
3385 			mlxsw_sp_fib_entry_offload_unset(fib_entry);
3386 		return;
3387 	default:
3388 		return;
3389 	}
3390 }
3391 
3392 static void
3393 mlxsw_sp_fib_entry_ralue_pack(char *ralue_pl,
3394 			      const struct mlxsw_sp_fib_entry *fib_entry,
3395 			      enum mlxsw_reg_ralue_op op)
3396 {
3397 	struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib;
3398 	enum mlxsw_reg_ralxx_protocol proto;
3399 	u32 *p_dip;
3400 
3401 	proto = (enum mlxsw_reg_ralxx_protocol) fib->proto;
3402 
3403 	switch (fib->proto) {
3404 	case MLXSW_SP_L3_PROTO_IPV4:
3405 		p_dip = (u32 *) fib_entry->fib_node->key.addr;
3406 		mlxsw_reg_ralue_pack4(ralue_pl, proto, op, fib->vr->id,
3407 				      fib_entry->fib_node->key.prefix_len,
3408 				      *p_dip);
3409 		break;
3410 	case MLXSW_SP_L3_PROTO_IPV6:
3411 		mlxsw_reg_ralue_pack6(ralue_pl, proto, op, fib->vr->id,
3412 				      fib_entry->fib_node->key.prefix_len,
3413 				      fib_entry->fib_node->key.addr);
3414 		break;
3415 	}
3416 }
3417 
3418 static int mlxsw_sp_fib_entry_op_remote(struct mlxsw_sp *mlxsw_sp,
3419 					struct mlxsw_sp_fib_entry *fib_entry,
3420 					enum mlxsw_reg_ralue_op op)
3421 {
3422 	char ralue_pl[MLXSW_REG_RALUE_LEN];
3423 	enum mlxsw_reg_ralue_trap_action trap_action;
3424 	u16 trap_id = 0;
3425 	u32 adjacency_index = 0;
3426 	u16 ecmp_size = 0;
3427 
3428 	/* In case the nexthop group adjacency index is valid, use it
3429 	 * with provided ECMP size. Otherwise, setup trap and pass
3430 	 * traffic to kernel.
3431 	 */
3432 	if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
3433 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
3434 		adjacency_index = fib_entry->nh_group->adj_index;
3435 		ecmp_size = fib_entry->nh_group->ecmp_size;
3436 	} else {
3437 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
3438 		trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
3439 	}
3440 
3441 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
3442 	mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id,
3443 					adjacency_index, ecmp_size);
3444 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
3445 }
3446 
3447 static int mlxsw_sp_fib_entry_op_local(struct mlxsw_sp *mlxsw_sp,
3448 				       struct mlxsw_sp_fib_entry *fib_entry,
3449 				       enum mlxsw_reg_ralue_op op)
3450 {
3451 	struct mlxsw_sp_rif *rif = fib_entry->nh_group->nh_rif;
3452 	enum mlxsw_reg_ralue_trap_action trap_action;
3453 	char ralue_pl[MLXSW_REG_RALUE_LEN];
3454 	u16 trap_id = 0;
3455 	u16 rif_index = 0;
3456 
3457 	if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
3458 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
3459 		rif_index = rif->rif_index;
3460 	} else {
3461 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
3462 		trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
3463 	}
3464 
3465 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
3466 	mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id,
3467 				       rif_index);
3468 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
3469 }
3470 
3471 static int mlxsw_sp_fib_entry_op_trap(struct mlxsw_sp *mlxsw_sp,
3472 				      struct mlxsw_sp_fib_entry *fib_entry,
3473 				      enum mlxsw_reg_ralue_op op)
3474 {
3475 	char ralue_pl[MLXSW_REG_RALUE_LEN];
3476 
3477 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
3478 	mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
3479 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
3480 }
3481 
3482 static int
3483 mlxsw_sp_fib_entry_op_ipip_decap(struct mlxsw_sp *mlxsw_sp,
3484 				 struct mlxsw_sp_fib_entry *fib_entry,
3485 				 enum mlxsw_reg_ralue_op op)
3486 {
3487 	struct mlxsw_sp_ipip_entry *ipip_entry = fib_entry->decap.ipip_entry;
3488 	const struct mlxsw_sp_ipip_ops *ipip_ops;
3489 
3490 	if (WARN_ON(!ipip_entry))
3491 		return -EINVAL;
3492 
3493 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
3494 	return ipip_ops->fib_entry_op(mlxsw_sp, ipip_entry, op,
3495 				      fib_entry->decap.tunnel_index);
3496 }
3497 
3498 static int __mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
3499 				   struct mlxsw_sp_fib_entry *fib_entry,
3500 				   enum mlxsw_reg_ralue_op op)
3501 {
3502 	switch (fib_entry->type) {
3503 	case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
3504 		return mlxsw_sp_fib_entry_op_remote(mlxsw_sp, fib_entry, op);
3505 	case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
3506 		return mlxsw_sp_fib_entry_op_local(mlxsw_sp, fib_entry, op);
3507 	case MLXSW_SP_FIB_ENTRY_TYPE_TRAP:
3508 		return mlxsw_sp_fib_entry_op_trap(mlxsw_sp, fib_entry, op);
3509 	case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
3510 		return mlxsw_sp_fib_entry_op_ipip_decap(mlxsw_sp,
3511 							fib_entry, op);
3512 	}
3513 	return -EINVAL;
3514 }
3515 
3516 static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
3517 				 struct mlxsw_sp_fib_entry *fib_entry,
3518 				 enum mlxsw_reg_ralue_op op)
3519 {
3520 	int err = __mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, op);
3521 
3522 	mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, err);
3523 
3524 	return err;
3525 }
3526 
3527 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
3528 				     struct mlxsw_sp_fib_entry *fib_entry)
3529 {
3530 	return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
3531 				     MLXSW_REG_RALUE_OP_WRITE_WRITE);
3532 }
3533 
3534 static int mlxsw_sp_fib_entry_del(struct mlxsw_sp *mlxsw_sp,
3535 				  struct mlxsw_sp_fib_entry *fib_entry)
3536 {
3537 	return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
3538 				     MLXSW_REG_RALUE_OP_WRITE_DELETE);
3539 }
3540 
3541 static int
3542 mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp,
3543 			     const struct fib_entry_notifier_info *fen_info,
3544 			     struct mlxsw_sp_fib_entry *fib_entry)
3545 {
3546 	union mlxsw_sp_l3addr dip = { .addr4 = htonl(fen_info->dst) };
3547 	struct net_device *dev = fen_info->fi->fib_dev;
3548 	struct mlxsw_sp_ipip_entry *ipip_entry;
3549 	struct fib_info *fi = fen_info->fi;
3550 
3551 	switch (fen_info->type) {
3552 	case RTN_LOCAL:
3553 		ipip_entry = mlxsw_sp_ipip_entry_find_by_decap(mlxsw_sp, dev,
3554 						 MLXSW_SP_L3_PROTO_IPV4, dip);
3555 		if (ipip_entry) {
3556 			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
3557 			return mlxsw_sp_fib_entry_decap_init(mlxsw_sp,
3558 							     fib_entry,
3559 							     ipip_entry);
3560 		}
3561 		/* fall through */
3562 	case RTN_BROADCAST:
3563 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
3564 		return 0;
3565 	case RTN_UNREACHABLE: /* fall through */
3566 	case RTN_BLACKHOLE: /* fall through */
3567 	case RTN_PROHIBIT:
3568 		/* Packets hitting these routes need to be trapped, but
3569 		 * can do so with a lower priority than packets directed
3570 		 * at the host, so use action type local instead of trap.
3571 		 */
3572 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
3573 		return 0;
3574 	case RTN_UNICAST:
3575 		if (mlxsw_sp_fi_is_gateway(mlxsw_sp, fi))
3576 			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
3577 		else
3578 			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
3579 		return 0;
3580 	default:
3581 		return -EINVAL;
3582 	}
3583 }
3584 
3585 static struct mlxsw_sp_fib4_entry *
3586 mlxsw_sp_fib4_entry_create(struct mlxsw_sp *mlxsw_sp,
3587 			   struct mlxsw_sp_fib_node *fib_node,
3588 			   const struct fib_entry_notifier_info *fen_info)
3589 {
3590 	struct mlxsw_sp_fib4_entry *fib4_entry;
3591 	struct mlxsw_sp_fib_entry *fib_entry;
3592 	int err;
3593 
3594 	fib4_entry = kzalloc(sizeof(*fib4_entry), GFP_KERNEL);
3595 	if (!fib4_entry)
3596 		return ERR_PTR(-ENOMEM);
3597 	fib_entry = &fib4_entry->common;
3598 
3599 	err = mlxsw_sp_fib4_entry_type_set(mlxsw_sp, fen_info, fib_entry);
3600 	if (err)
3601 		goto err_fib4_entry_type_set;
3602 
3603 	err = mlxsw_sp_nexthop4_group_get(mlxsw_sp, fib_entry, fen_info->fi);
3604 	if (err)
3605 		goto err_nexthop4_group_get;
3606 
3607 	fib4_entry->prio = fen_info->fi->fib_priority;
3608 	fib4_entry->tb_id = fen_info->tb_id;
3609 	fib4_entry->type = fen_info->type;
3610 	fib4_entry->tos = fen_info->tos;
3611 
3612 	fib_entry->fib_node = fib_node;
3613 
3614 	return fib4_entry;
3615 
3616 err_nexthop4_group_get:
3617 err_fib4_entry_type_set:
3618 	kfree(fib4_entry);
3619 	return ERR_PTR(err);
3620 }
3621 
3622 static void mlxsw_sp_fib4_entry_destroy(struct mlxsw_sp *mlxsw_sp,
3623 					struct mlxsw_sp_fib4_entry *fib4_entry)
3624 {
3625 	mlxsw_sp_nexthop4_group_put(mlxsw_sp, &fib4_entry->common);
3626 	kfree(fib4_entry);
3627 }
3628 
3629 static struct mlxsw_sp_fib4_entry *
3630 mlxsw_sp_fib4_entry_lookup(struct mlxsw_sp *mlxsw_sp,
3631 			   const struct fib_entry_notifier_info *fen_info)
3632 {
3633 	struct mlxsw_sp_fib4_entry *fib4_entry;
3634 	struct mlxsw_sp_fib_node *fib_node;
3635 	struct mlxsw_sp_fib *fib;
3636 	struct mlxsw_sp_vr *vr;
3637 
3638 	vr = mlxsw_sp_vr_find(mlxsw_sp, fen_info->tb_id);
3639 	if (!vr)
3640 		return NULL;
3641 	fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV4);
3642 
3643 	fib_node = mlxsw_sp_fib_node_lookup(fib, &fen_info->dst,
3644 					    sizeof(fen_info->dst),
3645 					    fen_info->dst_len);
3646 	if (!fib_node)
3647 		return NULL;
3648 
3649 	list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) {
3650 		if (fib4_entry->tb_id == fen_info->tb_id &&
3651 		    fib4_entry->tos == fen_info->tos &&
3652 		    fib4_entry->type == fen_info->type &&
3653 		    mlxsw_sp_nexthop4_group_fi(fib4_entry->common.nh_group) ==
3654 		    fen_info->fi) {
3655 			return fib4_entry;
3656 		}
3657 	}
3658 
3659 	return NULL;
3660 }
3661 
3662 static const struct rhashtable_params mlxsw_sp_fib_ht_params = {
3663 	.key_offset = offsetof(struct mlxsw_sp_fib_node, key),
3664 	.head_offset = offsetof(struct mlxsw_sp_fib_node, ht_node),
3665 	.key_len = sizeof(struct mlxsw_sp_fib_key),
3666 	.automatic_shrinking = true,
3667 };
3668 
3669 static int mlxsw_sp_fib_node_insert(struct mlxsw_sp_fib *fib,
3670 				    struct mlxsw_sp_fib_node *fib_node)
3671 {
3672 	return rhashtable_insert_fast(&fib->ht, &fib_node->ht_node,
3673 				      mlxsw_sp_fib_ht_params);
3674 }
3675 
3676 static void mlxsw_sp_fib_node_remove(struct mlxsw_sp_fib *fib,
3677 				     struct mlxsw_sp_fib_node *fib_node)
3678 {
3679 	rhashtable_remove_fast(&fib->ht, &fib_node->ht_node,
3680 			       mlxsw_sp_fib_ht_params);
3681 }
3682 
3683 static struct mlxsw_sp_fib_node *
3684 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
3685 			 size_t addr_len, unsigned char prefix_len)
3686 {
3687 	struct mlxsw_sp_fib_key key;
3688 
3689 	memset(&key, 0, sizeof(key));
3690 	memcpy(key.addr, addr, addr_len);
3691 	key.prefix_len = prefix_len;
3692 	return rhashtable_lookup_fast(&fib->ht, &key, mlxsw_sp_fib_ht_params);
3693 }
3694 
3695 static struct mlxsw_sp_fib_node *
3696 mlxsw_sp_fib_node_create(struct mlxsw_sp_fib *fib, const void *addr,
3697 			 size_t addr_len, unsigned char prefix_len)
3698 {
3699 	struct mlxsw_sp_fib_node *fib_node;
3700 
3701 	fib_node = kzalloc(sizeof(*fib_node), GFP_KERNEL);
3702 	if (!fib_node)
3703 		return NULL;
3704 
3705 	INIT_LIST_HEAD(&fib_node->entry_list);
3706 	list_add(&fib_node->list, &fib->node_list);
3707 	memcpy(fib_node->key.addr, addr, addr_len);
3708 	fib_node->key.prefix_len = prefix_len;
3709 
3710 	return fib_node;
3711 }
3712 
3713 static void mlxsw_sp_fib_node_destroy(struct mlxsw_sp_fib_node *fib_node)
3714 {
3715 	list_del(&fib_node->list);
3716 	WARN_ON(!list_empty(&fib_node->entry_list));
3717 	kfree(fib_node);
3718 }
3719 
3720 static bool
3721 mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node,
3722 				 const struct mlxsw_sp_fib_entry *fib_entry)
3723 {
3724 	return list_first_entry(&fib_node->entry_list,
3725 				struct mlxsw_sp_fib_entry, list) == fib_entry;
3726 }
3727 
3728 static int mlxsw_sp_fib_lpm_tree_link(struct mlxsw_sp *mlxsw_sp,
3729 				      struct mlxsw_sp_fib *fib,
3730 				      struct mlxsw_sp_fib_node *fib_node)
3731 {
3732 	struct mlxsw_sp_prefix_usage req_prefix_usage = {{ 0 } };
3733 	struct mlxsw_sp_lpm_tree *lpm_tree;
3734 	int err;
3735 
3736 	/* Since the tree is shared between all virtual routers we must
3737 	 * make sure it contains all the required prefix lengths. This
3738 	 * can be computed by either adding the new prefix length to the
3739 	 * existing prefix usage of a bound tree, or by aggregating the
3740 	 * prefix lengths across all virtual routers and adding the new
3741 	 * one as well.
3742 	 */
3743 	if (fib->lpm_tree)
3744 		mlxsw_sp_prefix_usage_cpy(&req_prefix_usage,
3745 					  &fib->lpm_tree->prefix_usage);
3746 	else
3747 		mlxsw_sp_vrs_prefixes(mlxsw_sp, fib->proto, &req_prefix_usage);
3748 	mlxsw_sp_prefix_usage_set(&req_prefix_usage, fib_node->key.prefix_len);
3749 
3750 	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
3751 					 fib->proto);
3752 	if (IS_ERR(lpm_tree))
3753 		return PTR_ERR(lpm_tree);
3754 
3755 	if (fib->lpm_tree && fib->lpm_tree->id == lpm_tree->id)
3756 		return 0;
3757 
3758 	err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
3759 	if (err)
3760 		return err;
3761 
3762 	return 0;
3763 }
3764 
3765 static void mlxsw_sp_fib_lpm_tree_unlink(struct mlxsw_sp *mlxsw_sp,
3766 					 struct mlxsw_sp_fib *fib)
3767 {
3768 	if (!mlxsw_sp_prefix_usage_none(&fib->prefix_usage))
3769 		return;
3770 	mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, fib);
3771 	mlxsw_sp_lpm_tree_put(mlxsw_sp, fib->lpm_tree);
3772 	fib->lpm_tree = NULL;
3773 }
3774 
3775 static void mlxsw_sp_fib_node_prefix_inc(struct mlxsw_sp_fib_node *fib_node)
3776 {
3777 	unsigned char prefix_len = fib_node->key.prefix_len;
3778 	struct mlxsw_sp_fib *fib = fib_node->fib;
3779 
3780 	if (fib->prefix_ref_count[prefix_len]++ == 0)
3781 		mlxsw_sp_prefix_usage_set(&fib->prefix_usage, prefix_len);
3782 }
3783 
3784 static void mlxsw_sp_fib_node_prefix_dec(struct mlxsw_sp_fib_node *fib_node)
3785 {
3786 	unsigned char prefix_len = fib_node->key.prefix_len;
3787 	struct mlxsw_sp_fib *fib = fib_node->fib;
3788 
3789 	if (--fib->prefix_ref_count[prefix_len] == 0)
3790 		mlxsw_sp_prefix_usage_clear(&fib->prefix_usage, prefix_len);
3791 }
3792 
3793 static int mlxsw_sp_fib_node_init(struct mlxsw_sp *mlxsw_sp,
3794 				  struct mlxsw_sp_fib_node *fib_node,
3795 				  struct mlxsw_sp_fib *fib)
3796 {
3797 	int err;
3798 
3799 	err = mlxsw_sp_fib_node_insert(fib, fib_node);
3800 	if (err)
3801 		return err;
3802 	fib_node->fib = fib;
3803 
3804 	err = mlxsw_sp_fib_lpm_tree_link(mlxsw_sp, fib, fib_node);
3805 	if (err)
3806 		goto err_fib_lpm_tree_link;
3807 
3808 	mlxsw_sp_fib_node_prefix_inc(fib_node);
3809 
3810 	return 0;
3811 
3812 err_fib_lpm_tree_link:
3813 	fib_node->fib = NULL;
3814 	mlxsw_sp_fib_node_remove(fib, fib_node);
3815 	return err;
3816 }
3817 
3818 static void mlxsw_sp_fib_node_fini(struct mlxsw_sp *mlxsw_sp,
3819 				   struct mlxsw_sp_fib_node *fib_node)
3820 {
3821 	struct mlxsw_sp_fib *fib = fib_node->fib;
3822 
3823 	mlxsw_sp_fib_node_prefix_dec(fib_node);
3824 	mlxsw_sp_fib_lpm_tree_unlink(mlxsw_sp, fib);
3825 	fib_node->fib = NULL;
3826 	mlxsw_sp_fib_node_remove(fib, fib_node);
3827 }
3828 
3829 static struct mlxsw_sp_fib_node *
3830 mlxsw_sp_fib_node_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id, const void *addr,
3831 		      size_t addr_len, unsigned char prefix_len,
3832 		      enum mlxsw_sp_l3proto proto)
3833 {
3834 	struct mlxsw_sp_fib_node *fib_node;
3835 	struct mlxsw_sp_fib *fib;
3836 	struct mlxsw_sp_vr *vr;
3837 	int err;
3838 
3839 	vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id);
3840 	if (IS_ERR(vr))
3841 		return ERR_CAST(vr);
3842 	fib = mlxsw_sp_vr_fib(vr, proto);
3843 
3844 	fib_node = mlxsw_sp_fib_node_lookup(fib, addr, addr_len, prefix_len);
3845 	if (fib_node)
3846 		return fib_node;
3847 
3848 	fib_node = mlxsw_sp_fib_node_create(fib, addr, addr_len, prefix_len);
3849 	if (!fib_node) {
3850 		err = -ENOMEM;
3851 		goto err_fib_node_create;
3852 	}
3853 
3854 	err = mlxsw_sp_fib_node_init(mlxsw_sp, fib_node, fib);
3855 	if (err)
3856 		goto err_fib_node_init;
3857 
3858 	return fib_node;
3859 
3860 err_fib_node_init:
3861 	mlxsw_sp_fib_node_destroy(fib_node);
3862 err_fib_node_create:
3863 	mlxsw_sp_vr_put(vr);
3864 	return ERR_PTR(err);
3865 }
3866 
3867 static void mlxsw_sp_fib_node_put(struct mlxsw_sp *mlxsw_sp,
3868 				  struct mlxsw_sp_fib_node *fib_node)
3869 {
3870 	struct mlxsw_sp_vr *vr = fib_node->fib->vr;
3871 
3872 	if (!list_empty(&fib_node->entry_list))
3873 		return;
3874 	mlxsw_sp_fib_node_fini(mlxsw_sp, fib_node);
3875 	mlxsw_sp_fib_node_destroy(fib_node);
3876 	mlxsw_sp_vr_put(vr);
3877 }
3878 
3879 static struct mlxsw_sp_fib4_entry *
3880 mlxsw_sp_fib4_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
3881 			      const struct mlxsw_sp_fib4_entry *new4_entry)
3882 {
3883 	struct mlxsw_sp_fib4_entry *fib4_entry;
3884 
3885 	list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) {
3886 		if (fib4_entry->tb_id > new4_entry->tb_id)
3887 			continue;
3888 		if (fib4_entry->tb_id != new4_entry->tb_id)
3889 			break;
3890 		if (fib4_entry->tos > new4_entry->tos)
3891 			continue;
3892 		if (fib4_entry->prio >= new4_entry->prio ||
3893 		    fib4_entry->tos < new4_entry->tos)
3894 			return fib4_entry;
3895 	}
3896 
3897 	return NULL;
3898 }
3899 
3900 static int
3901 mlxsw_sp_fib4_node_list_append(struct mlxsw_sp_fib4_entry *fib4_entry,
3902 			       struct mlxsw_sp_fib4_entry *new4_entry)
3903 {
3904 	struct mlxsw_sp_fib_node *fib_node;
3905 
3906 	if (WARN_ON(!fib4_entry))
3907 		return -EINVAL;
3908 
3909 	fib_node = fib4_entry->common.fib_node;
3910 	list_for_each_entry_from(fib4_entry, &fib_node->entry_list,
3911 				 common.list) {
3912 		if (fib4_entry->tb_id != new4_entry->tb_id ||
3913 		    fib4_entry->tos != new4_entry->tos ||
3914 		    fib4_entry->prio != new4_entry->prio)
3915 			break;
3916 	}
3917 
3918 	list_add_tail(&new4_entry->common.list, &fib4_entry->common.list);
3919 	return 0;
3920 }
3921 
3922 static int
3923 mlxsw_sp_fib4_node_list_insert(struct mlxsw_sp_fib4_entry *new4_entry,
3924 			       bool replace, bool append)
3925 {
3926 	struct mlxsw_sp_fib_node *fib_node = new4_entry->common.fib_node;
3927 	struct mlxsw_sp_fib4_entry *fib4_entry;
3928 
3929 	fib4_entry = mlxsw_sp_fib4_node_entry_find(fib_node, new4_entry);
3930 
3931 	if (append)
3932 		return mlxsw_sp_fib4_node_list_append(fib4_entry, new4_entry);
3933 	if (replace && WARN_ON(!fib4_entry))
3934 		return -EINVAL;
3935 
3936 	/* Insert new entry before replaced one, so that we can later
3937 	 * remove the second.
3938 	 */
3939 	if (fib4_entry) {
3940 		list_add_tail(&new4_entry->common.list,
3941 			      &fib4_entry->common.list);
3942 	} else {
3943 		struct mlxsw_sp_fib4_entry *last;
3944 
3945 		list_for_each_entry(last, &fib_node->entry_list, common.list) {
3946 			if (new4_entry->tb_id > last->tb_id)
3947 				break;
3948 			fib4_entry = last;
3949 		}
3950 
3951 		if (fib4_entry)
3952 			list_add(&new4_entry->common.list,
3953 				 &fib4_entry->common.list);
3954 		else
3955 			list_add(&new4_entry->common.list,
3956 				 &fib_node->entry_list);
3957 	}
3958 
3959 	return 0;
3960 }
3961 
3962 static void
3963 mlxsw_sp_fib4_node_list_remove(struct mlxsw_sp_fib4_entry *fib4_entry)
3964 {
3965 	list_del(&fib4_entry->common.list);
3966 }
3967 
3968 static int mlxsw_sp_fib_node_entry_add(struct mlxsw_sp *mlxsw_sp,
3969 				       struct mlxsw_sp_fib_entry *fib_entry)
3970 {
3971 	struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
3972 
3973 	if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
3974 		return 0;
3975 
3976 	/* To prevent packet loss, overwrite the previously offloaded
3977 	 * entry.
3978 	 */
3979 	if (!list_is_singular(&fib_node->entry_list)) {
3980 		enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
3981 		struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list);
3982 
3983 		mlxsw_sp_fib_entry_offload_refresh(n, op, 0);
3984 	}
3985 
3986 	return mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
3987 }
3988 
3989 static void mlxsw_sp_fib_node_entry_del(struct mlxsw_sp *mlxsw_sp,
3990 					struct mlxsw_sp_fib_entry *fib_entry)
3991 {
3992 	struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
3993 
3994 	if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
3995 		return;
3996 
3997 	/* Promote the next entry by overwriting the deleted entry */
3998 	if (!list_is_singular(&fib_node->entry_list)) {
3999 		struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list);
4000 		enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
4001 
4002 		mlxsw_sp_fib_entry_update(mlxsw_sp, n);
4003 		mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0);
4004 		return;
4005 	}
4006 
4007 	mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry);
4008 }
4009 
4010 static int mlxsw_sp_fib4_node_entry_link(struct mlxsw_sp *mlxsw_sp,
4011 					 struct mlxsw_sp_fib4_entry *fib4_entry,
4012 					 bool replace, bool append)
4013 {
4014 	int err;
4015 
4016 	err = mlxsw_sp_fib4_node_list_insert(fib4_entry, replace, append);
4017 	if (err)
4018 		return err;
4019 
4020 	err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib4_entry->common);
4021 	if (err)
4022 		goto err_fib_node_entry_add;
4023 
4024 	return 0;
4025 
4026 err_fib_node_entry_add:
4027 	mlxsw_sp_fib4_node_list_remove(fib4_entry);
4028 	return err;
4029 }
4030 
4031 static void
4032 mlxsw_sp_fib4_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
4033 				struct mlxsw_sp_fib4_entry *fib4_entry)
4034 {
4035 	mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib4_entry->common);
4036 	mlxsw_sp_fib4_node_list_remove(fib4_entry);
4037 
4038 	if (fib4_entry->common.type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP)
4039 		mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, &fib4_entry->common);
4040 }
4041 
4042 static void mlxsw_sp_fib4_entry_replace(struct mlxsw_sp *mlxsw_sp,
4043 					struct mlxsw_sp_fib4_entry *fib4_entry,
4044 					bool replace)
4045 {
4046 	struct mlxsw_sp_fib_node *fib_node = fib4_entry->common.fib_node;
4047 	struct mlxsw_sp_fib4_entry *replaced;
4048 
4049 	if (!replace)
4050 		return;
4051 
4052 	/* We inserted the new entry before replaced one */
4053 	replaced = list_next_entry(fib4_entry, common.list);
4054 
4055 	mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, replaced);
4056 	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, replaced);
4057 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4058 }
4059 
4060 static int
4061 mlxsw_sp_router_fib4_add(struct mlxsw_sp *mlxsw_sp,
4062 			 const struct fib_entry_notifier_info *fen_info,
4063 			 bool replace, bool append)
4064 {
4065 	struct mlxsw_sp_fib4_entry *fib4_entry;
4066 	struct mlxsw_sp_fib_node *fib_node;
4067 	int err;
4068 
4069 	if (mlxsw_sp->router->aborted)
4070 		return 0;
4071 
4072 	fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, fen_info->tb_id,
4073 					 &fen_info->dst, sizeof(fen_info->dst),
4074 					 fen_info->dst_len,
4075 					 MLXSW_SP_L3_PROTO_IPV4);
4076 	if (IS_ERR(fib_node)) {
4077 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to get FIB node\n");
4078 		return PTR_ERR(fib_node);
4079 	}
4080 
4081 	fib4_entry = mlxsw_sp_fib4_entry_create(mlxsw_sp, fib_node, fen_info);
4082 	if (IS_ERR(fib4_entry)) {
4083 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to create FIB entry\n");
4084 		err = PTR_ERR(fib4_entry);
4085 		goto err_fib4_entry_create;
4086 	}
4087 
4088 	err = mlxsw_sp_fib4_node_entry_link(mlxsw_sp, fib4_entry, replace,
4089 					    append);
4090 	if (err) {
4091 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to link FIB entry to node\n");
4092 		goto err_fib4_node_entry_link;
4093 	}
4094 
4095 	mlxsw_sp_fib4_entry_replace(mlxsw_sp, fib4_entry, replace);
4096 
4097 	return 0;
4098 
4099 err_fib4_node_entry_link:
4100 	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
4101 err_fib4_entry_create:
4102 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4103 	return err;
4104 }
4105 
4106 static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp,
4107 				     struct fib_entry_notifier_info *fen_info)
4108 {
4109 	struct mlxsw_sp_fib4_entry *fib4_entry;
4110 	struct mlxsw_sp_fib_node *fib_node;
4111 
4112 	if (mlxsw_sp->router->aborted)
4113 		return;
4114 
4115 	fib4_entry = mlxsw_sp_fib4_entry_lookup(mlxsw_sp, fen_info);
4116 	if (WARN_ON(!fib4_entry))
4117 		return;
4118 	fib_node = fib4_entry->common.fib_node;
4119 
4120 	mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry);
4121 	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
4122 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4123 }
4124 
4125 static bool mlxsw_sp_fib6_rt_should_ignore(const struct rt6_info *rt)
4126 {
4127 	/* Packets with link-local destination IP arriving to the router
4128 	 * are trapped to the CPU, so no need to program specific routes
4129 	 * for them.
4130 	 */
4131 	if (ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LINKLOCAL)
4132 		return true;
4133 
4134 	/* Multicast routes aren't supported, so ignore them. Neighbour
4135 	 * Discovery packets are specifically trapped.
4136 	 */
4137 	if (ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_MULTICAST)
4138 		return true;
4139 
4140 	/* Cloned routes are irrelevant in the forwarding path. */
4141 	if (rt->rt6i_flags & RTF_CACHE)
4142 		return true;
4143 
4144 	return false;
4145 }
4146 
4147 static struct mlxsw_sp_rt6 *mlxsw_sp_rt6_create(struct rt6_info *rt)
4148 {
4149 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4150 
4151 	mlxsw_sp_rt6 = kzalloc(sizeof(*mlxsw_sp_rt6), GFP_KERNEL);
4152 	if (!mlxsw_sp_rt6)
4153 		return ERR_PTR(-ENOMEM);
4154 
4155 	/* In case of route replace, replaced route is deleted with
4156 	 * no notification. Take reference to prevent accessing freed
4157 	 * memory.
4158 	 */
4159 	mlxsw_sp_rt6->rt = rt;
4160 	rt6_hold(rt);
4161 
4162 	return mlxsw_sp_rt6;
4163 }
4164 
4165 #if IS_ENABLED(CONFIG_IPV6)
4166 static void mlxsw_sp_rt6_release(struct rt6_info *rt)
4167 {
4168 	rt6_release(rt);
4169 }
4170 #else
4171 static void mlxsw_sp_rt6_release(struct rt6_info *rt)
4172 {
4173 }
4174 #endif
4175 
4176 static void mlxsw_sp_rt6_destroy(struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
4177 {
4178 	mlxsw_sp_rt6_release(mlxsw_sp_rt6->rt);
4179 	kfree(mlxsw_sp_rt6);
4180 }
4181 
4182 static bool mlxsw_sp_fib6_rt_can_mp(const struct rt6_info *rt)
4183 {
4184 	/* RTF_CACHE routes are ignored */
4185 	return (rt->rt6i_flags & (RTF_GATEWAY | RTF_ADDRCONF)) == RTF_GATEWAY;
4186 }
4187 
4188 static struct rt6_info *
4189 mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry)
4190 {
4191 	return list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
4192 				list)->rt;
4193 }
4194 
4195 static struct mlxsw_sp_fib6_entry *
4196 mlxsw_sp_fib6_node_mp_entry_find(const struct mlxsw_sp_fib_node *fib_node,
4197 				 const struct rt6_info *nrt, bool replace)
4198 {
4199 	struct mlxsw_sp_fib6_entry *fib6_entry;
4200 
4201 	if (!mlxsw_sp_fib6_rt_can_mp(nrt) || replace)
4202 		return NULL;
4203 
4204 	list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
4205 		struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
4206 
4207 		/* RT6_TABLE_LOCAL and RT6_TABLE_MAIN share the same
4208 		 * virtual router.
4209 		 */
4210 		if (rt->rt6i_table->tb6_id > nrt->rt6i_table->tb6_id)
4211 			continue;
4212 		if (rt->rt6i_table->tb6_id != nrt->rt6i_table->tb6_id)
4213 			break;
4214 		if (rt->rt6i_metric < nrt->rt6i_metric)
4215 			continue;
4216 		if (rt->rt6i_metric == nrt->rt6i_metric &&
4217 		    mlxsw_sp_fib6_rt_can_mp(rt))
4218 			return fib6_entry;
4219 		if (rt->rt6i_metric > nrt->rt6i_metric)
4220 			break;
4221 	}
4222 
4223 	return NULL;
4224 }
4225 
4226 static struct mlxsw_sp_rt6 *
4227 mlxsw_sp_fib6_entry_rt_find(const struct mlxsw_sp_fib6_entry *fib6_entry,
4228 			    const struct rt6_info *rt)
4229 {
4230 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4231 
4232 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
4233 		if (mlxsw_sp_rt6->rt == rt)
4234 			return mlxsw_sp_rt6;
4235 	}
4236 
4237 	return NULL;
4238 }
4239 
4240 static bool mlxsw_sp_nexthop6_ipip_type(const struct mlxsw_sp *mlxsw_sp,
4241 					const struct rt6_info *rt,
4242 					enum mlxsw_sp_ipip_type *ret)
4243 {
4244 	return rt->dst.dev &&
4245 	       mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->dst.dev, ret);
4246 }
4247 
4248 static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp *mlxsw_sp,
4249 				       struct mlxsw_sp_nexthop_group *nh_grp,
4250 				       struct mlxsw_sp_nexthop *nh,
4251 				       const struct rt6_info *rt)
4252 {
4253 	struct mlxsw_sp_router *router = mlxsw_sp->router;
4254 	struct net_device *dev = rt->dst.dev;
4255 	enum mlxsw_sp_ipip_type ipipt;
4256 	struct mlxsw_sp_rif *rif;
4257 	int err;
4258 
4259 	if (mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, &ipipt) &&
4260 	    router->ipip_ops_arr[ipipt]->can_offload(mlxsw_sp, dev,
4261 						     MLXSW_SP_L3_PROTO_IPV6)) {
4262 		nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP;
4263 		err = mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, dev);
4264 		if (err)
4265 			return err;
4266 		mlxsw_sp_nexthop_rif_init(nh, &nh->ipip_entry->ol_lb->common);
4267 		return 0;
4268 	}
4269 
4270 	nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH;
4271 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
4272 	if (!rif)
4273 		return 0;
4274 	mlxsw_sp_nexthop_rif_init(nh, rif);
4275 
4276 	err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
4277 	if (err)
4278 		goto err_nexthop_neigh_init;
4279 
4280 	return 0;
4281 
4282 err_nexthop_neigh_init:
4283 	mlxsw_sp_nexthop_rif_fini(nh);
4284 	return err;
4285 }
4286 
4287 static void mlxsw_sp_nexthop6_type_fini(struct mlxsw_sp *mlxsw_sp,
4288 					struct mlxsw_sp_nexthop *nh)
4289 {
4290 	mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
4291 }
4292 
4293 static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp,
4294 				  struct mlxsw_sp_nexthop_group *nh_grp,
4295 				  struct mlxsw_sp_nexthop *nh,
4296 				  const struct rt6_info *rt)
4297 {
4298 	struct net_device *dev = rt->dst.dev;
4299 
4300 	nh->nh_grp = nh_grp;
4301 	memcpy(&nh->gw_addr, &rt->rt6i_gateway, sizeof(nh->gw_addr));
4302 	mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
4303 
4304 	list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
4305 
4306 	if (!dev)
4307 		return 0;
4308 	nh->ifindex = dev->ifindex;
4309 
4310 	return mlxsw_sp_nexthop6_type_init(mlxsw_sp, nh_grp, nh, rt);
4311 }
4312 
4313 static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp,
4314 				   struct mlxsw_sp_nexthop *nh)
4315 {
4316 	mlxsw_sp_nexthop6_type_fini(mlxsw_sp, nh);
4317 	list_del(&nh->router_list_node);
4318 	mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
4319 }
4320 
4321 static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp *mlxsw_sp,
4322 				    const struct rt6_info *rt)
4323 {
4324 	return rt->rt6i_flags & RTF_GATEWAY ||
4325 	       mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, NULL);
4326 }
4327 
4328 static struct mlxsw_sp_nexthop_group *
4329 mlxsw_sp_nexthop6_group_create(struct mlxsw_sp *mlxsw_sp,
4330 			       struct mlxsw_sp_fib6_entry *fib6_entry)
4331 {
4332 	struct mlxsw_sp_nexthop_group *nh_grp;
4333 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4334 	struct mlxsw_sp_nexthop *nh;
4335 	size_t alloc_size;
4336 	int i = 0;
4337 	int err;
4338 
4339 	alloc_size = sizeof(*nh_grp) +
4340 		     fib6_entry->nrt6 * sizeof(struct mlxsw_sp_nexthop);
4341 	nh_grp = kzalloc(alloc_size, GFP_KERNEL);
4342 	if (!nh_grp)
4343 		return ERR_PTR(-ENOMEM);
4344 	INIT_LIST_HEAD(&nh_grp->fib_list);
4345 #if IS_ENABLED(CONFIG_IPV6)
4346 	nh_grp->neigh_tbl = &nd_tbl;
4347 #endif
4348 	mlxsw_sp_rt6 = list_first_entry(&fib6_entry->rt6_list,
4349 					struct mlxsw_sp_rt6, list);
4350 	nh_grp->gateway = mlxsw_sp_rt6_is_gateway(mlxsw_sp, mlxsw_sp_rt6->rt);
4351 	nh_grp->count = fib6_entry->nrt6;
4352 	for (i = 0; i < nh_grp->count; i++) {
4353 		struct rt6_info *rt = mlxsw_sp_rt6->rt;
4354 
4355 		nh = &nh_grp->nexthops[i];
4356 		err = mlxsw_sp_nexthop6_init(mlxsw_sp, nh_grp, nh, rt);
4357 		if (err)
4358 			goto err_nexthop6_init;
4359 		mlxsw_sp_rt6 = list_next_entry(mlxsw_sp_rt6, list);
4360 	}
4361 
4362 	err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
4363 	if (err)
4364 		goto err_nexthop_group_insert;
4365 
4366 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
4367 	return nh_grp;
4368 
4369 err_nexthop_group_insert:
4370 err_nexthop6_init:
4371 	for (i--; i >= 0; i--) {
4372 		nh = &nh_grp->nexthops[i];
4373 		mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
4374 	}
4375 	kfree(nh_grp);
4376 	return ERR_PTR(err);
4377 }
4378 
4379 static void
4380 mlxsw_sp_nexthop6_group_destroy(struct mlxsw_sp *mlxsw_sp,
4381 				struct mlxsw_sp_nexthop_group *nh_grp)
4382 {
4383 	struct mlxsw_sp_nexthop *nh;
4384 	int i = nh_grp->count;
4385 
4386 	mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
4387 	for (i--; i >= 0; i--) {
4388 		nh = &nh_grp->nexthops[i];
4389 		mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
4390 	}
4391 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
4392 	WARN_ON(nh_grp->adj_index_valid);
4393 	kfree(nh_grp);
4394 }
4395 
4396 static int mlxsw_sp_nexthop6_group_get(struct mlxsw_sp *mlxsw_sp,
4397 				       struct mlxsw_sp_fib6_entry *fib6_entry)
4398 {
4399 	struct mlxsw_sp_nexthop_group *nh_grp;
4400 
4401 	nh_grp = mlxsw_sp_nexthop6_group_lookup(mlxsw_sp, fib6_entry);
4402 	if (!nh_grp) {
4403 		nh_grp = mlxsw_sp_nexthop6_group_create(mlxsw_sp, fib6_entry);
4404 		if (IS_ERR(nh_grp))
4405 			return PTR_ERR(nh_grp);
4406 	}
4407 
4408 	list_add_tail(&fib6_entry->common.nexthop_group_node,
4409 		      &nh_grp->fib_list);
4410 	fib6_entry->common.nh_group = nh_grp;
4411 
4412 	return 0;
4413 }
4414 
4415 static void mlxsw_sp_nexthop6_group_put(struct mlxsw_sp *mlxsw_sp,
4416 					struct mlxsw_sp_fib_entry *fib_entry)
4417 {
4418 	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
4419 
4420 	list_del(&fib_entry->nexthop_group_node);
4421 	if (!list_empty(&nh_grp->fib_list))
4422 		return;
4423 	mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, nh_grp);
4424 }
4425 
4426 static int
4427 mlxsw_sp_nexthop6_group_update(struct mlxsw_sp *mlxsw_sp,
4428 			       struct mlxsw_sp_fib6_entry *fib6_entry)
4429 {
4430 	struct mlxsw_sp_nexthop_group *old_nh_grp = fib6_entry->common.nh_group;
4431 	int err;
4432 
4433 	fib6_entry->common.nh_group = NULL;
4434 	list_del(&fib6_entry->common.nexthop_group_node);
4435 
4436 	err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
4437 	if (err)
4438 		goto err_nexthop6_group_get;
4439 
4440 	/* In case this entry is offloaded, then the adjacency index
4441 	 * currently associated with it in the device's table is that
4442 	 * of the old group. Start using the new one instead.
4443 	 */
4444 	err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common);
4445 	if (err)
4446 		goto err_fib_node_entry_add;
4447 
4448 	if (list_empty(&old_nh_grp->fib_list))
4449 		mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, old_nh_grp);
4450 
4451 	return 0;
4452 
4453 err_fib_node_entry_add:
4454 	mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
4455 err_nexthop6_group_get:
4456 	list_add_tail(&fib6_entry->common.nexthop_group_node,
4457 		      &old_nh_grp->fib_list);
4458 	fib6_entry->common.nh_group = old_nh_grp;
4459 	return err;
4460 }
4461 
4462 static int
4463 mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp,
4464 				struct mlxsw_sp_fib6_entry *fib6_entry,
4465 				struct rt6_info *rt)
4466 {
4467 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4468 	int err;
4469 
4470 	mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt);
4471 	if (IS_ERR(mlxsw_sp_rt6))
4472 		return PTR_ERR(mlxsw_sp_rt6);
4473 
4474 	list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
4475 	fib6_entry->nrt6++;
4476 
4477 	err = mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
4478 	if (err)
4479 		goto err_nexthop6_group_update;
4480 
4481 	return 0;
4482 
4483 err_nexthop6_group_update:
4484 	fib6_entry->nrt6--;
4485 	list_del(&mlxsw_sp_rt6->list);
4486 	mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
4487 	return err;
4488 }
4489 
4490 static void
4491 mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp,
4492 				struct mlxsw_sp_fib6_entry *fib6_entry,
4493 				struct rt6_info *rt)
4494 {
4495 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4496 
4497 	mlxsw_sp_rt6 = mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt);
4498 	if (WARN_ON(!mlxsw_sp_rt6))
4499 		return;
4500 
4501 	fib6_entry->nrt6--;
4502 	list_del(&mlxsw_sp_rt6->list);
4503 	mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
4504 	mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
4505 }
4506 
4507 static void mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp *mlxsw_sp,
4508 					 struct mlxsw_sp_fib_entry *fib_entry,
4509 					 const struct rt6_info *rt)
4510 {
4511 	/* Packets hitting RTF_REJECT routes need to be discarded by the
4512 	 * stack. We can rely on their destination device not having a
4513 	 * RIF (it's the loopback device) and can thus use action type
4514 	 * local, which will cause them to be trapped with a lower
4515 	 * priority than packets that need to be locally received.
4516 	 */
4517 	if (rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST))
4518 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
4519 	else if (rt->rt6i_flags & RTF_REJECT)
4520 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
4521 	else if (mlxsw_sp_rt6_is_gateway(mlxsw_sp, rt))
4522 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
4523 	else
4524 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
4525 }
4526 
4527 static void
4528 mlxsw_sp_fib6_entry_rt_destroy_all(struct mlxsw_sp_fib6_entry *fib6_entry)
4529 {
4530 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6, *tmp;
4531 
4532 	list_for_each_entry_safe(mlxsw_sp_rt6, tmp, &fib6_entry->rt6_list,
4533 				 list) {
4534 		fib6_entry->nrt6--;
4535 		list_del(&mlxsw_sp_rt6->list);
4536 		mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
4537 	}
4538 }
4539 
4540 static struct mlxsw_sp_fib6_entry *
4541 mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp,
4542 			   struct mlxsw_sp_fib_node *fib_node,
4543 			   struct rt6_info *rt)
4544 {
4545 	struct mlxsw_sp_fib6_entry *fib6_entry;
4546 	struct mlxsw_sp_fib_entry *fib_entry;
4547 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4548 	int err;
4549 
4550 	fib6_entry = kzalloc(sizeof(*fib6_entry), GFP_KERNEL);
4551 	if (!fib6_entry)
4552 		return ERR_PTR(-ENOMEM);
4553 	fib_entry = &fib6_entry->common;
4554 
4555 	mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt);
4556 	if (IS_ERR(mlxsw_sp_rt6)) {
4557 		err = PTR_ERR(mlxsw_sp_rt6);
4558 		goto err_rt6_create;
4559 	}
4560 
4561 	mlxsw_sp_fib6_entry_type_set(mlxsw_sp, fib_entry, mlxsw_sp_rt6->rt);
4562 
4563 	INIT_LIST_HEAD(&fib6_entry->rt6_list);
4564 	list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
4565 	fib6_entry->nrt6 = 1;
4566 	err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
4567 	if (err)
4568 		goto err_nexthop6_group_get;
4569 
4570 	fib_entry->fib_node = fib_node;
4571 
4572 	return fib6_entry;
4573 
4574 err_nexthop6_group_get:
4575 	list_del(&mlxsw_sp_rt6->list);
4576 	mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
4577 err_rt6_create:
4578 	kfree(fib6_entry);
4579 	return ERR_PTR(err);
4580 }
4581 
4582 static void mlxsw_sp_fib6_entry_destroy(struct mlxsw_sp *mlxsw_sp,
4583 					struct mlxsw_sp_fib6_entry *fib6_entry)
4584 {
4585 	mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
4586 	mlxsw_sp_fib6_entry_rt_destroy_all(fib6_entry);
4587 	WARN_ON(fib6_entry->nrt6);
4588 	kfree(fib6_entry);
4589 }
4590 
4591 static struct mlxsw_sp_fib6_entry *
4592 mlxsw_sp_fib6_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
4593 			      const struct rt6_info *nrt, bool replace)
4594 {
4595 	struct mlxsw_sp_fib6_entry *fib6_entry, *fallback = NULL;
4596 
4597 	list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
4598 		struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
4599 
4600 		if (rt->rt6i_table->tb6_id > nrt->rt6i_table->tb6_id)
4601 			continue;
4602 		if (rt->rt6i_table->tb6_id != nrt->rt6i_table->tb6_id)
4603 			break;
4604 		if (replace && rt->rt6i_metric == nrt->rt6i_metric) {
4605 			if (mlxsw_sp_fib6_rt_can_mp(rt) ==
4606 			    mlxsw_sp_fib6_rt_can_mp(nrt))
4607 				return fib6_entry;
4608 			if (mlxsw_sp_fib6_rt_can_mp(nrt))
4609 				fallback = fallback ?: fib6_entry;
4610 		}
4611 		if (rt->rt6i_metric > nrt->rt6i_metric)
4612 			return fallback ?: fib6_entry;
4613 	}
4614 
4615 	return fallback;
4616 }
4617 
4618 static int
4619 mlxsw_sp_fib6_node_list_insert(struct mlxsw_sp_fib6_entry *new6_entry,
4620 			       bool replace)
4621 {
4622 	struct mlxsw_sp_fib_node *fib_node = new6_entry->common.fib_node;
4623 	struct rt6_info *nrt = mlxsw_sp_fib6_entry_rt(new6_entry);
4624 	struct mlxsw_sp_fib6_entry *fib6_entry;
4625 
4626 	fib6_entry = mlxsw_sp_fib6_node_entry_find(fib_node, nrt, replace);
4627 
4628 	if (replace && WARN_ON(!fib6_entry))
4629 		return -EINVAL;
4630 
4631 	if (fib6_entry) {
4632 		list_add_tail(&new6_entry->common.list,
4633 			      &fib6_entry->common.list);
4634 	} else {
4635 		struct mlxsw_sp_fib6_entry *last;
4636 
4637 		list_for_each_entry(last, &fib_node->entry_list, common.list) {
4638 			struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(last);
4639 
4640 			if (nrt->rt6i_table->tb6_id > rt->rt6i_table->tb6_id)
4641 				break;
4642 			fib6_entry = last;
4643 		}
4644 
4645 		if (fib6_entry)
4646 			list_add(&new6_entry->common.list,
4647 				 &fib6_entry->common.list);
4648 		else
4649 			list_add(&new6_entry->common.list,
4650 				 &fib_node->entry_list);
4651 	}
4652 
4653 	return 0;
4654 }
4655 
4656 static void
4657 mlxsw_sp_fib6_node_list_remove(struct mlxsw_sp_fib6_entry *fib6_entry)
4658 {
4659 	list_del(&fib6_entry->common.list);
4660 }
4661 
4662 static int mlxsw_sp_fib6_node_entry_link(struct mlxsw_sp *mlxsw_sp,
4663 					 struct mlxsw_sp_fib6_entry *fib6_entry,
4664 					 bool replace)
4665 {
4666 	int err;
4667 
4668 	err = mlxsw_sp_fib6_node_list_insert(fib6_entry, replace);
4669 	if (err)
4670 		return err;
4671 
4672 	err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common);
4673 	if (err)
4674 		goto err_fib_node_entry_add;
4675 
4676 	return 0;
4677 
4678 err_fib_node_entry_add:
4679 	mlxsw_sp_fib6_node_list_remove(fib6_entry);
4680 	return err;
4681 }
4682 
4683 static void
4684 mlxsw_sp_fib6_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
4685 				struct mlxsw_sp_fib6_entry *fib6_entry)
4686 {
4687 	mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib6_entry->common);
4688 	mlxsw_sp_fib6_node_list_remove(fib6_entry);
4689 }
4690 
4691 static struct mlxsw_sp_fib6_entry *
4692 mlxsw_sp_fib6_entry_lookup(struct mlxsw_sp *mlxsw_sp,
4693 			   const struct rt6_info *rt)
4694 {
4695 	struct mlxsw_sp_fib6_entry *fib6_entry;
4696 	struct mlxsw_sp_fib_node *fib_node;
4697 	struct mlxsw_sp_fib *fib;
4698 	struct mlxsw_sp_vr *vr;
4699 
4700 	vr = mlxsw_sp_vr_find(mlxsw_sp, rt->rt6i_table->tb6_id);
4701 	if (!vr)
4702 		return NULL;
4703 	fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV6);
4704 
4705 	fib_node = mlxsw_sp_fib_node_lookup(fib, &rt->rt6i_dst.addr,
4706 					    sizeof(rt->rt6i_dst.addr),
4707 					    rt->rt6i_dst.plen);
4708 	if (!fib_node)
4709 		return NULL;
4710 
4711 	list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
4712 		struct rt6_info *iter_rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
4713 
4714 		if (rt->rt6i_table->tb6_id == iter_rt->rt6i_table->tb6_id &&
4715 		    rt->rt6i_metric == iter_rt->rt6i_metric &&
4716 		    mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt))
4717 			return fib6_entry;
4718 	}
4719 
4720 	return NULL;
4721 }
4722 
4723 static void mlxsw_sp_fib6_entry_replace(struct mlxsw_sp *mlxsw_sp,
4724 					struct mlxsw_sp_fib6_entry *fib6_entry,
4725 					bool replace)
4726 {
4727 	struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node;
4728 	struct mlxsw_sp_fib6_entry *replaced;
4729 
4730 	if (!replace)
4731 		return;
4732 
4733 	replaced = list_next_entry(fib6_entry, common.list);
4734 
4735 	mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, replaced);
4736 	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, replaced);
4737 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4738 }
4739 
4740 static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp,
4741 				    struct rt6_info *rt, bool replace)
4742 {
4743 	struct mlxsw_sp_fib6_entry *fib6_entry;
4744 	struct mlxsw_sp_fib_node *fib_node;
4745 	int err;
4746 
4747 	if (mlxsw_sp->router->aborted)
4748 		return 0;
4749 
4750 	if (rt->rt6i_src.plen)
4751 		return -EINVAL;
4752 
4753 	if (mlxsw_sp_fib6_rt_should_ignore(rt))
4754 		return 0;
4755 
4756 	fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->rt6i_table->tb6_id,
4757 					 &rt->rt6i_dst.addr,
4758 					 sizeof(rt->rt6i_dst.addr),
4759 					 rt->rt6i_dst.plen,
4760 					 MLXSW_SP_L3_PROTO_IPV6);
4761 	if (IS_ERR(fib_node))
4762 		return PTR_ERR(fib_node);
4763 
4764 	/* Before creating a new entry, try to append route to an existing
4765 	 * multipath entry.
4766 	 */
4767 	fib6_entry = mlxsw_sp_fib6_node_mp_entry_find(fib_node, rt, replace);
4768 	if (fib6_entry) {
4769 		err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry, rt);
4770 		if (err)
4771 			goto err_fib6_entry_nexthop_add;
4772 		return 0;
4773 	}
4774 
4775 	fib6_entry = mlxsw_sp_fib6_entry_create(mlxsw_sp, fib_node, rt);
4776 	if (IS_ERR(fib6_entry)) {
4777 		err = PTR_ERR(fib6_entry);
4778 		goto err_fib6_entry_create;
4779 	}
4780 
4781 	err = mlxsw_sp_fib6_node_entry_link(mlxsw_sp, fib6_entry, replace);
4782 	if (err)
4783 		goto err_fib6_node_entry_link;
4784 
4785 	mlxsw_sp_fib6_entry_replace(mlxsw_sp, fib6_entry, replace);
4786 
4787 	return 0;
4788 
4789 err_fib6_node_entry_link:
4790 	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
4791 err_fib6_entry_create:
4792 err_fib6_entry_nexthop_add:
4793 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4794 	return err;
4795 }
4796 
4797 static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp,
4798 				     struct rt6_info *rt)
4799 {
4800 	struct mlxsw_sp_fib6_entry *fib6_entry;
4801 	struct mlxsw_sp_fib_node *fib_node;
4802 
4803 	if (mlxsw_sp->router->aborted)
4804 		return;
4805 
4806 	if (mlxsw_sp_fib6_rt_should_ignore(rt))
4807 		return;
4808 
4809 	fib6_entry = mlxsw_sp_fib6_entry_lookup(mlxsw_sp, rt);
4810 	if (WARN_ON(!fib6_entry))
4811 		return;
4812 
4813 	/* If route is part of a multipath entry, but not the last one
4814 	 * removed, then only reduce its nexthop group.
4815 	 */
4816 	if (!list_is_singular(&fib6_entry->rt6_list)) {
4817 		mlxsw_sp_fib6_entry_nexthop_del(mlxsw_sp, fib6_entry, rt);
4818 		return;
4819 	}
4820 
4821 	fib_node = fib6_entry->common.fib_node;
4822 
4823 	mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry);
4824 	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
4825 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4826 }
4827 
4828 static int __mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp,
4829 					    enum mlxsw_reg_ralxx_protocol proto,
4830 					    u8 tree_id)
4831 {
4832 	char ralta_pl[MLXSW_REG_RALTA_LEN];
4833 	char ralst_pl[MLXSW_REG_RALST_LEN];
4834 	int i, err;
4835 
4836 	mlxsw_reg_ralta_pack(ralta_pl, true, proto, tree_id);
4837 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
4838 	if (err)
4839 		return err;
4840 
4841 	mlxsw_reg_ralst_pack(ralst_pl, 0xff, tree_id);
4842 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
4843 	if (err)
4844 		return err;
4845 
4846 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
4847 		struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
4848 		char raltb_pl[MLXSW_REG_RALTB_LEN];
4849 		char ralue_pl[MLXSW_REG_RALUE_LEN];
4850 
4851 		mlxsw_reg_raltb_pack(raltb_pl, vr->id, proto, tree_id);
4852 		err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb),
4853 				      raltb_pl);
4854 		if (err)
4855 			return err;
4856 
4857 		mlxsw_reg_ralue_pack(ralue_pl, proto,
4858 				     MLXSW_REG_RALUE_OP_WRITE_WRITE, vr->id, 0);
4859 		mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
4860 		err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue),
4861 				      ralue_pl);
4862 		if (err)
4863 			return err;
4864 	}
4865 
4866 	return 0;
4867 }
4868 
4869 static int mlxsw_sp_router_fibmr_add(struct mlxsw_sp *mlxsw_sp,
4870 				     struct mfc_entry_notifier_info *men_info,
4871 				     bool replace)
4872 {
4873 	struct mlxsw_sp_vr *vr;
4874 
4875 	if (mlxsw_sp->router->aborted)
4876 		return 0;
4877 
4878 	vr = mlxsw_sp_vr_get(mlxsw_sp, men_info->tb_id);
4879 	if (IS_ERR(vr))
4880 		return PTR_ERR(vr);
4881 
4882 	return mlxsw_sp_mr_route4_add(vr->mr4_table, men_info->mfc, replace);
4883 }
4884 
4885 static void mlxsw_sp_router_fibmr_del(struct mlxsw_sp *mlxsw_sp,
4886 				      struct mfc_entry_notifier_info *men_info)
4887 {
4888 	struct mlxsw_sp_vr *vr;
4889 
4890 	if (mlxsw_sp->router->aborted)
4891 		return;
4892 
4893 	vr = mlxsw_sp_vr_find(mlxsw_sp, men_info->tb_id);
4894 	if (WARN_ON(!vr))
4895 		return;
4896 
4897 	mlxsw_sp_mr_route4_del(vr->mr4_table, men_info->mfc);
4898 	mlxsw_sp_vr_put(vr);
4899 }
4900 
4901 static int
4902 mlxsw_sp_router_fibmr_vif_add(struct mlxsw_sp *mlxsw_sp,
4903 			      struct vif_entry_notifier_info *ven_info)
4904 {
4905 	struct mlxsw_sp_rif *rif;
4906 	struct mlxsw_sp_vr *vr;
4907 
4908 	if (mlxsw_sp->router->aborted)
4909 		return 0;
4910 
4911 	vr = mlxsw_sp_vr_get(mlxsw_sp, ven_info->tb_id);
4912 	if (IS_ERR(vr))
4913 		return PTR_ERR(vr);
4914 
4915 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, ven_info->dev);
4916 	return mlxsw_sp_mr_vif_add(vr->mr4_table, ven_info->dev,
4917 				   ven_info->vif_index,
4918 				   ven_info->vif_flags, rif);
4919 }
4920 
4921 static void
4922 mlxsw_sp_router_fibmr_vif_del(struct mlxsw_sp *mlxsw_sp,
4923 			      struct vif_entry_notifier_info *ven_info)
4924 {
4925 	struct mlxsw_sp_vr *vr;
4926 
4927 	if (mlxsw_sp->router->aborted)
4928 		return;
4929 
4930 	vr = mlxsw_sp_vr_find(mlxsw_sp, ven_info->tb_id);
4931 	if (WARN_ON(!vr))
4932 		return;
4933 
4934 	mlxsw_sp_mr_vif_del(vr->mr4_table, ven_info->vif_index);
4935 	mlxsw_sp_vr_put(vr);
4936 }
4937 
4938 static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp)
4939 {
4940 	enum mlxsw_reg_ralxx_protocol proto = MLXSW_REG_RALXX_PROTOCOL_IPV4;
4941 	int err;
4942 
4943 	err = __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto,
4944 					       MLXSW_SP_LPM_TREE_MIN);
4945 	if (err)
4946 		return err;
4947 
4948 	/* The multicast router code does not need an abort trap as by default,
4949 	 * packets that don't match any routes are trapped to the CPU.
4950 	 */
4951 
4952 	proto = MLXSW_REG_RALXX_PROTOCOL_IPV6;
4953 	return __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto,
4954 						MLXSW_SP_LPM_TREE_MIN + 1);
4955 }
4956 
4957 static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp,
4958 				     struct mlxsw_sp_fib_node *fib_node)
4959 {
4960 	struct mlxsw_sp_fib4_entry *fib4_entry, *tmp;
4961 
4962 	list_for_each_entry_safe(fib4_entry, tmp, &fib_node->entry_list,
4963 				 common.list) {
4964 		bool do_break = &tmp->common.list == &fib_node->entry_list;
4965 
4966 		mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry);
4967 		mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
4968 		mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4969 		/* Break when entry list is empty and node was freed.
4970 		 * Otherwise, we'll access freed memory in the next
4971 		 * iteration.
4972 		 */
4973 		if (do_break)
4974 			break;
4975 	}
4976 }
4977 
4978 static void mlxsw_sp_fib6_node_flush(struct mlxsw_sp *mlxsw_sp,
4979 				     struct mlxsw_sp_fib_node *fib_node)
4980 {
4981 	struct mlxsw_sp_fib6_entry *fib6_entry, *tmp;
4982 
4983 	list_for_each_entry_safe(fib6_entry, tmp, &fib_node->entry_list,
4984 				 common.list) {
4985 		bool do_break = &tmp->common.list == &fib_node->entry_list;
4986 
4987 		mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry);
4988 		mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
4989 		mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4990 		if (do_break)
4991 			break;
4992 	}
4993 }
4994 
4995 static void mlxsw_sp_fib_node_flush(struct mlxsw_sp *mlxsw_sp,
4996 				    struct mlxsw_sp_fib_node *fib_node)
4997 {
4998 	switch (fib_node->fib->proto) {
4999 	case MLXSW_SP_L3_PROTO_IPV4:
5000 		mlxsw_sp_fib4_node_flush(mlxsw_sp, fib_node);
5001 		break;
5002 	case MLXSW_SP_L3_PROTO_IPV6:
5003 		mlxsw_sp_fib6_node_flush(mlxsw_sp, fib_node);
5004 		break;
5005 	}
5006 }
5007 
5008 static void mlxsw_sp_vr_fib_flush(struct mlxsw_sp *mlxsw_sp,
5009 				  struct mlxsw_sp_vr *vr,
5010 				  enum mlxsw_sp_l3proto proto)
5011 {
5012 	struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
5013 	struct mlxsw_sp_fib_node *fib_node, *tmp;
5014 
5015 	list_for_each_entry_safe(fib_node, tmp, &fib->node_list, list) {
5016 		bool do_break = &tmp->list == &fib->node_list;
5017 
5018 		mlxsw_sp_fib_node_flush(mlxsw_sp, fib_node);
5019 		if (do_break)
5020 			break;
5021 	}
5022 }
5023 
5024 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp)
5025 {
5026 	int i;
5027 
5028 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
5029 		struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
5030 
5031 		if (!mlxsw_sp_vr_is_used(vr))
5032 			continue;
5033 
5034 		mlxsw_sp_mr_table_flush(vr->mr4_table);
5035 		mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
5036 
5037 		/* If virtual router was only used for IPv4, then it's no
5038 		 * longer used.
5039 		 */
5040 		if (!mlxsw_sp_vr_is_used(vr))
5041 			continue;
5042 		mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
5043 	}
5044 }
5045 
5046 static void mlxsw_sp_router_fib_abort(struct mlxsw_sp *mlxsw_sp)
5047 {
5048 	int err;
5049 
5050 	if (mlxsw_sp->router->aborted)
5051 		return;
5052 	dev_warn(mlxsw_sp->bus_info->dev, "FIB abort triggered. Note that FIB entries are no longer being offloaded to this device.\n");
5053 	mlxsw_sp_router_fib_flush(mlxsw_sp);
5054 	mlxsw_sp->router->aborted = true;
5055 	err = mlxsw_sp_router_set_abort_trap(mlxsw_sp);
5056 	if (err)
5057 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to set abort trap.\n");
5058 }
5059 
5060 struct mlxsw_sp_fib_event_work {
5061 	struct work_struct work;
5062 	union {
5063 		struct fib6_entry_notifier_info fen6_info;
5064 		struct fib_entry_notifier_info fen_info;
5065 		struct fib_rule_notifier_info fr_info;
5066 		struct fib_nh_notifier_info fnh_info;
5067 		struct mfc_entry_notifier_info men_info;
5068 		struct vif_entry_notifier_info ven_info;
5069 	};
5070 	struct mlxsw_sp *mlxsw_sp;
5071 	unsigned long event;
5072 };
5073 
5074 static void mlxsw_sp_router_fib4_event_work(struct work_struct *work)
5075 {
5076 	struct mlxsw_sp_fib_event_work *fib_work =
5077 		container_of(work, struct mlxsw_sp_fib_event_work, work);
5078 	struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5079 	struct fib_rule *rule;
5080 	bool replace, append;
5081 	int err;
5082 
5083 	/* Protect internal structures from changes */
5084 	rtnl_lock();
5085 	switch (fib_work->event) {
5086 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5087 	case FIB_EVENT_ENTRY_APPEND: /* fall through */
5088 	case FIB_EVENT_ENTRY_ADD:
5089 		replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
5090 		append = fib_work->event == FIB_EVENT_ENTRY_APPEND;
5091 		err = mlxsw_sp_router_fib4_add(mlxsw_sp, &fib_work->fen_info,
5092 					       replace, append);
5093 		if (err)
5094 			mlxsw_sp_router_fib_abort(mlxsw_sp);
5095 		fib_info_put(fib_work->fen_info.fi);
5096 		break;
5097 	case FIB_EVENT_ENTRY_DEL:
5098 		mlxsw_sp_router_fib4_del(mlxsw_sp, &fib_work->fen_info);
5099 		fib_info_put(fib_work->fen_info.fi);
5100 		break;
5101 	case FIB_EVENT_RULE_ADD: /* fall through */
5102 	case FIB_EVENT_RULE_DEL:
5103 		rule = fib_work->fr_info.rule;
5104 		if (!fib4_rule_default(rule) && !rule->l3mdev)
5105 			mlxsw_sp_router_fib_abort(mlxsw_sp);
5106 		fib_rule_put(rule);
5107 		break;
5108 	case FIB_EVENT_NH_ADD: /* fall through */
5109 	case FIB_EVENT_NH_DEL:
5110 		mlxsw_sp_nexthop4_event(mlxsw_sp, fib_work->event,
5111 					fib_work->fnh_info.fib_nh);
5112 		fib_info_put(fib_work->fnh_info.fib_nh->nh_parent);
5113 		break;
5114 	}
5115 	rtnl_unlock();
5116 	kfree(fib_work);
5117 }
5118 
5119 static void mlxsw_sp_router_fib6_event_work(struct work_struct *work)
5120 {
5121 	struct mlxsw_sp_fib_event_work *fib_work =
5122 		container_of(work, struct mlxsw_sp_fib_event_work, work);
5123 	struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5124 	struct fib_rule *rule;
5125 	bool replace;
5126 	int err;
5127 
5128 	rtnl_lock();
5129 	switch (fib_work->event) {
5130 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5131 	case FIB_EVENT_ENTRY_ADD:
5132 		replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
5133 		err = mlxsw_sp_router_fib6_add(mlxsw_sp,
5134 					       fib_work->fen6_info.rt, replace);
5135 		if (err)
5136 			mlxsw_sp_router_fib_abort(mlxsw_sp);
5137 		mlxsw_sp_rt6_release(fib_work->fen6_info.rt);
5138 		break;
5139 	case FIB_EVENT_ENTRY_DEL:
5140 		mlxsw_sp_router_fib6_del(mlxsw_sp, fib_work->fen6_info.rt);
5141 		mlxsw_sp_rt6_release(fib_work->fen6_info.rt);
5142 		break;
5143 	case FIB_EVENT_RULE_ADD: /* fall through */
5144 	case FIB_EVENT_RULE_DEL:
5145 		rule = fib_work->fr_info.rule;
5146 		if (!fib6_rule_default(rule) && !rule->l3mdev)
5147 			mlxsw_sp_router_fib_abort(mlxsw_sp);
5148 		fib_rule_put(rule);
5149 		break;
5150 	}
5151 	rtnl_unlock();
5152 	kfree(fib_work);
5153 }
5154 
5155 static void mlxsw_sp_router_fibmr_event_work(struct work_struct *work)
5156 {
5157 	struct mlxsw_sp_fib_event_work *fib_work =
5158 		container_of(work, struct mlxsw_sp_fib_event_work, work);
5159 	struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5160 	struct fib_rule *rule;
5161 	bool replace;
5162 	int err;
5163 
5164 	rtnl_lock();
5165 	switch (fib_work->event) {
5166 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5167 	case FIB_EVENT_ENTRY_ADD:
5168 		replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
5169 
5170 		err = mlxsw_sp_router_fibmr_add(mlxsw_sp, &fib_work->men_info,
5171 						replace);
5172 		if (err)
5173 			mlxsw_sp_router_fib_abort(mlxsw_sp);
5174 		ipmr_cache_put(fib_work->men_info.mfc);
5175 		break;
5176 	case FIB_EVENT_ENTRY_DEL:
5177 		mlxsw_sp_router_fibmr_del(mlxsw_sp, &fib_work->men_info);
5178 		ipmr_cache_put(fib_work->men_info.mfc);
5179 		break;
5180 	case FIB_EVENT_VIF_ADD:
5181 		err = mlxsw_sp_router_fibmr_vif_add(mlxsw_sp,
5182 						    &fib_work->ven_info);
5183 		if (err)
5184 			mlxsw_sp_router_fib_abort(mlxsw_sp);
5185 		dev_put(fib_work->ven_info.dev);
5186 		break;
5187 	case FIB_EVENT_VIF_DEL:
5188 		mlxsw_sp_router_fibmr_vif_del(mlxsw_sp,
5189 					      &fib_work->ven_info);
5190 		dev_put(fib_work->ven_info.dev);
5191 		break;
5192 	case FIB_EVENT_RULE_ADD: /* fall through */
5193 	case FIB_EVENT_RULE_DEL:
5194 		rule = fib_work->fr_info.rule;
5195 		if (!ipmr_rule_default(rule) && !rule->l3mdev)
5196 			mlxsw_sp_router_fib_abort(mlxsw_sp);
5197 		fib_rule_put(rule);
5198 		break;
5199 	}
5200 	rtnl_unlock();
5201 	kfree(fib_work);
5202 }
5203 
5204 static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work,
5205 				       struct fib_notifier_info *info)
5206 {
5207 	switch (fib_work->event) {
5208 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5209 	case FIB_EVENT_ENTRY_APPEND: /* fall through */
5210 	case FIB_EVENT_ENTRY_ADD: /* fall through */
5211 	case FIB_EVENT_ENTRY_DEL:
5212 		memcpy(&fib_work->fen_info, info, sizeof(fib_work->fen_info));
5213 		/* Take referece on fib_info to prevent it from being
5214 		 * freed while work is queued. Release it afterwards.
5215 		 */
5216 		fib_info_hold(fib_work->fen_info.fi);
5217 		break;
5218 	case FIB_EVENT_RULE_ADD: /* fall through */
5219 	case FIB_EVENT_RULE_DEL:
5220 		memcpy(&fib_work->fr_info, info, sizeof(fib_work->fr_info));
5221 		fib_rule_get(fib_work->fr_info.rule);
5222 		break;
5223 	case FIB_EVENT_NH_ADD: /* fall through */
5224 	case FIB_EVENT_NH_DEL:
5225 		memcpy(&fib_work->fnh_info, info, sizeof(fib_work->fnh_info));
5226 		fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
5227 		break;
5228 	}
5229 }
5230 
5231 static void mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work,
5232 				       struct fib_notifier_info *info)
5233 {
5234 	switch (fib_work->event) {
5235 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5236 	case FIB_EVENT_ENTRY_ADD: /* fall through */
5237 	case FIB_EVENT_ENTRY_DEL:
5238 		memcpy(&fib_work->fen6_info, info, sizeof(fib_work->fen6_info));
5239 		rt6_hold(fib_work->fen6_info.rt);
5240 		break;
5241 	case FIB_EVENT_RULE_ADD: /* fall through */
5242 	case FIB_EVENT_RULE_DEL:
5243 		memcpy(&fib_work->fr_info, info, sizeof(fib_work->fr_info));
5244 		fib_rule_get(fib_work->fr_info.rule);
5245 		break;
5246 	}
5247 }
5248 
5249 static void
5250 mlxsw_sp_router_fibmr_event(struct mlxsw_sp_fib_event_work *fib_work,
5251 			    struct fib_notifier_info *info)
5252 {
5253 	switch (fib_work->event) {
5254 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5255 	case FIB_EVENT_ENTRY_ADD: /* fall through */
5256 	case FIB_EVENT_ENTRY_DEL:
5257 		memcpy(&fib_work->men_info, info, sizeof(fib_work->men_info));
5258 		ipmr_cache_hold(fib_work->men_info.mfc);
5259 		break;
5260 	case FIB_EVENT_VIF_ADD: /* fall through */
5261 	case FIB_EVENT_VIF_DEL:
5262 		memcpy(&fib_work->ven_info, info, sizeof(fib_work->ven_info));
5263 		dev_hold(fib_work->ven_info.dev);
5264 		break;
5265 	case FIB_EVENT_RULE_ADD: /* fall through */
5266 	case FIB_EVENT_RULE_DEL:
5267 		memcpy(&fib_work->fr_info, info, sizeof(fib_work->fr_info));
5268 		fib_rule_get(fib_work->fr_info.rule);
5269 		break;
5270 	}
5271 }
5272 
5273 /* Called with rcu_read_lock() */
5274 static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
5275 				     unsigned long event, void *ptr)
5276 {
5277 	struct mlxsw_sp_fib_event_work *fib_work;
5278 	struct fib_notifier_info *info = ptr;
5279 	struct mlxsw_sp_router *router;
5280 
5281 	if (!net_eq(info->net, &init_net) ||
5282 	    (info->family != AF_INET && info->family != AF_INET6 &&
5283 	     info->family != RTNL_FAMILY_IPMR))
5284 		return NOTIFY_DONE;
5285 
5286 	fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
5287 	if (WARN_ON(!fib_work))
5288 		return NOTIFY_BAD;
5289 
5290 	router = container_of(nb, struct mlxsw_sp_router, fib_nb);
5291 	fib_work->mlxsw_sp = router->mlxsw_sp;
5292 	fib_work->event = event;
5293 
5294 	switch (info->family) {
5295 	case AF_INET:
5296 		INIT_WORK(&fib_work->work, mlxsw_sp_router_fib4_event_work);
5297 		mlxsw_sp_router_fib4_event(fib_work, info);
5298 		break;
5299 	case AF_INET6:
5300 		INIT_WORK(&fib_work->work, mlxsw_sp_router_fib6_event_work);
5301 		mlxsw_sp_router_fib6_event(fib_work, info);
5302 		break;
5303 	case RTNL_FAMILY_IPMR:
5304 		INIT_WORK(&fib_work->work, mlxsw_sp_router_fibmr_event_work);
5305 		mlxsw_sp_router_fibmr_event(fib_work, info);
5306 		break;
5307 	}
5308 
5309 	mlxsw_core_schedule_work(&fib_work->work);
5310 
5311 	return NOTIFY_DONE;
5312 }
5313 
5314 static struct mlxsw_sp_rif *
5315 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
5316 			 const struct net_device *dev)
5317 {
5318 	int i;
5319 
5320 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
5321 		if (mlxsw_sp->router->rifs[i] &&
5322 		    mlxsw_sp->router->rifs[i]->dev == dev)
5323 			return mlxsw_sp->router->rifs[i];
5324 
5325 	return NULL;
5326 }
5327 
5328 static int mlxsw_sp_router_rif_disable(struct mlxsw_sp *mlxsw_sp, u16 rif)
5329 {
5330 	char ritr_pl[MLXSW_REG_RITR_LEN];
5331 	int err;
5332 
5333 	mlxsw_reg_ritr_rif_pack(ritr_pl, rif);
5334 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
5335 	if (WARN_ON_ONCE(err))
5336 		return err;
5337 
5338 	mlxsw_reg_ritr_enable_set(ritr_pl, false);
5339 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
5340 }
5341 
5342 static void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
5343 					  struct mlxsw_sp_rif *rif)
5344 {
5345 	mlxsw_sp_router_rif_disable(mlxsw_sp, rif->rif_index);
5346 	mlxsw_sp_nexthop_rif_gone_sync(mlxsw_sp, rif);
5347 	mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, rif);
5348 }
5349 
5350 static bool
5351 mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *rif, struct net_device *dev,
5352 			   unsigned long event)
5353 {
5354 	struct inet6_dev *inet6_dev;
5355 	bool addr_list_empty = true;
5356 	struct in_device *idev;
5357 
5358 	switch (event) {
5359 	case NETDEV_UP:
5360 		return rif == NULL;
5361 	case NETDEV_DOWN:
5362 		idev = __in_dev_get_rtnl(dev);
5363 		if (idev && idev->ifa_list)
5364 			addr_list_empty = false;
5365 
5366 		inet6_dev = __in6_dev_get(dev);
5367 		if (addr_list_empty && inet6_dev &&
5368 		    !list_empty(&inet6_dev->addr_list))
5369 			addr_list_empty = false;
5370 
5371 		if (rif && addr_list_empty &&
5372 		    !netif_is_l3_slave(rif->dev))
5373 			return true;
5374 		/* It is possible we already removed the RIF ourselves
5375 		 * if it was assigned to a netdev that is now a bridge
5376 		 * or LAG slave.
5377 		 */
5378 		return false;
5379 	}
5380 
5381 	return false;
5382 }
5383 
5384 static enum mlxsw_sp_rif_type
5385 mlxsw_sp_dev_rif_type(const struct mlxsw_sp *mlxsw_sp,
5386 		      const struct net_device *dev)
5387 {
5388 	enum mlxsw_sp_fid_type type;
5389 
5390 	if (mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL))
5391 		return MLXSW_SP_RIF_TYPE_IPIP_LB;
5392 
5393 	/* Otherwise RIF type is derived from the type of the underlying FID. */
5394 	if (is_vlan_dev(dev) && netif_is_bridge_master(vlan_dev_real_dev(dev)))
5395 		type = MLXSW_SP_FID_TYPE_8021Q;
5396 	else if (netif_is_bridge_master(dev) && br_vlan_enabled(dev))
5397 		type = MLXSW_SP_FID_TYPE_8021Q;
5398 	else if (netif_is_bridge_master(dev))
5399 		type = MLXSW_SP_FID_TYPE_8021D;
5400 	else
5401 		type = MLXSW_SP_FID_TYPE_RFID;
5402 
5403 	return mlxsw_sp_fid_type_rif_type(mlxsw_sp, type);
5404 }
5405 
5406 static int mlxsw_sp_rif_index_alloc(struct mlxsw_sp *mlxsw_sp, u16 *p_rif_index)
5407 {
5408 	int i;
5409 
5410 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) {
5411 		if (!mlxsw_sp->router->rifs[i]) {
5412 			*p_rif_index = i;
5413 			return 0;
5414 		}
5415 	}
5416 
5417 	return -ENOBUFS;
5418 }
5419 
5420 static struct mlxsw_sp_rif *mlxsw_sp_rif_alloc(size_t rif_size, u16 rif_index,
5421 					       u16 vr_id,
5422 					       struct net_device *l3_dev)
5423 {
5424 	struct mlxsw_sp_rif *rif;
5425 
5426 	rif = kzalloc(rif_size, GFP_KERNEL);
5427 	if (!rif)
5428 		return NULL;
5429 
5430 	INIT_LIST_HEAD(&rif->nexthop_list);
5431 	INIT_LIST_HEAD(&rif->neigh_list);
5432 	ether_addr_copy(rif->addr, l3_dev->dev_addr);
5433 	rif->mtu = l3_dev->mtu;
5434 	rif->vr_id = vr_id;
5435 	rif->dev = l3_dev;
5436 	rif->rif_index = rif_index;
5437 
5438 	return rif;
5439 }
5440 
5441 struct mlxsw_sp_rif *mlxsw_sp_rif_by_index(const struct mlxsw_sp *mlxsw_sp,
5442 					   u16 rif_index)
5443 {
5444 	return mlxsw_sp->router->rifs[rif_index];
5445 }
5446 
5447 u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif)
5448 {
5449 	return rif->rif_index;
5450 }
5451 
5452 u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
5453 {
5454 	return lb_rif->common.rif_index;
5455 }
5456 
5457 u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
5458 {
5459 	return lb_rif->ul_vr_id;
5460 }
5461 
5462 int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif)
5463 {
5464 	return rif->dev->ifindex;
5465 }
5466 
5467 const struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif)
5468 {
5469 	return rif->dev;
5470 }
5471 
5472 static struct mlxsw_sp_rif *
5473 mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
5474 		    const struct mlxsw_sp_rif_params *params)
5475 {
5476 	u32 tb_id = l3mdev_fib_table(params->dev);
5477 	const struct mlxsw_sp_rif_ops *ops;
5478 	struct mlxsw_sp_fid *fid = NULL;
5479 	enum mlxsw_sp_rif_type type;
5480 	struct mlxsw_sp_rif *rif;
5481 	struct mlxsw_sp_vr *vr;
5482 	u16 rif_index;
5483 	int err;
5484 
5485 	type = mlxsw_sp_dev_rif_type(mlxsw_sp, params->dev);
5486 	ops = mlxsw_sp->router->rif_ops_arr[type];
5487 
5488 	vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN);
5489 	if (IS_ERR(vr))
5490 		return ERR_CAST(vr);
5491 	vr->rif_count++;
5492 
5493 	err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index);
5494 	if (err)
5495 		goto err_rif_index_alloc;
5496 
5497 	rif = mlxsw_sp_rif_alloc(ops->rif_size, rif_index, vr->id, params->dev);
5498 	if (!rif) {
5499 		err = -ENOMEM;
5500 		goto err_rif_alloc;
5501 	}
5502 	rif->mlxsw_sp = mlxsw_sp;
5503 	rif->ops = ops;
5504 
5505 	if (ops->fid_get) {
5506 		fid = ops->fid_get(rif);
5507 		if (IS_ERR(fid)) {
5508 			err = PTR_ERR(fid);
5509 			goto err_fid_get;
5510 		}
5511 		rif->fid = fid;
5512 	}
5513 
5514 	if (ops->setup)
5515 		ops->setup(rif, params);
5516 
5517 	err = ops->configure(rif);
5518 	if (err)
5519 		goto err_configure;
5520 
5521 	err = mlxsw_sp_mr_rif_add(vr->mr4_table, rif);
5522 	if (err)
5523 		goto err_mr_rif_add;
5524 
5525 	mlxsw_sp_rif_counters_alloc(rif);
5526 	mlxsw_sp->router->rifs[rif_index] = rif;
5527 
5528 	return rif;
5529 
5530 err_mr_rif_add:
5531 	ops->deconfigure(rif);
5532 err_configure:
5533 	if (fid)
5534 		mlxsw_sp_fid_put(fid);
5535 err_fid_get:
5536 	kfree(rif);
5537 err_rif_alloc:
5538 err_rif_index_alloc:
5539 	vr->rif_count--;
5540 	mlxsw_sp_vr_put(vr);
5541 	return ERR_PTR(err);
5542 }
5543 
5544 void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif)
5545 {
5546 	const struct mlxsw_sp_rif_ops *ops = rif->ops;
5547 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
5548 	struct mlxsw_sp_fid *fid = rif->fid;
5549 	struct mlxsw_sp_vr *vr;
5550 
5551 	mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif);
5552 	vr = &mlxsw_sp->router->vrs[rif->vr_id];
5553 
5554 	mlxsw_sp->router->rifs[rif->rif_index] = NULL;
5555 	mlxsw_sp_rif_counters_free(rif);
5556 	mlxsw_sp_mr_rif_del(vr->mr4_table, rif);
5557 	ops->deconfigure(rif);
5558 	if (fid)
5559 		/* Loopback RIFs are not associated with a FID. */
5560 		mlxsw_sp_fid_put(fid);
5561 	kfree(rif);
5562 	vr->rif_count--;
5563 	mlxsw_sp_vr_put(vr);
5564 }
5565 
5566 static void
5567 mlxsw_sp_rif_subport_params_init(struct mlxsw_sp_rif_params *params,
5568 				 struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
5569 {
5570 	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
5571 
5572 	params->vid = mlxsw_sp_port_vlan->vid;
5573 	params->lag = mlxsw_sp_port->lagged;
5574 	if (params->lag)
5575 		params->lag_id = mlxsw_sp_port->lag_id;
5576 	else
5577 		params->system_port = mlxsw_sp_port->local_port;
5578 }
5579 
5580 static int
5581 mlxsw_sp_port_vlan_router_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan,
5582 			       struct net_device *l3_dev)
5583 {
5584 	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
5585 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
5586 	u16 vid = mlxsw_sp_port_vlan->vid;
5587 	struct mlxsw_sp_rif *rif;
5588 	struct mlxsw_sp_fid *fid;
5589 	int err;
5590 
5591 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
5592 	if (!rif) {
5593 		struct mlxsw_sp_rif_params params = {
5594 			.dev = l3_dev,
5595 		};
5596 
5597 		mlxsw_sp_rif_subport_params_init(&params, mlxsw_sp_port_vlan);
5598 		rif = mlxsw_sp_rif_create(mlxsw_sp, &params);
5599 		if (IS_ERR(rif))
5600 			return PTR_ERR(rif);
5601 	}
5602 
5603 	/* FID was already created, just take a reference */
5604 	fid = rif->ops->fid_get(rif);
5605 	err = mlxsw_sp_fid_port_vid_map(fid, mlxsw_sp_port, vid);
5606 	if (err)
5607 		goto err_fid_port_vid_map;
5608 
5609 	err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, false);
5610 	if (err)
5611 		goto err_port_vid_learning_set;
5612 
5613 	err = mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid,
5614 					BR_STATE_FORWARDING);
5615 	if (err)
5616 		goto err_port_vid_stp_set;
5617 
5618 	mlxsw_sp_port_vlan->fid = fid;
5619 
5620 	return 0;
5621 
5622 err_port_vid_stp_set:
5623 	mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
5624 err_port_vid_learning_set:
5625 	mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
5626 err_fid_port_vid_map:
5627 	mlxsw_sp_fid_put(fid);
5628 	return err;
5629 }
5630 
5631 void
5632 mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
5633 {
5634 	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
5635 	struct mlxsw_sp_fid *fid = mlxsw_sp_port_vlan->fid;
5636 	u16 vid = mlxsw_sp_port_vlan->vid;
5637 
5638 	if (WARN_ON(mlxsw_sp_fid_type(fid) != MLXSW_SP_FID_TYPE_RFID))
5639 		return;
5640 
5641 	mlxsw_sp_port_vlan->fid = NULL;
5642 	mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid, BR_STATE_BLOCKING);
5643 	mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
5644 	mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
5645 	/* If router port holds the last reference on the rFID, then the
5646 	 * associated Sub-port RIF will be destroyed.
5647 	 */
5648 	mlxsw_sp_fid_put(fid);
5649 }
5650 
5651 static int mlxsw_sp_inetaddr_port_vlan_event(struct net_device *l3_dev,
5652 					     struct net_device *port_dev,
5653 					     unsigned long event, u16 vid)
5654 {
5655 	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(port_dev);
5656 	struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
5657 
5658 	mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid);
5659 	if (WARN_ON(!mlxsw_sp_port_vlan))
5660 		return -EINVAL;
5661 
5662 	switch (event) {
5663 	case NETDEV_UP:
5664 		return mlxsw_sp_port_vlan_router_join(mlxsw_sp_port_vlan,
5665 						      l3_dev);
5666 	case NETDEV_DOWN:
5667 		mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan);
5668 		break;
5669 	}
5670 
5671 	return 0;
5672 }
5673 
5674 static int mlxsw_sp_inetaddr_port_event(struct net_device *port_dev,
5675 					unsigned long event)
5676 {
5677 	if (netif_is_bridge_port(port_dev) ||
5678 	    netif_is_lag_port(port_dev) ||
5679 	    netif_is_ovs_port(port_dev))
5680 		return 0;
5681 
5682 	return mlxsw_sp_inetaddr_port_vlan_event(port_dev, port_dev, event, 1);
5683 }
5684 
5685 static int __mlxsw_sp_inetaddr_lag_event(struct net_device *l3_dev,
5686 					 struct net_device *lag_dev,
5687 					 unsigned long event, u16 vid)
5688 {
5689 	struct net_device *port_dev;
5690 	struct list_head *iter;
5691 	int err;
5692 
5693 	netdev_for_each_lower_dev(lag_dev, port_dev, iter) {
5694 		if (mlxsw_sp_port_dev_check(port_dev)) {
5695 			err = mlxsw_sp_inetaddr_port_vlan_event(l3_dev,
5696 								port_dev,
5697 								event, vid);
5698 			if (err)
5699 				return err;
5700 		}
5701 	}
5702 
5703 	return 0;
5704 }
5705 
5706 static int mlxsw_sp_inetaddr_lag_event(struct net_device *lag_dev,
5707 				       unsigned long event)
5708 {
5709 	if (netif_is_bridge_port(lag_dev))
5710 		return 0;
5711 
5712 	return __mlxsw_sp_inetaddr_lag_event(lag_dev, lag_dev, event, 1);
5713 }
5714 
5715 static int mlxsw_sp_inetaddr_bridge_event(struct net_device *l3_dev,
5716 					  unsigned long event)
5717 {
5718 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
5719 	struct mlxsw_sp_rif_params params = {
5720 		.dev = l3_dev,
5721 	};
5722 	struct mlxsw_sp_rif *rif;
5723 
5724 	switch (event) {
5725 	case NETDEV_UP:
5726 		rif = mlxsw_sp_rif_create(mlxsw_sp, &params);
5727 		if (IS_ERR(rif))
5728 			return PTR_ERR(rif);
5729 		break;
5730 	case NETDEV_DOWN:
5731 		rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
5732 		mlxsw_sp_rif_destroy(rif);
5733 		break;
5734 	}
5735 
5736 	return 0;
5737 }
5738 
5739 static int mlxsw_sp_inetaddr_vlan_event(struct net_device *vlan_dev,
5740 					unsigned long event)
5741 {
5742 	struct net_device *real_dev = vlan_dev_real_dev(vlan_dev);
5743 	u16 vid = vlan_dev_vlan_id(vlan_dev);
5744 
5745 	if (netif_is_bridge_port(vlan_dev))
5746 		return 0;
5747 
5748 	if (mlxsw_sp_port_dev_check(real_dev))
5749 		return mlxsw_sp_inetaddr_port_vlan_event(vlan_dev, real_dev,
5750 							 event, vid);
5751 	else if (netif_is_lag_master(real_dev))
5752 		return __mlxsw_sp_inetaddr_lag_event(vlan_dev, real_dev, event,
5753 						     vid);
5754 	else if (netif_is_bridge_master(real_dev) && br_vlan_enabled(real_dev))
5755 		return mlxsw_sp_inetaddr_bridge_event(vlan_dev, event);
5756 
5757 	return 0;
5758 }
5759 
5760 static int __mlxsw_sp_inetaddr_event(struct net_device *dev,
5761 				     unsigned long event)
5762 {
5763 	if (mlxsw_sp_port_dev_check(dev))
5764 		return mlxsw_sp_inetaddr_port_event(dev, event);
5765 	else if (netif_is_lag_master(dev))
5766 		return mlxsw_sp_inetaddr_lag_event(dev, event);
5767 	else if (netif_is_bridge_master(dev))
5768 		return mlxsw_sp_inetaddr_bridge_event(dev, event);
5769 	else if (is_vlan_dev(dev))
5770 		return mlxsw_sp_inetaddr_vlan_event(dev, event);
5771 	else
5772 		return 0;
5773 }
5774 
5775 int mlxsw_sp_inetaddr_event(struct notifier_block *unused,
5776 			    unsigned long event, void *ptr)
5777 {
5778 	struct in_ifaddr *ifa = (struct in_ifaddr *) ptr;
5779 	struct net_device *dev = ifa->ifa_dev->dev;
5780 	struct mlxsw_sp *mlxsw_sp;
5781 	struct mlxsw_sp_rif *rif;
5782 	int err = 0;
5783 
5784 	mlxsw_sp = mlxsw_sp_lower_get(dev);
5785 	if (!mlxsw_sp)
5786 		goto out;
5787 
5788 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
5789 	if (!mlxsw_sp_rif_should_config(rif, dev, event))
5790 		goto out;
5791 
5792 	err = __mlxsw_sp_inetaddr_event(dev, event);
5793 out:
5794 	return notifier_from_errno(err);
5795 }
5796 
5797 struct mlxsw_sp_inet6addr_event_work {
5798 	struct work_struct work;
5799 	struct net_device *dev;
5800 	unsigned long event;
5801 };
5802 
5803 static void mlxsw_sp_inet6addr_event_work(struct work_struct *work)
5804 {
5805 	struct mlxsw_sp_inet6addr_event_work *inet6addr_work =
5806 		container_of(work, struct mlxsw_sp_inet6addr_event_work, work);
5807 	struct net_device *dev = inet6addr_work->dev;
5808 	unsigned long event = inet6addr_work->event;
5809 	struct mlxsw_sp *mlxsw_sp;
5810 	struct mlxsw_sp_rif *rif;
5811 
5812 	rtnl_lock();
5813 	mlxsw_sp = mlxsw_sp_lower_get(dev);
5814 	if (!mlxsw_sp)
5815 		goto out;
5816 
5817 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
5818 	if (!mlxsw_sp_rif_should_config(rif, dev, event))
5819 		goto out;
5820 
5821 	__mlxsw_sp_inetaddr_event(dev, event);
5822 out:
5823 	rtnl_unlock();
5824 	dev_put(dev);
5825 	kfree(inet6addr_work);
5826 }
5827 
5828 /* Called with rcu_read_lock() */
5829 int mlxsw_sp_inet6addr_event(struct notifier_block *unused,
5830 			     unsigned long event, void *ptr)
5831 {
5832 	struct inet6_ifaddr *if6 = (struct inet6_ifaddr *) ptr;
5833 	struct mlxsw_sp_inet6addr_event_work *inet6addr_work;
5834 	struct net_device *dev = if6->idev->dev;
5835 
5836 	if (!mlxsw_sp_port_dev_lower_find_rcu(dev))
5837 		return NOTIFY_DONE;
5838 
5839 	inet6addr_work = kzalloc(sizeof(*inet6addr_work), GFP_ATOMIC);
5840 	if (!inet6addr_work)
5841 		return NOTIFY_BAD;
5842 
5843 	INIT_WORK(&inet6addr_work->work, mlxsw_sp_inet6addr_event_work);
5844 	inet6addr_work->dev = dev;
5845 	inet6addr_work->event = event;
5846 	dev_hold(dev);
5847 	mlxsw_core_schedule_work(&inet6addr_work->work);
5848 
5849 	return NOTIFY_DONE;
5850 }
5851 
5852 static int mlxsw_sp_rif_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
5853 			     const char *mac, int mtu)
5854 {
5855 	char ritr_pl[MLXSW_REG_RITR_LEN];
5856 	int err;
5857 
5858 	mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
5859 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
5860 	if (err)
5861 		return err;
5862 
5863 	mlxsw_reg_ritr_mtu_set(ritr_pl, mtu);
5864 	mlxsw_reg_ritr_if_mac_memcpy_to(ritr_pl, mac);
5865 	mlxsw_reg_ritr_op_set(ritr_pl, MLXSW_REG_RITR_RIF_CREATE);
5866 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
5867 }
5868 
5869 int mlxsw_sp_netdevice_router_port_event(struct net_device *dev)
5870 {
5871 	struct mlxsw_sp *mlxsw_sp;
5872 	struct mlxsw_sp_rif *rif;
5873 	u16 fid_index;
5874 	int err;
5875 
5876 	mlxsw_sp = mlxsw_sp_lower_get(dev);
5877 	if (!mlxsw_sp)
5878 		return 0;
5879 
5880 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
5881 	if (!rif)
5882 		return 0;
5883 	fid_index = mlxsw_sp_fid_index(rif->fid);
5884 
5885 	err = mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, false);
5886 	if (err)
5887 		return err;
5888 
5889 	err = mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, dev->dev_addr,
5890 				dev->mtu);
5891 	if (err)
5892 		goto err_rif_edit;
5893 
5894 	err = mlxsw_sp_rif_fdb_op(mlxsw_sp, dev->dev_addr, fid_index, true);
5895 	if (err)
5896 		goto err_rif_fdb_op;
5897 
5898 	if (rif->mtu != dev->mtu) {
5899 		struct mlxsw_sp_vr *vr;
5900 
5901 		/* The RIF is relevant only to its mr_table instance, as unlike
5902 		 * unicast routing, in multicast routing a RIF cannot be shared
5903 		 * between several multicast routing tables.
5904 		 */
5905 		vr = &mlxsw_sp->router->vrs[rif->vr_id];
5906 		mlxsw_sp_mr_rif_mtu_update(vr->mr4_table, rif, dev->mtu);
5907 	}
5908 
5909 	ether_addr_copy(rif->addr, dev->dev_addr);
5910 	rif->mtu = dev->mtu;
5911 
5912 	netdev_dbg(dev, "Updated RIF=%d\n", rif->rif_index);
5913 
5914 	return 0;
5915 
5916 err_rif_fdb_op:
5917 	mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, rif->addr, rif->mtu);
5918 err_rif_edit:
5919 	mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, true);
5920 	return err;
5921 }
5922 
5923 static int mlxsw_sp_port_vrf_join(struct mlxsw_sp *mlxsw_sp,
5924 				  struct net_device *l3_dev)
5925 {
5926 	struct mlxsw_sp_rif *rif;
5927 
5928 	/* If netdev is already associated with a RIF, then we need to
5929 	 * destroy it and create a new one with the new virtual router ID.
5930 	 */
5931 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
5932 	if (rif)
5933 		__mlxsw_sp_inetaddr_event(l3_dev, NETDEV_DOWN);
5934 
5935 	return __mlxsw_sp_inetaddr_event(l3_dev, NETDEV_UP);
5936 }
5937 
5938 static void mlxsw_sp_port_vrf_leave(struct mlxsw_sp *mlxsw_sp,
5939 				    struct net_device *l3_dev)
5940 {
5941 	struct mlxsw_sp_rif *rif;
5942 
5943 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
5944 	if (!rif)
5945 		return;
5946 	__mlxsw_sp_inetaddr_event(l3_dev, NETDEV_DOWN);
5947 }
5948 
5949 int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event,
5950 				 struct netdev_notifier_changeupper_info *info)
5951 {
5952 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
5953 	int err = 0;
5954 
5955 	if (!mlxsw_sp)
5956 		return 0;
5957 
5958 	switch (event) {
5959 	case NETDEV_PRECHANGEUPPER:
5960 		return 0;
5961 	case NETDEV_CHANGEUPPER:
5962 		if (info->linking)
5963 			err = mlxsw_sp_port_vrf_join(mlxsw_sp, l3_dev);
5964 		else
5965 			mlxsw_sp_port_vrf_leave(mlxsw_sp, l3_dev);
5966 		break;
5967 	}
5968 
5969 	return err;
5970 }
5971 
5972 static struct mlxsw_sp_rif_subport *
5973 mlxsw_sp_rif_subport_rif(const struct mlxsw_sp_rif *rif)
5974 {
5975 	return container_of(rif, struct mlxsw_sp_rif_subport, common);
5976 }
5977 
5978 static void mlxsw_sp_rif_subport_setup(struct mlxsw_sp_rif *rif,
5979 				       const struct mlxsw_sp_rif_params *params)
5980 {
5981 	struct mlxsw_sp_rif_subport *rif_subport;
5982 
5983 	rif_subport = mlxsw_sp_rif_subport_rif(rif);
5984 	rif_subport->vid = params->vid;
5985 	rif_subport->lag = params->lag;
5986 	if (params->lag)
5987 		rif_subport->lag_id = params->lag_id;
5988 	else
5989 		rif_subport->system_port = params->system_port;
5990 }
5991 
5992 static int mlxsw_sp_rif_subport_op(struct mlxsw_sp_rif *rif, bool enable)
5993 {
5994 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
5995 	struct mlxsw_sp_rif_subport *rif_subport;
5996 	char ritr_pl[MLXSW_REG_RITR_LEN];
5997 
5998 	rif_subport = mlxsw_sp_rif_subport_rif(rif);
5999 	mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_SP_IF,
6000 			    rif->rif_index, rif->vr_id, rif->dev->mtu);
6001 	mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr);
6002 	mlxsw_reg_ritr_sp_if_pack(ritr_pl, rif_subport->lag,
6003 				  rif_subport->lag ? rif_subport->lag_id :
6004 						     rif_subport->system_port,
6005 				  rif_subport->vid);
6006 
6007 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6008 }
6009 
6010 static int mlxsw_sp_rif_subport_configure(struct mlxsw_sp_rif *rif)
6011 {
6012 	int err;
6013 
6014 	err = mlxsw_sp_rif_subport_op(rif, true);
6015 	if (err)
6016 		return err;
6017 
6018 	err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6019 				  mlxsw_sp_fid_index(rif->fid), true);
6020 	if (err)
6021 		goto err_rif_fdb_op;
6022 
6023 	mlxsw_sp_fid_rif_set(rif->fid, rif);
6024 	return 0;
6025 
6026 err_rif_fdb_op:
6027 	mlxsw_sp_rif_subport_op(rif, false);
6028 	return err;
6029 }
6030 
6031 static void mlxsw_sp_rif_subport_deconfigure(struct mlxsw_sp_rif *rif)
6032 {
6033 	struct mlxsw_sp_fid *fid = rif->fid;
6034 
6035 	mlxsw_sp_fid_rif_set(fid, NULL);
6036 	mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6037 			    mlxsw_sp_fid_index(fid), false);
6038 	mlxsw_sp_rif_subport_op(rif, false);
6039 }
6040 
6041 static struct mlxsw_sp_fid *
6042 mlxsw_sp_rif_subport_fid_get(struct mlxsw_sp_rif *rif)
6043 {
6044 	return mlxsw_sp_fid_rfid_get(rif->mlxsw_sp, rif->rif_index);
6045 }
6046 
6047 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_subport_ops = {
6048 	.type			= MLXSW_SP_RIF_TYPE_SUBPORT,
6049 	.rif_size		= sizeof(struct mlxsw_sp_rif_subport),
6050 	.setup			= mlxsw_sp_rif_subport_setup,
6051 	.configure		= mlxsw_sp_rif_subport_configure,
6052 	.deconfigure		= mlxsw_sp_rif_subport_deconfigure,
6053 	.fid_get		= mlxsw_sp_rif_subport_fid_get,
6054 };
6055 
6056 static int mlxsw_sp_rif_vlan_fid_op(struct mlxsw_sp_rif *rif,
6057 				    enum mlxsw_reg_ritr_if_type type,
6058 				    u16 vid_fid, bool enable)
6059 {
6060 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6061 	char ritr_pl[MLXSW_REG_RITR_LEN];
6062 
6063 	mlxsw_reg_ritr_pack(ritr_pl, enable, type, rif->rif_index, rif->vr_id,
6064 			    rif->dev->mtu);
6065 	mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr);
6066 	mlxsw_reg_ritr_fid_set(ritr_pl, type, vid_fid);
6067 
6068 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6069 }
6070 
6071 u8 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp)
6072 {
6073 	return mlxsw_core_max_ports(mlxsw_sp->core) + 1;
6074 }
6075 
6076 static int mlxsw_sp_rif_vlan_configure(struct mlxsw_sp_rif *rif)
6077 {
6078 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6079 	u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
6080 	int err;
6081 
6082 	err = mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, true);
6083 	if (err)
6084 		return err;
6085 
6086 	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
6087 				     mlxsw_sp_router_port(mlxsw_sp), true);
6088 	if (err)
6089 		goto err_fid_mc_flood_set;
6090 
6091 	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
6092 				     mlxsw_sp_router_port(mlxsw_sp), true);
6093 	if (err)
6094 		goto err_fid_bc_flood_set;
6095 
6096 	err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6097 				  mlxsw_sp_fid_index(rif->fid), true);
6098 	if (err)
6099 		goto err_rif_fdb_op;
6100 
6101 	mlxsw_sp_fid_rif_set(rif->fid, rif);
6102 	return 0;
6103 
6104 err_rif_fdb_op:
6105 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
6106 			       mlxsw_sp_router_port(mlxsw_sp), false);
6107 err_fid_bc_flood_set:
6108 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
6109 			       mlxsw_sp_router_port(mlxsw_sp), false);
6110 err_fid_mc_flood_set:
6111 	mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false);
6112 	return err;
6113 }
6114 
6115 static void mlxsw_sp_rif_vlan_deconfigure(struct mlxsw_sp_rif *rif)
6116 {
6117 	u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
6118 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6119 	struct mlxsw_sp_fid *fid = rif->fid;
6120 
6121 	mlxsw_sp_fid_rif_set(fid, NULL);
6122 	mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6123 			    mlxsw_sp_fid_index(fid), false);
6124 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
6125 			       mlxsw_sp_router_port(mlxsw_sp), false);
6126 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
6127 			       mlxsw_sp_router_port(mlxsw_sp), false);
6128 	mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false);
6129 }
6130 
6131 static struct mlxsw_sp_fid *
6132 mlxsw_sp_rif_vlan_fid_get(struct mlxsw_sp_rif *rif)
6133 {
6134 	u16 vid = is_vlan_dev(rif->dev) ? vlan_dev_vlan_id(rif->dev) : 1;
6135 
6136 	return mlxsw_sp_fid_8021q_get(rif->mlxsw_sp, vid);
6137 }
6138 
6139 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_vlan_ops = {
6140 	.type			= MLXSW_SP_RIF_TYPE_VLAN,
6141 	.rif_size		= sizeof(struct mlxsw_sp_rif),
6142 	.configure		= mlxsw_sp_rif_vlan_configure,
6143 	.deconfigure		= mlxsw_sp_rif_vlan_deconfigure,
6144 	.fid_get		= mlxsw_sp_rif_vlan_fid_get,
6145 };
6146 
6147 static int mlxsw_sp_rif_fid_configure(struct mlxsw_sp_rif *rif)
6148 {
6149 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6150 	u16 fid_index = mlxsw_sp_fid_index(rif->fid);
6151 	int err;
6152 
6153 	err = mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index,
6154 				       true);
6155 	if (err)
6156 		return err;
6157 
6158 	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
6159 				     mlxsw_sp_router_port(mlxsw_sp), true);
6160 	if (err)
6161 		goto err_fid_mc_flood_set;
6162 
6163 	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
6164 				     mlxsw_sp_router_port(mlxsw_sp), true);
6165 	if (err)
6166 		goto err_fid_bc_flood_set;
6167 
6168 	err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6169 				  mlxsw_sp_fid_index(rif->fid), true);
6170 	if (err)
6171 		goto err_rif_fdb_op;
6172 
6173 	mlxsw_sp_fid_rif_set(rif->fid, rif);
6174 	return 0;
6175 
6176 err_rif_fdb_op:
6177 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
6178 			       mlxsw_sp_router_port(mlxsw_sp), false);
6179 err_fid_bc_flood_set:
6180 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
6181 			       mlxsw_sp_router_port(mlxsw_sp), false);
6182 err_fid_mc_flood_set:
6183 	mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false);
6184 	return err;
6185 }
6186 
6187 static void mlxsw_sp_rif_fid_deconfigure(struct mlxsw_sp_rif *rif)
6188 {
6189 	u16 fid_index = mlxsw_sp_fid_index(rif->fid);
6190 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6191 	struct mlxsw_sp_fid *fid = rif->fid;
6192 
6193 	mlxsw_sp_fid_rif_set(fid, NULL);
6194 	mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6195 			    mlxsw_sp_fid_index(fid), false);
6196 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
6197 			       mlxsw_sp_router_port(mlxsw_sp), false);
6198 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
6199 			       mlxsw_sp_router_port(mlxsw_sp), false);
6200 	mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false);
6201 }
6202 
6203 static struct mlxsw_sp_fid *
6204 mlxsw_sp_rif_fid_fid_get(struct mlxsw_sp_rif *rif)
6205 {
6206 	return mlxsw_sp_fid_8021d_get(rif->mlxsw_sp, rif->dev->ifindex);
6207 }
6208 
6209 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_fid_ops = {
6210 	.type			= MLXSW_SP_RIF_TYPE_FID,
6211 	.rif_size		= sizeof(struct mlxsw_sp_rif),
6212 	.configure		= mlxsw_sp_rif_fid_configure,
6213 	.deconfigure		= mlxsw_sp_rif_fid_deconfigure,
6214 	.fid_get		= mlxsw_sp_rif_fid_fid_get,
6215 };
6216 
6217 static struct mlxsw_sp_rif_ipip_lb *
6218 mlxsw_sp_rif_ipip_lb_rif(struct mlxsw_sp_rif *rif)
6219 {
6220 	return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
6221 }
6222 
6223 static void
6224 mlxsw_sp_rif_ipip_lb_setup(struct mlxsw_sp_rif *rif,
6225 			   const struct mlxsw_sp_rif_params *params)
6226 {
6227 	struct mlxsw_sp_rif_params_ipip_lb *params_lb;
6228 	struct mlxsw_sp_rif_ipip_lb *rif_lb;
6229 
6230 	params_lb = container_of(params, struct mlxsw_sp_rif_params_ipip_lb,
6231 				 common);
6232 	rif_lb = mlxsw_sp_rif_ipip_lb_rif(rif);
6233 	rif_lb->lb_config = params_lb->lb_config;
6234 }
6235 
6236 static int
6237 mlxsw_sp_rif_ipip_lb_op(struct mlxsw_sp_rif_ipip_lb *lb_rif,
6238 			struct mlxsw_sp_vr *ul_vr, bool enable)
6239 {
6240 	struct mlxsw_sp_rif_ipip_lb_config lb_cf = lb_rif->lb_config;
6241 	struct mlxsw_sp_rif *rif = &lb_rif->common;
6242 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6243 	char ritr_pl[MLXSW_REG_RITR_LEN];
6244 	u32 saddr4;
6245 
6246 	switch (lb_cf.ul_protocol) {
6247 	case MLXSW_SP_L3_PROTO_IPV4:
6248 		saddr4 = be32_to_cpu(lb_cf.saddr.addr4);
6249 		mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF,
6250 				    rif->rif_index, rif->vr_id, rif->dev->mtu);
6251 		mlxsw_reg_ritr_loopback_ipip4_pack(ritr_pl, lb_cf.lb_ipipt,
6252 			    MLXSW_REG_RITR_LOOPBACK_IPIP_OPTIONS_GRE_KEY_PRESET,
6253 			    ul_vr->id, saddr4, lb_cf.okey);
6254 		break;
6255 
6256 	case MLXSW_SP_L3_PROTO_IPV6:
6257 		return -EAFNOSUPPORT;
6258 	}
6259 
6260 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6261 }
6262 
6263 static int
6264 mlxsw_sp_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif)
6265 {
6266 	struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
6267 	u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(rif->dev);
6268 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6269 	struct mlxsw_sp_vr *ul_vr;
6270 	int err;
6271 
6272 	ul_vr = mlxsw_sp_vr_get(mlxsw_sp, ul_tb_id);
6273 	if (IS_ERR(ul_vr))
6274 		return PTR_ERR(ul_vr);
6275 
6276 	err = mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, true);
6277 	if (err)
6278 		goto err_loopback_op;
6279 
6280 	lb_rif->ul_vr_id = ul_vr->id;
6281 	++ul_vr->rif_count;
6282 	return 0;
6283 
6284 err_loopback_op:
6285 	mlxsw_sp_vr_put(ul_vr);
6286 	return err;
6287 }
6288 
6289 static void mlxsw_sp_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif)
6290 {
6291 	struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
6292 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6293 	struct mlxsw_sp_vr *ul_vr;
6294 
6295 	ul_vr = &mlxsw_sp->router->vrs[lb_rif->ul_vr_id];
6296 	mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, false);
6297 
6298 	--ul_vr->rif_count;
6299 	mlxsw_sp_vr_put(ul_vr);
6300 }
6301 
6302 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_ipip_lb_ops = {
6303 	.type			= MLXSW_SP_RIF_TYPE_IPIP_LB,
6304 	.rif_size		= sizeof(struct mlxsw_sp_rif_ipip_lb),
6305 	.setup                  = mlxsw_sp_rif_ipip_lb_setup,
6306 	.configure		= mlxsw_sp_rif_ipip_lb_configure,
6307 	.deconfigure		= mlxsw_sp_rif_ipip_lb_deconfigure,
6308 };
6309 
6310 static const struct mlxsw_sp_rif_ops *mlxsw_sp_rif_ops_arr[] = {
6311 	[MLXSW_SP_RIF_TYPE_SUBPORT]	= &mlxsw_sp_rif_subport_ops,
6312 	[MLXSW_SP_RIF_TYPE_VLAN]	= &mlxsw_sp_rif_vlan_ops,
6313 	[MLXSW_SP_RIF_TYPE_FID]		= &mlxsw_sp_rif_fid_ops,
6314 	[MLXSW_SP_RIF_TYPE_IPIP_LB]	= &mlxsw_sp_rif_ipip_lb_ops,
6315 };
6316 
6317 static int mlxsw_sp_rifs_init(struct mlxsw_sp *mlxsw_sp)
6318 {
6319 	u64 max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
6320 
6321 	mlxsw_sp->router->rifs = kcalloc(max_rifs,
6322 					 sizeof(struct mlxsw_sp_rif *),
6323 					 GFP_KERNEL);
6324 	if (!mlxsw_sp->router->rifs)
6325 		return -ENOMEM;
6326 
6327 	mlxsw_sp->router->rif_ops_arr = mlxsw_sp_rif_ops_arr;
6328 
6329 	return 0;
6330 }
6331 
6332 static void mlxsw_sp_rifs_fini(struct mlxsw_sp *mlxsw_sp)
6333 {
6334 	int i;
6335 
6336 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
6337 		WARN_ON_ONCE(mlxsw_sp->router->rifs[i]);
6338 
6339 	kfree(mlxsw_sp->router->rifs);
6340 }
6341 
6342 static int mlxsw_sp_ipips_init(struct mlxsw_sp *mlxsw_sp)
6343 {
6344 	mlxsw_sp->router->ipip_ops_arr = mlxsw_sp_ipip_ops_arr;
6345 	INIT_LIST_HEAD(&mlxsw_sp->router->ipip_list);
6346 	return 0;
6347 }
6348 
6349 static void mlxsw_sp_ipips_fini(struct mlxsw_sp *mlxsw_sp)
6350 {
6351 	WARN_ON(!list_empty(&mlxsw_sp->router->ipip_list));
6352 }
6353 
6354 static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb)
6355 {
6356 	struct mlxsw_sp_router *router;
6357 
6358 	/* Flush pending FIB notifications and then flush the device's
6359 	 * table before requesting another dump. The FIB notification
6360 	 * block is unregistered, so no need to take RTNL.
6361 	 */
6362 	mlxsw_core_flush_owq();
6363 	router = container_of(nb, struct mlxsw_sp_router, fib_nb);
6364 	mlxsw_sp_router_fib_flush(router->mlxsw_sp);
6365 }
6366 
6367 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
6368 {
6369 	char rgcr_pl[MLXSW_REG_RGCR_LEN];
6370 	u64 max_rifs;
6371 	int err;
6372 
6373 	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_RIFS))
6374 		return -EIO;
6375 	max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
6376 
6377 	mlxsw_reg_rgcr_pack(rgcr_pl, true, true);
6378 	mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs);
6379 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
6380 	if (err)
6381 		return err;
6382 	return 0;
6383 }
6384 
6385 static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
6386 {
6387 	char rgcr_pl[MLXSW_REG_RGCR_LEN];
6388 
6389 	mlxsw_reg_rgcr_pack(rgcr_pl, false, false);
6390 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
6391 }
6392 
6393 int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
6394 {
6395 	struct mlxsw_sp_router *router;
6396 	int err;
6397 
6398 	router = kzalloc(sizeof(*mlxsw_sp->router), GFP_KERNEL);
6399 	if (!router)
6400 		return -ENOMEM;
6401 	mlxsw_sp->router = router;
6402 	router->mlxsw_sp = mlxsw_sp;
6403 
6404 	INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_neighs_list);
6405 	err = __mlxsw_sp_router_init(mlxsw_sp);
6406 	if (err)
6407 		goto err_router_init;
6408 
6409 	err = mlxsw_sp_rifs_init(mlxsw_sp);
6410 	if (err)
6411 		goto err_rifs_init;
6412 
6413 	err = mlxsw_sp_ipips_init(mlxsw_sp);
6414 	if (err)
6415 		goto err_ipips_init;
6416 
6417 	err = rhashtable_init(&mlxsw_sp->router->nexthop_ht,
6418 			      &mlxsw_sp_nexthop_ht_params);
6419 	if (err)
6420 		goto err_nexthop_ht_init;
6421 
6422 	err = rhashtable_init(&mlxsw_sp->router->nexthop_group_ht,
6423 			      &mlxsw_sp_nexthop_group_ht_params);
6424 	if (err)
6425 		goto err_nexthop_group_ht_init;
6426 
6427 	INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_list);
6428 	err = mlxsw_sp_lpm_init(mlxsw_sp);
6429 	if (err)
6430 		goto err_lpm_init;
6431 
6432 	err = mlxsw_sp_mr_init(mlxsw_sp, &mlxsw_sp_mr_tcam_ops);
6433 	if (err)
6434 		goto err_mr_init;
6435 
6436 	err = mlxsw_sp_vrs_init(mlxsw_sp);
6437 	if (err)
6438 		goto err_vrs_init;
6439 
6440 	err = mlxsw_sp_neigh_init(mlxsw_sp);
6441 	if (err)
6442 		goto err_neigh_init;
6443 
6444 	mlxsw_sp->router->fib_nb.notifier_call = mlxsw_sp_router_fib_event;
6445 	err = register_fib_notifier(&mlxsw_sp->router->fib_nb,
6446 				    mlxsw_sp_router_fib_dump_flush);
6447 	if (err)
6448 		goto err_register_fib_notifier;
6449 
6450 	return 0;
6451 
6452 err_register_fib_notifier:
6453 	mlxsw_sp_neigh_fini(mlxsw_sp);
6454 err_neigh_init:
6455 	mlxsw_sp_vrs_fini(mlxsw_sp);
6456 err_vrs_init:
6457 	mlxsw_sp_mr_fini(mlxsw_sp);
6458 err_mr_init:
6459 	mlxsw_sp_lpm_fini(mlxsw_sp);
6460 err_lpm_init:
6461 	rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
6462 err_nexthop_group_ht_init:
6463 	rhashtable_destroy(&mlxsw_sp->router->nexthop_ht);
6464 err_nexthop_ht_init:
6465 	mlxsw_sp_ipips_fini(mlxsw_sp);
6466 err_ipips_init:
6467 	mlxsw_sp_rifs_fini(mlxsw_sp);
6468 err_rifs_init:
6469 	__mlxsw_sp_router_fini(mlxsw_sp);
6470 err_router_init:
6471 	kfree(mlxsw_sp->router);
6472 	return err;
6473 }
6474 
6475 void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
6476 {
6477 	unregister_fib_notifier(&mlxsw_sp->router->fib_nb);
6478 	mlxsw_sp_neigh_fini(mlxsw_sp);
6479 	mlxsw_sp_vrs_fini(mlxsw_sp);
6480 	mlxsw_sp_mr_fini(mlxsw_sp);
6481 	mlxsw_sp_lpm_fini(mlxsw_sp);
6482 	rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
6483 	rhashtable_destroy(&mlxsw_sp->router->nexthop_ht);
6484 	mlxsw_sp_ipips_fini(mlxsw_sp);
6485 	mlxsw_sp_rifs_fini(mlxsw_sp);
6486 	__mlxsw_sp_router_fini(mlxsw_sp);
6487 	kfree(mlxsw_sp->router);
6488 }
6489