xref: /linux/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c (revision 995231c820e3bd3633cb38bf4ea6f2541e1da331)
1 /*
2  * drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
3  * Copyright (c) 2016-2017 Mellanox Technologies. All rights reserved.
4  * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
5  * Copyright (c) 2016 Ido Schimmel <idosch@mellanox.com>
6  * Copyright (c) 2016 Yotam Gigi <yotamg@mellanox.com>
7  * Copyright (c) 2017 Petr Machata <petrm@mellanox.com>
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. Neither the names of the copyright holders nor the names of its
18  *    contributors may be used to endorse or promote products derived from
19  *    this software without specific prior written permission.
20  *
21  * Alternatively, this software may be distributed under the terms of the
22  * GNU General Public License ("GPL") version 2 as published by the Free
23  * Software Foundation.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35  * POSSIBILITY OF SUCH DAMAGE.
36  */
37 
38 #include <linux/kernel.h>
39 #include <linux/types.h>
40 #include <linux/rhashtable.h>
41 #include <linux/bitops.h>
42 #include <linux/in6.h>
43 #include <linux/notifier.h>
44 #include <linux/inetdevice.h>
45 #include <linux/netdevice.h>
46 #include <linux/if_bridge.h>
47 #include <linux/socket.h>
48 #include <linux/route.h>
49 #include <linux/gcd.h>
50 #include <net/netevent.h>
51 #include <net/neighbour.h>
52 #include <net/arp.h>
53 #include <net/ip_fib.h>
54 #include <net/ip6_fib.h>
55 #include <net/fib_rules.h>
56 #include <net/ip_tunnels.h>
57 #include <net/l3mdev.h>
58 #include <net/addrconf.h>
59 #include <net/ndisc.h>
60 #include <net/ipv6.h>
61 #include <net/fib_notifier.h>
62 
63 #include "spectrum.h"
64 #include "core.h"
65 #include "reg.h"
66 #include "spectrum_cnt.h"
67 #include "spectrum_dpipe.h"
68 #include "spectrum_ipip.h"
69 #include "spectrum_mr.h"
70 #include "spectrum_mr_tcam.h"
71 #include "spectrum_router.h"
72 
73 struct mlxsw_sp_vr;
74 struct mlxsw_sp_lpm_tree;
75 struct mlxsw_sp_rif_ops;
76 
77 struct mlxsw_sp_router {
78 	struct mlxsw_sp *mlxsw_sp;
79 	struct mlxsw_sp_rif **rifs;
80 	struct mlxsw_sp_vr *vrs;
81 	struct rhashtable neigh_ht;
82 	struct rhashtable nexthop_group_ht;
83 	struct rhashtable nexthop_ht;
84 	struct list_head nexthop_list;
85 	struct {
86 		struct mlxsw_sp_lpm_tree *trees;
87 		unsigned int tree_count;
88 	} lpm;
89 	struct {
90 		struct delayed_work dw;
91 		unsigned long interval;	/* ms */
92 	} neighs_update;
93 	struct delayed_work nexthop_probe_dw;
94 #define MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL 5000 /* ms */
95 	struct list_head nexthop_neighs_list;
96 	struct list_head ipip_list;
97 	bool aborted;
98 	struct notifier_block fib_nb;
99 	const struct mlxsw_sp_rif_ops **rif_ops_arr;
100 	const struct mlxsw_sp_ipip_ops **ipip_ops_arr;
101 };
102 
103 struct mlxsw_sp_rif {
104 	struct list_head nexthop_list;
105 	struct list_head neigh_list;
106 	struct net_device *dev;
107 	struct mlxsw_sp_fid *fid;
108 	unsigned char addr[ETH_ALEN];
109 	int mtu;
110 	u16 rif_index;
111 	u16 vr_id;
112 	const struct mlxsw_sp_rif_ops *ops;
113 	struct mlxsw_sp *mlxsw_sp;
114 
115 	unsigned int counter_ingress;
116 	bool counter_ingress_valid;
117 	unsigned int counter_egress;
118 	bool counter_egress_valid;
119 };
120 
121 struct mlxsw_sp_rif_params {
122 	struct net_device *dev;
123 	union {
124 		u16 system_port;
125 		u16 lag_id;
126 	};
127 	u16 vid;
128 	bool lag;
129 };
130 
131 struct mlxsw_sp_rif_subport {
132 	struct mlxsw_sp_rif common;
133 	union {
134 		u16 system_port;
135 		u16 lag_id;
136 	};
137 	u16 vid;
138 	bool lag;
139 };
140 
141 struct mlxsw_sp_rif_ipip_lb {
142 	struct mlxsw_sp_rif common;
143 	struct mlxsw_sp_rif_ipip_lb_config lb_config;
144 	u16 ul_vr_id; /* Reserved for Spectrum-2. */
145 };
146 
147 struct mlxsw_sp_rif_params_ipip_lb {
148 	struct mlxsw_sp_rif_params common;
149 	struct mlxsw_sp_rif_ipip_lb_config lb_config;
150 };
151 
152 struct mlxsw_sp_rif_ops {
153 	enum mlxsw_sp_rif_type type;
154 	size_t rif_size;
155 
156 	void (*setup)(struct mlxsw_sp_rif *rif,
157 		      const struct mlxsw_sp_rif_params *params);
158 	int (*configure)(struct mlxsw_sp_rif *rif);
159 	void (*deconfigure)(struct mlxsw_sp_rif *rif);
160 	struct mlxsw_sp_fid * (*fid_get)(struct mlxsw_sp_rif *rif);
161 };
162 
163 static unsigned int *
164 mlxsw_sp_rif_p_counter_get(struct mlxsw_sp_rif *rif,
165 			   enum mlxsw_sp_rif_counter_dir dir)
166 {
167 	switch (dir) {
168 	case MLXSW_SP_RIF_COUNTER_EGRESS:
169 		return &rif->counter_egress;
170 	case MLXSW_SP_RIF_COUNTER_INGRESS:
171 		return &rif->counter_ingress;
172 	}
173 	return NULL;
174 }
175 
176 static bool
177 mlxsw_sp_rif_counter_valid_get(struct mlxsw_sp_rif *rif,
178 			       enum mlxsw_sp_rif_counter_dir dir)
179 {
180 	switch (dir) {
181 	case MLXSW_SP_RIF_COUNTER_EGRESS:
182 		return rif->counter_egress_valid;
183 	case MLXSW_SP_RIF_COUNTER_INGRESS:
184 		return rif->counter_ingress_valid;
185 	}
186 	return false;
187 }
188 
189 static void
190 mlxsw_sp_rif_counter_valid_set(struct mlxsw_sp_rif *rif,
191 			       enum mlxsw_sp_rif_counter_dir dir,
192 			       bool valid)
193 {
194 	switch (dir) {
195 	case MLXSW_SP_RIF_COUNTER_EGRESS:
196 		rif->counter_egress_valid = valid;
197 		break;
198 	case MLXSW_SP_RIF_COUNTER_INGRESS:
199 		rif->counter_ingress_valid = valid;
200 		break;
201 	}
202 }
203 
204 static int mlxsw_sp_rif_counter_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
205 				     unsigned int counter_index, bool enable,
206 				     enum mlxsw_sp_rif_counter_dir dir)
207 {
208 	char ritr_pl[MLXSW_REG_RITR_LEN];
209 	bool is_egress = false;
210 	int err;
211 
212 	if (dir == MLXSW_SP_RIF_COUNTER_EGRESS)
213 		is_egress = true;
214 	mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
215 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
216 	if (err)
217 		return err;
218 
219 	mlxsw_reg_ritr_counter_pack(ritr_pl, counter_index, enable,
220 				    is_egress);
221 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
222 }
223 
224 int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp,
225 				   struct mlxsw_sp_rif *rif,
226 				   enum mlxsw_sp_rif_counter_dir dir, u64 *cnt)
227 {
228 	char ricnt_pl[MLXSW_REG_RICNT_LEN];
229 	unsigned int *p_counter_index;
230 	bool valid;
231 	int err;
232 
233 	valid = mlxsw_sp_rif_counter_valid_get(rif, dir);
234 	if (!valid)
235 		return -EINVAL;
236 
237 	p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
238 	if (!p_counter_index)
239 		return -EINVAL;
240 	mlxsw_reg_ricnt_pack(ricnt_pl, *p_counter_index,
241 			     MLXSW_REG_RICNT_OPCODE_NOP);
242 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
243 	if (err)
244 		return err;
245 	*cnt = mlxsw_reg_ricnt_good_unicast_packets_get(ricnt_pl);
246 	return 0;
247 }
248 
249 static int mlxsw_sp_rif_counter_clear(struct mlxsw_sp *mlxsw_sp,
250 				      unsigned int counter_index)
251 {
252 	char ricnt_pl[MLXSW_REG_RICNT_LEN];
253 
254 	mlxsw_reg_ricnt_pack(ricnt_pl, counter_index,
255 			     MLXSW_REG_RICNT_OPCODE_CLEAR);
256 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
257 }
258 
259 int mlxsw_sp_rif_counter_alloc(struct mlxsw_sp *mlxsw_sp,
260 			       struct mlxsw_sp_rif *rif,
261 			       enum mlxsw_sp_rif_counter_dir dir)
262 {
263 	unsigned int *p_counter_index;
264 	int err;
265 
266 	p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
267 	if (!p_counter_index)
268 		return -EINVAL;
269 	err = mlxsw_sp_counter_alloc(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
270 				     p_counter_index);
271 	if (err)
272 		return err;
273 
274 	err = mlxsw_sp_rif_counter_clear(mlxsw_sp, *p_counter_index);
275 	if (err)
276 		goto err_counter_clear;
277 
278 	err = mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
279 					*p_counter_index, true, dir);
280 	if (err)
281 		goto err_counter_edit;
282 	mlxsw_sp_rif_counter_valid_set(rif, dir, true);
283 	return 0;
284 
285 err_counter_edit:
286 err_counter_clear:
287 	mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
288 			      *p_counter_index);
289 	return err;
290 }
291 
292 void mlxsw_sp_rif_counter_free(struct mlxsw_sp *mlxsw_sp,
293 			       struct mlxsw_sp_rif *rif,
294 			       enum mlxsw_sp_rif_counter_dir dir)
295 {
296 	unsigned int *p_counter_index;
297 
298 	if (!mlxsw_sp_rif_counter_valid_get(rif, dir))
299 		return;
300 
301 	p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
302 	if (WARN_ON(!p_counter_index))
303 		return;
304 	mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
305 				  *p_counter_index, false, dir);
306 	mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
307 			      *p_counter_index);
308 	mlxsw_sp_rif_counter_valid_set(rif, dir, false);
309 }
310 
311 static void mlxsw_sp_rif_counters_alloc(struct mlxsw_sp_rif *rif)
312 {
313 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
314 	struct devlink *devlink;
315 
316 	devlink = priv_to_devlink(mlxsw_sp->core);
317 	if (!devlink_dpipe_table_counter_enabled(devlink,
318 						 MLXSW_SP_DPIPE_TABLE_NAME_ERIF))
319 		return;
320 	mlxsw_sp_rif_counter_alloc(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS);
321 }
322 
323 static void mlxsw_sp_rif_counters_free(struct mlxsw_sp_rif *rif)
324 {
325 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
326 
327 	mlxsw_sp_rif_counter_free(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS);
328 }
329 
330 static struct mlxsw_sp_rif *
331 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
332 			 const struct net_device *dev);
333 
334 #define MLXSW_SP_PREFIX_COUNT (sizeof(struct in6_addr) * BITS_PER_BYTE + 1)
335 
336 struct mlxsw_sp_prefix_usage {
337 	DECLARE_BITMAP(b, MLXSW_SP_PREFIX_COUNT);
338 };
339 
340 #define mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) \
341 	for_each_set_bit(prefix, (prefix_usage)->b, MLXSW_SP_PREFIX_COUNT)
342 
343 static bool
344 mlxsw_sp_prefix_usage_eq(struct mlxsw_sp_prefix_usage *prefix_usage1,
345 			 struct mlxsw_sp_prefix_usage *prefix_usage2)
346 {
347 	return !memcmp(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
348 }
349 
350 static bool
351 mlxsw_sp_prefix_usage_none(struct mlxsw_sp_prefix_usage *prefix_usage)
352 {
353 	struct mlxsw_sp_prefix_usage prefix_usage_none = {{ 0 } };
354 
355 	return mlxsw_sp_prefix_usage_eq(prefix_usage, &prefix_usage_none);
356 }
357 
358 static void
359 mlxsw_sp_prefix_usage_cpy(struct mlxsw_sp_prefix_usage *prefix_usage1,
360 			  struct mlxsw_sp_prefix_usage *prefix_usage2)
361 {
362 	memcpy(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
363 }
364 
365 static void
366 mlxsw_sp_prefix_usage_set(struct mlxsw_sp_prefix_usage *prefix_usage,
367 			  unsigned char prefix_len)
368 {
369 	set_bit(prefix_len, prefix_usage->b);
370 }
371 
372 static void
373 mlxsw_sp_prefix_usage_clear(struct mlxsw_sp_prefix_usage *prefix_usage,
374 			    unsigned char prefix_len)
375 {
376 	clear_bit(prefix_len, prefix_usage->b);
377 }
378 
379 struct mlxsw_sp_fib_key {
380 	unsigned char addr[sizeof(struct in6_addr)];
381 	unsigned char prefix_len;
382 };
383 
384 enum mlxsw_sp_fib_entry_type {
385 	MLXSW_SP_FIB_ENTRY_TYPE_REMOTE,
386 	MLXSW_SP_FIB_ENTRY_TYPE_LOCAL,
387 	MLXSW_SP_FIB_ENTRY_TYPE_TRAP,
388 
389 	/* This is a special case of local delivery, where a packet should be
390 	 * decapsulated on reception. Note that there is no corresponding ENCAP,
391 	 * because that's a type of next hop, not of FIB entry. (There can be
392 	 * several next hops in a REMOTE entry, and some of them may be
393 	 * encapsulating entries.)
394 	 */
395 	MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP,
396 };
397 
398 struct mlxsw_sp_nexthop_group;
399 struct mlxsw_sp_fib;
400 
401 struct mlxsw_sp_fib_node {
402 	struct list_head entry_list;
403 	struct list_head list;
404 	struct rhash_head ht_node;
405 	struct mlxsw_sp_fib *fib;
406 	struct mlxsw_sp_fib_key key;
407 };
408 
409 struct mlxsw_sp_fib_entry_decap {
410 	struct mlxsw_sp_ipip_entry *ipip_entry;
411 	u32 tunnel_index;
412 };
413 
414 struct mlxsw_sp_fib_entry {
415 	struct list_head list;
416 	struct mlxsw_sp_fib_node *fib_node;
417 	enum mlxsw_sp_fib_entry_type type;
418 	struct list_head nexthop_group_node;
419 	struct mlxsw_sp_nexthop_group *nh_group;
420 	struct mlxsw_sp_fib_entry_decap decap; /* Valid for decap entries. */
421 };
422 
423 struct mlxsw_sp_fib4_entry {
424 	struct mlxsw_sp_fib_entry common;
425 	u32 tb_id;
426 	u32 prio;
427 	u8 tos;
428 	u8 type;
429 };
430 
431 struct mlxsw_sp_fib6_entry {
432 	struct mlxsw_sp_fib_entry common;
433 	struct list_head rt6_list;
434 	unsigned int nrt6;
435 };
436 
437 struct mlxsw_sp_rt6 {
438 	struct list_head list;
439 	struct rt6_info *rt;
440 };
441 
442 struct mlxsw_sp_lpm_tree {
443 	u8 id; /* tree ID */
444 	unsigned int ref_count;
445 	enum mlxsw_sp_l3proto proto;
446 	struct mlxsw_sp_prefix_usage prefix_usage;
447 };
448 
449 struct mlxsw_sp_fib {
450 	struct rhashtable ht;
451 	struct list_head node_list;
452 	struct mlxsw_sp_vr *vr;
453 	struct mlxsw_sp_lpm_tree *lpm_tree;
454 	unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT];
455 	struct mlxsw_sp_prefix_usage prefix_usage;
456 	enum mlxsw_sp_l3proto proto;
457 };
458 
459 struct mlxsw_sp_vr {
460 	u16 id; /* virtual router ID */
461 	u32 tb_id; /* kernel fib table id */
462 	unsigned int rif_count;
463 	struct mlxsw_sp_fib *fib4;
464 	struct mlxsw_sp_fib *fib6;
465 	struct mlxsw_sp_mr_table *mr4_table;
466 };
467 
468 static const struct rhashtable_params mlxsw_sp_fib_ht_params;
469 
470 static struct mlxsw_sp_fib *mlxsw_sp_fib_create(struct mlxsw_sp_vr *vr,
471 						enum mlxsw_sp_l3proto proto)
472 {
473 	struct mlxsw_sp_fib *fib;
474 	int err;
475 
476 	fib = kzalloc(sizeof(*fib), GFP_KERNEL);
477 	if (!fib)
478 		return ERR_PTR(-ENOMEM);
479 	err = rhashtable_init(&fib->ht, &mlxsw_sp_fib_ht_params);
480 	if (err)
481 		goto err_rhashtable_init;
482 	INIT_LIST_HEAD(&fib->node_list);
483 	fib->proto = proto;
484 	fib->vr = vr;
485 	return fib;
486 
487 err_rhashtable_init:
488 	kfree(fib);
489 	return ERR_PTR(err);
490 }
491 
492 static void mlxsw_sp_fib_destroy(struct mlxsw_sp_fib *fib)
493 {
494 	WARN_ON(!list_empty(&fib->node_list));
495 	WARN_ON(fib->lpm_tree);
496 	rhashtable_destroy(&fib->ht);
497 	kfree(fib);
498 }
499 
500 static struct mlxsw_sp_lpm_tree *
501 mlxsw_sp_lpm_tree_find_unused(struct mlxsw_sp *mlxsw_sp)
502 {
503 	static struct mlxsw_sp_lpm_tree *lpm_tree;
504 	int i;
505 
506 	for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
507 		lpm_tree = &mlxsw_sp->router->lpm.trees[i];
508 		if (lpm_tree->ref_count == 0)
509 			return lpm_tree;
510 	}
511 	return NULL;
512 }
513 
514 static int mlxsw_sp_lpm_tree_alloc(struct mlxsw_sp *mlxsw_sp,
515 				   struct mlxsw_sp_lpm_tree *lpm_tree)
516 {
517 	char ralta_pl[MLXSW_REG_RALTA_LEN];
518 
519 	mlxsw_reg_ralta_pack(ralta_pl, true,
520 			     (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
521 			     lpm_tree->id);
522 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
523 }
524 
525 static void mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp,
526 				   struct mlxsw_sp_lpm_tree *lpm_tree)
527 {
528 	char ralta_pl[MLXSW_REG_RALTA_LEN];
529 
530 	mlxsw_reg_ralta_pack(ralta_pl, false,
531 			     (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
532 			     lpm_tree->id);
533 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
534 }
535 
536 static int
537 mlxsw_sp_lpm_tree_left_struct_set(struct mlxsw_sp *mlxsw_sp,
538 				  struct mlxsw_sp_prefix_usage *prefix_usage,
539 				  struct mlxsw_sp_lpm_tree *lpm_tree)
540 {
541 	char ralst_pl[MLXSW_REG_RALST_LEN];
542 	u8 root_bin = 0;
543 	u8 prefix;
544 	u8 last_prefix = MLXSW_REG_RALST_BIN_NO_CHILD;
545 
546 	mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage)
547 		root_bin = prefix;
548 
549 	mlxsw_reg_ralst_pack(ralst_pl, root_bin, lpm_tree->id);
550 	mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) {
551 		if (prefix == 0)
552 			continue;
553 		mlxsw_reg_ralst_bin_pack(ralst_pl, prefix, last_prefix,
554 					 MLXSW_REG_RALST_BIN_NO_CHILD);
555 		last_prefix = prefix;
556 	}
557 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
558 }
559 
560 static struct mlxsw_sp_lpm_tree *
561 mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp,
562 			 struct mlxsw_sp_prefix_usage *prefix_usage,
563 			 enum mlxsw_sp_l3proto proto)
564 {
565 	struct mlxsw_sp_lpm_tree *lpm_tree;
566 	int err;
567 
568 	lpm_tree = mlxsw_sp_lpm_tree_find_unused(mlxsw_sp);
569 	if (!lpm_tree)
570 		return ERR_PTR(-EBUSY);
571 	lpm_tree->proto = proto;
572 	err = mlxsw_sp_lpm_tree_alloc(mlxsw_sp, lpm_tree);
573 	if (err)
574 		return ERR_PTR(err);
575 
576 	err = mlxsw_sp_lpm_tree_left_struct_set(mlxsw_sp, prefix_usage,
577 						lpm_tree);
578 	if (err)
579 		goto err_left_struct_set;
580 	memcpy(&lpm_tree->prefix_usage, prefix_usage,
581 	       sizeof(lpm_tree->prefix_usage));
582 	return lpm_tree;
583 
584 err_left_struct_set:
585 	mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
586 	return ERR_PTR(err);
587 }
588 
589 static void mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp *mlxsw_sp,
590 				      struct mlxsw_sp_lpm_tree *lpm_tree)
591 {
592 	mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
593 }
594 
595 static struct mlxsw_sp_lpm_tree *
596 mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp,
597 		      struct mlxsw_sp_prefix_usage *prefix_usage,
598 		      enum mlxsw_sp_l3proto proto)
599 {
600 	struct mlxsw_sp_lpm_tree *lpm_tree;
601 	int i;
602 
603 	for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
604 		lpm_tree = &mlxsw_sp->router->lpm.trees[i];
605 		if (lpm_tree->ref_count != 0 &&
606 		    lpm_tree->proto == proto &&
607 		    mlxsw_sp_prefix_usage_eq(&lpm_tree->prefix_usage,
608 					     prefix_usage))
609 			return lpm_tree;
610 	}
611 	return mlxsw_sp_lpm_tree_create(mlxsw_sp, prefix_usage, proto);
612 }
613 
614 static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree)
615 {
616 	lpm_tree->ref_count++;
617 }
618 
619 static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
620 				  struct mlxsw_sp_lpm_tree *lpm_tree)
621 {
622 	if (--lpm_tree->ref_count == 0)
623 		mlxsw_sp_lpm_tree_destroy(mlxsw_sp, lpm_tree);
624 }
625 
626 #define MLXSW_SP_LPM_TREE_MIN 1 /* tree 0 is reserved */
627 
628 static int mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp)
629 {
630 	struct mlxsw_sp_lpm_tree *lpm_tree;
631 	u64 max_trees;
632 	int i;
633 
634 	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_LPM_TREES))
635 		return -EIO;
636 
637 	max_trees = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_LPM_TREES);
638 	mlxsw_sp->router->lpm.tree_count = max_trees - MLXSW_SP_LPM_TREE_MIN;
639 	mlxsw_sp->router->lpm.trees = kcalloc(mlxsw_sp->router->lpm.tree_count,
640 					     sizeof(struct mlxsw_sp_lpm_tree),
641 					     GFP_KERNEL);
642 	if (!mlxsw_sp->router->lpm.trees)
643 		return -ENOMEM;
644 
645 	for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
646 		lpm_tree = &mlxsw_sp->router->lpm.trees[i];
647 		lpm_tree->id = i + MLXSW_SP_LPM_TREE_MIN;
648 	}
649 
650 	return 0;
651 }
652 
653 static void mlxsw_sp_lpm_fini(struct mlxsw_sp *mlxsw_sp)
654 {
655 	kfree(mlxsw_sp->router->lpm.trees);
656 }
657 
658 static bool mlxsw_sp_vr_is_used(const struct mlxsw_sp_vr *vr)
659 {
660 	return !!vr->fib4 || !!vr->fib6 || !!vr->mr4_table;
661 }
662 
663 static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp)
664 {
665 	struct mlxsw_sp_vr *vr;
666 	int i;
667 
668 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
669 		vr = &mlxsw_sp->router->vrs[i];
670 		if (!mlxsw_sp_vr_is_used(vr))
671 			return vr;
672 	}
673 	return NULL;
674 }
675 
676 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
677 				     const struct mlxsw_sp_fib *fib, u8 tree_id)
678 {
679 	char raltb_pl[MLXSW_REG_RALTB_LEN];
680 
681 	mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
682 			     (enum mlxsw_reg_ralxx_protocol) fib->proto,
683 			     tree_id);
684 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
685 }
686 
687 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
688 				       const struct mlxsw_sp_fib *fib)
689 {
690 	char raltb_pl[MLXSW_REG_RALTB_LEN];
691 
692 	/* Bind to tree 0 which is default */
693 	mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
694 			     (enum mlxsw_reg_ralxx_protocol) fib->proto, 0);
695 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
696 }
697 
698 static u32 mlxsw_sp_fix_tb_id(u32 tb_id)
699 {
700 	/* For our purpose, squash main, default and local tables into one */
701 	if (tb_id == RT_TABLE_LOCAL || tb_id == RT_TABLE_DEFAULT)
702 		tb_id = RT_TABLE_MAIN;
703 	return tb_id;
704 }
705 
706 static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp,
707 					    u32 tb_id)
708 {
709 	struct mlxsw_sp_vr *vr;
710 	int i;
711 
712 	tb_id = mlxsw_sp_fix_tb_id(tb_id);
713 
714 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
715 		vr = &mlxsw_sp->router->vrs[i];
716 		if (mlxsw_sp_vr_is_used(vr) && vr->tb_id == tb_id)
717 			return vr;
718 	}
719 	return NULL;
720 }
721 
722 static struct mlxsw_sp_fib *mlxsw_sp_vr_fib(const struct mlxsw_sp_vr *vr,
723 					    enum mlxsw_sp_l3proto proto)
724 {
725 	switch (proto) {
726 	case MLXSW_SP_L3_PROTO_IPV4:
727 		return vr->fib4;
728 	case MLXSW_SP_L3_PROTO_IPV6:
729 		return vr->fib6;
730 	}
731 	return NULL;
732 }
733 
734 static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp,
735 					      u32 tb_id,
736 					      struct netlink_ext_ack *extack)
737 {
738 	struct mlxsw_sp_vr *vr;
739 	int err;
740 
741 	vr = mlxsw_sp_vr_find_unused(mlxsw_sp);
742 	if (!vr) {
743 		NL_SET_ERR_MSG(extack, "spectrum: Exceeded number of supported virtual routers");
744 		return ERR_PTR(-EBUSY);
745 	}
746 	vr->fib4 = mlxsw_sp_fib_create(vr, MLXSW_SP_L3_PROTO_IPV4);
747 	if (IS_ERR(vr->fib4))
748 		return ERR_CAST(vr->fib4);
749 	vr->fib6 = mlxsw_sp_fib_create(vr, MLXSW_SP_L3_PROTO_IPV6);
750 	if (IS_ERR(vr->fib6)) {
751 		err = PTR_ERR(vr->fib6);
752 		goto err_fib6_create;
753 	}
754 	vr->mr4_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id,
755 						 MLXSW_SP_L3_PROTO_IPV4);
756 	if (IS_ERR(vr->mr4_table)) {
757 		err = PTR_ERR(vr->mr4_table);
758 		goto err_mr_table_create;
759 	}
760 	vr->tb_id = tb_id;
761 	return vr;
762 
763 err_mr_table_create:
764 	mlxsw_sp_fib_destroy(vr->fib6);
765 	vr->fib6 = NULL;
766 err_fib6_create:
767 	mlxsw_sp_fib_destroy(vr->fib4);
768 	vr->fib4 = NULL;
769 	return ERR_PTR(err);
770 }
771 
772 static void mlxsw_sp_vr_destroy(struct mlxsw_sp_vr *vr)
773 {
774 	mlxsw_sp_mr_table_destroy(vr->mr4_table);
775 	vr->mr4_table = NULL;
776 	mlxsw_sp_fib_destroy(vr->fib6);
777 	vr->fib6 = NULL;
778 	mlxsw_sp_fib_destroy(vr->fib4);
779 	vr->fib4 = NULL;
780 }
781 
782 static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
783 					   struct netlink_ext_ack *extack)
784 {
785 	struct mlxsw_sp_vr *vr;
786 
787 	tb_id = mlxsw_sp_fix_tb_id(tb_id);
788 	vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
789 	if (!vr)
790 		vr = mlxsw_sp_vr_create(mlxsw_sp, tb_id, extack);
791 	return vr;
792 }
793 
794 static void mlxsw_sp_vr_put(struct mlxsw_sp_vr *vr)
795 {
796 	if (!vr->rif_count && list_empty(&vr->fib4->node_list) &&
797 	    list_empty(&vr->fib6->node_list) &&
798 	    mlxsw_sp_mr_table_empty(vr->mr4_table))
799 		mlxsw_sp_vr_destroy(vr);
800 }
801 
802 static bool
803 mlxsw_sp_vr_lpm_tree_should_replace(struct mlxsw_sp_vr *vr,
804 				    enum mlxsw_sp_l3proto proto, u8 tree_id)
805 {
806 	struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
807 
808 	if (!mlxsw_sp_vr_is_used(vr))
809 		return false;
810 	if (fib->lpm_tree && fib->lpm_tree->id == tree_id)
811 		return true;
812 	return false;
813 }
814 
815 static int mlxsw_sp_vr_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
816 					struct mlxsw_sp_fib *fib,
817 					struct mlxsw_sp_lpm_tree *new_tree)
818 {
819 	struct mlxsw_sp_lpm_tree *old_tree = fib->lpm_tree;
820 	int err;
821 
822 	err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, new_tree->id);
823 	if (err)
824 		return err;
825 	fib->lpm_tree = new_tree;
826 	mlxsw_sp_lpm_tree_hold(new_tree);
827 	mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree);
828 	return 0;
829 }
830 
831 static int mlxsw_sp_vrs_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
832 					 struct mlxsw_sp_fib *fib,
833 					 struct mlxsw_sp_lpm_tree *new_tree)
834 {
835 	struct mlxsw_sp_lpm_tree *old_tree = fib->lpm_tree;
836 	enum mlxsw_sp_l3proto proto = fib->proto;
837 	u8 old_id, new_id = new_tree->id;
838 	struct mlxsw_sp_vr *vr;
839 	int i, err;
840 
841 	if (!old_tree)
842 		goto no_replace;
843 	old_id = old_tree->id;
844 
845 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
846 		vr = &mlxsw_sp->router->vrs[i];
847 		if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, old_id))
848 			continue;
849 		err = mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
850 						   mlxsw_sp_vr_fib(vr, proto),
851 						   new_tree);
852 		if (err)
853 			goto err_tree_replace;
854 	}
855 
856 	return 0;
857 
858 err_tree_replace:
859 	for (i--; i >= 0; i--) {
860 		if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, new_id))
861 			continue;
862 		mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
863 					     mlxsw_sp_vr_fib(vr, proto),
864 					     old_tree);
865 	}
866 	return err;
867 
868 no_replace:
869 	err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, new_tree->id);
870 	if (err)
871 		return err;
872 	fib->lpm_tree = new_tree;
873 	mlxsw_sp_lpm_tree_hold(new_tree);
874 	return 0;
875 }
876 
877 static void
878 mlxsw_sp_vrs_prefixes(struct mlxsw_sp *mlxsw_sp,
879 		      enum mlxsw_sp_l3proto proto,
880 		      struct mlxsw_sp_prefix_usage *req_prefix_usage)
881 {
882 	int i;
883 
884 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
885 		struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
886 		struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
887 		unsigned char prefix;
888 
889 		if (!mlxsw_sp_vr_is_used(vr))
890 			continue;
891 		mlxsw_sp_prefix_usage_for_each(prefix, &fib->prefix_usage)
892 			mlxsw_sp_prefix_usage_set(req_prefix_usage, prefix);
893 	}
894 }
895 
896 static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp)
897 {
898 	struct mlxsw_sp_vr *vr;
899 	u64 max_vrs;
900 	int i;
901 
902 	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_VRS))
903 		return -EIO;
904 
905 	max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
906 	mlxsw_sp->router->vrs = kcalloc(max_vrs, sizeof(struct mlxsw_sp_vr),
907 					GFP_KERNEL);
908 	if (!mlxsw_sp->router->vrs)
909 		return -ENOMEM;
910 
911 	for (i = 0; i < max_vrs; i++) {
912 		vr = &mlxsw_sp->router->vrs[i];
913 		vr->id = i;
914 	}
915 
916 	return 0;
917 }
918 
919 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp);
920 
921 static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp)
922 {
923 	/* At this stage we're guaranteed not to have new incoming
924 	 * FIB notifications and the work queue is free from FIBs
925 	 * sitting on top of mlxsw netdevs. However, we can still
926 	 * have other FIBs queued. Flush the queue before flushing
927 	 * the device's tables. No need for locks, as we're the only
928 	 * writer.
929 	 */
930 	mlxsw_core_flush_owq();
931 	mlxsw_sp_router_fib_flush(mlxsw_sp);
932 	kfree(mlxsw_sp->router->vrs);
933 }
934 
935 static struct net_device *
936 __mlxsw_sp_ipip_netdev_ul_dev_get(const struct net_device *ol_dev)
937 {
938 	struct ip_tunnel *tun = netdev_priv(ol_dev);
939 	struct net *net = dev_net(ol_dev);
940 
941 	return __dev_get_by_index(net, tun->parms.link);
942 }
943 
944 static u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev)
945 {
946 	struct net_device *d = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
947 
948 	if (d)
949 		return l3mdev_fib_table(d) ? : RT_TABLE_MAIN;
950 	else
951 		return l3mdev_fib_table(ol_dev) ? : RT_TABLE_MAIN;
952 }
953 
954 static struct mlxsw_sp_rif *
955 mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
956 		    const struct mlxsw_sp_rif_params *params,
957 		    struct netlink_ext_ack *extack);
958 
959 static struct mlxsw_sp_rif_ipip_lb *
960 mlxsw_sp_ipip_ol_ipip_lb_create(struct mlxsw_sp *mlxsw_sp,
961 				enum mlxsw_sp_ipip_type ipipt,
962 				struct net_device *ol_dev)
963 {
964 	struct mlxsw_sp_rif_params_ipip_lb lb_params;
965 	const struct mlxsw_sp_ipip_ops *ipip_ops;
966 	struct mlxsw_sp_rif *rif;
967 
968 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
969 	lb_params = (struct mlxsw_sp_rif_params_ipip_lb) {
970 		.common.dev = ol_dev,
971 		.common.lag = false,
972 		.lb_config = ipip_ops->ol_loopback_config(mlxsw_sp, ol_dev),
973 	};
974 
975 	rif = mlxsw_sp_rif_create(mlxsw_sp, &lb_params.common, NULL);
976 	if (IS_ERR(rif))
977 		return ERR_CAST(rif);
978 	return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
979 }
980 
981 static struct mlxsw_sp_ipip_entry *
982 mlxsw_sp_ipip_entry_alloc(struct mlxsw_sp *mlxsw_sp,
983 			  enum mlxsw_sp_ipip_type ipipt,
984 			  struct net_device *ol_dev)
985 {
986 	struct mlxsw_sp_ipip_entry *ipip_entry;
987 	struct mlxsw_sp_ipip_entry *ret = NULL;
988 
989 	ipip_entry = kzalloc(sizeof(*ipip_entry), GFP_KERNEL);
990 	if (!ipip_entry)
991 		return ERR_PTR(-ENOMEM);
992 
993 	ipip_entry->ol_lb = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp, ipipt,
994 							    ol_dev);
995 	if (IS_ERR(ipip_entry->ol_lb)) {
996 		ret = ERR_CAST(ipip_entry->ol_lb);
997 		goto err_ol_ipip_lb_create;
998 	}
999 
1000 	ipip_entry->ipipt = ipipt;
1001 	ipip_entry->ol_dev = ol_dev;
1002 
1003 	return ipip_entry;
1004 
1005 err_ol_ipip_lb_create:
1006 	kfree(ipip_entry);
1007 	return ret;
1008 }
1009 
1010 static void
1011 mlxsw_sp_ipip_entry_dealloc(struct mlxsw_sp_ipip_entry *ipip_entry)
1012 {
1013 	mlxsw_sp_rif_destroy(&ipip_entry->ol_lb->common);
1014 	kfree(ipip_entry);
1015 }
1016 
1017 static __be32
1018 mlxsw_sp_ipip_netdev_saddr4(const struct net_device *ol_dev)
1019 {
1020 	struct ip_tunnel *tun = netdev_priv(ol_dev);
1021 
1022 	return tun->parms.iph.saddr;
1023 }
1024 
1025 union mlxsw_sp_l3addr
1026 mlxsw_sp_ipip_netdev_saddr(enum mlxsw_sp_l3proto proto,
1027 			   const struct net_device *ol_dev)
1028 {
1029 	switch (proto) {
1030 	case MLXSW_SP_L3_PROTO_IPV4:
1031 		return (union mlxsw_sp_l3addr) {
1032 			.addr4 = mlxsw_sp_ipip_netdev_saddr4(ol_dev),
1033 		};
1034 	case MLXSW_SP_L3_PROTO_IPV6:
1035 		break;
1036 	};
1037 
1038 	WARN_ON(1);
1039 	return (union mlxsw_sp_l3addr) {
1040 		.addr4 = 0,
1041 	};
1042 }
1043 
1044 __be32 mlxsw_sp_ipip_netdev_daddr4(const struct net_device *ol_dev)
1045 {
1046 	struct ip_tunnel *tun = netdev_priv(ol_dev);
1047 
1048 	return tun->parms.iph.daddr;
1049 }
1050 
1051 union mlxsw_sp_l3addr
1052 mlxsw_sp_ipip_netdev_daddr(enum mlxsw_sp_l3proto proto,
1053 			   const struct net_device *ol_dev)
1054 {
1055 	switch (proto) {
1056 	case MLXSW_SP_L3_PROTO_IPV4:
1057 		return (union mlxsw_sp_l3addr) {
1058 			.addr4 = mlxsw_sp_ipip_netdev_daddr4(ol_dev),
1059 		};
1060 	case MLXSW_SP_L3_PROTO_IPV6:
1061 		break;
1062 	};
1063 
1064 	WARN_ON(1);
1065 	return (union mlxsw_sp_l3addr) {
1066 		.addr4 = 0,
1067 	};
1068 }
1069 
1070 static bool mlxsw_sp_l3addr_eq(const union mlxsw_sp_l3addr *addr1,
1071 			       const union mlxsw_sp_l3addr *addr2)
1072 {
1073 	return !memcmp(addr1, addr2, sizeof(*addr1));
1074 }
1075 
1076 static bool
1077 mlxsw_sp_ipip_entry_saddr_matches(struct mlxsw_sp *mlxsw_sp,
1078 				  const enum mlxsw_sp_l3proto ul_proto,
1079 				  union mlxsw_sp_l3addr saddr,
1080 				  u32 ul_tb_id,
1081 				  struct mlxsw_sp_ipip_entry *ipip_entry)
1082 {
1083 	u32 tun_ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
1084 	enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
1085 	union mlxsw_sp_l3addr tun_saddr;
1086 
1087 	if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
1088 		return false;
1089 
1090 	tun_saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ipip_entry->ol_dev);
1091 	return tun_ul_tb_id == ul_tb_id &&
1092 	       mlxsw_sp_l3addr_eq(&tun_saddr, &saddr);
1093 }
1094 
1095 static int
1096 mlxsw_sp_fib_entry_decap_init(struct mlxsw_sp *mlxsw_sp,
1097 			      struct mlxsw_sp_fib_entry *fib_entry,
1098 			      struct mlxsw_sp_ipip_entry *ipip_entry)
1099 {
1100 	u32 tunnel_index;
1101 	int err;
1102 
1103 	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, 1, &tunnel_index);
1104 	if (err)
1105 		return err;
1106 
1107 	ipip_entry->decap_fib_entry = fib_entry;
1108 	fib_entry->decap.ipip_entry = ipip_entry;
1109 	fib_entry->decap.tunnel_index = tunnel_index;
1110 	return 0;
1111 }
1112 
1113 static void mlxsw_sp_fib_entry_decap_fini(struct mlxsw_sp *mlxsw_sp,
1114 					  struct mlxsw_sp_fib_entry *fib_entry)
1115 {
1116 	/* Unlink this node from the IPIP entry that it's the decap entry of. */
1117 	fib_entry->decap.ipip_entry->decap_fib_entry = NULL;
1118 	fib_entry->decap.ipip_entry = NULL;
1119 	mlxsw_sp_kvdl_free(mlxsw_sp, fib_entry->decap.tunnel_index);
1120 }
1121 
1122 static struct mlxsw_sp_fib_node *
1123 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
1124 			 size_t addr_len, unsigned char prefix_len);
1125 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
1126 				     struct mlxsw_sp_fib_entry *fib_entry);
1127 
1128 static void
1129 mlxsw_sp_ipip_entry_demote_decap(struct mlxsw_sp *mlxsw_sp,
1130 				 struct mlxsw_sp_ipip_entry *ipip_entry)
1131 {
1132 	struct mlxsw_sp_fib_entry *fib_entry = ipip_entry->decap_fib_entry;
1133 
1134 	mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, fib_entry);
1135 	fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1136 
1137 	mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1138 }
1139 
1140 static void
1141 mlxsw_sp_ipip_entry_promote_decap(struct mlxsw_sp *mlxsw_sp,
1142 				  struct mlxsw_sp_ipip_entry *ipip_entry,
1143 				  struct mlxsw_sp_fib_entry *decap_fib_entry)
1144 {
1145 	if (mlxsw_sp_fib_entry_decap_init(mlxsw_sp, decap_fib_entry,
1146 					  ipip_entry))
1147 		return;
1148 	decap_fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
1149 
1150 	if (mlxsw_sp_fib_entry_update(mlxsw_sp, decap_fib_entry))
1151 		mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1152 }
1153 
1154 /* Given an IPIP entry, find the corresponding decap route. */
1155 static struct mlxsw_sp_fib_entry *
1156 mlxsw_sp_ipip_entry_find_decap(struct mlxsw_sp *mlxsw_sp,
1157 			       struct mlxsw_sp_ipip_entry *ipip_entry)
1158 {
1159 	static struct mlxsw_sp_fib_node *fib_node;
1160 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1161 	struct mlxsw_sp_fib_entry *fib_entry;
1162 	unsigned char saddr_prefix_len;
1163 	union mlxsw_sp_l3addr saddr;
1164 	struct mlxsw_sp_fib *ul_fib;
1165 	struct mlxsw_sp_vr *ul_vr;
1166 	const void *saddrp;
1167 	size_t saddr_len;
1168 	u32 ul_tb_id;
1169 	u32 saddr4;
1170 
1171 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1172 
1173 	ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
1174 	ul_vr = mlxsw_sp_vr_find(mlxsw_sp, ul_tb_id);
1175 	if (!ul_vr)
1176 		return NULL;
1177 
1178 	ul_fib = mlxsw_sp_vr_fib(ul_vr, ipip_ops->ul_proto);
1179 	saddr = mlxsw_sp_ipip_netdev_saddr(ipip_ops->ul_proto,
1180 					   ipip_entry->ol_dev);
1181 
1182 	switch (ipip_ops->ul_proto) {
1183 	case MLXSW_SP_L3_PROTO_IPV4:
1184 		saddr4 = be32_to_cpu(saddr.addr4);
1185 		saddrp = &saddr4;
1186 		saddr_len = 4;
1187 		saddr_prefix_len = 32;
1188 		break;
1189 	case MLXSW_SP_L3_PROTO_IPV6:
1190 		WARN_ON(1);
1191 		return NULL;
1192 	}
1193 
1194 	fib_node = mlxsw_sp_fib_node_lookup(ul_fib, saddrp, saddr_len,
1195 					    saddr_prefix_len);
1196 	if (!fib_node || list_empty(&fib_node->entry_list))
1197 		return NULL;
1198 
1199 	fib_entry = list_first_entry(&fib_node->entry_list,
1200 				     struct mlxsw_sp_fib_entry, list);
1201 	if (fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_TRAP)
1202 		return NULL;
1203 
1204 	return fib_entry;
1205 }
1206 
1207 static struct mlxsw_sp_ipip_entry *
1208 mlxsw_sp_ipip_entry_create(struct mlxsw_sp *mlxsw_sp,
1209 			   enum mlxsw_sp_ipip_type ipipt,
1210 			   struct net_device *ol_dev)
1211 {
1212 	u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev);
1213 	struct mlxsw_sp_router *router = mlxsw_sp->router;
1214 	struct mlxsw_sp_ipip_entry *ipip_entry;
1215 	enum mlxsw_sp_l3proto ul_proto;
1216 	union mlxsw_sp_l3addr saddr;
1217 
1218 	/* The configuration where several tunnels have the same local address
1219 	 * in the same underlay table needs special treatment in the HW. That is
1220 	 * currently not implemented in the driver.
1221 	 */
1222 	list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
1223 			    ipip_list_node) {
1224 		ul_proto = router->ipip_ops_arr[ipip_entry->ipipt]->ul_proto;
1225 		saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev);
1226 		if (mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, saddr,
1227 						      ul_tb_id, ipip_entry))
1228 			return ERR_PTR(-EEXIST);
1229 	}
1230 
1231 	ipip_entry = mlxsw_sp_ipip_entry_alloc(mlxsw_sp, ipipt, ol_dev);
1232 	if (IS_ERR(ipip_entry))
1233 		return ipip_entry;
1234 
1235 	list_add_tail(&ipip_entry->ipip_list_node,
1236 		      &mlxsw_sp->router->ipip_list);
1237 
1238 	return ipip_entry;
1239 }
1240 
1241 static void
1242 mlxsw_sp_ipip_entry_destroy(struct mlxsw_sp *mlxsw_sp,
1243 			    struct mlxsw_sp_ipip_entry *ipip_entry)
1244 {
1245 	list_del(&ipip_entry->ipip_list_node);
1246 	mlxsw_sp_ipip_entry_dealloc(ipip_entry);
1247 }
1248 
1249 static bool
1250 mlxsw_sp_ipip_entry_matches_decap(struct mlxsw_sp *mlxsw_sp,
1251 				  const struct net_device *ul_dev,
1252 				  enum mlxsw_sp_l3proto ul_proto,
1253 				  union mlxsw_sp_l3addr ul_dip,
1254 				  struct mlxsw_sp_ipip_entry *ipip_entry)
1255 {
1256 	u32 ul_tb_id = l3mdev_fib_table(ul_dev) ? : RT_TABLE_MAIN;
1257 	enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
1258 	struct net_device *ipip_ul_dev;
1259 
1260 	if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
1261 		return false;
1262 
1263 	ipip_ul_dev = __mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev);
1264 	return mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, ul_dip,
1265 						 ul_tb_id, ipip_entry) &&
1266 	       (!ipip_ul_dev || ipip_ul_dev == ul_dev);
1267 }
1268 
1269 /* Given decap parameters, find the corresponding IPIP entry. */
1270 static struct mlxsw_sp_ipip_entry *
1271 mlxsw_sp_ipip_entry_find_by_decap(struct mlxsw_sp *mlxsw_sp,
1272 				  const struct net_device *ul_dev,
1273 				  enum mlxsw_sp_l3proto ul_proto,
1274 				  union mlxsw_sp_l3addr ul_dip)
1275 {
1276 	struct mlxsw_sp_ipip_entry *ipip_entry;
1277 
1278 	list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
1279 			    ipip_list_node)
1280 		if (mlxsw_sp_ipip_entry_matches_decap(mlxsw_sp, ul_dev,
1281 						      ul_proto, ul_dip,
1282 						      ipip_entry))
1283 			return ipip_entry;
1284 
1285 	return NULL;
1286 }
1287 
1288 static bool mlxsw_sp_netdev_ipip_type(const struct mlxsw_sp *mlxsw_sp,
1289 				      const struct net_device *dev,
1290 				      enum mlxsw_sp_ipip_type *p_type)
1291 {
1292 	struct mlxsw_sp_router *router = mlxsw_sp->router;
1293 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1294 	enum mlxsw_sp_ipip_type ipipt;
1295 
1296 	for (ipipt = 0; ipipt < MLXSW_SP_IPIP_TYPE_MAX; ++ipipt) {
1297 		ipip_ops = router->ipip_ops_arr[ipipt];
1298 		if (dev->type == ipip_ops->dev_type) {
1299 			if (p_type)
1300 				*p_type = ipipt;
1301 			return true;
1302 		}
1303 	}
1304 	return false;
1305 }
1306 
1307 bool mlxsw_sp_netdev_is_ipip(const struct mlxsw_sp *mlxsw_sp,
1308 			     const struct net_device *dev)
1309 {
1310 	return mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL);
1311 }
1312 
1313 static struct mlxsw_sp_ipip_entry *
1314 mlxsw_sp_ipip_entry_find_by_ol_dev(struct mlxsw_sp *mlxsw_sp,
1315 				   const struct net_device *ol_dev)
1316 {
1317 	struct mlxsw_sp_ipip_entry *ipip_entry;
1318 
1319 	list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
1320 			    ipip_list_node)
1321 		if (ipip_entry->ol_dev == ol_dev)
1322 			return ipip_entry;
1323 
1324 	return NULL;
1325 }
1326 
1327 static int mlxsw_sp_netdevice_ipip_reg_event(struct mlxsw_sp *mlxsw_sp,
1328 					     struct net_device *ol_dev)
1329 {
1330 	struct mlxsw_sp_router *router = mlxsw_sp->router;
1331 	struct mlxsw_sp_ipip_entry *ipip_entry;
1332 	enum mlxsw_sp_ipip_type ipipt;
1333 
1334 	mlxsw_sp_netdev_ipip_type(mlxsw_sp, ol_dev, &ipipt);
1335 	if (router->ipip_ops_arr[ipipt]->can_offload(mlxsw_sp, ol_dev,
1336 						     MLXSW_SP_L3_PROTO_IPV4) ||
1337 	    router->ipip_ops_arr[ipipt]->can_offload(mlxsw_sp, ol_dev,
1338 						     MLXSW_SP_L3_PROTO_IPV6)) {
1339 		ipip_entry = mlxsw_sp_ipip_entry_create(mlxsw_sp, ipipt,
1340 							ol_dev);
1341 		if (IS_ERR(ipip_entry))
1342 			return PTR_ERR(ipip_entry);
1343 	}
1344 
1345 	return 0;
1346 }
1347 
1348 static void mlxsw_sp_netdevice_ipip_unreg_event(struct mlxsw_sp *mlxsw_sp,
1349 						struct net_device *ol_dev)
1350 {
1351 	struct mlxsw_sp_ipip_entry *ipip_entry;
1352 
1353 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1354 	if (ipip_entry)
1355 		mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
1356 }
1357 
1358 static int mlxsw_sp_netdevice_ipip_up_event(struct mlxsw_sp *mlxsw_sp,
1359 					    struct net_device *ol_dev)
1360 {
1361 	struct mlxsw_sp_fib_entry *decap_fib_entry;
1362 	struct mlxsw_sp_ipip_entry *ipip_entry;
1363 
1364 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1365 	if (ipip_entry) {
1366 		decap_fib_entry = mlxsw_sp_ipip_entry_find_decap(mlxsw_sp,
1367 								 ipip_entry);
1368 		if (decap_fib_entry)
1369 			mlxsw_sp_ipip_entry_promote_decap(mlxsw_sp, ipip_entry,
1370 							  decap_fib_entry);
1371 	}
1372 
1373 	return 0;
1374 }
1375 
1376 static void mlxsw_sp_netdevice_ipip_down_event(struct mlxsw_sp *mlxsw_sp,
1377 					       struct net_device *ol_dev)
1378 {
1379 	struct mlxsw_sp_ipip_entry *ipip_entry;
1380 
1381 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1382 	if (ipip_entry && ipip_entry->decap_fib_entry)
1383 		mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1384 }
1385 
1386 static int mlxsw_sp_netdevice_ipip_vrf_event(struct mlxsw_sp *mlxsw_sp,
1387 					     struct net_device *ol_dev)
1388 {
1389 	struct mlxsw_sp_fib_entry *decap_fib_entry;
1390 	struct mlxsw_sp_ipip_entry *ipip_entry;
1391 	struct mlxsw_sp_rif_ipip_lb *lb_rif;
1392 
1393 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1394 	if (!ipip_entry)
1395 		return 0;
1396 
1397 	/* When a tunneling device is moved to a different VRF, we need to
1398 	 * update the backing loopback. Since RIFs can't be edited, we need to
1399 	 * destroy and recreate it. That might create a window of opportunity
1400 	 * where RALUE and RATR registers end up referencing a RIF that's
1401 	 * already gone. RATRs are handled by the RIF destroy, and to take care
1402 	 * of RALUE, demote the decap route back.
1403 	 */
1404 	if (ipip_entry->decap_fib_entry)
1405 		mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1406 
1407 	lb_rif = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp, ipip_entry->ipipt,
1408 						 ol_dev);
1409 	if (IS_ERR(lb_rif))
1410 		return PTR_ERR(lb_rif);
1411 	mlxsw_sp_rif_destroy(&ipip_entry->ol_lb->common);
1412 	ipip_entry->ol_lb = lb_rif;
1413 
1414 	if (ol_dev->flags & IFF_UP) {
1415 		decap_fib_entry = mlxsw_sp_ipip_entry_find_decap(mlxsw_sp,
1416 								 ipip_entry);
1417 		if (decap_fib_entry)
1418 			mlxsw_sp_ipip_entry_promote_decap(mlxsw_sp, ipip_entry,
1419 							  decap_fib_entry);
1420 	}
1421 
1422 	return 0;
1423 }
1424 
1425 int mlxsw_sp_netdevice_ipip_event(struct mlxsw_sp *mlxsw_sp,
1426 				  struct net_device *ol_dev,
1427 				  unsigned long event,
1428 				  struct netdev_notifier_changeupper_info *info)
1429 {
1430 	switch (event) {
1431 	case NETDEV_REGISTER:
1432 		return mlxsw_sp_netdevice_ipip_reg_event(mlxsw_sp, ol_dev);
1433 	case NETDEV_UNREGISTER:
1434 		mlxsw_sp_netdevice_ipip_unreg_event(mlxsw_sp, ol_dev);
1435 		return 0;
1436 	case NETDEV_UP:
1437 		return mlxsw_sp_netdevice_ipip_up_event(mlxsw_sp, ol_dev);
1438 	case NETDEV_DOWN:
1439 		mlxsw_sp_netdevice_ipip_down_event(mlxsw_sp, ol_dev);
1440 		return 0;
1441 	case NETDEV_CHANGEUPPER:
1442 		if (netif_is_l3_master(info->upper_dev))
1443 			return mlxsw_sp_netdevice_ipip_vrf_event(mlxsw_sp,
1444 								 ol_dev);
1445 		return 0;
1446 	}
1447 	return 0;
1448 }
1449 
1450 struct mlxsw_sp_neigh_key {
1451 	struct neighbour *n;
1452 };
1453 
1454 struct mlxsw_sp_neigh_entry {
1455 	struct list_head rif_list_node;
1456 	struct rhash_head ht_node;
1457 	struct mlxsw_sp_neigh_key key;
1458 	u16 rif;
1459 	bool connected;
1460 	unsigned char ha[ETH_ALEN];
1461 	struct list_head nexthop_list; /* list of nexthops using
1462 					* this neigh entry
1463 					*/
1464 	struct list_head nexthop_neighs_list_node;
1465 	unsigned int counter_index;
1466 	bool counter_valid;
1467 };
1468 
1469 static const struct rhashtable_params mlxsw_sp_neigh_ht_params = {
1470 	.key_offset = offsetof(struct mlxsw_sp_neigh_entry, key),
1471 	.head_offset = offsetof(struct mlxsw_sp_neigh_entry, ht_node),
1472 	.key_len = sizeof(struct mlxsw_sp_neigh_key),
1473 };
1474 
1475 struct mlxsw_sp_neigh_entry *
1476 mlxsw_sp_rif_neigh_next(struct mlxsw_sp_rif *rif,
1477 			struct mlxsw_sp_neigh_entry *neigh_entry)
1478 {
1479 	if (!neigh_entry) {
1480 		if (list_empty(&rif->neigh_list))
1481 			return NULL;
1482 		else
1483 			return list_first_entry(&rif->neigh_list,
1484 						typeof(*neigh_entry),
1485 						rif_list_node);
1486 	}
1487 	if (list_is_last(&neigh_entry->rif_list_node, &rif->neigh_list))
1488 		return NULL;
1489 	return list_next_entry(neigh_entry, rif_list_node);
1490 }
1491 
1492 int mlxsw_sp_neigh_entry_type(struct mlxsw_sp_neigh_entry *neigh_entry)
1493 {
1494 	return neigh_entry->key.n->tbl->family;
1495 }
1496 
1497 unsigned char *
1498 mlxsw_sp_neigh_entry_ha(struct mlxsw_sp_neigh_entry *neigh_entry)
1499 {
1500 	return neigh_entry->ha;
1501 }
1502 
1503 u32 mlxsw_sp_neigh4_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
1504 {
1505 	struct neighbour *n;
1506 
1507 	n = neigh_entry->key.n;
1508 	return ntohl(*((__be32 *) n->primary_key));
1509 }
1510 
1511 struct in6_addr *
1512 mlxsw_sp_neigh6_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
1513 {
1514 	struct neighbour *n;
1515 
1516 	n = neigh_entry->key.n;
1517 	return (struct in6_addr *) &n->primary_key;
1518 }
1519 
1520 int mlxsw_sp_neigh_counter_get(struct mlxsw_sp *mlxsw_sp,
1521 			       struct mlxsw_sp_neigh_entry *neigh_entry,
1522 			       u64 *p_counter)
1523 {
1524 	if (!neigh_entry->counter_valid)
1525 		return -EINVAL;
1526 
1527 	return mlxsw_sp_flow_counter_get(mlxsw_sp, neigh_entry->counter_index,
1528 					 p_counter, NULL);
1529 }
1530 
1531 static struct mlxsw_sp_neigh_entry *
1532 mlxsw_sp_neigh_entry_alloc(struct mlxsw_sp *mlxsw_sp, struct neighbour *n,
1533 			   u16 rif)
1534 {
1535 	struct mlxsw_sp_neigh_entry *neigh_entry;
1536 
1537 	neigh_entry = kzalloc(sizeof(*neigh_entry), GFP_KERNEL);
1538 	if (!neigh_entry)
1539 		return NULL;
1540 
1541 	neigh_entry->key.n = n;
1542 	neigh_entry->rif = rif;
1543 	INIT_LIST_HEAD(&neigh_entry->nexthop_list);
1544 
1545 	return neigh_entry;
1546 }
1547 
1548 static void mlxsw_sp_neigh_entry_free(struct mlxsw_sp_neigh_entry *neigh_entry)
1549 {
1550 	kfree(neigh_entry);
1551 }
1552 
1553 static int
1554 mlxsw_sp_neigh_entry_insert(struct mlxsw_sp *mlxsw_sp,
1555 			    struct mlxsw_sp_neigh_entry *neigh_entry)
1556 {
1557 	return rhashtable_insert_fast(&mlxsw_sp->router->neigh_ht,
1558 				      &neigh_entry->ht_node,
1559 				      mlxsw_sp_neigh_ht_params);
1560 }
1561 
1562 static void
1563 mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp,
1564 			    struct mlxsw_sp_neigh_entry *neigh_entry)
1565 {
1566 	rhashtable_remove_fast(&mlxsw_sp->router->neigh_ht,
1567 			       &neigh_entry->ht_node,
1568 			       mlxsw_sp_neigh_ht_params);
1569 }
1570 
1571 static bool
1572 mlxsw_sp_neigh_counter_should_alloc(struct mlxsw_sp *mlxsw_sp,
1573 				    struct mlxsw_sp_neigh_entry *neigh_entry)
1574 {
1575 	struct devlink *devlink;
1576 	const char *table_name;
1577 
1578 	switch (mlxsw_sp_neigh_entry_type(neigh_entry)) {
1579 	case AF_INET:
1580 		table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST4;
1581 		break;
1582 	case AF_INET6:
1583 		table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST6;
1584 		break;
1585 	default:
1586 		WARN_ON(1);
1587 		return false;
1588 	}
1589 
1590 	devlink = priv_to_devlink(mlxsw_sp->core);
1591 	return devlink_dpipe_table_counter_enabled(devlink, table_name);
1592 }
1593 
1594 static void
1595 mlxsw_sp_neigh_counter_alloc(struct mlxsw_sp *mlxsw_sp,
1596 			     struct mlxsw_sp_neigh_entry *neigh_entry)
1597 {
1598 	if (!mlxsw_sp_neigh_counter_should_alloc(mlxsw_sp, neigh_entry))
1599 		return;
1600 
1601 	if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &neigh_entry->counter_index))
1602 		return;
1603 
1604 	neigh_entry->counter_valid = true;
1605 }
1606 
1607 static void
1608 mlxsw_sp_neigh_counter_free(struct mlxsw_sp *mlxsw_sp,
1609 			    struct mlxsw_sp_neigh_entry *neigh_entry)
1610 {
1611 	if (!neigh_entry->counter_valid)
1612 		return;
1613 	mlxsw_sp_flow_counter_free(mlxsw_sp,
1614 				   neigh_entry->counter_index);
1615 	neigh_entry->counter_valid = false;
1616 }
1617 
1618 static struct mlxsw_sp_neigh_entry *
1619 mlxsw_sp_neigh_entry_create(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
1620 {
1621 	struct mlxsw_sp_neigh_entry *neigh_entry;
1622 	struct mlxsw_sp_rif *rif;
1623 	int err;
1624 
1625 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, n->dev);
1626 	if (!rif)
1627 		return ERR_PTR(-EINVAL);
1628 
1629 	neigh_entry = mlxsw_sp_neigh_entry_alloc(mlxsw_sp, n, rif->rif_index);
1630 	if (!neigh_entry)
1631 		return ERR_PTR(-ENOMEM);
1632 
1633 	err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
1634 	if (err)
1635 		goto err_neigh_entry_insert;
1636 
1637 	mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
1638 	list_add(&neigh_entry->rif_list_node, &rif->neigh_list);
1639 
1640 	return neigh_entry;
1641 
1642 err_neigh_entry_insert:
1643 	mlxsw_sp_neigh_entry_free(neigh_entry);
1644 	return ERR_PTR(err);
1645 }
1646 
1647 static void
1648 mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp *mlxsw_sp,
1649 			     struct mlxsw_sp_neigh_entry *neigh_entry)
1650 {
1651 	list_del(&neigh_entry->rif_list_node);
1652 	mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
1653 	mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry);
1654 	mlxsw_sp_neigh_entry_free(neigh_entry);
1655 }
1656 
1657 static struct mlxsw_sp_neigh_entry *
1658 mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
1659 {
1660 	struct mlxsw_sp_neigh_key key;
1661 
1662 	key.n = n;
1663 	return rhashtable_lookup_fast(&mlxsw_sp->router->neigh_ht,
1664 				      &key, mlxsw_sp_neigh_ht_params);
1665 }
1666 
1667 static void
1668 mlxsw_sp_router_neighs_update_interval_init(struct mlxsw_sp *mlxsw_sp)
1669 {
1670 	unsigned long interval;
1671 
1672 #if IS_ENABLED(CONFIG_IPV6)
1673 	interval = min_t(unsigned long,
1674 			 NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME),
1675 			 NEIGH_VAR(&nd_tbl.parms, DELAY_PROBE_TIME));
1676 #else
1677 	interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
1678 #endif
1679 	mlxsw_sp->router->neighs_update.interval = jiffies_to_msecs(interval);
1680 }
1681 
1682 static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp,
1683 						   char *rauhtd_pl,
1684 						   int ent_index)
1685 {
1686 	struct net_device *dev;
1687 	struct neighbour *n;
1688 	__be32 dipn;
1689 	u32 dip;
1690 	u16 rif;
1691 
1692 	mlxsw_reg_rauhtd_ent_ipv4_unpack(rauhtd_pl, ent_index, &rif, &dip);
1693 
1694 	if (!mlxsw_sp->router->rifs[rif]) {
1695 		dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
1696 		return;
1697 	}
1698 
1699 	dipn = htonl(dip);
1700 	dev = mlxsw_sp->router->rifs[rif]->dev;
1701 	n = neigh_lookup(&arp_tbl, &dipn, dev);
1702 	if (!n) {
1703 		netdev_err(dev, "Failed to find matching neighbour for IP=%pI4h\n",
1704 			   &dip);
1705 		return;
1706 	}
1707 
1708 	netdev_dbg(dev, "Updating neighbour with IP=%pI4h\n", &dip);
1709 	neigh_event_send(n, NULL);
1710 	neigh_release(n);
1711 }
1712 
1713 #if IS_ENABLED(CONFIG_IPV6)
1714 static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
1715 						   char *rauhtd_pl,
1716 						   int rec_index)
1717 {
1718 	struct net_device *dev;
1719 	struct neighbour *n;
1720 	struct in6_addr dip;
1721 	u16 rif;
1722 
1723 	mlxsw_reg_rauhtd_ent_ipv6_unpack(rauhtd_pl, rec_index, &rif,
1724 					 (char *) &dip);
1725 
1726 	if (!mlxsw_sp->router->rifs[rif]) {
1727 		dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
1728 		return;
1729 	}
1730 
1731 	dev = mlxsw_sp->router->rifs[rif]->dev;
1732 	n = neigh_lookup(&nd_tbl, &dip, dev);
1733 	if (!n) {
1734 		netdev_err(dev, "Failed to find matching neighbour for IP=%pI6c\n",
1735 			   &dip);
1736 		return;
1737 	}
1738 
1739 	netdev_dbg(dev, "Updating neighbour with IP=%pI6c\n", &dip);
1740 	neigh_event_send(n, NULL);
1741 	neigh_release(n);
1742 }
1743 #else
1744 static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
1745 						   char *rauhtd_pl,
1746 						   int rec_index)
1747 {
1748 }
1749 #endif
1750 
1751 static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp,
1752 						   char *rauhtd_pl,
1753 						   int rec_index)
1754 {
1755 	u8 num_entries;
1756 	int i;
1757 
1758 	num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
1759 								rec_index);
1760 	/* Hardware starts counting at 0, so add 1. */
1761 	num_entries++;
1762 
1763 	/* Each record consists of several neighbour entries. */
1764 	for (i = 0; i < num_entries; i++) {
1765 		int ent_index;
1766 
1767 		ent_index = rec_index * MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC + i;
1768 		mlxsw_sp_router_neigh_ent_ipv4_process(mlxsw_sp, rauhtd_pl,
1769 						       ent_index);
1770 	}
1771 
1772 }
1773 
1774 static void mlxsw_sp_router_neigh_rec_ipv6_process(struct mlxsw_sp *mlxsw_sp,
1775 						   char *rauhtd_pl,
1776 						   int rec_index)
1777 {
1778 	/* One record contains one entry. */
1779 	mlxsw_sp_router_neigh_ent_ipv6_process(mlxsw_sp, rauhtd_pl,
1780 					       rec_index);
1781 }
1782 
1783 static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp,
1784 					      char *rauhtd_pl, int rec_index)
1785 {
1786 	switch (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, rec_index)) {
1787 	case MLXSW_REG_RAUHTD_TYPE_IPV4:
1788 		mlxsw_sp_router_neigh_rec_ipv4_process(mlxsw_sp, rauhtd_pl,
1789 						       rec_index);
1790 		break;
1791 	case MLXSW_REG_RAUHTD_TYPE_IPV6:
1792 		mlxsw_sp_router_neigh_rec_ipv6_process(mlxsw_sp, rauhtd_pl,
1793 						       rec_index);
1794 		break;
1795 	}
1796 }
1797 
1798 static bool mlxsw_sp_router_rauhtd_is_full(char *rauhtd_pl)
1799 {
1800 	u8 num_rec, last_rec_index, num_entries;
1801 
1802 	num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
1803 	last_rec_index = num_rec - 1;
1804 
1805 	if (num_rec < MLXSW_REG_RAUHTD_REC_MAX_NUM)
1806 		return false;
1807 	if (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, last_rec_index) ==
1808 	    MLXSW_REG_RAUHTD_TYPE_IPV6)
1809 		return true;
1810 
1811 	num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
1812 								last_rec_index);
1813 	if (++num_entries == MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC)
1814 		return true;
1815 	return false;
1816 }
1817 
1818 static int
1819 __mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp,
1820 				       char *rauhtd_pl,
1821 				       enum mlxsw_reg_rauhtd_type type)
1822 {
1823 	int i, num_rec;
1824 	int err;
1825 
1826 	/* Make sure the neighbour's netdev isn't removed in the
1827 	 * process.
1828 	 */
1829 	rtnl_lock();
1830 	do {
1831 		mlxsw_reg_rauhtd_pack(rauhtd_pl, type);
1832 		err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd),
1833 				      rauhtd_pl);
1834 		if (err) {
1835 			dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to dump neighbour table\n");
1836 			break;
1837 		}
1838 		num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
1839 		for (i = 0; i < num_rec; i++)
1840 			mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl,
1841 							  i);
1842 	} while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl));
1843 	rtnl_unlock();
1844 
1845 	return err;
1846 }
1847 
1848 static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp)
1849 {
1850 	enum mlxsw_reg_rauhtd_type type;
1851 	char *rauhtd_pl;
1852 	int err;
1853 
1854 	rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL);
1855 	if (!rauhtd_pl)
1856 		return -ENOMEM;
1857 
1858 	type = MLXSW_REG_RAUHTD_TYPE_IPV4;
1859 	err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
1860 	if (err)
1861 		goto out;
1862 
1863 	type = MLXSW_REG_RAUHTD_TYPE_IPV6;
1864 	err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
1865 out:
1866 	kfree(rauhtd_pl);
1867 	return err;
1868 }
1869 
1870 static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp *mlxsw_sp)
1871 {
1872 	struct mlxsw_sp_neigh_entry *neigh_entry;
1873 
1874 	/* Take RTNL mutex here to prevent lists from changes */
1875 	rtnl_lock();
1876 	list_for_each_entry(neigh_entry, &mlxsw_sp->router->nexthop_neighs_list,
1877 			    nexthop_neighs_list_node)
1878 		/* If this neigh have nexthops, make the kernel think this neigh
1879 		 * is active regardless of the traffic.
1880 		 */
1881 		neigh_event_send(neigh_entry->key.n, NULL);
1882 	rtnl_unlock();
1883 }
1884 
1885 static void
1886 mlxsw_sp_router_neighs_update_work_schedule(struct mlxsw_sp *mlxsw_sp)
1887 {
1888 	unsigned long interval = mlxsw_sp->router->neighs_update.interval;
1889 
1890 	mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw,
1891 			       msecs_to_jiffies(interval));
1892 }
1893 
1894 static void mlxsw_sp_router_neighs_update_work(struct work_struct *work)
1895 {
1896 	struct mlxsw_sp_router *router;
1897 	int err;
1898 
1899 	router = container_of(work, struct mlxsw_sp_router,
1900 			      neighs_update.dw.work);
1901 	err = mlxsw_sp_router_neighs_update_rauhtd(router->mlxsw_sp);
1902 	if (err)
1903 		dev_err(router->mlxsw_sp->bus_info->dev, "Could not update kernel for neigh activity");
1904 
1905 	mlxsw_sp_router_neighs_update_nh(router->mlxsw_sp);
1906 
1907 	mlxsw_sp_router_neighs_update_work_schedule(router->mlxsw_sp);
1908 }
1909 
1910 static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work)
1911 {
1912 	struct mlxsw_sp_neigh_entry *neigh_entry;
1913 	struct mlxsw_sp_router *router;
1914 
1915 	router = container_of(work, struct mlxsw_sp_router,
1916 			      nexthop_probe_dw.work);
1917 	/* Iterate over nexthop neighbours, find those who are unresolved and
1918 	 * send arp on them. This solves the chicken-egg problem when
1919 	 * the nexthop wouldn't get offloaded until the neighbor is resolved
1920 	 * but it wouldn't get resolved ever in case traffic is flowing in HW
1921 	 * using different nexthop.
1922 	 *
1923 	 * Take RTNL mutex here to prevent lists from changes.
1924 	 */
1925 	rtnl_lock();
1926 	list_for_each_entry(neigh_entry, &router->nexthop_neighs_list,
1927 			    nexthop_neighs_list_node)
1928 		if (!neigh_entry->connected)
1929 			neigh_event_send(neigh_entry->key.n, NULL);
1930 	rtnl_unlock();
1931 
1932 	mlxsw_core_schedule_dw(&router->nexthop_probe_dw,
1933 			       MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL);
1934 }
1935 
1936 static void
1937 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
1938 			      struct mlxsw_sp_neigh_entry *neigh_entry,
1939 			      bool removing);
1940 
1941 static enum mlxsw_reg_rauht_op mlxsw_sp_rauht_op(bool adding)
1942 {
1943 	return adding ? MLXSW_REG_RAUHT_OP_WRITE_ADD :
1944 			MLXSW_REG_RAUHT_OP_WRITE_DELETE;
1945 }
1946 
1947 static void
1948 mlxsw_sp_router_neigh_entry_op4(struct mlxsw_sp *mlxsw_sp,
1949 				struct mlxsw_sp_neigh_entry *neigh_entry,
1950 				enum mlxsw_reg_rauht_op op)
1951 {
1952 	struct neighbour *n = neigh_entry->key.n;
1953 	u32 dip = ntohl(*((__be32 *) n->primary_key));
1954 	char rauht_pl[MLXSW_REG_RAUHT_LEN];
1955 
1956 	mlxsw_reg_rauht_pack4(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
1957 			      dip);
1958 	if (neigh_entry->counter_valid)
1959 		mlxsw_reg_rauht_pack_counter(rauht_pl,
1960 					     neigh_entry->counter_index);
1961 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
1962 }
1963 
1964 static void
1965 mlxsw_sp_router_neigh_entry_op6(struct mlxsw_sp *mlxsw_sp,
1966 				struct mlxsw_sp_neigh_entry *neigh_entry,
1967 				enum mlxsw_reg_rauht_op op)
1968 {
1969 	struct neighbour *n = neigh_entry->key.n;
1970 	char rauht_pl[MLXSW_REG_RAUHT_LEN];
1971 	const char *dip = n->primary_key;
1972 
1973 	mlxsw_reg_rauht_pack6(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
1974 			      dip);
1975 	if (neigh_entry->counter_valid)
1976 		mlxsw_reg_rauht_pack_counter(rauht_pl,
1977 					     neigh_entry->counter_index);
1978 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
1979 }
1980 
1981 bool mlxsw_sp_neigh_ipv6_ignore(struct mlxsw_sp_neigh_entry *neigh_entry)
1982 {
1983 	struct neighbour *n = neigh_entry->key.n;
1984 
1985 	/* Packets with a link-local destination address are trapped
1986 	 * after LPM lookup and never reach the neighbour table, so
1987 	 * there is no need to program such neighbours to the device.
1988 	 */
1989 	if (ipv6_addr_type((struct in6_addr *) &n->primary_key) &
1990 	    IPV6_ADDR_LINKLOCAL)
1991 		return true;
1992 	return false;
1993 }
1994 
1995 static void
1996 mlxsw_sp_neigh_entry_update(struct mlxsw_sp *mlxsw_sp,
1997 			    struct mlxsw_sp_neigh_entry *neigh_entry,
1998 			    bool adding)
1999 {
2000 	if (!adding && !neigh_entry->connected)
2001 		return;
2002 	neigh_entry->connected = adding;
2003 	if (neigh_entry->key.n->tbl->family == AF_INET) {
2004 		mlxsw_sp_router_neigh_entry_op4(mlxsw_sp, neigh_entry,
2005 						mlxsw_sp_rauht_op(adding));
2006 	} else if (neigh_entry->key.n->tbl->family == AF_INET6) {
2007 		if (mlxsw_sp_neigh_ipv6_ignore(neigh_entry))
2008 			return;
2009 		mlxsw_sp_router_neigh_entry_op6(mlxsw_sp, neigh_entry,
2010 						mlxsw_sp_rauht_op(adding));
2011 	} else {
2012 		WARN_ON_ONCE(1);
2013 	}
2014 }
2015 
2016 void
2017 mlxsw_sp_neigh_entry_counter_update(struct mlxsw_sp *mlxsw_sp,
2018 				    struct mlxsw_sp_neigh_entry *neigh_entry,
2019 				    bool adding)
2020 {
2021 	if (adding)
2022 		mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
2023 	else
2024 		mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
2025 	mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, true);
2026 }
2027 
2028 struct mlxsw_sp_neigh_event_work {
2029 	struct work_struct work;
2030 	struct mlxsw_sp *mlxsw_sp;
2031 	struct neighbour *n;
2032 };
2033 
2034 static void mlxsw_sp_router_neigh_event_work(struct work_struct *work)
2035 {
2036 	struct mlxsw_sp_neigh_event_work *neigh_work =
2037 		container_of(work, struct mlxsw_sp_neigh_event_work, work);
2038 	struct mlxsw_sp *mlxsw_sp = neigh_work->mlxsw_sp;
2039 	struct mlxsw_sp_neigh_entry *neigh_entry;
2040 	struct neighbour *n = neigh_work->n;
2041 	unsigned char ha[ETH_ALEN];
2042 	bool entry_connected;
2043 	u8 nud_state, dead;
2044 
2045 	/* If these parameters are changed after we release the lock,
2046 	 * then we are guaranteed to receive another event letting us
2047 	 * know about it.
2048 	 */
2049 	read_lock_bh(&n->lock);
2050 	memcpy(ha, n->ha, ETH_ALEN);
2051 	nud_state = n->nud_state;
2052 	dead = n->dead;
2053 	read_unlock_bh(&n->lock);
2054 
2055 	rtnl_lock();
2056 	entry_connected = nud_state & NUD_VALID && !dead;
2057 	neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
2058 	if (!entry_connected && !neigh_entry)
2059 		goto out;
2060 	if (!neigh_entry) {
2061 		neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
2062 		if (IS_ERR(neigh_entry))
2063 			goto out;
2064 	}
2065 
2066 	memcpy(neigh_entry->ha, ha, ETH_ALEN);
2067 	mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, entry_connected);
2068 	mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, !entry_connected);
2069 
2070 	if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
2071 		mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2072 
2073 out:
2074 	rtnl_unlock();
2075 	neigh_release(n);
2076 	kfree(neigh_work);
2077 }
2078 
2079 int mlxsw_sp_router_netevent_event(struct notifier_block *unused,
2080 				   unsigned long event, void *ptr)
2081 {
2082 	struct mlxsw_sp_neigh_event_work *neigh_work;
2083 	struct mlxsw_sp_port *mlxsw_sp_port;
2084 	struct mlxsw_sp *mlxsw_sp;
2085 	unsigned long interval;
2086 	struct neigh_parms *p;
2087 	struct neighbour *n;
2088 
2089 	switch (event) {
2090 	case NETEVENT_DELAY_PROBE_TIME_UPDATE:
2091 		p = ptr;
2092 
2093 		/* We don't care about changes in the default table. */
2094 		if (!p->dev || (p->tbl->family != AF_INET &&
2095 				p->tbl->family != AF_INET6))
2096 			return NOTIFY_DONE;
2097 
2098 		/* We are in atomic context and can't take RTNL mutex,
2099 		 * so use RCU variant to walk the device chain.
2100 		 */
2101 		mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(p->dev);
2102 		if (!mlxsw_sp_port)
2103 			return NOTIFY_DONE;
2104 
2105 		mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
2106 		interval = jiffies_to_msecs(NEIGH_VAR(p, DELAY_PROBE_TIME));
2107 		mlxsw_sp->router->neighs_update.interval = interval;
2108 
2109 		mlxsw_sp_port_dev_put(mlxsw_sp_port);
2110 		break;
2111 	case NETEVENT_NEIGH_UPDATE:
2112 		n = ptr;
2113 
2114 		if (n->tbl->family != AF_INET && n->tbl->family != AF_INET6)
2115 			return NOTIFY_DONE;
2116 
2117 		mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(n->dev);
2118 		if (!mlxsw_sp_port)
2119 			return NOTIFY_DONE;
2120 
2121 		neigh_work = kzalloc(sizeof(*neigh_work), GFP_ATOMIC);
2122 		if (!neigh_work) {
2123 			mlxsw_sp_port_dev_put(mlxsw_sp_port);
2124 			return NOTIFY_BAD;
2125 		}
2126 
2127 		INIT_WORK(&neigh_work->work, mlxsw_sp_router_neigh_event_work);
2128 		neigh_work->mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
2129 		neigh_work->n = n;
2130 
2131 		/* Take a reference to ensure the neighbour won't be
2132 		 * destructed until we drop the reference in delayed
2133 		 * work.
2134 		 */
2135 		neigh_clone(n);
2136 		mlxsw_core_schedule_work(&neigh_work->work);
2137 		mlxsw_sp_port_dev_put(mlxsw_sp_port);
2138 		break;
2139 	}
2140 
2141 	return NOTIFY_DONE;
2142 }
2143 
2144 static int mlxsw_sp_neigh_init(struct mlxsw_sp *mlxsw_sp)
2145 {
2146 	int err;
2147 
2148 	err = rhashtable_init(&mlxsw_sp->router->neigh_ht,
2149 			      &mlxsw_sp_neigh_ht_params);
2150 	if (err)
2151 		return err;
2152 
2153 	/* Initialize the polling interval according to the default
2154 	 * table.
2155 	 */
2156 	mlxsw_sp_router_neighs_update_interval_init(mlxsw_sp);
2157 
2158 	/* Create the delayed works for the activity_update */
2159 	INIT_DELAYED_WORK(&mlxsw_sp->router->neighs_update.dw,
2160 			  mlxsw_sp_router_neighs_update_work);
2161 	INIT_DELAYED_WORK(&mlxsw_sp->router->nexthop_probe_dw,
2162 			  mlxsw_sp_router_probe_unresolved_nexthops);
2163 	mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw, 0);
2164 	mlxsw_core_schedule_dw(&mlxsw_sp->router->nexthop_probe_dw, 0);
2165 	return 0;
2166 }
2167 
2168 static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp)
2169 {
2170 	cancel_delayed_work_sync(&mlxsw_sp->router->neighs_update.dw);
2171 	cancel_delayed_work_sync(&mlxsw_sp->router->nexthop_probe_dw);
2172 	rhashtable_destroy(&mlxsw_sp->router->neigh_ht);
2173 }
2174 
2175 static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
2176 					 struct mlxsw_sp_rif *rif)
2177 {
2178 	struct mlxsw_sp_neigh_entry *neigh_entry, *tmp;
2179 
2180 	list_for_each_entry_safe(neigh_entry, tmp, &rif->neigh_list,
2181 				 rif_list_node) {
2182 		mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, false);
2183 		mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2184 	}
2185 }
2186 
2187 enum mlxsw_sp_nexthop_type {
2188 	MLXSW_SP_NEXTHOP_TYPE_ETH,
2189 	MLXSW_SP_NEXTHOP_TYPE_IPIP,
2190 };
2191 
2192 struct mlxsw_sp_nexthop_key {
2193 	struct fib_nh *fib_nh;
2194 };
2195 
2196 struct mlxsw_sp_nexthop {
2197 	struct list_head neigh_list_node; /* member of neigh entry list */
2198 	struct list_head rif_list_node;
2199 	struct list_head router_list_node;
2200 	struct mlxsw_sp_nexthop_group *nh_grp; /* pointer back to the group
2201 						* this belongs to
2202 						*/
2203 	struct rhash_head ht_node;
2204 	struct mlxsw_sp_nexthop_key key;
2205 	unsigned char gw_addr[sizeof(struct in6_addr)];
2206 	int ifindex;
2207 	int nh_weight;
2208 	int norm_nh_weight;
2209 	int num_adj_entries;
2210 	struct mlxsw_sp_rif *rif;
2211 	u8 should_offload:1, /* set indicates this neigh is connected and
2212 			      * should be put to KVD linear area of this group.
2213 			      */
2214 	   offloaded:1, /* set in case the neigh is actually put into
2215 			 * KVD linear area of this group.
2216 			 */
2217 	   update:1; /* set indicates that MAC of this neigh should be
2218 		      * updated in HW
2219 		      */
2220 	enum mlxsw_sp_nexthop_type type;
2221 	union {
2222 		struct mlxsw_sp_neigh_entry *neigh_entry;
2223 		struct mlxsw_sp_ipip_entry *ipip_entry;
2224 	};
2225 	unsigned int counter_index;
2226 	bool counter_valid;
2227 };
2228 
2229 struct mlxsw_sp_nexthop_group {
2230 	void *priv;
2231 	struct rhash_head ht_node;
2232 	struct list_head fib_list; /* list of fib entries that use this group */
2233 	struct neigh_table *neigh_tbl;
2234 	u8 adj_index_valid:1,
2235 	   gateway:1; /* routes using the group use a gateway */
2236 	u32 adj_index;
2237 	u16 ecmp_size;
2238 	u16 count;
2239 	int sum_norm_weight;
2240 	struct mlxsw_sp_nexthop nexthops[0];
2241 #define nh_rif	nexthops[0].rif
2242 };
2243 
2244 void mlxsw_sp_nexthop_counter_alloc(struct mlxsw_sp *mlxsw_sp,
2245 				    struct mlxsw_sp_nexthop *nh)
2246 {
2247 	struct devlink *devlink;
2248 
2249 	devlink = priv_to_devlink(mlxsw_sp->core);
2250 	if (!devlink_dpipe_table_counter_enabled(devlink,
2251 						 MLXSW_SP_DPIPE_TABLE_NAME_ADJ))
2252 		return;
2253 
2254 	if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &nh->counter_index))
2255 		return;
2256 
2257 	nh->counter_valid = true;
2258 }
2259 
2260 void mlxsw_sp_nexthop_counter_free(struct mlxsw_sp *mlxsw_sp,
2261 				   struct mlxsw_sp_nexthop *nh)
2262 {
2263 	if (!nh->counter_valid)
2264 		return;
2265 	mlxsw_sp_flow_counter_free(mlxsw_sp, nh->counter_index);
2266 	nh->counter_valid = false;
2267 }
2268 
2269 int mlxsw_sp_nexthop_counter_get(struct mlxsw_sp *mlxsw_sp,
2270 				 struct mlxsw_sp_nexthop *nh, u64 *p_counter)
2271 {
2272 	if (!nh->counter_valid)
2273 		return -EINVAL;
2274 
2275 	return mlxsw_sp_flow_counter_get(mlxsw_sp, nh->counter_index,
2276 					 p_counter, NULL);
2277 }
2278 
2279 struct mlxsw_sp_nexthop *mlxsw_sp_nexthop_next(struct mlxsw_sp_router *router,
2280 					       struct mlxsw_sp_nexthop *nh)
2281 {
2282 	if (!nh) {
2283 		if (list_empty(&router->nexthop_list))
2284 			return NULL;
2285 		else
2286 			return list_first_entry(&router->nexthop_list,
2287 						typeof(*nh), router_list_node);
2288 	}
2289 	if (list_is_last(&nh->router_list_node, &router->nexthop_list))
2290 		return NULL;
2291 	return list_next_entry(nh, router_list_node);
2292 }
2293 
2294 bool mlxsw_sp_nexthop_offload(struct mlxsw_sp_nexthop *nh)
2295 {
2296 	return nh->offloaded;
2297 }
2298 
2299 unsigned char *mlxsw_sp_nexthop_ha(struct mlxsw_sp_nexthop *nh)
2300 {
2301 	if (!nh->offloaded)
2302 		return NULL;
2303 	return nh->neigh_entry->ha;
2304 }
2305 
2306 int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop *nh, u32 *p_adj_index,
2307 			     u32 *p_adj_size, u32 *p_adj_hash_index)
2308 {
2309 	struct mlxsw_sp_nexthop_group *nh_grp = nh->nh_grp;
2310 	u32 adj_hash_index = 0;
2311 	int i;
2312 
2313 	if (!nh->offloaded || !nh_grp->adj_index_valid)
2314 		return -EINVAL;
2315 
2316 	*p_adj_index = nh_grp->adj_index;
2317 	*p_adj_size = nh_grp->ecmp_size;
2318 
2319 	for (i = 0; i < nh_grp->count; i++) {
2320 		struct mlxsw_sp_nexthop *nh_iter = &nh_grp->nexthops[i];
2321 
2322 		if (nh_iter == nh)
2323 			break;
2324 		if (nh_iter->offloaded)
2325 			adj_hash_index += nh_iter->num_adj_entries;
2326 	}
2327 
2328 	*p_adj_hash_index = adj_hash_index;
2329 	return 0;
2330 }
2331 
2332 struct mlxsw_sp_rif *mlxsw_sp_nexthop_rif(struct mlxsw_sp_nexthop *nh)
2333 {
2334 	return nh->rif;
2335 }
2336 
2337 bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh)
2338 {
2339 	struct mlxsw_sp_nexthop_group *nh_grp = nh->nh_grp;
2340 	int i;
2341 
2342 	for (i = 0; i < nh_grp->count; i++) {
2343 		struct mlxsw_sp_nexthop *nh_iter = &nh_grp->nexthops[i];
2344 
2345 		if (nh_iter->type == MLXSW_SP_NEXTHOP_TYPE_IPIP)
2346 			return true;
2347 	}
2348 	return false;
2349 }
2350 
2351 static struct fib_info *
2352 mlxsw_sp_nexthop4_group_fi(const struct mlxsw_sp_nexthop_group *nh_grp)
2353 {
2354 	return nh_grp->priv;
2355 }
2356 
2357 struct mlxsw_sp_nexthop_group_cmp_arg {
2358 	enum mlxsw_sp_l3proto proto;
2359 	union {
2360 		struct fib_info *fi;
2361 		struct mlxsw_sp_fib6_entry *fib6_entry;
2362 	};
2363 };
2364 
2365 static bool
2366 mlxsw_sp_nexthop6_group_has_nexthop(const struct mlxsw_sp_nexthop_group *nh_grp,
2367 				    const struct in6_addr *gw, int ifindex)
2368 {
2369 	int i;
2370 
2371 	for (i = 0; i < nh_grp->count; i++) {
2372 		const struct mlxsw_sp_nexthop *nh;
2373 
2374 		nh = &nh_grp->nexthops[i];
2375 		if (nh->ifindex == ifindex &&
2376 		    ipv6_addr_equal(gw, (struct in6_addr *) nh->gw_addr))
2377 			return true;
2378 	}
2379 
2380 	return false;
2381 }
2382 
2383 static bool
2384 mlxsw_sp_nexthop6_group_cmp(const struct mlxsw_sp_nexthop_group *nh_grp,
2385 			    const struct mlxsw_sp_fib6_entry *fib6_entry)
2386 {
2387 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
2388 
2389 	if (nh_grp->count != fib6_entry->nrt6)
2390 		return false;
2391 
2392 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
2393 		struct in6_addr *gw;
2394 		int ifindex;
2395 
2396 		ifindex = mlxsw_sp_rt6->rt->dst.dev->ifindex;
2397 		gw = &mlxsw_sp_rt6->rt->rt6i_gateway;
2398 		if (!mlxsw_sp_nexthop6_group_has_nexthop(nh_grp, gw, ifindex))
2399 			return false;
2400 	}
2401 
2402 	return true;
2403 }
2404 
2405 static int
2406 mlxsw_sp_nexthop_group_cmp(struct rhashtable_compare_arg *arg, const void *ptr)
2407 {
2408 	const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = arg->key;
2409 	const struct mlxsw_sp_nexthop_group *nh_grp = ptr;
2410 
2411 	switch (cmp_arg->proto) {
2412 	case MLXSW_SP_L3_PROTO_IPV4:
2413 		return cmp_arg->fi != mlxsw_sp_nexthop4_group_fi(nh_grp);
2414 	case MLXSW_SP_L3_PROTO_IPV6:
2415 		return !mlxsw_sp_nexthop6_group_cmp(nh_grp,
2416 						    cmp_arg->fib6_entry);
2417 	default:
2418 		WARN_ON(1);
2419 		return 1;
2420 	}
2421 }
2422 
2423 static int
2424 mlxsw_sp_nexthop_group_type(const struct mlxsw_sp_nexthop_group *nh_grp)
2425 {
2426 	return nh_grp->neigh_tbl->family;
2427 }
2428 
2429 static u32 mlxsw_sp_nexthop_group_hash_obj(const void *data, u32 len, u32 seed)
2430 {
2431 	const struct mlxsw_sp_nexthop_group *nh_grp = data;
2432 	const struct mlxsw_sp_nexthop *nh;
2433 	struct fib_info *fi;
2434 	unsigned int val;
2435 	int i;
2436 
2437 	switch (mlxsw_sp_nexthop_group_type(nh_grp)) {
2438 	case AF_INET:
2439 		fi = mlxsw_sp_nexthop4_group_fi(nh_grp);
2440 		return jhash(&fi, sizeof(fi), seed);
2441 	case AF_INET6:
2442 		val = nh_grp->count;
2443 		for (i = 0; i < nh_grp->count; i++) {
2444 			nh = &nh_grp->nexthops[i];
2445 			val ^= nh->ifindex;
2446 		}
2447 		return jhash(&val, sizeof(val), seed);
2448 	default:
2449 		WARN_ON(1);
2450 		return 0;
2451 	}
2452 }
2453 
2454 static u32
2455 mlxsw_sp_nexthop6_group_hash(struct mlxsw_sp_fib6_entry *fib6_entry, u32 seed)
2456 {
2457 	unsigned int val = fib6_entry->nrt6;
2458 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
2459 	struct net_device *dev;
2460 
2461 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
2462 		dev = mlxsw_sp_rt6->rt->dst.dev;
2463 		val ^= dev->ifindex;
2464 	}
2465 
2466 	return jhash(&val, sizeof(val), seed);
2467 }
2468 
2469 static u32
2470 mlxsw_sp_nexthop_group_hash(const void *data, u32 len, u32 seed)
2471 {
2472 	const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = data;
2473 
2474 	switch (cmp_arg->proto) {
2475 	case MLXSW_SP_L3_PROTO_IPV4:
2476 		return jhash(&cmp_arg->fi, sizeof(cmp_arg->fi), seed);
2477 	case MLXSW_SP_L3_PROTO_IPV6:
2478 		return mlxsw_sp_nexthop6_group_hash(cmp_arg->fib6_entry, seed);
2479 	default:
2480 		WARN_ON(1);
2481 		return 0;
2482 	}
2483 }
2484 
2485 static const struct rhashtable_params mlxsw_sp_nexthop_group_ht_params = {
2486 	.head_offset = offsetof(struct mlxsw_sp_nexthop_group, ht_node),
2487 	.hashfn	     = mlxsw_sp_nexthop_group_hash,
2488 	.obj_hashfn  = mlxsw_sp_nexthop_group_hash_obj,
2489 	.obj_cmpfn   = mlxsw_sp_nexthop_group_cmp,
2490 };
2491 
2492 static int mlxsw_sp_nexthop_group_insert(struct mlxsw_sp *mlxsw_sp,
2493 					 struct mlxsw_sp_nexthop_group *nh_grp)
2494 {
2495 	if (mlxsw_sp_nexthop_group_type(nh_grp) == AF_INET6 &&
2496 	    !nh_grp->gateway)
2497 		return 0;
2498 
2499 	return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_group_ht,
2500 				      &nh_grp->ht_node,
2501 				      mlxsw_sp_nexthop_group_ht_params);
2502 }
2503 
2504 static void mlxsw_sp_nexthop_group_remove(struct mlxsw_sp *mlxsw_sp,
2505 					  struct mlxsw_sp_nexthop_group *nh_grp)
2506 {
2507 	if (mlxsw_sp_nexthop_group_type(nh_grp) == AF_INET6 &&
2508 	    !nh_grp->gateway)
2509 		return;
2510 
2511 	rhashtable_remove_fast(&mlxsw_sp->router->nexthop_group_ht,
2512 			       &nh_grp->ht_node,
2513 			       mlxsw_sp_nexthop_group_ht_params);
2514 }
2515 
2516 static struct mlxsw_sp_nexthop_group *
2517 mlxsw_sp_nexthop4_group_lookup(struct mlxsw_sp *mlxsw_sp,
2518 			       struct fib_info *fi)
2519 {
2520 	struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
2521 
2522 	cmp_arg.proto = MLXSW_SP_L3_PROTO_IPV4;
2523 	cmp_arg.fi = fi;
2524 	return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
2525 				      &cmp_arg,
2526 				      mlxsw_sp_nexthop_group_ht_params);
2527 }
2528 
2529 static struct mlxsw_sp_nexthop_group *
2530 mlxsw_sp_nexthop6_group_lookup(struct mlxsw_sp *mlxsw_sp,
2531 			       struct mlxsw_sp_fib6_entry *fib6_entry)
2532 {
2533 	struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
2534 
2535 	cmp_arg.proto = MLXSW_SP_L3_PROTO_IPV6;
2536 	cmp_arg.fib6_entry = fib6_entry;
2537 	return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
2538 				      &cmp_arg,
2539 				      mlxsw_sp_nexthop_group_ht_params);
2540 }
2541 
2542 static const struct rhashtable_params mlxsw_sp_nexthop_ht_params = {
2543 	.key_offset = offsetof(struct mlxsw_sp_nexthop, key),
2544 	.head_offset = offsetof(struct mlxsw_sp_nexthop, ht_node),
2545 	.key_len = sizeof(struct mlxsw_sp_nexthop_key),
2546 };
2547 
2548 static int mlxsw_sp_nexthop_insert(struct mlxsw_sp *mlxsw_sp,
2549 				   struct mlxsw_sp_nexthop *nh)
2550 {
2551 	return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_ht,
2552 				      &nh->ht_node, mlxsw_sp_nexthop_ht_params);
2553 }
2554 
2555 static void mlxsw_sp_nexthop_remove(struct mlxsw_sp *mlxsw_sp,
2556 				    struct mlxsw_sp_nexthop *nh)
2557 {
2558 	rhashtable_remove_fast(&mlxsw_sp->router->nexthop_ht, &nh->ht_node,
2559 			       mlxsw_sp_nexthop_ht_params);
2560 }
2561 
2562 static struct mlxsw_sp_nexthop *
2563 mlxsw_sp_nexthop_lookup(struct mlxsw_sp *mlxsw_sp,
2564 			struct mlxsw_sp_nexthop_key key)
2565 {
2566 	return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_ht, &key,
2567 				      mlxsw_sp_nexthop_ht_params);
2568 }
2569 
2570 static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp,
2571 					     const struct mlxsw_sp_fib *fib,
2572 					     u32 adj_index, u16 ecmp_size,
2573 					     u32 new_adj_index,
2574 					     u16 new_ecmp_size)
2575 {
2576 	char raleu_pl[MLXSW_REG_RALEU_LEN];
2577 
2578 	mlxsw_reg_raleu_pack(raleu_pl,
2579 			     (enum mlxsw_reg_ralxx_protocol) fib->proto,
2580 			     fib->vr->id, adj_index, ecmp_size, new_adj_index,
2581 			     new_ecmp_size);
2582 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raleu), raleu_pl);
2583 }
2584 
2585 static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp,
2586 					  struct mlxsw_sp_nexthop_group *nh_grp,
2587 					  u32 old_adj_index, u16 old_ecmp_size)
2588 {
2589 	struct mlxsw_sp_fib_entry *fib_entry;
2590 	struct mlxsw_sp_fib *fib = NULL;
2591 	int err;
2592 
2593 	list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
2594 		if (fib == fib_entry->fib_node->fib)
2595 			continue;
2596 		fib = fib_entry->fib_node->fib;
2597 		err = mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, fib,
2598 							old_adj_index,
2599 							old_ecmp_size,
2600 							nh_grp->adj_index,
2601 							nh_grp->ecmp_size);
2602 		if (err)
2603 			return err;
2604 	}
2605 	return 0;
2606 }
2607 
2608 static int __mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
2609 				     struct mlxsw_sp_nexthop *nh)
2610 {
2611 	struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
2612 	char ratr_pl[MLXSW_REG_RATR_LEN];
2613 
2614 	mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY,
2615 			    true, MLXSW_REG_RATR_TYPE_ETHERNET,
2616 			    adj_index, neigh_entry->rif);
2617 	mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha);
2618 	if (nh->counter_valid)
2619 		mlxsw_reg_ratr_counter_pack(ratr_pl, nh->counter_index, true);
2620 	else
2621 		mlxsw_reg_ratr_counter_pack(ratr_pl, 0, false);
2622 
2623 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
2624 }
2625 
2626 int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
2627 			    struct mlxsw_sp_nexthop *nh)
2628 {
2629 	int i;
2630 
2631 	for (i = 0; i < nh->num_adj_entries; i++) {
2632 		int err;
2633 
2634 		err = __mlxsw_sp_nexthop_update(mlxsw_sp, adj_index + i, nh);
2635 		if (err)
2636 			return err;
2637 	}
2638 
2639 	return 0;
2640 }
2641 
2642 static int __mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
2643 					  u32 adj_index,
2644 					  struct mlxsw_sp_nexthop *nh)
2645 {
2646 	const struct mlxsw_sp_ipip_ops *ipip_ops;
2647 
2648 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[nh->ipip_entry->ipipt];
2649 	return ipip_ops->nexthop_update(mlxsw_sp, adj_index, nh->ipip_entry);
2650 }
2651 
2652 static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
2653 					u32 adj_index,
2654 					struct mlxsw_sp_nexthop *nh)
2655 {
2656 	int i;
2657 
2658 	for (i = 0; i < nh->num_adj_entries; i++) {
2659 		int err;
2660 
2661 		err = __mlxsw_sp_nexthop_ipip_update(mlxsw_sp, adj_index + i,
2662 						     nh);
2663 		if (err)
2664 			return err;
2665 	}
2666 
2667 	return 0;
2668 }
2669 
2670 static int
2671 mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp,
2672 			      struct mlxsw_sp_nexthop_group *nh_grp,
2673 			      bool reallocate)
2674 {
2675 	u32 adj_index = nh_grp->adj_index; /* base */
2676 	struct mlxsw_sp_nexthop *nh;
2677 	int i;
2678 	int err;
2679 
2680 	for (i = 0; i < nh_grp->count; i++) {
2681 		nh = &nh_grp->nexthops[i];
2682 
2683 		if (!nh->should_offload) {
2684 			nh->offloaded = 0;
2685 			continue;
2686 		}
2687 
2688 		if (nh->update || reallocate) {
2689 			switch (nh->type) {
2690 			case MLXSW_SP_NEXTHOP_TYPE_ETH:
2691 				err = mlxsw_sp_nexthop_update
2692 					    (mlxsw_sp, adj_index, nh);
2693 				break;
2694 			case MLXSW_SP_NEXTHOP_TYPE_IPIP:
2695 				err = mlxsw_sp_nexthop_ipip_update
2696 					    (mlxsw_sp, adj_index, nh);
2697 				break;
2698 			}
2699 			if (err)
2700 				return err;
2701 			nh->update = 0;
2702 			nh->offloaded = 1;
2703 		}
2704 		adj_index += nh->num_adj_entries;
2705 	}
2706 	return 0;
2707 }
2708 
2709 static bool
2710 mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node,
2711 				 const struct mlxsw_sp_fib_entry *fib_entry);
2712 
2713 static int
2714 mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp,
2715 				    struct mlxsw_sp_nexthop_group *nh_grp)
2716 {
2717 	struct mlxsw_sp_fib_entry *fib_entry;
2718 	int err;
2719 
2720 	list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
2721 		if (!mlxsw_sp_fib_node_entry_is_first(fib_entry->fib_node,
2722 						      fib_entry))
2723 			continue;
2724 		err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
2725 		if (err)
2726 			return err;
2727 	}
2728 	return 0;
2729 }
2730 
2731 static void
2732 mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
2733 				   enum mlxsw_reg_ralue_op op, int err);
2734 
2735 static void
2736 mlxsw_sp_nexthop_fib_entries_refresh(struct mlxsw_sp_nexthop_group *nh_grp)
2737 {
2738 	enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_WRITE;
2739 	struct mlxsw_sp_fib_entry *fib_entry;
2740 
2741 	list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
2742 		if (!mlxsw_sp_fib_node_entry_is_first(fib_entry->fib_node,
2743 						      fib_entry))
2744 			continue;
2745 		mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0);
2746 	}
2747 }
2748 
2749 static void mlxsw_sp_adj_grp_size_round_up(u16 *p_adj_grp_size)
2750 {
2751 	/* Valid sizes for an adjacency group are:
2752 	 * 1-64, 512, 1024, 2048 and 4096.
2753 	 */
2754 	if (*p_adj_grp_size <= 64)
2755 		return;
2756 	else if (*p_adj_grp_size <= 512)
2757 		*p_adj_grp_size = 512;
2758 	else if (*p_adj_grp_size <= 1024)
2759 		*p_adj_grp_size = 1024;
2760 	else if (*p_adj_grp_size <= 2048)
2761 		*p_adj_grp_size = 2048;
2762 	else
2763 		*p_adj_grp_size = 4096;
2764 }
2765 
2766 static void mlxsw_sp_adj_grp_size_round_down(u16 *p_adj_grp_size,
2767 					     unsigned int alloc_size)
2768 {
2769 	if (alloc_size >= 4096)
2770 		*p_adj_grp_size = 4096;
2771 	else if (alloc_size >= 2048)
2772 		*p_adj_grp_size = 2048;
2773 	else if (alloc_size >= 1024)
2774 		*p_adj_grp_size = 1024;
2775 	else if (alloc_size >= 512)
2776 		*p_adj_grp_size = 512;
2777 }
2778 
2779 static int mlxsw_sp_fix_adj_grp_size(struct mlxsw_sp *mlxsw_sp,
2780 				     u16 *p_adj_grp_size)
2781 {
2782 	unsigned int alloc_size;
2783 	int err;
2784 
2785 	/* Round up the requested group size to the next size supported
2786 	 * by the device and make sure the request can be satisfied.
2787 	 */
2788 	mlxsw_sp_adj_grp_size_round_up(p_adj_grp_size);
2789 	err = mlxsw_sp_kvdl_alloc_size_query(mlxsw_sp, *p_adj_grp_size,
2790 					     &alloc_size);
2791 	if (err)
2792 		return err;
2793 	/* It is possible the allocation results in more allocated
2794 	 * entries than requested. Try to use as much of them as
2795 	 * possible.
2796 	 */
2797 	mlxsw_sp_adj_grp_size_round_down(p_adj_grp_size, alloc_size);
2798 
2799 	return 0;
2800 }
2801 
2802 static void
2803 mlxsw_sp_nexthop_group_normalize(struct mlxsw_sp_nexthop_group *nh_grp)
2804 {
2805 	int i, g = 0, sum_norm_weight = 0;
2806 	struct mlxsw_sp_nexthop *nh;
2807 
2808 	for (i = 0; i < nh_grp->count; i++) {
2809 		nh = &nh_grp->nexthops[i];
2810 
2811 		if (!nh->should_offload)
2812 			continue;
2813 		if (g > 0)
2814 			g = gcd(nh->nh_weight, g);
2815 		else
2816 			g = nh->nh_weight;
2817 	}
2818 
2819 	for (i = 0; i < nh_grp->count; i++) {
2820 		nh = &nh_grp->nexthops[i];
2821 
2822 		if (!nh->should_offload)
2823 			continue;
2824 		nh->norm_nh_weight = nh->nh_weight / g;
2825 		sum_norm_weight += nh->norm_nh_weight;
2826 	}
2827 
2828 	nh_grp->sum_norm_weight = sum_norm_weight;
2829 }
2830 
2831 static void
2832 mlxsw_sp_nexthop_group_rebalance(struct mlxsw_sp_nexthop_group *nh_grp)
2833 {
2834 	int total = nh_grp->sum_norm_weight;
2835 	u16 ecmp_size = nh_grp->ecmp_size;
2836 	int i, weight = 0, lower_bound = 0;
2837 
2838 	for (i = 0; i < nh_grp->count; i++) {
2839 		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
2840 		int upper_bound;
2841 
2842 		if (!nh->should_offload)
2843 			continue;
2844 		weight += nh->norm_nh_weight;
2845 		upper_bound = DIV_ROUND_CLOSEST(ecmp_size * weight, total);
2846 		nh->num_adj_entries = upper_bound - lower_bound;
2847 		lower_bound = upper_bound;
2848 	}
2849 }
2850 
2851 static void
2852 mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
2853 			       struct mlxsw_sp_nexthop_group *nh_grp)
2854 {
2855 	u16 ecmp_size, old_ecmp_size;
2856 	struct mlxsw_sp_nexthop *nh;
2857 	bool offload_change = false;
2858 	u32 adj_index;
2859 	bool old_adj_index_valid;
2860 	u32 old_adj_index;
2861 	int i;
2862 	int err;
2863 
2864 	if (!nh_grp->gateway) {
2865 		mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
2866 		return;
2867 	}
2868 
2869 	for (i = 0; i < nh_grp->count; i++) {
2870 		nh = &nh_grp->nexthops[i];
2871 
2872 		if (nh->should_offload != nh->offloaded) {
2873 			offload_change = true;
2874 			if (nh->should_offload)
2875 				nh->update = 1;
2876 		}
2877 	}
2878 	if (!offload_change) {
2879 		/* Nothing was added or removed, so no need to reallocate. Just
2880 		 * update MAC on existing adjacency indexes.
2881 		 */
2882 		err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, false);
2883 		if (err) {
2884 			dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
2885 			goto set_trap;
2886 		}
2887 		return;
2888 	}
2889 	mlxsw_sp_nexthop_group_normalize(nh_grp);
2890 	if (!nh_grp->sum_norm_weight)
2891 		/* No neigh of this group is connected so we just set
2892 		 * the trap and let everthing flow through kernel.
2893 		 */
2894 		goto set_trap;
2895 
2896 	ecmp_size = nh_grp->sum_norm_weight;
2897 	err = mlxsw_sp_fix_adj_grp_size(mlxsw_sp, &ecmp_size);
2898 	if (err)
2899 		/* No valid allocation size available. */
2900 		goto set_trap;
2901 
2902 	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, ecmp_size, &adj_index);
2903 	if (err) {
2904 		/* We ran out of KVD linear space, just set the
2905 		 * trap and let everything flow through kernel.
2906 		 */
2907 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to allocate KVD linear area for nexthop group.\n");
2908 		goto set_trap;
2909 	}
2910 	old_adj_index_valid = nh_grp->adj_index_valid;
2911 	old_adj_index = nh_grp->adj_index;
2912 	old_ecmp_size = nh_grp->ecmp_size;
2913 	nh_grp->adj_index_valid = 1;
2914 	nh_grp->adj_index = adj_index;
2915 	nh_grp->ecmp_size = ecmp_size;
2916 	mlxsw_sp_nexthop_group_rebalance(nh_grp);
2917 	err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, true);
2918 	if (err) {
2919 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
2920 		goto set_trap;
2921 	}
2922 
2923 	if (!old_adj_index_valid) {
2924 		/* The trap was set for fib entries, so we have to call
2925 		 * fib entry update to unset it and use adjacency index.
2926 		 */
2927 		err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
2928 		if (err) {
2929 			dev_warn(mlxsw_sp->bus_info->dev, "Failed to add adjacency index to fib entries.\n");
2930 			goto set_trap;
2931 		}
2932 		return;
2933 	}
2934 
2935 	err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, nh_grp,
2936 					     old_adj_index, old_ecmp_size);
2937 	mlxsw_sp_kvdl_free(mlxsw_sp, old_adj_index);
2938 	if (err) {
2939 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to mass-update adjacency index for nexthop group.\n");
2940 		goto set_trap;
2941 	}
2942 
2943 	/* Offload state within the group changed, so update the flags. */
2944 	mlxsw_sp_nexthop_fib_entries_refresh(nh_grp);
2945 
2946 	return;
2947 
2948 set_trap:
2949 	old_adj_index_valid = nh_grp->adj_index_valid;
2950 	nh_grp->adj_index_valid = 0;
2951 	for (i = 0; i < nh_grp->count; i++) {
2952 		nh = &nh_grp->nexthops[i];
2953 		nh->offloaded = 0;
2954 	}
2955 	err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
2956 	if (err)
2957 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n");
2958 	if (old_adj_index_valid)
2959 		mlxsw_sp_kvdl_free(mlxsw_sp, nh_grp->adj_index);
2960 }
2961 
2962 static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh,
2963 					    bool removing)
2964 {
2965 	if (!removing)
2966 		nh->should_offload = 1;
2967 	else if (nh->offloaded)
2968 		nh->should_offload = 0;
2969 	nh->update = 1;
2970 }
2971 
2972 static void
2973 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
2974 			      struct mlxsw_sp_neigh_entry *neigh_entry,
2975 			      bool removing)
2976 {
2977 	struct mlxsw_sp_nexthop *nh;
2978 
2979 	list_for_each_entry(nh, &neigh_entry->nexthop_list,
2980 			    neigh_list_node) {
2981 		__mlxsw_sp_nexthop_neigh_update(nh, removing);
2982 		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
2983 	}
2984 }
2985 
2986 static void mlxsw_sp_nexthop_rif_init(struct mlxsw_sp_nexthop *nh,
2987 				      struct mlxsw_sp_rif *rif)
2988 {
2989 	if (nh->rif)
2990 		return;
2991 
2992 	nh->rif = rif;
2993 	list_add(&nh->rif_list_node, &rif->nexthop_list);
2994 }
2995 
2996 static void mlxsw_sp_nexthop_rif_fini(struct mlxsw_sp_nexthop *nh)
2997 {
2998 	if (!nh->rif)
2999 		return;
3000 
3001 	list_del(&nh->rif_list_node);
3002 	nh->rif = NULL;
3003 }
3004 
3005 static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp,
3006 				       struct mlxsw_sp_nexthop *nh)
3007 {
3008 	struct mlxsw_sp_neigh_entry *neigh_entry;
3009 	struct neighbour *n;
3010 	u8 nud_state, dead;
3011 	int err;
3012 
3013 	if (!nh->nh_grp->gateway || nh->neigh_entry)
3014 		return 0;
3015 
3016 	/* Take a reference of neigh here ensuring that neigh would
3017 	 * not be destructed before the nexthop entry is finished.
3018 	 * The reference is taken either in neigh_lookup() or
3019 	 * in neigh_create() in case n is not found.
3020 	 */
3021 	n = neigh_lookup(nh->nh_grp->neigh_tbl, &nh->gw_addr, nh->rif->dev);
3022 	if (!n) {
3023 		n = neigh_create(nh->nh_grp->neigh_tbl, &nh->gw_addr,
3024 				 nh->rif->dev);
3025 		if (IS_ERR(n))
3026 			return PTR_ERR(n);
3027 		neigh_event_send(n, NULL);
3028 	}
3029 	neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
3030 	if (!neigh_entry) {
3031 		neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
3032 		if (IS_ERR(neigh_entry)) {
3033 			err = -EINVAL;
3034 			goto err_neigh_entry_create;
3035 		}
3036 	}
3037 
3038 	/* If that is the first nexthop connected to that neigh, add to
3039 	 * nexthop_neighs_list
3040 	 */
3041 	if (list_empty(&neigh_entry->nexthop_list))
3042 		list_add_tail(&neigh_entry->nexthop_neighs_list_node,
3043 			      &mlxsw_sp->router->nexthop_neighs_list);
3044 
3045 	nh->neigh_entry = neigh_entry;
3046 	list_add_tail(&nh->neigh_list_node, &neigh_entry->nexthop_list);
3047 	read_lock_bh(&n->lock);
3048 	nud_state = n->nud_state;
3049 	dead = n->dead;
3050 	read_unlock_bh(&n->lock);
3051 	__mlxsw_sp_nexthop_neigh_update(nh, !(nud_state & NUD_VALID && !dead));
3052 
3053 	return 0;
3054 
3055 err_neigh_entry_create:
3056 	neigh_release(n);
3057 	return err;
3058 }
3059 
3060 static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp,
3061 					struct mlxsw_sp_nexthop *nh)
3062 {
3063 	struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
3064 	struct neighbour *n;
3065 
3066 	if (!neigh_entry)
3067 		return;
3068 	n = neigh_entry->key.n;
3069 
3070 	__mlxsw_sp_nexthop_neigh_update(nh, true);
3071 	list_del(&nh->neigh_list_node);
3072 	nh->neigh_entry = NULL;
3073 
3074 	/* If that is the last nexthop connected to that neigh, remove from
3075 	 * nexthop_neighs_list
3076 	 */
3077 	if (list_empty(&neigh_entry->nexthop_list))
3078 		list_del(&neigh_entry->nexthop_neighs_list_node);
3079 
3080 	if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
3081 		mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
3082 
3083 	neigh_release(n);
3084 }
3085 
3086 static int mlxsw_sp_nexthop_ipip_init(struct mlxsw_sp *mlxsw_sp,
3087 				      struct mlxsw_sp_nexthop *nh,
3088 				      struct net_device *ol_dev)
3089 {
3090 	if (!nh->nh_grp->gateway || nh->ipip_entry)
3091 		return 0;
3092 
3093 	nh->ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
3094 	if (!nh->ipip_entry)
3095 		return -ENOENT;
3096 
3097 	__mlxsw_sp_nexthop_neigh_update(nh, false);
3098 	return 0;
3099 }
3100 
3101 static void mlxsw_sp_nexthop_ipip_fini(struct mlxsw_sp *mlxsw_sp,
3102 				       struct mlxsw_sp_nexthop *nh)
3103 {
3104 	struct mlxsw_sp_ipip_entry *ipip_entry = nh->ipip_entry;
3105 
3106 	if (!ipip_entry)
3107 		return;
3108 
3109 	__mlxsw_sp_nexthop_neigh_update(nh, true);
3110 	nh->ipip_entry = NULL;
3111 }
3112 
3113 static bool mlxsw_sp_nexthop4_ipip_type(const struct mlxsw_sp *mlxsw_sp,
3114 					const struct fib_nh *fib_nh,
3115 					enum mlxsw_sp_ipip_type *p_ipipt)
3116 {
3117 	struct net_device *dev = fib_nh->nh_dev;
3118 
3119 	return dev &&
3120 	       fib_nh->nh_parent->fib_type == RTN_UNICAST &&
3121 	       mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, p_ipipt);
3122 }
3123 
3124 static void mlxsw_sp_nexthop_type_fini(struct mlxsw_sp *mlxsw_sp,
3125 				       struct mlxsw_sp_nexthop *nh)
3126 {
3127 	switch (nh->type) {
3128 	case MLXSW_SP_NEXTHOP_TYPE_ETH:
3129 		mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
3130 		mlxsw_sp_nexthop_rif_fini(nh);
3131 		break;
3132 	case MLXSW_SP_NEXTHOP_TYPE_IPIP:
3133 		mlxsw_sp_nexthop_rif_fini(nh);
3134 		mlxsw_sp_nexthop_ipip_fini(mlxsw_sp, nh);
3135 		break;
3136 	}
3137 }
3138 
3139 static int mlxsw_sp_nexthop4_type_init(struct mlxsw_sp *mlxsw_sp,
3140 				       struct mlxsw_sp_nexthop *nh,
3141 				       struct fib_nh *fib_nh)
3142 {
3143 	struct mlxsw_sp_router *router = mlxsw_sp->router;
3144 	struct net_device *dev = fib_nh->nh_dev;
3145 	enum mlxsw_sp_ipip_type ipipt;
3146 	struct mlxsw_sp_rif *rif;
3147 	int err;
3148 
3149 	if (mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, fib_nh, &ipipt) &&
3150 	    router->ipip_ops_arr[ipipt]->can_offload(mlxsw_sp, dev,
3151 						     MLXSW_SP_L3_PROTO_IPV4)) {
3152 		nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP;
3153 		err = mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, dev);
3154 		if (err)
3155 			return err;
3156 		mlxsw_sp_nexthop_rif_init(nh, &nh->ipip_entry->ol_lb->common);
3157 		return 0;
3158 	}
3159 
3160 	nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH;
3161 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
3162 	if (!rif)
3163 		return 0;
3164 
3165 	mlxsw_sp_nexthop_rif_init(nh, rif);
3166 	err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
3167 	if (err)
3168 		goto err_neigh_init;
3169 
3170 	return 0;
3171 
3172 err_neigh_init:
3173 	mlxsw_sp_nexthop_rif_fini(nh);
3174 	return err;
3175 }
3176 
3177 static void mlxsw_sp_nexthop4_type_fini(struct mlxsw_sp *mlxsw_sp,
3178 					struct mlxsw_sp_nexthop *nh)
3179 {
3180 	mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
3181 }
3182 
3183 static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp,
3184 				  struct mlxsw_sp_nexthop_group *nh_grp,
3185 				  struct mlxsw_sp_nexthop *nh,
3186 				  struct fib_nh *fib_nh)
3187 {
3188 	struct net_device *dev = fib_nh->nh_dev;
3189 	struct in_device *in_dev;
3190 	int err;
3191 
3192 	nh->nh_grp = nh_grp;
3193 	nh->key.fib_nh = fib_nh;
3194 #ifdef CONFIG_IP_ROUTE_MULTIPATH
3195 	nh->nh_weight = fib_nh->nh_weight;
3196 #else
3197 	nh->nh_weight = 1;
3198 #endif
3199 	memcpy(&nh->gw_addr, &fib_nh->nh_gw, sizeof(fib_nh->nh_gw));
3200 	err = mlxsw_sp_nexthop_insert(mlxsw_sp, nh);
3201 	if (err)
3202 		return err;
3203 
3204 	mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
3205 	list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
3206 
3207 	if (!dev)
3208 		return 0;
3209 
3210 	in_dev = __in_dev_get_rtnl(dev);
3211 	if (in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
3212 	    fib_nh->nh_flags & RTNH_F_LINKDOWN)
3213 		return 0;
3214 
3215 	err = mlxsw_sp_nexthop4_type_init(mlxsw_sp, nh, fib_nh);
3216 	if (err)
3217 		goto err_nexthop_neigh_init;
3218 
3219 	return 0;
3220 
3221 err_nexthop_neigh_init:
3222 	mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
3223 	return err;
3224 }
3225 
3226 static void mlxsw_sp_nexthop4_fini(struct mlxsw_sp *mlxsw_sp,
3227 				   struct mlxsw_sp_nexthop *nh)
3228 {
3229 	mlxsw_sp_nexthop4_type_fini(mlxsw_sp, nh);
3230 	list_del(&nh->router_list_node);
3231 	mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
3232 	mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
3233 }
3234 
3235 static void mlxsw_sp_nexthop4_event(struct mlxsw_sp *mlxsw_sp,
3236 				    unsigned long event, struct fib_nh *fib_nh)
3237 {
3238 	struct mlxsw_sp_nexthop_key key;
3239 	struct mlxsw_sp_nexthop *nh;
3240 
3241 	if (mlxsw_sp->router->aborted)
3242 		return;
3243 
3244 	key.fib_nh = fib_nh;
3245 	nh = mlxsw_sp_nexthop_lookup(mlxsw_sp, key);
3246 	if (WARN_ON_ONCE(!nh))
3247 		return;
3248 
3249 	switch (event) {
3250 	case FIB_EVENT_NH_ADD:
3251 		mlxsw_sp_nexthop4_type_init(mlxsw_sp, nh, fib_nh);
3252 		break;
3253 	case FIB_EVENT_NH_DEL:
3254 		mlxsw_sp_nexthop4_type_fini(mlxsw_sp, nh);
3255 		break;
3256 	}
3257 
3258 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3259 }
3260 
3261 static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
3262 					   struct mlxsw_sp_rif *rif)
3263 {
3264 	struct mlxsw_sp_nexthop *nh, *tmp;
3265 
3266 	list_for_each_entry_safe(nh, tmp, &rif->nexthop_list, rif_list_node) {
3267 		mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
3268 		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3269 	}
3270 }
3271 
3272 static bool mlxsw_sp_fi_is_gateway(const struct mlxsw_sp *mlxsw_sp,
3273 				   const struct fib_info *fi)
3274 {
3275 	return fi->fib_nh->nh_scope == RT_SCOPE_LINK ||
3276 	       mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, fi->fib_nh, NULL);
3277 }
3278 
3279 static struct mlxsw_sp_nexthop_group *
3280 mlxsw_sp_nexthop4_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
3281 {
3282 	struct mlxsw_sp_nexthop_group *nh_grp;
3283 	struct mlxsw_sp_nexthop *nh;
3284 	struct fib_nh *fib_nh;
3285 	size_t alloc_size;
3286 	int i;
3287 	int err;
3288 
3289 	alloc_size = sizeof(*nh_grp) +
3290 		     fi->fib_nhs * sizeof(struct mlxsw_sp_nexthop);
3291 	nh_grp = kzalloc(alloc_size, GFP_KERNEL);
3292 	if (!nh_grp)
3293 		return ERR_PTR(-ENOMEM);
3294 	nh_grp->priv = fi;
3295 	INIT_LIST_HEAD(&nh_grp->fib_list);
3296 	nh_grp->neigh_tbl = &arp_tbl;
3297 
3298 	nh_grp->gateway = mlxsw_sp_fi_is_gateway(mlxsw_sp, fi);
3299 	nh_grp->count = fi->fib_nhs;
3300 	fib_info_hold(fi);
3301 	for (i = 0; i < nh_grp->count; i++) {
3302 		nh = &nh_grp->nexthops[i];
3303 		fib_nh = &fi->fib_nh[i];
3304 		err = mlxsw_sp_nexthop4_init(mlxsw_sp, nh_grp, nh, fib_nh);
3305 		if (err)
3306 			goto err_nexthop4_init;
3307 	}
3308 	err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
3309 	if (err)
3310 		goto err_nexthop_group_insert;
3311 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
3312 	return nh_grp;
3313 
3314 err_nexthop_group_insert:
3315 err_nexthop4_init:
3316 	for (i--; i >= 0; i--) {
3317 		nh = &nh_grp->nexthops[i];
3318 		mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
3319 	}
3320 	fib_info_put(fi);
3321 	kfree(nh_grp);
3322 	return ERR_PTR(err);
3323 }
3324 
3325 static void
3326 mlxsw_sp_nexthop4_group_destroy(struct mlxsw_sp *mlxsw_sp,
3327 				struct mlxsw_sp_nexthop_group *nh_grp)
3328 {
3329 	struct mlxsw_sp_nexthop *nh;
3330 	int i;
3331 
3332 	mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
3333 	for (i = 0; i < nh_grp->count; i++) {
3334 		nh = &nh_grp->nexthops[i];
3335 		mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
3336 	}
3337 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
3338 	WARN_ON_ONCE(nh_grp->adj_index_valid);
3339 	fib_info_put(mlxsw_sp_nexthop4_group_fi(nh_grp));
3340 	kfree(nh_grp);
3341 }
3342 
3343 static int mlxsw_sp_nexthop4_group_get(struct mlxsw_sp *mlxsw_sp,
3344 				       struct mlxsw_sp_fib_entry *fib_entry,
3345 				       struct fib_info *fi)
3346 {
3347 	struct mlxsw_sp_nexthop_group *nh_grp;
3348 
3349 	nh_grp = mlxsw_sp_nexthop4_group_lookup(mlxsw_sp, fi);
3350 	if (!nh_grp) {
3351 		nh_grp = mlxsw_sp_nexthop4_group_create(mlxsw_sp, fi);
3352 		if (IS_ERR(nh_grp))
3353 			return PTR_ERR(nh_grp);
3354 	}
3355 	list_add_tail(&fib_entry->nexthop_group_node, &nh_grp->fib_list);
3356 	fib_entry->nh_group = nh_grp;
3357 	return 0;
3358 }
3359 
3360 static void mlxsw_sp_nexthop4_group_put(struct mlxsw_sp *mlxsw_sp,
3361 					struct mlxsw_sp_fib_entry *fib_entry)
3362 {
3363 	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3364 
3365 	list_del(&fib_entry->nexthop_group_node);
3366 	if (!list_empty(&nh_grp->fib_list))
3367 		return;
3368 	mlxsw_sp_nexthop4_group_destroy(mlxsw_sp, nh_grp);
3369 }
3370 
3371 static bool
3372 mlxsw_sp_fib4_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
3373 {
3374 	struct mlxsw_sp_fib4_entry *fib4_entry;
3375 
3376 	fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry,
3377 				  common);
3378 	return !fib4_entry->tos;
3379 }
3380 
3381 static bool
3382 mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
3383 {
3384 	struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group;
3385 
3386 	switch (fib_entry->fib_node->fib->proto) {
3387 	case MLXSW_SP_L3_PROTO_IPV4:
3388 		if (!mlxsw_sp_fib4_entry_should_offload(fib_entry))
3389 			return false;
3390 		break;
3391 	case MLXSW_SP_L3_PROTO_IPV6:
3392 		break;
3393 	}
3394 
3395 	switch (fib_entry->type) {
3396 	case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
3397 		return !!nh_group->adj_index_valid;
3398 	case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
3399 		return !!nh_group->nh_rif;
3400 	case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
3401 		return true;
3402 	default:
3403 		return false;
3404 	}
3405 }
3406 
3407 static struct mlxsw_sp_nexthop *
3408 mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp,
3409 		     const struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
3410 {
3411 	int i;
3412 
3413 	for (i = 0; i < nh_grp->count; i++) {
3414 		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3415 		struct rt6_info *rt = mlxsw_sp_rt6->rt;
3416 
3417 		if (nh->rif && nh->rif->dev == rt->dst.dev &&
3418 		    ipv6_addr_equal((const struct in6_addr *) &nh->gw_addr,
3419 				    &rt->rt6i_gateway))
3420 			return nh;
3421 		continue;
3422 	}
3423 
3424 	return NULL;
3425 }
3426 
3427 static void
3428 mlxsw_sp_fib4_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
3429 {
3430 	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3431 	int i;
3432 
3433 	if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL ||
3434 	    fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP) {
3435 		nh_grp->nexthops->key.fib_nh->nh_flags |= RTNH_F_OFFLOAD;
3436 		return;
3437 	}
3438 
3439 	for (i = 0; i < nh_grp->count; i++) {
3440 		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3441 
3442 		if (nh->offloaded)
3443 			nh->key.fib_nh->nh_flags |= RTNH_F_OFFLOAD;
3444 		else
3445 			nh->key.fib_nh->nh_flags &= ~RTNH_F_OFFLOAD;
3446 	}
3447 }
3448 
3449 static void
3450 mlxsw_sp_fib4_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
3451 {
3452 	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3453 	int i;
3454 
3455 	for (i = 0; i < nh_grp->count; i++) {
3456 		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3457 
3458 		nh->key.fib_nh->nh_flags &= ~RTNH_F_OFFLOAD;
3459 	}
3460 }
3461 
3462 static void
3463 mlxsw_sp_fib6_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
3464 {
3465 	struct mlxsw_sp_fib6_entry *fib6_entry;
3466 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
3467 
3468 	fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
3469 				  common);
3470 
3471 	if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL) {
3472 		list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
3473 				 list)->rt->rt6i_nh_flags |= RTNH_F_OFFLOAD;
3474 		return;
3475 	}
3476 
3477 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
3478 		struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3479 		struct mlxsw_sp_nexthop *nh;
3480 
3481 		nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6);
3482 		if (nh && nh->offloaded)
3483 			mlxsw_sp_rt6->rt->rt6i_nh_flags |= RTNH_F_OFFLOAD;
3484 		else
3485 			mlxsw_sp_rt6->rt->rt6i_nh_flags &= ~RTNH_F_OFFLOAD;
3486 	}
3487 }
3488 
3489 static void
3490 mlxsw_sp_fib6_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
3491 {
3492 	struct mlxsw_sp_fib6_entry *fib6_entry;
3493 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
3494 
3495 	fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
3496 				  common);
3497 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
3498 		struct rt6_info *rt = mlxsw_sp_rt6->rt;
3499 
3500 		rt->rt6i_nh_flags &= ~RTNH_F_OFFLOAD;
3501 	}
3502 }
3503 
3504 static void mlxsw_sp_fib_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
3505 {
3506 	switch (fib_entry->fib_node->fib->proto) {
3507 	case MLXSW_SP_L3_PROTO_IPV4:
3508 		mlxsw_sp_fib4_entry_offload_set(fib_entry);
3509 		break;
3510 	case MLXSW_SP_L3_PROTO_IPV6:
3511 		mlxsw_sp_fib6_entry_offload_set(fib_entry);
3512 		break;
3513 	}
3514 }
3515 
3516 static void
3517 mlxsw_sp_fib_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
3518 {
3519 	switch (fib_entry->fib_node->fib->proto) {
3520 	case MLXSW_SP_L3_PROTO_IPV4:
3521 		mlxsw_sp_fib4_entry_offload_unset(fib_entry);
3522 		break;
3523 	case MLXSW_SP_L3_PROTO_IPV6:
3524 		mlxsw_sp_fib6_entry_offload_unset(fib_entry);
3525 		break;
3526 	}
3527 }
3528 
3529 static void
3530 mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
3531 				   enum mlxsw_reg_ralue_op op, int err)
3532 {
3533 	switch (op) {
3534 	case MLXSW_REG_RALUE_OP_WRITE_DELETE:
3535 		return mlxsw_sp_fib_entry_offload_unset(fib_entry);
3536 	case MLXSW_REG_RALUE_OP_WRITE_WRITE:
3537 		if (err)
3538 			return;
3539 		if (mlxsw_sp_fib_entry_should_offload(fib_entry))
3540 			mlxsw_sp_fib_entry_offload_set(fib_entry);
3541 		else
3542 			mlxsw_sp_fib_entry_offload_unset(fib_entry);
3543 		return;
3544 	default:
3545 		return;
3546 	}
3547 }
3548 
3549 static void
3550 mlxsw_sp_fib_entry_ralue_pack(char *ralue_pl,
3551 			      const struct mlxsw_sp_fib_entry *fib_entry,
3552 			      enum mlxsw_reg_ralue_op op)
3553 {
3554 	struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib;
3555 	enum mlxsw_reg_ralxx_protocol proto;
3556 	u32 *p_dip;
3557 
3558 	proto = (enum mlxsw_reg_ralxx_protocol) fib->proto;
3559 
3560 	switch (fib->proto) {
3561 	case MLXSW_SP_L3_PROTO_IPV4:
3562 		p_dip = (u32 *) fib_entry->fib_node->key.addr;
3563 		mlxsw_reg_ralue_pack4(ralue_pl, proto, op, fib->vr->id,
3564 				      fib_entry->fib_node->key.prefix_len,
3565 				      *p_dip);
3566 		break;
3567 	case MLXSW_SP_L3_PROTO_IPV6:
3568 		mlxsw_reg_ralue_pack6(ralue_pl, proto, op, fib->vr->id,
3569 				      fib_entry->fib_node->key.prefix_len,
3570 				      fib_entry->fib_node->key.addr);
3571 		break;
3572 	}
3573 }
3574 
3575 static int mlxsw_sp_fib_entry_op_remote(struct mlxsw_sp *mlxsw_sp,
3576 					struct mlxsw_sp_fib_entry *fib_entry,
3577 					enum mlxsw_reg_ralue_op op)
3578 {
3579 	char ralue_pl[MLXSW_REG_RALUE_LEN];
3580 	enum mlxsw_reg_ralue_trap_action trap_action;
3581 	u16 trap_id = 0;
3582 	u32 adjacency_index = 0;
3583 	u16 ecmp_size = 0;
3584 
3585 	/* In case the nexthop group adjacency index is valid, use it
3586 	 * with provided ECMP size. Otherwise, setup trap and pass
3587 	 * traffic to kernel.
3588 	 */
3589 	if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
3590 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
3591 		adjacency_index = fib_entry->nh_group->adj_index;
3592 		ecmp_size = fib_entry->nh_group->ecmp_size;
3593 	} else {
3594 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
3595 		trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
3596 	}
3597 
3598 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
3599 	mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id,
3600 					adjacency_index, ecmp_size);
3601 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
3602 }
3603 
3604 static int mlxsw_sp_fib_entry_op_local(struct mlxsw_sp *mlxsw_sp,
3605 				       struct mlxsw_sp_fib_entry *fib_entry,
3606 				       enum mlxsw_reg_ralue_op op)
3607 {
3608 	struct mlxsw_sp_rif *rif = fib_entry->nh_group->nh_rif;
3609 	enum mlxsw_reg_ralue_trap_action trap_action;
3610 	char ralue_pl[MLXSW_REG_RALUE_LEN];
3611 	u16 trap_id = 0;
3612 	u16 rif_index = 0;
3613 
3614 	if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
3615 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
3616 		rif_index = rif->rif_index;
3617 	} else {
3618 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
3619 		trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
3620 	}
3621 
3622 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
3623 	mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id,
3624 				       rif_index);
3625 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
3626 }
3627 
3628 static int mlxsw_sp_fib_entry_op_trap(struct mlxsw_sp *mlxsw_sp,
3629 				      struct mlxsw_sp_fib_entry *fib_entry,
3630 				      enum mlxsw_reg_ralue_op op)
3631 {
3632 	char ralue_pl[MLXSW_REG_RALUE_LEN];
3633 
3634 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
3635 	mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
3636 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
3637 }
3638 
3639 static int
3640 mlxsw_sp_fib_entry_op_ipip_decap(struct mlxsw_sp *mlxsw_sp,
3641 				 struct mlxsw_sp_fib_entry *fib_entry,
3642 				 enum mlxsw_reg_ralue_op op)
3643 {
3644 	struct mlxsw_sp_ipip_entry *ipip_entry = fib_entry->decap.ipip_entry;
3645 	const struct mlxsw_sp_ipip_ops *ipip_ops;
3646 
3647 	if (WARN_ON(!ipip_entry))
3648 		return -EINVAL;
3649 
3650 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
3651 	return ipip_ops->fib_entry_op(mlxsw_sp, ipip_entry, op,
3652 				      fib_entry->decap.tunnel_index);
3653 }
3654 
3655 static int __mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
3656 				   struct mlxsw_sp_fib_entry *fib_entry,
3657 				   enum mlxsw_reg_ralue_op op)
3658 {
3659 	switch (fib_entry->type) {
3660 	case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
3661 		return mlxsw_sp_fib_entry_op_remote(mlxsw_sp, fib_entry, op);
3662 	case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
3663 		return mlxsw_sp_fib_entry_op_local(mlxsw_sp, fib_entry, op);
3664 	case MLXSW_SP_FIB_ENTRY_TYPE_TRAP:
3665 		return mlxsw_sp_fib_entry_op_trap(mlxsw_sp, fib_entry, op);
3666 	case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
3667 		return mlxsw_sp_fib_entry_op_ipip_decap(mlxsw_sp,
3668 							fib_entry, op);
3669 	}
3670 	return -EINVAL;
3671 }
3672 
3673 static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
3674 				 struct mlxsw_sp_fib_entry *fib_entry,
3675 				 enum mlxsw_reg_ralue_op op)
3676 {
3677 	int err = __mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, op);
3678 
3679 	mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, err);
3680 
3681 	return err;
3682 }
3683 
3684 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
3685 				     struct mlxsw_sp_fib_entry *fib_entry)
3686 {
3687 	return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
3688 				     MLXSW_REG_RALUE_OP_WRITE_WRITE);
3689 }
3690 
3691 static int mlxsw_sp_fib_entry_del(struct mlxsw_sp *mlxsw_sp,
3692 				  struct mlxsw_sp_fib_entry *fib_entry)
3693 {
3694 	return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
3695 				     MLXSW_REG_RALUE_OP_WRITE_DELETE);
3696 }
3697 
3698 static int
3699 mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp,
3700 			     const struct fib_entry_notifier_info *fen_info,
3701 			     struct mlxsw_sp_fib_entry *fib_entry)
3702 {
3703 	union mlxsw_sp_l3addr dip = { .addr4 = htonl(fen_info->dst) };
3704 	struct net_device *dev = fen_info->fi->fib_dev;
3705 	struct mlxsw_sp_ipip_entry *ipip_entry;
3706 	struct fib_info *fi = fen_info->fi;
3707 
3708 	switch (fen_info->type) {
3709 	case RTN_LOCAL:
3710 		ipip_entry = mlxsw_sp_ipip_entry_find_by_decap(mlxsw_sp, dev,
3711 						 MLXSW_SP_L3_PROTO_IPV4, dip);
3712 		if (ipip_entry) {
3713 			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
3714 			return mlxsw_sp_fib_entry_decap_init(mlxsw_sp,
3715 							     fib_entry,
3716 							     ipip_entry);
3717 		}
3718 		/* fall through */
3719 	case RTN_BROADCAST:
3720 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
3721 		return 0;
3722 	case RTN_UNREACHABLE: /* fall through */
3723 	case RTN_BLACKHOLE: /* fall through */
3724 	case RTN_PROHIBIT:
3725 		/* Packets hitting these routes need to be trapped, but
3726 		 * can do so with a lower priority than packets directed
3727 		 * at the host, so use action type local instead of trap.
3728 		 */
3729 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
3730 		return 0;
3731 	case RTN_UNICAST:
3732 		if (mlxsw_sp_fi_is_gateway(mlxsw_sp, fi))
3733 			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
3734 		else
3735 			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
3736 		return 0;
3737 	default:
3738 		return -EINVAL;
3739 	}
3740 }
3741 
3742 static struct mlxsw_sp_fib4_entry *
3743 mlxsw_sp_fib4_entry_create(struct mlxsw_sp *mlxsw_sp,
3744 			   struct mlxsw_sp_fib_node *fib_node,
3745 			   const struct fib_entry_notifier_info *fen_info)
3746 {
3747 	struct mlxsw_sp_fib4_entry *fib4_entry;
3748 	struct mlxsw_sp_fib_entry *fib_entry;
3749 	int err;
3750 
3751 	fib4_entry = kzalloc(sizeof(*fib4_entry), GFP_KERNEL);
3752 	if (!fib4_entry)
3753 		return ERR_PTR(-ENOMEM);
3754 	fib_entry = &fib4_entry->common;
3755 
3756 	err = mlxsw_sp_fib4_entry_type_set(mlxsw_sp, fen_info, fib_entry);
3757 	if (err)
3758 		goto err_fib4_entry_type_set;
3759 
3760 	err = mlxsw_sp_nexthop4_group_get(mlxsw_sp, fib_entry, fen_info->fi);
3761 	if (err)
3762 		goto err_nexthop4_group_get;
3763 
3764 	fib4_entry->prio = fen_info->fi->fib_priority;
3765 	fib4_entry->tb_id = fen_info->tb_id;
3766 	fib4_entry->type = fen_info->type;
3767 	fib4_entry->tos = fen_info->tos;
3768 
3769 	fib_entry->fib_node = fib_node;
3770 
3771 	return fib4_entry;
3772 
3773 err_nexthop4_group_get:
3774 err_fib4_entry_type_set:
3775 	kfree(fib4_entry);
3776 	return ERR_PTR(err);
3777 }
3778 
3779 static void mlxsw_sp_fib4_entry_destroy(struct mlxsw_sp *mlxsw_sp,
3780 					struct mlxsw_sp_fib4_entry *fib4_entry)
3781 {
3782 	mlxsw_sp_nexthop4_group_put(mlxsw_sp, &fib4_entry->common);
3783 	kfree(fib4_entry);
3784 }
3785 
3786 static struct mlxsw_sp_fib4_entry *
3787 mlxsw_sp_fib4_entry_lookup(struct mlxsw_sp *mlxsw_sp,
3788 			   const struct fib_entry_notifier_info *fen_info)
3789 {
3790 	struct mlxsw_sp_fib4_entry *fib4_entry;
3791 	struct mlxsw_sp_fib_node *fib_node;
3792 	struct mlxsw_sp_fib *fib;
3793 	struct mlxsw_sp_vr *vr;
3794 
3795 	vr = mlxsw_sp_vr_find(mlxsw_sp, fen_info->tb_id);
3796 	if (!vr)
3797 		return NULL;
3798 	fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV4);
3799 
3800 	fib_node = mlxsw_sp_fib_node_lookup(fib, &fen_info->dst,
3801 					    sizeof(fen_info->dst),
3802 					    fen_info->dst_len);
3803 	if (!fib_node)
3804 		return NULL;
3805 
3806 	list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) {
3807 		if (fib4_entry->tb_id == fen_info->tb_id &&
3808 		    fib4_entry->tos == fen_info->tos &&
3809 		    fib4_entry->type == fen_info->type &&
3810 		    mlxsw_sp_nexthop4_group_fi(fib4_entry->common.nh_group) ==
3811 		    fen_info->fi) {
3812 			return fib4_entry;
3813 		}
3814 	}
3815 
3816 	return NULL;
3817 }
3818 
3819 static const struct rhashtable_params mlxsw_sp_fib_ht_params = {
3820 	.key_offset = offsetof(struct mlxsw_sp_fib_node, key),
3821 	.head_offset = offsetof(struct mlxsw_sp_fib_node, ht_node),
3822 	.key_len = sizeof(struct mlxsw_sp_fib_key),
3823 	.automatic_shrinking = true,
3824 };
3825 
3826 static int mlxsw_sp_fib_node_insert(struct mlxsw_sp_fib *fib,
3827 				    struct mlxsw_sp_fib_node *fib_node)
3828 {
3829 	return rhashtable_insert_fast(&fib->ht, &fib_node->ht_node,
3830 				      mlxsw_sp_fib_ht_params);
3831 }
3832 
3833 static void mlxsw_sp_fib_node_remove(struct mlxsw_sp_fib *fib,
3834 				     struct mlxsw_sp_fib_node *fib_node)
3835 {
3836 	rhashtable_remove_fast(&fib->ht, &fib_node->ht_node,
3837 			       mlxsw_sp_fib_ht_params);
3838 }
3839 
3840 static struct mlxsw_sp_fib_node *
3841 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
3842 			 size_t addr_len, unsigned char prefix_len)
3843 {
3844 	struct mlxsw_sp_fib_key key;
3845 
3846 	memset(&key, 0, sizeof(key));
3847 	memcpy(key.addr, addr, addr_len);
3848 	key.prefix_len = prefix_len;
3849 	return rhashtable_lookup_fast(&fib->ht, &key, mlxsw_sp_fib_ht_params);
3850 }
3851 
3852 static struct mlxsw_sp_fib_node *
3853 mlxsw_sp_fib_node_create(struct mlxsw_sp_fib *fib, const void *addr,
3854 			 size_t addr_len, unsigned char prefix_len)
3855 {
3856 	struct mlxsw_sp_fib_node *fib_node;
3857 
3858 	fib_node = kzalloc(sizeof(*fib_node), GFP_KERNEL);
3859 	if (!fib_node)
3860 		return NULL;
3861 
3862 	INIT_LIST_HEAD(&fib_node->entry_list);
3863 	list_add(&fib_node->list, &fib->node_list);
3864 	memcpy(fib_node->key.addr, addr, addr_len);
3865 	fib_node->key.prefix_len = prefix_len;
3866 
3867 	return fib_node;
3868 }
3869 
3870 static void mlxsw_sp_fib_node_destroy(struct mlxsw_sp_fib_node *fib_node)
3871 {
3872 	list_del(&fib_node->list);
3873 	WARN_ON(!list_empty(&fib_node->entry_list));
3874 	kfree(fib_node);
3875 }
3876 
3877 static bool
3878 mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node,
3879 				 const struct mlxsw_sp_fib_entry *fib_entry)
3880 {
3881 	return list_first_entry(&fib_node->entry_list,
3882 				struct mlxsw_sp_fib_entry, list) == fib_entry;
3883 }
3884 
3885 static int mlxsw_sp_fib_lpm_tree_link(struct mlxsw_sp *mlxsw_sp,
3886 				      struct mlxsw_sp_fib *fib,
3887 				      struct mlxsw_sp_fib_node *fib_node)
3888 {
3889 	struct mlxsw_sp_prefix_usage req_prefix_usage = {{ 0 } };
3890 	struct mlxsw_sp_lpm_tree *lpm_tree;
3891 	int err;
3892 
3893 	/* Since the tree is shared between all virtual routers we must
3894 	 * make sure it contains all the required prefix lengths. This
3895 	 * can be computed by either adding the new prefix length to the
3896 	 * existing prefix usage of a bound tree, or by aggregating the
3897 	 * prefix lengths across all virtual routers and adding the new
3898 	 * one as well.
3899 	 */
3900 	if (fib->lpm_tree)
3901 		mlxsw_sp_prefix_usage_cpy(&req_prefix_usage,
3902 					  &fib->lpm_tree->prefix_usage);
3903 	else
3904 		mlxsw_sp_vrs_prefixes(mlxsw_sp, fib->proto, &req_prefix_usage);
3905 	mlxsw_sp_prefix_usage_set(&req_prefix_usage, fib_node->key.prefix_len);
3906 
3907 	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
3908 					 fib->proto);
3909 	if (IS_ERR(lpm_tree))
3910 		return PTR_ERR(lpm_tree);
3911 
3912 	if (fib->lpm_tree && fib->lpm_tree->id == lpm_tree->id)
3913 		return 0;
3914 
3915 	err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
3916 	if (err)
3917 		return err;
3918 
3919 	return 0;
3920 }
3921 
3922 static void mlxsw_sp_fib_lpm_tree_unlink(struct mlxsw_sp *mlxsw_sp,
3923 					 struct mlxsw_sp_fib *fib)
3924 {
3925 	if (!mlxsw_sp_prefix_usage_none(&fib->prefix_usage))
3926 		return;
3927 	mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, fib);
3928 	mlxsw_sp_lpm_tree_put(mlxsw_sp, fib->lpm_tree);
3929 	fib->lpm_tree = NULL;
3930 }
3931 
3932 static void mlxsw_sp_fib_node_prefix_inc(struct mlxsw_sp_fib_node *fib_node)
3933 {
3934 	unsigned char prefix_len = fib_node->key.prefix_len;
3935 	struct mlxsw_sp_fib *fib = fib_node->fib;
3936 
3937 	if (fib->prefix_ref_count[prefix_len]++ == 0)
3938 		mlxsw_sp_prefix_usage_set(&fib->prefix_usage, prefix_len);
3939 }
3940 
3941 static void mlxsw_sp_fib_node_prefix_dec(struct mlxsw_sp_fib_node *fib_node)
3942 {
3943 	unsigned char prefix_len = fib_node->key.prefix_len;
3944 	struct mlxsw_sp_fib *fib = fib_node->fib;
3945 
3946 	if (--fib->prefix_ref_count[prefix_len] == 0)
3947 		mlxsw_sp_prefix_usage_clear(&fib->prefix_usage, prefix_len);
3948 }
3949 
3950 static int mlxsw_sp_fib_node_init(struct mlxsw_sp *mlxsw_sp,
3951 				  struct mlxsw_sp_fib_node *fib_node,
3952 				  struct mlxsw_sp_fib *fib)
3953 {
3954 	int err;
3955 
3956 	err = mlxsw_sp_fib_node_insert(fib, fib_node);
3957 	if (err)
3958 		return err;
3959 	fib_node->fib = fib;
3960 
3961 	err = mlxsw_sp_fib_lpm_tree_link(mlxsw_sp, fib, fib_node);
3962 	if (err)
3963 		goto err_fib_lpm_tree_link;
3964 
3965 	mlxsw_sp_fib_node_prefix_inc(fib_node);
3966 
3967 	return 0;
3968 
3969 err_fib_lpm_tree_link:
3970 	fib_node->fib = NULL;
3971 	mlxsw_sp_fib_node_remove(fib, fib_node);
3972 	return err;
3973 }
3974 
3975 static void mlxsw_sp_fib_node_fini(struct mlxsw_sp *mlxsw_sp,
3976 				   struct mlxsw_sp_fib_node *fib_node)
3977 {
3978 	struct mlxsw_sp_fib *fib = fib_node->fib;
3979 
3980 	mlxsw_sp_fib_node_prefix_dec(fib_node);
3981 	mlxsw_sp_fib_lpm_tree_unlink(mlxsw_sp, fib);
3982 	fib_node->fib = NULL;
3983 	mlxsw_sp_fib_node_remove(fib, fib_node);
3984 }
3985 
3986 static struct mlxsw_sp_fib_node *
3987 mlxsw_sp_fib_node_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id, const void *addr,
3988 		      size_t addr_len, unsigned char prefix_len,
3989 		      enum mlxsw_sp_l3proto proto)
3990 {
3991 	struct mlxsw_sp_fib_node *fib_node;
3992 	struct mlxsw_sp_fib *fib;
3993 	struct mlxsw_sp_vr *vr;
3994 	int err;
3995 
3996 	vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id, NULL);
3997 	if (IS_ERR(vr))
3998 		return ERR_CAST(vr);
3999 	fib = mlxsw_sp_vr_fib(vr, proto);
4000 
4001 	fib_node = mlxsw_sp_fib_node_lookup(fib, addr, addr_len, prefix_len);
4002 	if (fib_node)
4003 		return fib_node;
4004 
4005 	fib_node = mlxsw_sp_fib_node_create(fib, addr, addr_len, prefix_len);
4006 	if (!fib_node) {
4007 		err = -ENOMEM;
4008 		goto err_fib_node_create;
4009 	}
4010 
4011 	err = mlxsw_sp_fib_node_init(mlxsw_sp, fib_node, fib);
4012 	if (err)
4013 		goto err_fib_node_init;
4014 
4015 	return fib_node;
4016 
4017 err_fib_node_init:
4018 	mlxsw_sp_fib_node_destroy(fib_node);
4019 err_fib_node_create:
4020 	mlxsw_sp_vr_put(vr);
4021 	return ERR_PTR(err);
4022 }
4023 
4024 static void mlxsw_sp_fib_node_put(struct mlxsw_sp *mlxsw_sp,
4025 				  struct mlxsw_sp_fib_node *fib_node)
4026 {
4027 	struct mlxsw_sp_vr *vr = fib_node->fib->vr;
4028 
4029 	if (!list_empty(&fib_node->entry_list))
4030 		return;
4031 	mlxsw_sp_fib_node_fini(mlxsw_sp, fib_node);
4032 	mlxsw_sp_fib_node_destroy(fib_node);
4033 	mlxsw_sp_vr_put(vr);
4034 }
4035 
4036 static struct mlxsw_sp_fib4_entry *
4037 mlxsw_sp_fib4_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
4038 			      const struct mlxsw_sp_fib4_entry *new4_entry)
4039 {
4040 	struct mlxsw_sp_fib4_entry *fib4_entry;
4041 
4042 	list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) {
4043 		if (fib4_entry->tb_id > new4_entry->tb_id)
4044 			continue;
4045 		if (fib4_entry->tb_id != new4_entry->tb_id)
4046 			break;
4047 		if (fib4_entry->tos > new4_entry->tos)
4048 			continue;
4049 		if (fib4_entry->prio >= new4_entry->prio ||
4050 		    fib4_entry->tos < new4_entry->tos)
4051 			return fib4_entry;
4052 	}
4053 
4054 	return NULL;
4055 }
4056 
4057 static int
4058 mlxsw_sp_fib4_node_list_append(struct mlxsw_sp_fib4_entry *fib4_entry,
4059 			       struct mlxsw_sp_fib4_entry *new4_entry)
4060 {
4061 	struct mlxsw_sp_fib_node *fib_node;
4062 
4063 	if (WARN_ON(!fib4_entry))
4064 		return -EINVAL;
4065 
4066 	fib_node = fib4_entry->common.fib_node;
4067 	list_for_each_entry_from(fib4_entry, &fib_node->entry_list,
4068 				 common.list) {
4069 		if (fib4_entry->tb_id != new4_entry->tb_id ||
4070 		    fib4_entry->tos != new4_entry->tos ||
4071 		    fib4_entry->prio != new4_entry->prio)
4072 			break;
4073 	}
4074 
4075 	list_add_tail(&new4_entry->common.list, &fib4_entry->common.list);
4076 	return 0;
4077 }
4078 
4079 static int
4080 mlxsw_sp_fib4_node_list_insert(struct mlxsw_sp_fib4_entry *new4_entry,
4081 			       bool replace, bool append)
4082 {
4083 	struct mlxsw_sp_fib_node *fib_node = new4_entry->common.fib_node;
4084 	struct mlxsw_sp_fib4_entry *fib4_entry;
4085 
4086 	fib4_entry = mlxsw_sp_fib4_node_entry_find(fib_node, new4_entry);
4087 
4088 	if (append)
4089 		return mlxsw_sp_fib4_node_list_append(fib4_entry, new4_entry);
4090 	if (replace && WARN_ON(!fib4_entry))
4091 		return -EINVAL;
4092 
4093 	/* Insert new entry before replaced one, so that we can later
4094 	 * remove the second.
4095 	 */
4096 	if (fib4_entry) {
4097 		list_add_tail(&new4_entry->common.list,
4098 			      &fib4_entry->common.list);
4099 	} else {
4100 		struct mlxsw_sp_fib4_entry *last;
4101 
4102 		list_for_each_entry(last, &fib_node->entry_list, common.list) {
4103 			if (new4_entry->tb_id > last->tb_id)
4104 				break;
4105 			fib4_entry = last;
4106 		}
4107 
4108 		if (fib4_entry)
4109 			list_add(&new4_entry->common.list,
4110 				 &fib4_entry->common.list);
4111 		else
4112 			list_add(&new4_entry->common.list,
4113 				 &fib_node->entry_list);
4114 	}
4115 
4116 	return 0;
4117 }
4118 
4119 static void
4120 mlxsw_sp_fib4_node_list_remove(struct mlxsw_sp_fib4_entry *fib4_entry)
4121 {
4122 	list_del(&fib4_entry->common.list);
4123 }
4124 
4125 static int mlxsw_sp_fib_node_entry_add(struct mlxsw_sp *mlxsw_sp,
4126 				       struct mlxsw_sp_fib_entry *fib_entry)
4127 {
4128 	struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
4129 
4130 	if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
4131 		return 0;
4132 
4133 	/* To prevent packet loss, overwrite the previously offloaded
4134 	 * entry.
4135 	 */
4136 	if (!list_is_singular(&fib_node->entry_list)) {
4137 		enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
4138 		struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list);
4139 
4140 		mlxsw_sp_fib_entry_offload_refresh(n, op, 0);
4141 	}
4142 
4143 	return mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
4144 }
4145 
4146 static void mlxsw_sp_fib_node_entry_del(struct mlxsw_sp *mlxsw_sp,
4147 					struct mlxsw_sp_fib_entry *fib_entry)
4148 {
4149 	struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
4150 
4151 	if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
4152 		return;
4153 
4154 	/* Promote the next entry by overwriting the deleted entry */
4155 	if (!list_is_singular(&fib_node->entry_list)) {
4156 		struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list);
4157 		enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
4158 
4159 		mlxsw_sp_fib_entry_update(mlxsw_sp, n);
4160 		mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0);
4161 		return;
4162 	}
4163 
4164 	mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry);
4165 }
4166 
4167 static int mlxsw_sp_fib4_node_entry_link(struct mlxsw_sp *mlxsw_sp,
4168 					 struct mlxsw_sp_fib4_entry *fib4_entry,
4169 					 bool replace, bool append)
4170 {
4171 	int err;
4172 
4173 	err = mlxsw_sp_fib4_node_list_insert(fib4_entry, replace, append);
4174 	if (err)
4175 		return err;
4176 
4177 	err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib4_entry->common);
4178 	if (err)
4179 		goto err_fib_node_entry_add;
4180 
4181 	return 0;
4182 
4183 err_fib_node_entry_add:
4184 	mlxsw_sp_fib4_node_list_remove(fib4_entry);
4185 	return err;
4186 }
4187 
4188 static void
4189 mlxsw_sp_fib4_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
4190 				struct mlxsw_sp_fib4_entry *fib4_entry)
4191 {
4192 	mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib4_entry->common);
4193 	mlxsw_sp_fib4_node_list_remove(fib4_entry);
4194 
4195 	if (fib4_entry->common.type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP)
4196 		mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, &fib4_entry->common);
4197 }
4198 
4199 static void mlxsw_sp_fib4_entry_replace(struct mlxsw_sp *mlxsw_sp,
4200 					struct mlxsw_sp_fib4_entry *fib4_entry,
4201 					bool replace)
4202 {
4203 	struct mlxsw_sp_fib_node *fib_node = fib4_entry->common.fib_node;
4204 	struct mlxsw_sp_fib4_entry *replaced;
4205 
4206 	if (!replace)
4207 		return;
4208 
4209 	/* We inserted the new entry before replaced one */
4210 	replaced = list_next_entry(fib4_entry, common.list);
4211 
4212 	mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, replaced);
4213 	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, replaced);
4214 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4215 }
4216 
4217 static int
4218 mlxsw_sp_router_fib4_add(struct mlxsw_sp *mlxsw_sp,
4219 			 const struct fib_entry_notifier_info *fen_info,
4220 			 bool replace, bool append)
4221 {
4222 	struct mlxsw_sp_fib4_entry *fib4_entry;
4223 	struct mlxsw_sp_fib_node *fib_node;
4224 	int err;
4225 
4226 	if (mlxsw_sp->router->aborted)
4227 		return 0;
4228 
4229 	fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, fen_info->tb_id,
4230 					 &fen_info->dst, sizeof(fen_info->dst),
4231 					 fen_info->dst_len,
4232 					 MLXSW_SP_L3_PROTO_IPV4);
4233 	if (IS_ERR(fib_node)) {
4234 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to get FIB node\n");
4235 		return PTR_ERR(fib_node);
4236 	}
4237 
4238 	fib4_entry = mlxsw_sp_fib4_entry_create(mlxsw_sp, fib_node, fen_info);
4239 	if (IS_ERR(fib4_entry)) {
4240 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to create FIB entry\n");
4241 		err = PTR_ERR(fib4_entry);
4242 		goto err_fib4_entry_create;
4243 	}
4244 
4245 	err = mlxsw_sp_fib4_node_entry_link(mlxsw_sp, fib4_entry, replace,
4246 					    append);
4247 	if (err) {
4248 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to link FIB entry to node\n");
4249 		goto err_fib4_node_entry_link;
4250 	}
4251 
4252 	mlxsw_sp_fib4_entry_replace(mlxsw_sp, fib4_entry, replace);
4253 
4254 	return 0;
4255 
4256 err_fib4_node_entry_link:
4257 	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
4258 err_fib4_entry_create:
4259 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4260 	return err;
4261 }
4262 
4263 static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp,
4264 				     struct fib_entry_notifier_info *fen_info)
4265 {
4266 	struct mlxsw_sp_fib4_entry *fib4_entry;
4267 	struct mlxsw_sp_fib_node *fib_node;
4268 
4269 	if (mlxsw_sp->router->aborted)
4270 		return;
4271 
4272 	fib4_entry = mlxsw_sp_fib4_entry_lookup(mlxsw_sp, fen_info);
4273 	if (WARN_ON(!fib4_entry))
4274 		return;
4275 	fib_node = fib4_entry->common.fib_node;
4276 
4277 	mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry);
4278 	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
4279 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4280 }
4281 
4282 static bool mlxsw_sp_fib6_rt_should_ignore(const struct rt6_info *rt)
4283 {
4284 	/* Packets with link-local destination IP arriving to the router
4285 	 * are trapped to the CPU, so no need to program specific routes
4286 	 * for them.
4287 	 */
4288 	if (ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LINKLOCAL)
4289 		return true;
4290 
4291 	/* Multicast routes aren't supported, so ignore them. Neighbour
4292 	 * Discovery packets are specifically trapped.
4293 	 */
4294 	if (ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_MULTICAST)
4295 		return true;
4296 
4297 	/* Cloned routes are irrelevant in the forwarding path. */
4298 	if (rt->rt6i_flags & RTF_CACHE)
4299 		return true;
4300 
4301 	return false;
4302 }
4303 
4304 static struct mlxsw_sp_rt6 *mlxsw_sp_rt6_create(struct rt6_info *rt)
4305 {
4306 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4307 
4308 	mlxsw_sp_rt6 = kzalloc(sizeof(*mlxsw_sp_rt6), GFP_KERNEL);
4309 	if (!mlxsw_sp_rt6)
4310 		return ERR_PTR(-ENOMEM);
4311 
4312 	/* In case of route replace, replaced route is deleted with
4313 	 * no notification. Take reference to prevent accessing freed
4314 	 * memory.
4315 	 */
4316 	mlxsw_sp_rt6->rt = rt;
4317 	rt6_hold(rt);
4318 
4319 	return mlxsw_sp_rt6;
4320 }
4321 
4322 #if IS_ENABLED(CONFIG_IPV6)
4323 static void mlxsw_sp_rt6_release(struct rt6_info *rt)
4324 {
4325 	rt6_release(rt);
4326 }
4327 #else
4328 static void mlxsw_sp_rt6_release(struct rt6_info *rt)
4329 {
4330 }
4331 #endif
4332 
4333 static void mlxsw_sp_rt6_destroy(struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
4334 {
4335 	mlxsw_sp_rt6_release(mlxsw_sp_rt6->rt);
4336 	kfree(mlxsw_sp_rt6);
4337 }
4338 
4339 static bool mlxsw_sp_fib6_rt_can_mp(const struct rt6_info *rt)
4340 {
4341 	/* RTF_CACHE routes are ignored */
4342 	return (rt->rt6i_flags & (RTF_GATEWAY | RTF_ADDRCONF)) == RTF_GATEWAY;
4343 }
4344 
4345 static struct rt6_info *
4346 mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry)
4347 {
4348 	return list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
4349 				list)->rt;
4350 }
4351 
4352 static struct mlxsw_sp_fib6_entry *
4353 mlxsw_sp_fib6_node_mp_entry_find(const struct mlxsw_sp_fib_node *fib_node,
4354 				 const struct rt6_info *nrt, bool replace)
4355 {
4356 	struct mlxsw_sp_fib6_entry *fib6_entry;
4357 
4358 	if (!mlxsw_sp_fib6_rt_can_mp(nrt) || replace)
4359 		return NULL;
4360 
4361 	list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
4362 		struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
4363 
4364 		/* RT6_TABLE_LOCAL and RT6_TABLE_MAIN share the same
4365 		 * virtual router.
4366 		 */
4367 		if (rt->rt6i_table->tb6_id > nrt->rt6i_table->tb6_id)
4368 			continue;
4369 		if (rt->rt6i_table->tb6_id != nrt->rt6i_table->tb6_id)
4370 			break;
4371 		if (rt->rt6i_metric < nrt->rt6i_metric)
4372 			continue;
4373 		if (rt->rt6i_metric == nrt->rt6i_metric &&
4374 		    mlxsw_sp_fib6_rt_can_mp(rt))
4375 			return fib6_entry;
4376 		if (rt->rt6i_metric > nrt->rt6i_metric)
4377 			break;
4378 	}
4379 
4380 	return NULL;
4381 }
4382 
4383 static struct mlxsw_sp_rt6 *
4384 mlxsw_sp_fib6_entry_rt_find(const struct mlxsw_sp_fib6_entry *fib6_entry,
4385 			    const struct rt6_info *rt)
4386 {
4387 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4388 
4389 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
4390 		if (mlxsw_sp_rt6->rt == rt)
4391 			return mlxsw_sp_rt6;
4392 	}
4393 
4394 	return NULL;
4395 }
4396 
4397 static bool mlxsw_sp_nexthop6_ipip_type(const struct mlxsw_sp *mlxsw_sp,
4398 					const struct rt6_info *rt,
4399 					enum mlxsw_sp_ipip_type *ret)
4400 {
4401 	return rt->dst.dev &&
4402 	       mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->dst.dev, ret);
4403 }
4404 
4405 static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp *mlxsw_sp,
4406 				       struct mlxsw_sp_nexthop_group *nh_grp,
4407 				       struct mlxsw_sp_nexthop *nh,
4408 				       const struct rt6_info *rt)
4409 {
4410 	struct mlxsw_sp_router *router = mlxsw_sp->router;
4411 	struct net_device *dev = rt->dst.dev;
4412 	enum mlxsw_sp_ipip_type ipipt;
4413 	struct mlxsw_sp_rif *rif;
4414 	int err;
4415 
4416 	if (mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, &ipipt) &&
4417 	    router->ipip_ops_arr[ipipt]->can_offload(mlxsw_sp, dev,
4418 						     MLXSW_SP_L3_PROTO_IPV6)) {
4419 		nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP;
4420 		err = mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, dev);
4421 		if (err)
4422 			return err;
4423 		mlxsw_sp_nexthop_rif_init(nh, &nh->ipip_entry->ol_lb->common);
4424 		return 0;
4425 	}
4426 
4427 	nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH;
4428 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
4429 	if (!rif)
4430 		return 0;
4431 	mlxsw_sp_nexthop_rif_init(nh, rif);
4432 
4433 	err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
4434 	if (err)
4435 		goto err_nexthop_neigh_init;
4436 
4437 	return 0;
4438 
4439 err_nexthop_neigh_init:
4440 	mlxsw_sp_nexthop_rif_fini(nh);
4441 	return err;
4442 }
4443 
4444 static void mlxsw_sp_nexthop6_type_fini(struct mlxsw_sp *mlxsw_sp,
4445 					struct mlxsw_sp_nexthop *nh)
4446 {
4447 	mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
4448 }
4449 
4450 static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp,
4451 				  struct mlxsw_sp_nexthop_group *nh_grp,
4452 				  struct mlxsw_sp_nexthop *nh,
4453 				  const struct rt6_info *rt)
4454 {
4455 	struct net_device *dev = rt->dst.dev;
4456 
4457 	nh->nh_grp = nh_grp;
4458 	nh->nh_weight = 1;
4459 	memcpy(&nh->gw_addr, &rt->rt6i_gateway, sizeof(nh->gw_addr));
4460 	mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
4461 
4462 	list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
4463 
4464 	if (!dev)
4465 		return 0;
4466 	nh->ifindex = dev->ifindex;
4467 
4468 	return mlxsw_sp_nexthop6_type_init(mlxsw_sp, nh_grp, nh, rt);
4469 }
4470 
4471 static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp,
4472 				   struct mlxsw_sp_nexthop *nh)
4473 {
4474 	mlxsw_sp_nexthop6_type_fini(mlxsw_sp, nh);
4475 	list_del(&nh->router_list_node);
4476 	mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
4477 }
4478 
4479 static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp *mlxsw_sp,
4480 				    const struct rt6_info *rt)
4481 {
4482 	return rt->rt6i_flags & RTF_GATEWAY ||
4483 	       mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, NULL);
4484 }
4485 
4486 static struct mlxsw_sp_nexthop_group *
4487 mlxsw_sp_nexthop6_group_create(struct mlxsw_sp *mlxsw_sp,
4488 			       struct mlxsw_sp_fib6_entry *fib6_entry)
4489 {
4490 	struct mlxsw_sp_nexthop_group *nh_grp;
4491 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4492 	struct mlxsw_sp_nexthop *nh;
4493 	size_t alloc_size;
4494 	int i = 0;
4495 	int err;
4496 
4497 	alloc_size = sizeof(*nh_grp) +
4498 		     fib6_entry->nrt6 * sizeof(struct mlxsw_sp_nexthop);
4499 	nh_grp = kzalloc(alloc_size, GFP_KERNEL);
4500 	if (!nh_grp)
4501 		return ERR_PTR(-ENOMEM);
4502 	INIT_LIST_HEAD(&nh_grp->fib_list);
4503 #if IS_ENABLED(CONFIG_IPV6)
4504 	nh_grp->neigh_tbl = &nd_tbl;
4505 #endif
4506 	mlxsw_sp_rt6 = list_first_entry(&fib6_entry->rt6_list,
4507 					struct mlxsw_sp_rt6, list);
4508 	nh_grp->gateway = mlxsw_sp_rt6_is_gateway(mlxsw_sp, mlxsw_sp_rt6->rt);
4509 	nh_grp->count = fib6_entry->nrt6;
4510 	for (i = 0; i < nh_grp->count; i++) {
4511 		struct rt6_info *rt = mlxsw_sp_rt6->rt;
4512 
4513 		nh = &nh_grp->nexthops[i];
4514 		err = mlxsw_sp_nexthop6_init(mlxsw_sp, nh_grp, nh, rt);
4515 		if (err)
4516 			goto err_nexthop6_init;
4517 		mlxsw_sp_rt6 = list_next_entry(mlxsw_sp_rt6, list);
4518 	}
4519 
4520 	err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
4521 	if (err)
4522 		goto err_nexthop_group_insert;
4523 
4524 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
4525 	return nh_grp;
4526 
4527 err_nexthop_group_insert:
4528 err_nexthop6_init:
4529 	for (i--; i >= 0; i--) {
4530 		nh = &nh_grp->nexthops[i];
4531 		mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
4532 	}
4533 	kfree(nh_grp);
4534 	return ERR_PTR(err);
4535 }
4536 
4537 static void
4538 mlxsw_sp_nexthop6_group_destroy(struct mlxsw_sp *mlxsw_sp,
4539 				struct mlxsw_sp_nexthop_group *nh_grp)
4540 {
4541 	struct mlxsw_sp_nexthop *nh;
4542 	int i = nh_grp->count;
4543 
4544 	mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
4545 	for (i--; i >= 0; i--) {
4546 		nh = &nh_grp->nexthops[i];
4547 		mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
4548 	}
4549 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
4550 	WARN_ON(nh_grp->adj_index_valid);
4551 	kfree(nh_grp);
4552 }
4553 
4554 static int mlxsw_sp_nexthop6_group_get(struct mlxsw_sp *mlxsw_sp,
4555 				       struct mlxsw_sp_fib6_entry *fib6_entry)
4556 {
4557 	struct mlxsw_sp_nexthop_group *nh_grp;
4558 
4559 	nh_grp = mlxsw_sp_nexthop6_group_lookup(mlxsw_sp, fib6_entry);
4560 	if (!nh_grp) {
4561 		nh_grp = mlxsw_sp_nexthop6_group_create(mlxsw_sp, fib6_entry);
4562 		if (IS_ERR(nh_grp))
4563 			return PTR_ERR(nh_grp);
4564 	}
4565 
4566 	list_add_tail(&fib6_entry->common.nexthop_group_node,
4567 		      &nh_grp->fib_list);
4568 	fib6_entry->common.nh_group = nh_grp;
4569 
4570 	return 0;
4571 }
4572 
4573 static void mlxsw_sp_nexthop6_group_put(struct mlxsw_sp *mlxsw_sp,
4574 					struct mlxsw_sp_fib_entry *fib_entry)
4575 {
4576 	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
4577 
4578 	list_del(&fib_entry->nexthop_group_node);
4579 	if (!list_empty(&nh_grp->fib_list))
4580 		return;
4581 	mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, nh_grp);
4582 }
4583 
4584 static int
4585 mlxsw_sp_nexthop6_group_update(struct mlxsw_sp *mlxsw_sp,
4586 			       struct mlxsw_sp_fib6_entry *fib6_entry)
4587 {
4588 	struct mlxsw_sp_nexthop_group *old_nh_grp = fib6_entry->common.nh_group;
4589 	int err;
4590 
4591 	fib6_entry->common.nh_group = NULL;
4592 	list_del(&fib6_entry->common.nexthop_group_node);
4593 
4594 	err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
4595 	if (err)
4596 		goto err_nexthop6_group_get;
4597 
4598 	/* In case this entry is offloaded, then the adjacency index
4599 	 * currently associated with it in the device's table is that
4600 	 * of the old group. Start using the new one instead.
4601 	 */
4602 	err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common);
4603 	if (err)
4604 		goto err_fib_node_entry_add;
4605 
4606 	if (list_empty(&old_nh_grp->fib_list))
4607 		mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, old_nh_grp);
4608 
4609 	return 0;
4610 
4611 err_fib_node_entry_add:
4612 	mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
4613 err_nexthop6_group_get:
4614 	list_add_tail(&fib6_entry->common.nexthop_group_node,
4615 		      &old_nh_grp->fib_list);
4616 	fib6_entry->common.nh_group = old_nh_grp;
4617 	return err;
4618 }
4619 
4620 static int
4621 mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp,
4622 				struct mlxsw_sp_fib6_entry *fib6_entry,
4623 				struct rt6_info *rt)
4624 {
4625 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4626 	int err;
4627 
4628 	mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt);
4629 	if (IS_ERR(mlxsw_sp_rt6))
4630 		return PTR_ERR(mlxsw_sp_rt6);
4631 
4632 	list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
4633 	fib6_entry->nrt6++;
4634 
4635 	err = mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
4636 	if (err)
4637 		goto err_nexthop6_group_update;
4638 
4639 	return 0;
4640 
4641 err_nexthop6_group_update:
4642 	fib6_entry->nrt6--;
4643 	list_del(&mlxsw_sp_rt6->list);
4644 	mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
4645 	return err;
4646 }
4647 
4648 static void
4649 mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp,
4650 				struct mlxsw_sp_fib6_entry *fib6_entry,
4651 				struct rt6_info *rt)
4652 {
4653 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4654 
4655 	mlxsw_sp_rt6 = mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt);
4656 	if (WARN_ON(!mlxsw_sp_rt6))
4657 		return;
4658 
4659 	fib6_entry->nrt6--;
4660 	list_del(&mlxsw_sp_rt6->list);
4661 	mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
4662 	mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
4663 }
4664 
4665 static void mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp *mlxsw_sp,
4666 					 struct mlxsw_sp_fib_entry *fib_entry,
4667 					 const struct rt6_info *rt)
4668 {
4669 	/* Packets hitting RTF_REJECT routes need to be discarded by the
4670 	 * stack. We can rely on their destination device not having a
4671 	 * RIF (it's the loopback device) and can thus use action type
4672 	 * local, which will cause them to be trapped with a lower
4673 	 * priority than packets that need to be locally received.
4674 	 */
4675 	if (rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST))
4676 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
4677 	else if (rt->rt6i_flags & RTF_REJECT)
4678 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
4679 	else if (mlxsw_sp_rt6_is_gateway(mlxsw_sp, rt))
4680 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
4681 	else
4682 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
4683 }
4684 
4685 static void
4686 mlxsw_sp_fib6_entry_rt_destroy_all(struct mlxsw_sp_fib6_entry *fib6_entry)
4687 {
4688 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6, *tmp;
4689 
4690 	list_for_each_entry_safe(mlxsw_sp_rt6, tmp, &fib6_entry->rt6_list,
4691 				 list) {
4692 		fib6_entry->nrt6--;
4693 		list_del(&mlxsw_sp_rt6->list);
4694 		mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
4695 	}
4696 }
4697 
4698 static struct mlxsw_sp_fib6_entry *
4699 mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp,
4700 			   struct mlxsw_sp_fib_node *fib_node,
4701 			   struct rt6_info *rt)
4702 {
4703 	struct mlxsw_sp_fib6_entry *fib6_entry;
4704 	struct mlxsw_sp_fib_entry *fib_entry;
4705 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4706 	int err;
4707 
4708 	fib6_entry = kzalloc(sizeof(*fib6_entry), GFP_KERNEL);
4709 	if (!fib6_entry)
4710 		return ERR_PTR(-ENOMEM);
4711 	fib_entry = &fib6_entry->common;
4712 
4713 	mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt);
4714 	if (IS_ERR(mlxsw_sp_rt6)) {
4715 		err = PTR_ERR(mlxsw_sp_rt6);
4716 		goto err_rt6_create;
4717 	}
4718 
4719 	mlxsw_sp_fib6_entry_type_set(mlxsw_sp, fib_entry, mlxsw_sp_rt6->rt);
4720 
4721 	INIT_LIST_HEAD(&fib6_entry->rt6_list);
4722 	list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
4723 	fib6_entry->nrt6 = 1;
4724 	err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
4725 	if (err)
4726 		goto err_nexthop6_group_get;
4727 
4728 	fib_entry->fib_node = fib_node;
4729 
4730 	return fib6_entry;
4731 
4732 err_nexthop6_group_get:
4733 	list_del(&mlxsw_sp_rt6->list);
4734 	mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
4735 err_rt6_create:
4736 	kfree(fib6_entry);
4737 	return ERR_PTR(err);
4738 }
4739 
4740 static void mlxsw_sp_fib6_entry_destroy(struct mlxsw_sp *mlxsw_sp,
4741 					struct mlxsw_sp_fib6_entry *fib6_entry)
4742 {
4743 	mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
4744 	mlxsw_sp_fib6_entry_rt_destroy_all(fib6_entry);
4745 	WARN_ON(fib6_entry->nrt6);
4746 	kfree(fib6_entry);
4747 }
4748 
4749 static struct mlxsw_sp_fib6_entry *
4750 mlxsw_sp_fib6_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
4751 			      const struct rt6_info *nrt, bool replace)
4752 {
4753 	struct mlxsw_sp_fib6_entry *fib6_entry, *fallback = NULL;
4754 
4755 	list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
4756 		struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
4757 
4758 		if (rt->rt6i_table->tb6_id > nrt->rt6i_table->tb6_id)
4759 			continue;
4760 		if (rt->rt6i_table->tb6_id != nrt->rt6i_table->tb6_id)
4761 			break;
4762 		if (replace && rt->rt6i_metric == nrt->rt6i_metric) {
4763 			if (mlxsw_sp_fib6_rt_can_mp(rt) ==
4764 			    mlxsw_sp_fib6_rt_can_mp(nrt))
4765 				return fib6_entry;
4766 			if (mlxsw_sp_fib6_rt_can_mp(nrt))
4767 				fallback = fallback ?: fib6_entry;
4768 		}
4769 		if (rt->rt6i_metric > nrt->rt6i_metric)
4770 			return fallback ?: fib6_entry;
4771 	}
4772 
4773 	return fallback;
4774 }
4775 
4776 static int
4777 mlxsw_sp_fib6_node_list_insert(struct mlxsw_sp_fib6_entry *new6_entry,
4778 			       bool replace)
4779 {
4780 	struct mlxsw_sp_fib_node *fib_node = new6_entry->common.fib_node;
4781 	struct rt6_info *nrt = mlxsw_sp_fib6_entry_rt(new6_entry);
4782 	struct mlxsw_sp_fib6_entry *fib6_entry;
4783 
4784 	fib6_entry = mlxsw_sp_fib6_node_entry_find(fib_node, nrt, replace);
4785 
4786 	if (replace && WARN_ON(!fib6_entry))
4787 		return -EINVAL;
4788 
4789 	if (fib6_entry) {
4790 		list_add_tail(&new6_entry->common.list,
4791 			      &fib6_entry->common.list);
4792 	} else {
4793 		struct mlxsw_sp_fib6_entry *last;
4794 
4795 		list_for_each_entry(last, &fib_node->entry_list, common.list) {
4796 			struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(last);
4797 
4798 			if (nrt->rt6i_table->tb6_id > rt->rt6i_table->tb6_id)
4799 				break;
4800 			fib6_entry = last;
4801 		}
4802 
4803 		if (fib6_entry)
4804 			list_add(&new6_entry->common.list,
4805 				 &fib6_entry->common.list);
4806 		else
4807 			list_add(&new6_entry->common.list,
4808 				 &fib_node->entry_list);
4809 	}
4810 
4811 	return 0;
4812 }
4813 
4814 static void
4815 mlxsw_sp_fib6_node_list_remove(struct mlxsw_sp_fib6_entry *fib6_entry)
4816 {
4817 	list_del(&fib6_entry->common.list);
4818 }
4819 
4820 static int mlxsw_sp_fib6_node_entry_link(struct mlxsw_sp *mlxsw_sp,
4821 					 struct mlxsw_sp_fib6_entry *fib6_entry,
4822 					 bool replace)
4823 {
4824 	int err;
4825 
4826 	err = mlxsw_sp_fib6_node_list_insert(fib6_entry, replace);
4827 	if (err)
4828 		return err;
4829 
4830 	err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common);
4831 	if (err)
4832 		goto err_fib_node_entry_add;
4833 
4834 	return 0;
4835 
4836 err_fib_node_entry_add:
4837 	mlxsw_sp_fib6_node_list_remove(fib6_entry);
4838 	return err;
4839 }
4840 
4841 static void
4842 mlxsw_sp_fib6_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
4843 				struct mlxsw_sp_fib6_entry *fib6_entry)
4844 {
4845 	mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib6_entry->common);
4846 	mlxsw_sp_fib6_node_list_remove(fib6_entry);
4847 }
4848 
4849 static struct mlxsw_sp_fib6_entry *
4850 mlxsw_sp_fib6_entry_lookup(struct mlxsw_sp *mlxsw_sp,
4851 			   const struct rt6_info *rt)
4852 {
4853 	struct mlxsw_sp_fib6_entry *fib6_entry;
4854 	struct mlxsw_sp_fib_node *fib_node;
4855 	struct mlxsw_sp_fib *fib;
4856 	struct mlxsw_sp_vr *vr;
4857 
4858 	vr = mlxsw_sp_vr_find(mlxsw_sp, rt->rt6i_table->tb6_id);
4859 	if (!vr)
4860 		return NULL;
4861 	fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV6);
4862 
4863 	fib_node = mlxsw_sp_fib_node_lookup(fib, &rt->rt6i_dst.addr,
4864 					    sizeof(rt->rt6i_dst.addr),
4865 					    rt->rt6i_dst.plen);
4866 	if (!fib_node)
4867 		return NULL;
4868 
4869 	list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
4870 		struct rt6_info *iter_rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
4871 
4872 		if (rt->rt6i_table->tb6_id == iter_rt->rt6i_table->tb6_id &&
4873 		    rt->rt6i_metric == iter_rt->rt6i_metric &&
4874 		    mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt))
4875 			return fib6_entry;
4876 	}
4877 
4878 	return NULL;
4879 }
4880 
4881 static void mlxsw_sp_fib6_entry_replace(struct mlxsw_sp *mlxsw_sp,
4882 					struct mlxsw_sp_fib6_entry *fib6_entry,
4883 					bool replace)
4884 {
4885 	struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node;
4886 	struct mlxsw_sp_fib6_entry *replaced;
4887 
4888 	if (!replace)
4889 		return;
4890 
4891 	replaced = list_next_entry(fib6_entry, common.list);
4892 
4893 	mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, replaced);
4894 	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, replaced);
4895 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4896 }
4897 
4898 static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp,
4899 				    struct rt6_info *rt, bool replace)
4900 {
4901 	struct mlxsw_sp_fib6_entry *fib6_entry;
4902 	struct mlxsw_sp_fib_node *fib_node;
4903 	int err;
4904 
4905 	if (mlxsw_sp->router->aborted)
4906 		return 0;
4907 
4908 	if (rt->rt6i_src.plen)
4909 		return -EINVAL;
4910 
4911 	if (mlxsw_sp_fib6_rt_should_ignore(rt))
4912 		return 0;
4913 
4914 	fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->rt6i_table->tb6_id,
4915 					 &rt->rt6i_dst.addr,
4916 					 sizeof(rt->rt6i_dst.addr),
4917 					 rt->rt6i_dst.plen,
4918 					 MLXSW_SP_L3_PROTO_IPV6);
4919 	if (IS_ERR(fib_node))
4920 		return PTR_ERR(fib_node);
4921 
4922 	/* Before creating a new entry, try to append route to an existing
4923 	 * multipath entry.
4924 	 */
4925 	fib6_entry = mlxsw_sp_fib6_node_mp_entry_find(fib_node, rt, replace);
4926 	if (fib6_entry) {
4927 		err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry, rt);
4928 		if (err)
4929 			goto err_fib6_entry_nexthop_add;
4930 		return 0;
4931 	}
4932 
4933 	fib6_entry = mlxsw_sp_fib6_entry_create(mlxsw_sp, fib_node, rt);
4934 	if (IS_ERR(fib6_entry)) {
4935 		err = PTR_ERR(fib6_entry);
4936 		goto err_fib6_entry_create;
4937 	}
4938 
4939 	err = mlxsw_sp_fib6_node_entry_link(mlxsw_sp, fib6_entry, replace);
4940 	if (err)
4941 		goto err_fib6_node_entry_link;
4942 
4943 	mlxsw_sp_fib6_entry_replace(mlxsw_sp, fib6_entry, replace);
4944 
4945 	return 0;
4946 
4947 err_fib6_node_entry_link:
4948 	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
4949 err_fib6_entry_create:
4950 err_fib6_entry_nexthop_add:
4951 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4952 	return err;
4953 }
4954 
4955 static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp,
4956 				     struct rt6_info *rt)
4957 {
4958 	struct mlxsw_sp_fib6_entry *fib6_entry;
4959 	struct mlxsw_sp_fib_node *fib_node;
4960 
4961 	if (mlxsw_sp->router->aborted)
4962 		return;
4963 
4964 	if (mlxsw_sp_fib6_rt_should_ignore(rt))
4965 		return;
4966 
4967 	fib6_entry = mlxsw_sp_fib6_entry_lookup(mlxsw_sp, rt);
4968 	if (WARN_ON(!fib6_entry))
4969 		return;
4970 
4971 	/* If route is part of a multipath entry, but not the last one
4972 	 * removed, then only reduce its nexthop group.
4973 	 */
4974 	if (!list_is_singular(&fib6_entry->rt6_list)) {
4975 		mlxsw_sp_fib6_entry_nexthop_del(mlxsw_sp, fib6_entry, rt);
4976 		return;
4977 	}
4978 
4979 	fib_node = fib6_entry->common.fib_node;
4980 
4981 	mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry);
4982 	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
4983 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4984 }
4985 
4986 static int __mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp,
4987 					    enum mlxsw_reg_ralxx_protocol proto,
4988 					    u8 tree_id)
4989 {
4990 	char ralta_pl[MLXSW_REG_RALTA_LEN];
4991 	char ralst_pl[MLXSW_REG_RALST_LEN];
4992 	int i, err;
4993 
4994 	mlxsw_reg_ralta_pack(ralta_pl, true, proto, tree_id);
4995 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
4996 	if (err)
4997 		return err;
4998 
4999 	mlxsw_reg_ralst_pack(ralst_pl, 0xff, tree_id);
5000 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
5001 	if (err)
5002 		return err;
5003 
5004 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
5005 		struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
5006 		char raltb_pl[MLXSW_REG_RALTB_LEN];
5007 		char ralue_pl[MLXSW_REG_RALUE_LEN];
5008 
5009 		mlxsw_reg_raltb_pack(raltb_pl, vr->id, proto, tree_id);
5010 		err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb),
5011 				      raltb_pl);
5012 		if (err)
5013 			return err;
5014 
5015 		mlxsw_reg_ralue_pack(ralue_pl, proto,
5016 				     MLXSW_REG_RALUE_OP_WRITE_WRITE, vr->id, 0);
5017 		mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
5018 		err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue),
5019 				      ralue_pl);
5020 		if (err)
5021 			return err;
5022 	}
5023 
5024 	return 0;
5025 }
5026 
5027 static int mlxsw_sp_router_fibmr_add(struct mlxsw_sp *mlxsw_sp,
5028 				     struct mfc_entry_notifier_info *men_info,
5029 				     bool replace)
5030 {
5031 	struct mlxsw_sp_vr *vr;
5032 
5033 	if (mlxsw_sp->router->aborted)
5034 		return 0;
5035 
5036 	vr = mlxsw_sp_vr_get(mlxsw_sp, men_info->tb_id, NULL);
5037 	if (IS_ERR(vr))
5038 		return PTR_ERR(vr);
5039 
5040 	return mlxsw_sp_mr_route4_add(vr->mr4_table, men_info->mfc, replace);
5041 }
5042 
5043 static void mlxsw_sp_router_fibmr_del(struct mlxsw_sp *mlxsw_sp,
5044 				      struct mfc_entry_notifier_info *men_info)
5045 {
5046 	struct mlxsw_sp_vr *vr;
5047 
5048 	if (mlxsw_sp->router->aborted)
5049 		return;
5050 
5051 	vr = mlxsw_sp_vr_find(mlxsw_sp, men_info->tb_id);
5052 	if (WARN_ON(!vr))
5053 		return;
5054 
5055 	mlxsw_sp_mr_route4_del(vr->mr4_table, men_info->mfc);
5056 	mlxsw_sp_vr_put(vr);
5057 }
5058 
5059 static int
5060 mlxsw_sp_router_fibmr_vif_add(struct mlxsw_sp *mlxsw_sp,
5061 			      struct vif_entry_notifier_info *ven_info)
5062 {
5063 	struct mlxsw_sp_rif *rif;
5064 	struct mlxsw_sp_vr *vr;
5065 
5066 	if (mlxsw_sp->router->aborted)
5067 		return 0;
5068 
5069 	vr = mlxsw_sp_vr_get(mlxsw_sp, ven_info->tb_id, NULL);
5070 	if (IS_ERR(vr))
5071 		return PTR_ERR(vr);
5072 
5073 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, ven_info->dev);
5074 	return mlxsw_sp_mr_vif_add(vr->mr4_table, ven_info->dev,
5075 				   ven_info->vif_index,
5076 				   ven_info->vif_flags, rif);
5077 }
5078 
5079 static void
5080 mlxsw_sp_router_fibmr_vif_del(struct mlxsw_sp *mlxsw_sp,
5081 			      struct vif_entry_notifier_info *ven_info)
5082 {
5083 	struct mlxsw_sp_vr *vr;
5084 
5085 	if (mlxsw_sp->router->aborted)
5086 		return;
5087 
5088 	vr = mlxsw_sp_vr_find(mlxsw_sp, ven_info->tb_id);
5089 	if (WARN_ON(!vr))
5090 		return;
5091 
5092 	mlxsw_sp_mr_vif_del(vr->mr4_table, ven_info->vif_index);
5093 	mlxsw_sp_vr_put(vr);
5094 }
5095 
5096 static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp)
5097 {
5098 	enum mlxsw_reg_ralxx_protocol proto = MLXSW_REG_RALXX_PROTOCOL_IPV4;
5099 	int err;
5100 
5101 	err = __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto,
5102 					       MLXSW_SP_LPM_TREE_MIN);
5103 	if (err)
5104 		return err;
5105 
5106 	/* The multicast router code does not need an abort trap as by default,
5107 	 * packets that don't match any routes are trapped to the CPU.
5108 	 */
5109 
5110 	proto = MLXSW_REG_RALXX_PROTOCOL_IPV6;
5111 	return __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto,
5112 						MLXSW_SP_LPM_TREE_MIN + 1);
5113 }
5114 
5115 static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp,
5116 				     struct mlxsw_sp_fib_node *fib_node)
5117 {
5118 	struct mlxsw_sp_fib4_entry *fib4_entry, *tmp;
5119 
5120 	list_for_each_entry_safe(fib4_entry, tmp, &fib_node->entry_list,
5121 				 common.list) {
5122 		bool do_break = &tmp->common.list == &fib_node->entry_list;
5123 
5124 		mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry);
5125 		mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
5126 		mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5127 		/* Break when entry list is empty and node was freed.
5128 		 * Otherwise, we'll access freed memory in the next
5129 		 * iteration.
5130 		 */
5131 		if (do_break)
5132 			break;
5133 	}
5134 }
5135 
5136 static void mlxsw_sp_fib6_node_flush(struct mlxsw_sp *mlxsw_sp,
5137 				     struct mlxsw_sp_fib_node *fib_node)
5138 {
5139 	struct mlxsw_sp_fib6_entry *fib6_entry, *tmp;
5140 
5141 	list_for_each_entry_safe(fib6_entry, tmp, &fib_node->entry_list,
5142 				 common.list) {
5143 		bool do_break = &tmp->common.list == &fib_node->entry_list;
5144 
5145 		mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry);
5146 		mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5147 		mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5148 		if (do_break)
5149 			break;
5150 	}
5151 }
5152 
5153 static void mlxsw_sp_fib_node_flush(struct mlxsw_sp *mlxsw_sp,
5154 				    struct mlxsw_sp_fib_node *fib_node)
5155 {
5156 	switch (fib_node->fib->proto) {
5157 	case MLXSW_SP_L3_PROTO_IPV4:
5158 		mlxsw_sp_fib4_node_flush(mlxsw_sp, fib_node);
5159 		break;
5160 	case MLXSW_SP_L3_PROTO_IPV6:
5161 		mlxsw_sp_fib6_node_flush(mlxsw_sp, fib_node);
5162 		break;
5163 	}
5164 }
5165 
5166 static void mlxsw_sp_vr_fib_flush(struct mlxsw_sp *mlxsw_sp,
5167 				  struct mlxsw_sp_vr *vr,
5168 				  enum mlxsw_sp_l3proto proto)
5169 {
5170 	struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
5171 	struct mlxsw_sp_fib_node *fib_node, *tmp;
5172 
5173 	list_for_each_entry_safe(fib_node, tmp, &fib->node_list, list) {
5174 		bool do_break = &tmp->list == &fib->node_list;
5175 
5176 		mlxsw_sp_fib_node_flush(mlxsw_sp, fib_node);
5177 		if (do_break)
5178 			break;
5179 	}
5180 }
5181 
5182 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp)
5183 {
5184 	int i;
5185 
5186 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
5187 		struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
5188 
5189 		if (!mlxsw_sp_vr_is_used(vr))
5190 			continue;
5191 
5192 		mlxsw_sp_mr_table_flush(vr->mr4_table);
5193 		mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
5194 
5195 		/* If virtual router was only used for IPv4, then it's no
5196 		 * longer used.
5197 		 */
5198 		if (!mlxsw_sp_vr_is_used(vr))
5199 			continue;
5200 		mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
5201 	}
5202 }
5203 
5204 static void mlxsw_sp_router_fib_abort(struct mlxsw_sp *mlxsw_sp)
5205 {
5206 	int err;
5207 
5208 	if (mlxsw_sp->router->aborted)
5209 		return;
5210 	dev_warn(mlxsw_sp->bus_info->dev, "FIB abort triggered. Note that FIB entries are no longer being offloaded to this device.\n");
5211 	mlxsw_sp_router_fib_flush(mlxsw_sp);
5212 	mlxsw_sp->router->aborted = true;
5213 	err = mlxsw_sp_router_set_abort_trap(mlxsw_sp);
5214 	if (err)
5215 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to set abort trap.\n");
5216 }
5217 
5218 struct mlxsw_sp_fib_event_work {
5219 	struct work_struct work;
5220 	union {
5221 		struct fib6_entry_notifier_info fen6_info;
5222 		struct fib_entry_notifier_info fen_info;
5223 		struct fib_rule_notifier_info fr_info;
5224 		struct fib_nh_notifier_info fnh_info;
5225 		struct mfc_entry_notifier_info men_info;
5226 		struct vif_entry_notifier_info ven_info;
5227 	};
5228 	struct mlxsw_sp *mlxsw_sp;
5229 	unsigned long event;
5230 };
5231 
5232 static void mlxsw_sp_router_fib4_event_work(struct work_struct *work)
5233 {
5234 	struct mlxsw_sp_fib_event_work *fib_work =
5235 		container_of(work, struct mlxsw_sp_fib_event_work, work);
5236 	struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5237 	struct fib_rule *rule;
5238 	bool replace, append;
5239 	int err;
5240 
5241 	/* Protect internal structures from changes */
5242 	rtnl_lock();
5243 	switch (fib_work->event) {
5244 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5245 	case FIB_EVENT_ENTRY_APPEND: /* fall through */
5246 	case FIB_EVENT_ENTRY_ADD:
5247 		replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
5248 		append = fib_work->event == FIB_EVENT_ENTRY_APPEND;
5249 		err = mlxsw_sp_router_fib4_add(mlxsw_sp, &fib_work->fen_info,
5250 					       replace, append);
5251 		if (err)
5252 			mlxsw_sp_router_fib_abort(mlxsw_sp);
5253 		fib_info_put(fib_work->fen_info.fi);
5254 		break;
5255 	case FIB_EVENT_ENTRY_DEL:
5256 		mlxsw_sp_router_fib4_del(mlxsw_sp, &fib_work->fen_info);
5257 		fib_info_put(fib_work->fen_info.fi);
5258 		break;
5259 	case FIB_EVENT_RULE_ADD: /* fall through */
5260 	case FIB_EVENT_RULE_DEL:
5261 		rule = fib_work->fr_info.rule;
5262 		if (!fib4_rule_default(rule) && !rule->l3mdev)
5263 			mlxsw_sp_router_fib_abort(mlxsw_sp);
5264 		fib_rule_put(rule);
5265 		break;
5266 	case FIB_EVENT_NH_ADD: /* fall through */
5267 	case FIB_EVENT_NH_DEL:
5268 		mlxsw_sp_nexthop4_event(mlxsw_sp, fib_work->event,
5269 					fib_work->fnh_info.fib_nh);
5270 		fib_info_put(fib_work->fnh_info.fib_nh->nh_parent);
5271 		break;
5272 	}
5273 	rtnl_unlock();
5274 	kfree(fib_work);
5275 }
5276 
5277 static void mlxsw_sp_router_fib6_event_work(struct work_struct *work)
5278 {
5279 	struct mlxsw_sp_fib_event_work *fib_work =
5280 		container_of(work, struct mlxsw_sp_fib_event_work, work);
5281 	struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5282 	struct fib_rule *rule;
5283 	bool replace;
5284 	int err;
5285 
5286 	rtnl_lock();
5287 	switch (fib_work->event) {
5288 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5289 	case FIB_EVENT_ENTRY_ADD:
5290 		replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
5291 		err = mlxsw_sp_router_fib6_add(mlxsw_sp,
5292 					       fib_work->fen6_info.rt, replace);
5293 		if (err)
5294 			mlxsw_sp_router_fib_abort(mlxsw_sp);
5295 		mlxsw_sp_rt6_release(fib_work->fen6_info.rt);
5296 		break;
5297 	case FIB_EVENT_ENTRY_DEL:
5298 		mlxsw_sp_router_fib6_del(mlxsw_sp, fib_work->fen6_info.rt);
5299 		mlxsw_sp_rt6_release(fib_work->fen6_info.rt);
5300 		break;
5301 	case FIB_EVENT_RULE_ADD: /* fall through */
5302 	case FIB_EVENT_RULE_DEL:
5303 		rule = fib_work->fr_info.rule;
5304 		if (!fib6_rule_default(rule) && !rule->l3mdev)
5305 			mlxsw_sp_router_fib_abort(mlxsw_sp);
5306 		fib_rule_put(rule);
5307 		break;
5308 	}
5309 	rtnl_unlock();
5310 	kfree(fib_work);
5311 }
5312 
5313 static void mlxsw_sp_router_fibmr_event_work(struct work_struct *work)
5314 {
5315 	struct mlxsw_sp_fib_event_work *fib_work =
5316 		container_of(work, struct mlxsw_sp_fib_event_work, work);
5317 	struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5318 	struct fib_rule *rule;
5319 	bool replace;
5320 	int err;
5321 
5322 	rtnl_lock();
5323 	switch (fib_work->event) {
5324 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5325 	case FIB_EVENT_ENTRY_ADD:
5326 		replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
5327 
5328 		err = mlxsw_sp_router_fibmr_add(mlxsw_sp, &fib_work->men_info,
5329 						replace);
5330 		if (err)
5331 			mlxsw_sp_router_fib_abort(mlxsw_sp);
5332 		ipmr_cache_put(fib_work->men_info.mfc);
5333 		break;
5334 	case FIB_EVENT_ENTRY_DEL:
5335 		mlxsw_sp_router_fibmr_del(mlxsw_sp, &fib_work->men_info);
5336 		ipmr_cache_put(fib_work->men_info.mfc);
5337 		break;
5338 	case FIB_EVENT_VIF_ADD:
5339 		err = mlxsw_sp_router_fibmr_vif_add(mlxsw_sp,
5340 						    &fib_work->ven_info);
5341 		if (err)
5342 			mlxsw_sp_router_fib_abort(mlxsw_sp);
5343 		dev_put(fib_work->ven_info.dev);
5344 		break;
5345 	case FIB_EVENT_VIF_DEL:
5346 		mlxsw_sp_router_fibmr_vif_del(mlxsw_sp,
5347 					      &fib_work->ven_info);
5348 		dev_put(fib_work->ven_info.dev);
5349 		break;
5350 	case FIB_EVENT_RULE_ADD: /* fall through */
5351 	case FIB_EVENT_RULE_DEL:
5352 		rule = fib_work->fr_info.rule;
5353 		if (!ipmr_rule_default(rule) && !rule->l3mdev)
5354 			mlxsw_sp_router_fib_abort(mlxsw_sp);
5355 		fib_rule_put(rule);
5356 		break;
5357 	}
5358 	rtnl_unlock();
5359 	kfree(fib_work);
5360 }
5361 
5362 static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work,
5363 				       struct fib_notifier_info *info)
5364 {
5365 	struct fib_entry_notifier_info *fen_info;
5366 	struct fib_rule_notifier_info *fr_info;
5367 	struct fib_nh_notifier_info *fnh_info;
5368 
5369 	switch (fib_work->event) {
5370 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5371 	case FIB_EVENT_ENTRY_APPEND: /* fall through */
5372 	case FIB_EVENT_ENTRY_ADD: /* fall through */
5373 	case FIB_EVENT_ENTRY_DEL:
5374 		fen_info = container_of(info, struct fib_entry_notifier_info,
5375 					info);
5376 		fib_work->fen_info = *fen_info;
5377 		/* Take reference on fib_info to prevent it from being
5378 		 * freed while work is queued. Release it afterwards.
5379 		 */
5380 		fib_info_hold(fib_work->fen_info.fi);
5381 		break;
5382 	case FIB_EVENT_RULE_ADD: /* fall through */
5383 	case FIB_EVENT_RULE_DEL:
5384 		fr_info = container_of(info, struct fib_rule_notifier_info,
5385 				       info);
5386 		fib_work->fr_info = *fr_info;
5387 		fib_rule_get(fib_work->fr_info.rule);
5388 		break;
5389 	case FIB_EVENT_NH_ADD: /* fall through */
5390 	case FIB_EVENT_NH_DEL:
5391 		fnh_info = container_of(info, struct fib_nh_notifier_info,
5392 					info);
5393 		fib_work->fnh_info = *fnh_info;
5394 		fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
5395 		break;
5396 	}
5397 }
5398 
5399 static void mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work,
5400 				       struct fib_notifier_info *info)
5401 {
5402 	struct fib6_entry_notifier_info *fen6_info;
5403 	struct fib_rule_notifier_info *fr_info;
5404 
5405 	switch (fib_work->event) {
5406 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5407 	case FIB_EVENT_ENTRY_ADD: /* fall through */
5408 	case FIB_EVENT_ENTRY_DEL:
5409 		fen6_info = container_of(info, struct fib6_entry_notifier_info,
5410 					 info);
5411 		fib_work->fen6_info = *fen6_info;
5412 		rt6_hold(fib_work->fen6_info.rt);
5413 		break;
5414 	case FIB_EVENT_RULE_ADD: /* fall through */
5415 	case FIB_EVENT_RULE_DEL:
5416 		fr_info = container_of(info, struct fib_rule_notifier_info,
5417 				       info);
5418 		fib_work->fr_info = *fr_info;
5419 		fib_rule_get(fib_work->fr_info.rule);
5420 		break;
5421 	}
5422 }
5423 
5424 static void
5425 mlxsw_sp_router_fibmr_event(struct mlxsw_sp_fib_event_work *fib_work,
5426 			    struct fib_notifier_info *info)
5427 {
5428 	switch (fib_work->event) {
5429 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5430 	case FIB_EVENT_ENTRY_ADD: /* fall through */
5431 	case FIB_EVENT_ENTRY_DEL:
5432 		memcpy(&fib_work->men_info, info, sizeof(fib_work->men_info));
5433 		ipmr_cache_hold(fib_work->men_info.mfc);
5434 		break;
5435 	case FIB_EVENT_VIF_ADD: /* fall through */
5436 	case FIB_EVENT_VIF_DEL:
5437 		memcpy(&fib_work->ven_info, info, sizeof(fib_work->ven_info));
5438 		dev_hold(fib_work->ven_info.dev);
5439 		break;
5440 	case FIB_EVENT_RULE_ADD: /* fall through */
5441 	case FIB_EVENT_RULE_DEL:
5442 		memcpy(&fib_work->fr_info, info, sizeof(fib_work->fr_info));
5443 		fib_rule_get(fib_work->fr_info.rule);
5444 		break;
5445 	}
5446 }
5447 
5448 /* Called with rcu_read_lock() */
5449 static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
5450 				     unsigned long event, void *ptr)
5451 {
5452 	struct mlxsw_sp_fib_event_work *fib_work;
5453 	struct fib_notifier_info *info = ptr;
5454 	struct mlxsw_sp_router *router;
5455 
5456 	if (!net_eq(info->net, &init_net) ||
5457 	    (info->family != AF_INET && info->family != AF_INET6 &&
5458 	     info->family != RTNL_FAMILY_IPMR))
5459 		return NOTIFY_DONE;
5460 
5461 	fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
5462 	if (WARN_ON(!fib_work))
5463 		return NOTIFY_BAD;
5464 
5465 	router = container_of(nb, struct mlxsw_sp_router, fib_nb);
5466 	fib_work->mlxsw_sp = router->mlxsw_sp;
5467 	fib_work->event = event;
5468 
5469 	switch (info->family) {
5470 	case AF_INET:
5471 		INIT_WORK(&fib_work->work, mlxsw_sp_router_fib4_event_work);
5472 		mlxsw_sp_router_fib4_event(fib_work, info);
5473 		break;
5474 	case AF_INET6:
5475 		INIT_WORK(&fib_work->work, mlxsw_sp_router_fib6_event_work);
5476 		mlxsw_sp_router_fib6_event(fib_work, info);
5477 		break;
5478 	case RTNL_FAMILY_IPMR:
5479 		INIT_WORK(&fib_work->work, mlxsw_sp_router_fibmr_event_work);
5480 		mlxsw_sp_router_fibmr_event(fib_work, info);
5481 		break;
5482 	}
5483 
5484 	mlxsw_core_schedule_work(&fib_work->work);
5485 
5486 	return NOTIFY_DONE;
5487 }
5488 
5489 static struct mlxsw_sp_rif *
5490 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
5491 			 const struct net_device *dev)
5492 {
5493 	int i;
5494 
5495 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
5496 		if (mlxsw_sp->router->rifs[i] &&
5497 		    mlxsw_sp->router->rifs[i]->dev == dev)
5498 			return mlxsw_sp->router->rifs[i];
5499 
5500 	return NULL;
5501 }
5502 
5503 static int mlxsw_sp_router_rif_disable(struct mlxsw_sp *mlxsw_sp, u16 rif)
5504 {
5505 	char ritr_pl[MLXSW_REG_RITR_LEN];
5506 	int err;
5507 
5508 	mlxsw_reg_ritr_rif_pack(ritr_pl, rif);
5509 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
5510 	if (WARN_ON_ONCE(err))
5511 		return err;
5512 
5513 	mlxsw_reg_ritr_enable_set(ritr_pl, false);
5514 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
5515 }
5516 
5517 static void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
5518 					  struct mlxsw_sp_rif *rif)
5519 {
5520 	mlxsw_sp_router_rif_disable(mlxsw_sp, rif->rif_index);
5521 	mlxsw_sp_nexthop_rif_gone_sync(mlxsw_sp, rif);
5522 	mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, rif);
5523 }
5524 
5525 static bool
5526 mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *rif, struct net_device *dev,
5527 			   unsigned long event)
5528 {
5529 	struct inet6_dev *inet6_dev;
5530 	bool addr_list_empty = true;
5531 	struct in_device *idev;
5532 
5533 	switch (event) {
5534 	case NETDEV_UP:
5535 		return rif == NULL;
5536 	case NETDEV_DOWN:
5537 		idev = __in_dev_get_rtnl(dev);
5538 		if (idev && idev->ifa_list)
5539 			addr_list_empty = false;
5540 
5541 		inet6_dev = __in6_dev_get(dev);
5542 		if (addr_list_empty && inet6_dev &&
5543 		    !list_empty(&inet6_dev->addr_list))
5544 			addr_list_empty = false;
5545 
5546 		if (rif && addr_list_empty &&
5547 		    !netif_is_l3_slave(rif->dev))
5548 			return true;
5549 		/* It is possible we already removed the RIF ourselves
5550 		 * if it was assigned to a netdev that is now a bridge
5551 		 * or LAG slave.
5552 		 */
5553 		return false;
5554 	}
5555 
5556 	return false;
5557 }
5558 
5559 static enum mlxsw_sp_rif_type
5560 mlxsw_sp_dev_rif_type(const struct mlxsw_sp *mlxsw_sp,
5561 		      const struct net_device *dev)
5562 {
5563 	enum mlxsw_sp_fid_type type;
5564 
5565 	if (mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL))
5566 		return MLXSW_SP_RIF_TYPE_IPIP_LB;
5567 
5568 	/* Otherwise RIF type is derived from the type of the underlying FID. */
5569 	if (is_vlan_dev(dev) && netif_is_bridge_master(vlan_dev_real_dev(dev)))
5570 		type = MLXSW_SP_FID_TYPE_8021Q;
5571 	else if (netif_is_bridge_master(dev) && br_vlan_enabled(dev))
5572 		type = MLXSW_SP_FID_TYPE_8021Q;
5573 	else if (netif_is_bridge_master(dev))
5574 		type = MLXSW_SP_FID_TYPE_8021D;
5575 	else
5576 		type = MLXSW_SP_FID_TYPE_RFID;
5577 
5578 	return mlxsw_sp_fid_type_rif_type(mlxsw_sp, type);
5579 }
5580 
5581 static int mlxsw_sp_rif_index_alloc(struct mlxsw_sp *mlxsw_sp, u16 *p_rif_index)
5582 {
5583 	int i;
5584 
5585 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) {
5586 		if (!mlxsw_sp->router->rifs[i]) {
5587 			*p_rif_index = i;
5588 			return 0;
5589 		}
5590 	}
5591 
5592 	return -ENOBUFS;
5593 }
5594 
5595 static struct mlxsw_sp_rif *mlxsw_sp_rif_alloc(size_t rif_size, u16 rif_index,
5596 					       u16 vr_id,
5597 					       struct net_device *l3_dev)
5598 {
5599 	struct mlxsw_sp_rif *rif;
5600 
5601 	rif = kzalloc(rif_size, GFP_KERNEL);
5602 	if (!rif)
5603 		return NULL;
5604 
5605 	INIT_LIST_HEAD(&rif->nexthop_list);
5606 	INIT_LIST_HEAD(&rif->neigh_list);
5607 	ether_addr_copy(rif->addr, l3_dev->dev_addr);
5608 	rif->mtu = l3_dev->mtu;
5609 	rif->vr_id = vr_id;
5610 	rif->dev = l3_dev;
5611 	rif->rif_index = rif_index;
5612 
5613 	return rif;
5614 }
5615 
5616 struct mlxsw_sp_rif *mlxsw_sp_rif_by_index(const struct mlxsw_sp *mlxsw_sp,
5617 					   u16 rif_index)
5618 {
5619 	return mlxsw_sp->router->rifs[rif_index];
5620 }
5621 
5622 u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif)
5623 {
5624 	return rif->rif_index;
5625 }
5626 
5627 u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
5628 {
5629 	return lb_rif->common.rif_index;
5630 }
5631 
5632 u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
5633 {
5634 	return lb_rif->ul_vr_id;
5635 }
5636 
5637 int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif)
5638 {
5639 	return rif->dev->ifindex;
5640 }
5641 
5642 const struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif)
5643 {
5644 	return rif->dev;
5645 }
5646 
5647 static struct mlxsw_sp_rif *
5648 mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
5649 		    const struct mlxsw_sp_rif_params *params,
5650 		    struct netlink_ext_ack *extack)
5651 {
5652 	u32 tb_id = l3mdev_fib_table(params->dev);
5653 	const struct mlxsw_sp_rif_ops *ops;
5654 	struct mlxsw_sp_fid *fid = NULL;
5655 	enum mlxsw_sp_rif_type type;
5656 	struct mlxsw_sp_rif *rif;
5657 	struct mlxsw_sp_vr *vr;
5658 	u16 rif_index;
5659 	int err;
5660 
5661 	type = mlxsw_sp_dev_rif_type(mlxsw_sp, params->dev);
5662 	ops = mlxsw_sp->router->rif_ops_arr[type];
5663 
5664 	vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN, extack);
5665 	if (IS_ERR(vr))
5666 		return ERR_CAST(vr);
5667 	vr->rif_count++;
5668 
5669 	err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index);
5670 	if (err) {
5671 		NL_SET_ERR_MSG(extack, "spectrum: Exceeded number of supported router interfaces");
5672 		goto err_rif_index_alloc;
5673 	}
5674 
5675 	rif = mlxsw_sp_rif_alloc(ops->rif_size, rif_index, vr->id, params->dev);
5676 	if (!rif) {
5677 		err = -ENOMEM;
5678 		goto err_rif_alloc;
5679 	}
5680 	rif->mlxsw_sp = mlxsw_sp;
5681 	rif->ops = ops;
5682 
5683 	if (ops->fid_get) {
5684 		fid = ops->fid_get(rif);
5685 		if (IS_ERR(fid)) {
5686 			err = PTR_ERR(fid);
5687 			goto err_fid_get;
5688 		}
5689 		rif->fid = fid;
5690 	}
5691 
5692 	if (ops->setup)
5693 		ops->setup(rif, params);
5694 
5695 	err = ops->configure(rif);
5696 	if (err)
5697 		goto err_configure;
5698 
5699 	err = mlxsw_sp_mr_rif_add(vr->mr4_table, rif);
5700 	if (err)
5701 		goto err_mr_rif_add;
5702 
5703 	mlxsw_sp_rif_counters_alloc(rif);
5704 	mlxsw_sp->router->rifs[rif_index] = rif;
5705 
5706 	return rif;
5707 
5708 err_mr_rif_add:
5709 	ops->deconfigure(rif);
5710 err_configure:
5711 	if (fid)
5712 		mlxsw_sp_fid_put(fid);
5713 err_fid_get:
5714 	kfree(rif);
5715 err_rif_alloc:
5716 err_rif_index_alloc:
5717 	vr->rif_count--;
5718 	mlxsw_sp_vr_put(vr);
5719 	return ERR_PTR(err);
5720 }
5721 
5722 void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif)
5723 {
5724 	const struct mlxsw_sp_rif_ops *ops = rif->ops;
5725 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
5726 	struct mlxsw_sp_fid *fid = rif->fid;
5727 	struct mlxsw_sp_vr *vr;
5728 
5729 	mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif);
5730 	vr = &mlxsw_sp->router->vrs[rif->vr_id];
5731 
5732 	mlxsw_sp->router->rifs[rif->rif_index] = NULL;
5733 	mlxsw_sp_rif_counters_free(rif);
5734 	mlxsw_sp_mr_rif_del(vr->mr4_table, rif);
5735 	ops->deconfigure(rif);
5736 	if (fid)
5737 		/* Loopback RIFs are not associated with a FID. */
5738 		mlxsw_sp_fid_put(fid);
5739 	kfree(rif);
5740 	vr->rif_count--;
5741 	mlxsw_sp_vr_put(vr);
5742 }
5743 
5744 static void
5745 mlxsw_sp_rif_subport_params_init(struct mlxsw_sp_rif_params *params,
5746 				 struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
5747 {
5748 	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
5749 
5750 	params->vid = mlxsw_sp_port_vlan->vid;
5751 	params->lag = mlxsw_sp_port->lagged;
5752 	if (params->lag)
5753 		params->lag_id = mlxsw_sp_port->lag_id;
5754 	else
5755 		params->system_port = mlxsw_sp_port->local_port;
5756 }
5757 
5758 static int
5759 mlxsw_sp_port_vlan_router_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan,
5760 			       struct net_device *l3_dev,
5761 			       struct netlink_ext_ack *extack)
5762 {
5763 	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
5764 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
5765 	u16 vid = mlxsw_sp_port_vlan->vid;
5766 	struct mlxsw_sp_rif *rif;
5767 	struct mlxsw_sp_fid *fid;
5768 	int err;
5769 
5770 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
5771 	if (!rif) {
5772 		struct mlxsw_sp_rif_params params = {
5773 			.dev = l3_dev,
5774 		};
5775 
5776 		mlxsw_sp_rif_subport_params_init(&params, mlxsw_sp_port_vlan);
5777 		rif = mlxsw_sp_rif_create(mlxsw_sp, &params, extack);
5778 		if (IS_ERR(rif))
5779 			return PTR_ERR(rif);
5780 	}
5781 
5782 	/* FID was already created, just take a reference */
5783 	fid = rif->ops->fid_get(rif);
5784 	err = mlxsw_sp_fid_port_vid_map(fid, mlxsw_sp_port, vid);
5785 	if (err)
5786 		goto err_fid_port_vid_map;
5787 
5788 	err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, false);
5789 	if (err)
5790 		goto err_port_vid_learning_set;
5791 
5792 	err = mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid,
5793 					BR_STATE_FORWARDING);
5794 	if (err)
5795 		goto err_port_vid_stp_set;
5796 
5797 	mlxsw_sp_port_vlan->fid = fid;
5798 
5799 	return 0;
5800 
5801 err_port_vid_stp_set:
5802 	mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
5803 err_port_vid_learning_set:
5804 	mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
5805 err_fid_port_vid_map:
5806 	mlxsw_sp_fid_put(fid);
5807 	return err;
5808 }
5809 
5810 void
5811 mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
5812 {
5813 	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
5814 	struct mlxsw_sp_fid *fid = mlxsw_sp_port_vlan->fid;
5815 	u16 vid = mlxsw_sp_port_vlan->vid;
5816 
5817 	if (WARN_ON(mlxsw_sp_fid_type(fid) != MLXSW_SP_FID_TYPE_RFID))
5818 		return;
5819 
5820 	mlxsw_sp_port_vlan->fid = NULL;
5821 	mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid, BR_STATE_BLOCKING);
5822 	mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
5823 	mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
5824 	/* If router port holds the last reference on the rFID, then the
5825 	 * associated Sub-port RIF will be destroyed.
5826 	 */
5827 	mlxsw_sp_fid_put(fid);
5828 }
5829 
5830 static int mlxsw_sp_inetaddr_port_vlan_event(struct net_device *l3_dev,
5831 					     struct net_device *port_dev,
5832 					     unsigned long event, u16 vid,
5833 					     struct netlink_ext_ack *extack)
5834 {
5835 	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(port_dev);
5836 	struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
5837 
5838 	mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid);
5839 	if (WARN_ON(!mlxsw_sp_port_vlan))
5840 		return -EINVAL;
5841 
5842 	switch (event) {
5843 	case NETDEV_UP:
5844 		return mlxsw_sp_port_vlan_router_join(mlxsw_sp_port_vlan,
5845 						      l3_dev, extack);
5846 	case NETDEV_DOWN:
5847 		mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan);
5848 		break;
5849 	}
5850 
5851 	return 0;
5852 }
5853 
5854 static int mlxsw_sp_inetaddr_port_event(struct net_device *port_dev,
5855 					unsigned long event,
5856 					struct netlink_ext_ack *extack)
5857 {
5858 	if (netif_is_bridge_port(port_dev) ||
5859 	    netif_is_lag_port(port_dev) ||
5860 	    netif_is_ovs_port(port_dev))
5861 		return 0;
5862 
5863 	return mlxsw_sp_inetaddr_port_vlan_event(port_dev, port_dev, event, 1,
5864 						 extack);
5865 }
5866 
5867 static int __mlxsw_sp_inetaddr_lag_event(struct net_device *l3_dev,
5868 					 struct net_device *lag_dev,
5869 					 unsigned long event, u16 vid,
5870 					 struct netlink_ext_ack *extack)
5871 {
5872 	struct net_device *port_dev;
5873 	struct list_head *iter;
5874 	int err;
5875 
5876 	netdev_for_each_lower_dev(lag_dev, port_dev, iter) {
5877 		if (mlxsw_sp_port_dev_check(port_dev)) {
5878 			err = mlxsw_sp_inetaddr_port_vlan_event(l3_dev,
5879 								port_dev,
5880 								event, vid,
5881 								extack);
5882 			if (err)
5883 				return err;
5884 		}
5885 	}
5886 
5887 	return 0;
5888 }
5889 
5890 static int mlxsw_sp_inetaddr_lag_event(struct net_device *lag_dev,
5891 				       unsigned long event,
5892 				       struct netlink_ext_ack *extack)
5893 {
5894 	if (netif_is_bridge_port(lag_dev))
5895 		return 0;
5896 
5897 	return __mlxsw_sp_inetaddr_lag_event(lag_dev, lag_dev, event, 1,
5898 					     extack);
5899 }
5900 
5901 static int mlxsw_sp_inetaddr_bridge_event(struct net_device *l3_dev,
5902 					  unsigned long event,
5903 					  struct netlink_ext_ack *extack)
5904 {
5905 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
5906 	struct mlxsw_sp_rif_params params = {
5907 		.dev = l3_dev,
5908 	};
5909 	struct mlxsw_sp_rif *rif;
5910 
5911 	switch (event) {
5912 	case NETDEV_UP:
5913 		rif = mlxsw_sp_rif_create(mlxsw_sp, &params, extack);
5914 		if (IS_ERR(rif))
5915 			return PTR_ERR(rif);
5916 		break;
5917 	case NETDEV_DOWN:
5918 		rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
5919 		mlxsw_sp_rif_destroy(rif);
5920 		break;
5921 	}
5922 
5923 	return 0;
5924 }
5925 
5926 static int mlxsw_sp_inetaddr_vlan_event(struct net_device *vlan_dev,
5927 					unsigned long event,
5928 					struct netlink_ext_ack *extack)
5929 {
5930 	struct net_device *real_dev = vlan_dev_real_dev(vlan_dev);
5931 	u16 vid = vlan_dev_vlan_id(vlan_dev);
5932 
5933 	if (netif_is_bridge_port(vlan_dev))
5934 		return 0;
5935 
5936 	if (mlxsw_sp_port_dev_check(real_dev))
5937 		return mlxsw_sp_inetaddr_port_vlan_event(vlan_dev, real_dev,
5938 							 event, vid, extack);
5939 	else if (netif_is_lag_master(real_dev))
5940 		return __mlxsw_sp_inetaddr_lag_event(vlan_dev, real_dev, event,
5941 						     vid, extack);
5942 	else if (netif_is_bridge_master(real_dev) && br_vlan_enabled(real_dev))
5943 		return mlxsw_sp_inetaddr_bridge_event(vlan_dev, event, extack);
5944 
5945 	return 0;
5946 }
5947 
5948 static int __mlxsw_sp_inetaddr_event(struct net_device *dev,
5949 				     unsigned long event,
5950 				     struct netlink_ext_ack *extack)
5951 {
5952 	if (mlxsw_sp_port_dev_check(dev))
5953 		return mlxsw_sp_inetaddr_port_event(dev, event, extack);
5954 	else if (netif_is_lag_master(dev))
5955 		return mlxsw_sp_inetaddr_lag_event(dev, event, extack);
5956 	else if (netif_is_bridge_master(dev))
5957 		return mlxsw_sp_inetaddr_bridge_event(dev, event, extack);
5958 	else if (is_vlan_dev(dev))
5959 		return mlxsw_sp_inetaddr_vlan_event(dev, event, extack);
5960 	else
5961 		return 0;
5962 }
5963 
5964 int mlxsw_sp_inetaddr_event(struct notifier_block *unused,
5965 			    unsigned long event, void *ptr)
5966 {
5967 	struct in_ifaddr *ifa = (struct in_ifaddr *) ptr;
5968 	struct net_device *dev = ifa->ifa_dev->dev;
5969 	struct mlxsw_sp *mlxsw_sp;
5970 	struct mlxsw_sp_rif *rif;
5971 	int err = 0;
5972 
5973 	/* NETDEV_UP event is handled by mlxsw_sp_inetaddr_valid_event */
5974 	if (event == NETDEV_UP)
5975 		goto out;
5976 
5977 	mlxsw_sp = mlxsw_sp_lower_get(dev);
5978 	if (!mlxsw_sp)
5979 		goto out;
5980 
5981 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
5982 	if (!mlxsw_sp_rif_should_config(rif, dev, event))
5983 		goto out;
5984 
5985 	err = __mlxsw_sp_inetaddr_event(dev, event, NULL);
5986 out:
5987 	return notifier_from_errno(err);
5988 }
5989 
5990 int mlxsw_sp_inetaddr_valid_event(struct notifier_block *unused,
5991 				  unsigned long event, void *ptr)
5992 {
5993 	struct in_validator_info *ivi = (struct in_validator_info *) ptr;
5994 	struct net_device *dev = ivi->ivi_dev->dev;
5995 	struct mlxsw_sp *mlxsw_sp;
5996 	struct mlxsw_sp_rif *rif;
5997 	int err = 0;
5998 
5999 	mlxsw_sp = mlxsw_sp_lower_get(dev);
6000 	if (!mlxsw_sp)
6001 		goto out;
6002 
6003 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6004 	if (!mlxsw_sp_rif_should_config(rif, dev, event))
6005 		goto out;
6006 
6007 	err = __mlxsw_sp_inetaddr_event(dev, event, ivi->extack);
6008 out:
6009 	return notifier_from_errno(err);
6010 }
6011 
6012 struct mlxsw_sp_inet6addr_event_work {
6013 	struct work_struct work;
6014 	struct net_device *dev;
6015 	unsigned long event;
6016 };
6017 
6018 static void mlxsw_sp_inet6addr_event_work(struct work_struct *work)
6019 {
6020 	struct mlxsw_sp_inet6addr_event_work *inet6addr_work =
6021 		container_of(work, struct mlxsw_sp_inet6addr_event_work, work);
6022 	struct net_device *dev = inet6addr_work->dev;
6023 	unsigned long event = inet6addr_work->event;
6024 	struct mlxsw_sp *mlxsw_sp;
6025 	struct mlxsw_sp_rif *rif;
6026 
6027 	rtnl_lock();
6028 	mlxsw_sp = mlxsw_sp_lower_get(dev);
6029 	if (!mlxsw_sp)
6030 		goto out;
6031 
6032 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6033 	if (!mlxsw_sp_rif_should_config(rif, dev, event))
6034 		goto out;
6035 
6036 	__mlxsw_sp_inetaddr_event(dev, event, NULL);
6037 out:
6038 	rtnl_unlock();
6039 	dev_put(dev);
6040 	kfree(inet6addr_work);
6041 }
6042 
6043 /* Called with rcu_read_lock() */
6044 int mlxsw_sp_inet6addr_event(struct notifier_block *unused,
6045 			     unsigned long event, void *ptr)
6046 {
6047 	struct inet6_ifaddr *if6 = (struct inet6_ifaddr *) ptr;
6048 	struct mlxsw_sp_inet6addr_event_work *inet6addr_work;
6049 	struct net_device *dev = if6->idev->dev;
6050 
6051 	/* NETDEV_UP event is handled by mlxsw_sp_inet6addr_valid_event */
6052 	if (event == NETDEV_UP)
6053 		return NOTIFY_DONE;
6054 
6055 	if (!mlxsw_sp_port_dev_lower_find_rcu(dev))
6056 		return NOTIFY_DONE;
6057 
6058 	inet6addr_work = kzalloc(sizeof(*inet6addr_work), GFP_ATOMIC);
6059 	if (!inet6addr_work)
6060 		return NOTIFY_BAD;
6061 
6062 	INIT_WORK(&inet6addr_work->work, mlxsw_sp_inet6addr_event_work);
6063 	inet6addr_work->dev = dev;
6064 	inet6addr_work->event = event;
6065 	dev_hold(dev);
6066 	mlxsw_core_schedule_work(&inet6addr_work->work);
6067 
6068 	return NOTIFY_DONE;
6069 }
6070 
6071 int mlxsw_sp_inet6addr_valid_event(struct notifier_block *unused,
6072 				   unsigned long event, void *ptr)
6073 {
6074 	struct in6_validator_info *i6vi = (struct in6_validator_info *) ptr;
6075 	struct net_device *dev = i6vi->i6vi_dev->dev;
6076 	struct mlxsw_sp *mlxsw_sp;
6077 	struct mlxsw_sp_rif *rif;
6078 	int err = 0;
6079 
6080 	mlxsw_sp = mlxsw_sp_lower_get(dev);
6081 	if (!mlxsw_sp)
6082 		goto out;
6083 
6084 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6085 	if (!mlxsw_sp_rif_should_config(rif, dev, event))
6086 		goto out;
6087 
6088 	err = __mlxsw_sp_inetaddr_event(dev, event, i6vi->extack);
6089 out:
6090 	return notifier_from_errno(err);
6091 }
6092 
6093 static int mlxsw_sp_rif_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
6094 			     const char *mac, int mtu)
6095 {
6096 	char ritr_pl[MLXSW_REG_RITR_LEN];
6097 	int err;
6098 
6099 	mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
6100 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6101 	if (err)
6102 		return err;
6103 
6104 	mlxsw_reg_ritr_mtu_set(ritr_pl, mtu);
6105 	mlxsw_reg_ritr_if_mac_memcpy_to(ritr_pl, mac);
6106 	mlxsw_reg_ritr_op_set(ritr_pl, MLXSW_REG_RITR_RIF_CREATE);
6107 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6108 }
6109 
6110 int mlxsw_sp_netdevice_router_port_event(struct net_device *dev)
6111 {
6112 	struct mlxsw_sp *mlxsw_sp;
6113 	struct mlxsw_sp_rif *rif;
6114 	u16 fid_index;
6115 	int err;
6116 
6117 	mlxsw_sp = mlxsw_sp_lower_get(dev);
6118 	if (!mlxsw_sp)
6119 		return 0;
6120 
6121 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6122 	if (!rif)
6123 		return 0;
6124 	fid_index = mlxsw_sp_fid_index(rif->fid);
6125 
6126 	err = mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, false);
6127 	if (err)
6128 		return err;
6129 
6130 	err = mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, dev->dev_addr,
6131 				dev->mtu);
6132 	if (err)
6133 		goto err_rif_edit;
6134 
6135 	err = mlxsw_sp_rif_fdb_op(mlxsw_sp, dev->dev_addr, fid_index, true);
6136 	if (err)
6137 		goto err_rif_fdb_op;
6138 
6139 	if (rif->mtu != dev->mtu) {
6140 		struct mlxsw_sp_vr *vr;
6141 
6142 		/* The RIF is relevant only to its mr_table instance, as unlike
6143 		 * unicast routing, in multicast routing a RIF cannot be shared
6144 		 * between several multicast routing tables.
6145 		 */
6146 		vr = &mlxsw_sp->router->vrs[rif->vr_id];
6147 		mlxsw_sp_mr_rif_mtu_update(vr->mr4_table, rif, dev->mtu);
6148 	}
6149 
6150 	ether_addr_copy(rif->addr, dev->dev_addr);
6151 	rif->mtu = dev->mtu;
6152 
6153 	netdev_dbg(dev, "Updated RIF=%d\n", rif->rif_index);
6154 
6155 	return 0;
6156 
6157 err_rif_fdb_op:
6158 	mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, rif->addr, rif->mtu);
6159 err_rif_edit:
6160 	mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, true);
6161 	return err;
6162 }
6163 
6164 static int mlxsw_sp_port_vrf_join(struct mlxsw_sp *mlxsw_sp,
6165 				  struct net_device *l3_dev,
6166 				  struct netlink_ext_ack *extack)
6167 {
6168 	struct mlxsw_sp_rif *rif;
6169 
6170 	/* If netdev is already associated with a RIF, then we need to
6171 	 * destroy it and create a new one with the new virtual router ID.
6172 	 */
6173 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
6174 	if (rif)
6175 		__mlxsw_sp_inetaddr_event(l3_dev, NETDEV_DOWN, extack);
6176 
6177 	return __mlxsw_sp_inetaddr_event(l3_dev, NETDEV_UP, extack);
6178 }
6179 
6180 static void mlxsw_sp_port_vrf_leave(struct mlxsw_sp *mlxsw_sp,
6181 				    struct net_device *l3_dev)
6182 {
6183 	struct mlxsw_sp_rif *rif;
6184 
6185 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
6186 	if (!rif)
6187 		return;
6188 	__mlxsw_sp_inetaddr_event(l3_dev, NETDEV_DOWN, NULL);
6189 }
6190 
6191 int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event,
6192 				 struct netdev_notifier_changeupper_info *info)
6193 {
6194 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
6195 	int err = 0;
6196 
6197 	if (!mlxsw_sp)
6198 		return 0;
6199 
6200 	switch (event) {
6201 	case NETDEV_PRECHANGEUPPER:
6202 		return 0;
6203 	case NETDEV_CHANGEUPPER:
6204 		if (info->linking) {
6205 			struct netlink_ext_ack *extack;
6206 
6207 			extack = netdev_notifier_info_to_extack(&info->info);
6208 			err = mlxsw_sp_port_vrf_join(mlxsw_sp, l3_dev, extack);
6209 		} else {
6210 			mlxsw_sp_port_vrf_leave(mlxsw_sp, l3_dev);
6211 		}
6212 		break;
6213 	}
6214 
6215 	return err;
6216 }
6217 
6218 static struct mlxsw_sp_rif_subport *
6219 mlxsw_sp_rif_subport_rif(const struct mlxsw_sp_rif *rif)
6220 {
6221 	return container_of(rif, struct mlxsw_sp_rif_subport, common);
6222 }
6223 
6224 static void mlxsw_sp_rif_subport_setup(struct mlxsw_sp_rif *rif,
6225 				       const struct mlxsw_sp_rif_params *params)
6226 {
6227 	struct mlxsw_sp_rif_subport *rif_subport;
6228 
6229 	rif_subport = mlxsw_sp_rif_subport_rif(rif);
6230 	rif_subport->vid = params->vid;
6231 	rif_subport->lag = params->lag;
6232 	if (params->lag)
6233 		rif_subport->lag_id = params->lag_id;
6234 	else
6235 		rif_subport->system_port = params->system_port;
6236 }
6237 
6238 static int mlxsw_sp_rif_subport_op(struct mlxsw_sp_rif *rif, bool enable)
6239 {
6240 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6241 	struct mlxsw_sp_rif_subport *rif_subport;
6242 	char ritr_pl[MLXSW_REG_RITR_LEN];
6243 
6244 	rif_subport = mlxsw_sp_rif_subport_rif(rif);
6245 	mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_SP_IF,
6246 			    rif->rif_index, rif->vr_id, rif->dev->mtu);
6247 	mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr);
6248 	mlxsw_reg_ritr_sp_if_pack(ritr_pl, rif_subport->lag,
6249 				  rif_subport->lag ? rif_subport->lag_id :
6250 						     rif_subport->system_port,
6251 				  rif_subport->vid);
6252 
6253 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6254 }
6255 
6256 static int mlxsw_sp_rif_subport_configure(struct mlxsw_sp_rif *rif)
6257 {
6258 	int err;
6259 
6260 	err = mlxsw_sp_rif_subport_op(rif, true);
6261 	if (err)
6262 		return err;
6263 
6264 	err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6265 				  mlxsw_sp_fid_index(rif->fid), true);
6266 	if (err)
6267 		goto err_rif_fdb_op;
6268 
6269 	mlxsw_sp_fid_rif_set(rif->fid, rif);
6270 	return 0;
6271 
6272 err_rif_fdb_op:
6273 	mlxsw_sp_rif_subport_op(rif, false);
6274 	return err;
6275 }
6276 
6277 static void mlxsw_sp_rif_subport_deconfigure(struct mlxsw_sp_rif *rif)
6278 {
6279 	struct mlxsw_sp_fid *fid = rif->fid;
6280 
6281 	mlxsw_sp_fid_rif_set(fid, NULL);
6282 	mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6283 			    mlxsw_sp_fid_index(fid), false);
6284 	mlxsw_sp_rif_subport_op(rif, false);
6285 }
6286 
6287 static struct mlxsw_sp_fid *
6288 mlxsw_sp_rif_subport_fid_get(struct mlxsw_sp_rif *rif)
6289 {
6290 	return mlxsw_sp_fid_rfid_get(rif->mlxsw_sp, rif->rif_index);
6291 }
6292 
6293 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_subport_ops = {
6294 	.type			= MLXSW_SP_RIF_TYPE_SUBPORT,
6295 	.rif_size		= sizeof(struct mlxsw_sp_rif_subport),
6296 	.setup			= mlxsw_sp_rif_subport_setup,
6297 	.configure		= mlxsw_sp_rif_subport_configure,
6298 	.deconfigure		= mlxsw_sp_rif_subport_deconfigure,
6299 	.fid_get		= mlxsw_sp_rif_subport_fid_get,
6300 };
6301 
6302 static int mlxsw_sp_rif_vlan_fid_op(struct mlxsw_sp_rif *rif,
6303 				    enum mlxsw_reg_ritr_if_type type,
6304 				    u16 vid_fid, bool enable)
6305 {
6306 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6307 	char ritr_pl[MLXSW_REG_RITR_LEN];
6308 
6309 	mlxsw_reg_ritr_pack(ritr_pl, enable, type, rif->rif_index, rif->vr_id,
6310 			    rif->dev->mtu);
6311 	mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr);
6312 	mlxsw_reg_ritr_fid_set(ritr_pl, type, vid_fid);
6313 
6314 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6315 }
6316 
6317 u8 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp)
6318 {
6319 	return mlxsw_core_max_ports(mlxsw_sp->core) + 1;
6320 }
6321 
6322 static int mlxsw_sp_rif_vlan_configure(struct mlxsw_sp_rif *rif)
6323 {
6324 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6325 	u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
6326 	int err;
6327 
6328 	err = mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, true);
6329 	if (err)
6330 		return err;
6331 
6332 	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
6333 				     mlxsw_sp_router_port(mlxsw_sp), true);
6334 	if (err)
6335 		goto err_fid_mc_flood_set;
6336 
6337 	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
6338 				     mlxsw_sp_router_port(mlxsw_sp), true);
6339 	if (err)
6340 		goto err_fid_bc_flood_set;
6341 
6342 	err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6343 				  mlxsw_sp_fid_index(rif->fid), true);
6344 	if (err)
6345 		goto err_rif_fdb_op;
6346 
6347 	mlxsw_sp_fid_rif_set(rif->fid, rif);
6348 	return 0;
6349 
6350 err_rif_fdb_op:
6351 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
6352 			       mlxsw_sp_router_port(mlxsw_sp), false);
6353 err_fid_bc_flood_set:
6354 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
6355 			       mlxsw_sp_router_port(mlxsw_sp), false);
6356 err_fid_mc_flood_set:
6357 	mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false);
6358 	return err;
6359 }
6360 
6361 static void mlxsw_sp_rif_vlan_deconfigure(struct mlxsw_sp_rif *rif)
6362 {
6363 	u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
6364 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6365 	struct mlxsw_sp_fid *fid = rif->fid;
6366 
6367 	mlxsw_sp_fid_rif_set(fid, NULL);
6368 	mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6369 			    mlxsw_sp_fid_index(fid), false);
6370 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
6371 			       mlxsw_sp_router_port(mlxsw_sp), false);
6372 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
6373 			       mlxsw_sp_router_port(mlxsw_sp), false);
6374 	mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false);
6375 }
6376 
6377 static struct mlxsw_sp_fid *
6378 mlxsw_sp_rif_vlan_fid_get(struct mlxsw_sp_rif *rif)
6379 {
6380 	u16 vid = is_vlan_dev(rif->dev) ? vlan_dev_vlan_id(rif->dev) : 1;
6381 
6382 	return mlxsw_sp_fid_8021q_get(rif->mlxsw_sp, vid);
6383 }
6384 
6385 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_vlan_ops = {
6386 	.type			= MLXSW_SP_RIF_TYPE_VLAN,
6387 	.rif_size		= sizeof(struct mlxsw_sp_rif),
6388 	.configure		= mlxsw_sp_rif_vlan_configure,
6389 	.deconfigure		= mlxsw_sp_rif_vlan_deconfigure,
6390 	.fid_get		= mlxsw_sp_rif_vlan_fid_get,
6391 };
6392 
6393 static int mlxsw_sp_rif_fid_configure(struct mlxsw_sp_rif *rif)
6394 {
6395 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6396 	u16 fid_index = mlxsw_sp_fid_index(rif->fid);
6397 	int err;
6398 
6399 	err = mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index,
6400 				       true);
6401 	if (err)
6402 		return err;
6403 
6404 	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
6405 				     mlxsw_sp_router_port(mlxsw_sp), true);
6406 	if (err)
6407 		goto err_fid_mc_flood_set;
6408 
6409 	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
6410 				     mlxsw_sp_router_port(mlxsw_sp), true);
6411 	if (err)
6412 		goto err_fid_bc_flood_set;
6413 
6414 	err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6415 				  mlxsw_sp_fid_index(rif->fid), true);
6416 	if (err)
6417 		goto err_rif_fdb_op;
6418 
6419 	mlxsw_sp_fid_rif_set(rif->fid, rif);
6420 	return 0;
6421 
6422 err_rif_fdb_op:
6423 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
6424 			       mlxsw_sp_router_port(mlxsw_sp), false);
6425 err_fid_bc_flood_set:
6426 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
6427 			       mlxsw_sp_router_port(mlxsw_sp), false);
6428 err_fid_mc_flood_set:
6429 	mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false);
6430 	return err;
6431 }
6432 
6433 static void mlxsw_sp_rif_fid_deconfigure(struct mlxsw_sp_rif *rif)
6434 {
6435 	u16 fid_index = mlxsw_sp_fid_index(rif->fid);
6436 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6437 	struct mlxsw_sp_fid *fid = rif->fid;
6438 
6439 	mlxsw_sp_fid_rif_set(fid, NULL);
6440 	mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6441 			    mlxsw_sp_fid_index(fid), false);
6442 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
6443 			       mlxsw_sp_router_port(mlxsw_sp), false);
6444 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
6445 			       mlxsw_sp_router_port(mlxsw_sp), false);
6446 	mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false);
6447 }
6448 
6449 static struct mlxsw_sp_fid *
6450 mlxsw_sp_rif_fid_fid_get(struct mlxsw_sp_rif *rif)
6451 {
6452 	return mlxsw_sp_fid_8021d_get(rif->mlxsw_sp, rif->dev->ifindex);
6453 }
6454 
6455 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_fid_ops = {
6456 	.type			= MLXSW_SP_RIF_TYPE_FID,
6457 	.rif_size		= sizeof(struct mlxsw_sp_rif),
6458 	.configure		= mlxsw_sp_rif_fid_configure,
6459 	.deconfigure		= mlxsw_sp_rif_fid_deconfigure,
6460 	.fid_get		= mlxsw_sp_rif_fid_fid_get,
6461 };
6462 
6463 static struct mlxsw_sp_rif_ipip_lb *
6464 mlxsw_sp_rif_ipip_lb_rif(struct mlxsw_sp_rif *rif)
6465 {
6466 	return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
6467 }
6468 
6469 static void
6470 mlxsw_sp_rif_ipip_lb_setup(struct mlxsw_sp_rif *rif,
6471 			   const struct mlxsw_sp_rif_params *params)
6472 {
6473 	struct mlxsw_sp_rif_params_ipip_lb *params_lb;
6474 	struct mlxsw_sp_rif_ipip_lb *rif_lb;
6475 
6476 	params_lb = container_of(params, struct mlxsw_sp_rif_params_ipip_lb,
6477 				 common);
6478 	rif_lb = mlxsw_sp_rif_ipip_lb_rif(rif);
6479 	rif_lb->lb_config = params_lb->lb_config;
6480 }
6481 
6482 static int
6483 mlxsw_sp_rif_ipip_lb_op(struct mlxsw_sp_rif_ipip_lb *lb_rif,
6484 			struct mlxsw_sp_vr *ul_vr, bool enable)
6485 {
6486 	struct mlxsw_sp_rif_ipip_lb_config lb_cf = lb_rif->lb_config;
6487 	struct mlxsw_sp_rif *rif = &lb_rif->common;
6488 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6489 	char ritr_pl[MLXSW_REG_RITR_LEN];
6490 	u32 saddr4;
6491 
6492 	switch (lb_cf.ul_protocol) {
6493 	case MLXSW_SP_L3_PROTO_IPV4:
6494 		saddr4 = be32_to_cpu(lb_cf.saddr.addr4);
6495 		mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF,
6496 				    rif->rif_index, rif->vr_id, rif->dev->mtu);
6497 		mlxsw_reg_ritr_loopback_ipip4_pack(ritr_pl, lb_cf.lb_ipipt,
6498 			    MLXSW_REG_RITR_LOOPBACK_IPIP_OPTIONS_GRE_KEY_PRESET,
6499 			    ul_vr->id, saddr4, lb_cf.okey);
6500 		break;
6501 
6502 	case MLXSW_SP_L3_PROTO_IPV6:
6503 		return -EAFNOSUPPORT;
6504 	}
6505 
6506 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6507 }
6508 
6509 static int
6510 mlxsw_sp_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif)
6511 {
6512 	struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
6513 	u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(rif->dev);
6514 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6515 	struct mlxsw_sp_vr *ul_vr;
6516 	int err;
6517 
6518 	ul_vr = mlxsw_sp_vr_get(mlxsw_sp, ul_tb_id, NULL);
6519 	if (IS_ERR(ul_vr))
6520 		return PTR_ERR(ul_vr);
6521 
6522 	err = mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, true);
6523 	if (err)
6524 		goto err_loopback_op;
6525 
6526 	lb_rif->ul_vr_id = ul_vr->id;
6527 	++ul_vr->rif_count;
6528 	return 0;
6529 
6530 err_loopback_op:
6531 	mlxsw_sp_vr_put(ul_vr);
6532 	return err;
6533 }
6534 
6535 static void mlxsw_sp_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif)
6536 {
6537 	struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
6538 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6539 	struct mlxsw_sp_vr *ul_vr;
6540 
6541 	ul_vr = &mlxsw_sp->router->vrs[lb_rif->ul_vr_id];
6542 	mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, false);
6543 
6544 	--ul_vr->rif_count;
6545 	mlxsw_sp_vr_put(ul_vr);
6546 }
6547 
6548 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_ipip_lb_ops = {
6549 	.type			= MLXSW_SP_RIF_TYPE_IPIP_LB,
6550 	.rif_size		= sizeof(struct mlxsw_sp_rif_ipip_lb),
6551 	.setup                  = mlxsw_sp_rif_ipip_lb_setup,
6552 	.configure		= mlxsw_sp_rif_ipip_lb_configure,
6553 	.deconfigure		= mlxsw_sp_rif_ipip_lb_deconfigure,
6554 };
6555 
6556 static const struct mlxsw_sp_rif_ops *mlxsw_sp_rif_ops_arr[] = {
6557 	[MLXSW_SP_RIF_TYPE_SUBPORT]	= &mlxsw_sp_rif_subport_ops,
6558 	[MLXSW_SP_RIF_TYPE_VLAN]	= &mlxsw_sp_rif_vlan_ops,
6559 	[MLXSW_SP_RIF_TYPE_FID]		= &mlxsw_sp_rif_fid_ops,
6560 	[MLXSW_SP_RIF_TYPE_IPIP_LB]	= &mlxsw_sp_rif_ipip_lb_ops,
6561 };
6562 
6563 static int mlxsw_sp_rifs_init(struct mlxsw_sp *mlxsw_sp)
6564 {
6565 	u64 max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
6566 
6567 	mlxsw_sp->router->rifs = kcalloc(max_rifs,
6568 					 sizeof(struct mlxsw_sp_rif *),
6569 					 GFP_KERNEL);
6570 	if (!mlxsw_sp->router->rifs)
6571 		return -ENOMEM;
6572 
6573 	mlxsw_sp->router->rif_ops_arr = mlxsw_sp_rif_ops_arr;
6574 
6575 	return 0;
6576 }
6577 
6578 static void mlxsw_sp_rifs_fini(struct mlxsw_sp *mlxsw_sp)
6579 {
6580 	int i;
6581 
6582 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
6583 		WARN_ON_ONCE(mlxsw_sp->router->rifs[i]);
6584 
6585 	kfree(mlxsw_sp->router->rifs);
6586 }
6587 
6588 static int
6589 mlxsw_sp_ipip_config_tigcr(struct mlxsw_sp *mlxsw_sp)
6590 {
6591 	char tigcr_pl[MLXSW_REG_TIGCR_LEN];
6592 
6593 	mlxsw_reg_tigcr_pack(tigcr_pl, true, 0);
6594 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tigcr), tigcr_pl);
6595 }
6596 
6597 static int mlxsw_sp_ipips_init(struct mlxsw_sp *mlxsw_sp)
6598 {
6599 	mlxsw_sp->router->ipip_ops_arr = mlxsw_sp_ipip_ops_arr;
6600 	INIT_LIST_HEAD(&mlxsw_sp->router->ipip_list);
6601 	return mlxsw_sp_ipip_config_tigcr(mlxsw_sp);
6602 }
6603 
6604 static void mlxsw_sp_ipips_fini(struct mlxsw_sp *mlxsw_sp)
6605 {
6606 	WARN_ON(!list_empty(&mlxsw_sp->router->ipip_list));
6607 }
6608 
6609 static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb)
6610 {
6611 	struct mlxsw_sp_router *router;
6612 
6613 	/* Flush pending FIB notifications and then flush the device's
6614 	 * table before requesting another dump. The FIB notification
6615 	 * block is unregistered, so no need to take RTNL.
6616 	 */
6617 	mlxsw_core_flush_owq();
6618 	router = container_of(nb, struct mlxsw_sp_router, fib_nb);
6619 	mlxsw_sp_router_fib_flush(router->mlxsw_sp);
6620 }
6621 
6622 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
6623 {
6624 	char rgcr_pl[MLXSW_REG_RGCR_LEN];
6625 	u64 max_rifs;
6626 	int err;
6627 
6628 	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_RIFS))
6629 		return -EIO;
6630 	max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
6631 
6632 	mlxsw_reg_rgcr_pack(rgcr_pl, true, true);
6633 	mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs);
6634 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
6635 	if (err)
6636 		return err;
6637 	return 0;
6638 }
6639 
6640 static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
6641 {
6642 	char rgcr_pl[MLXSW_REG_RGCR_LEN];
6643 
6644 	mlxsw_reg_rgcr_pack(rgcr_pl, false, false);
6645 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
6646 }
6647 
6648 int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
6649 {
6650 	struct mlxsw_sp_router *router;
6651 	int err;
6652 
6653 	router = kzalloc(sizeof(*mlxsw_sp->router), GFP_KERNEL);
6654 	if (!router)
6655 		return -ENOMEM;
6656 	mlxsw_sp->router = router;
6657 	router->mlxsw_sp = mlxsw_sp;
6658 
6659 	INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_neighs_list);
6660 	err = __mlxsw_sp_router_init(mlxsw_sp);
6661 	if (err)
6662 		goto err_router_init;
6663 
6664 	err = mlxsw_sp_rifs_init(mlxsw_sp);
6665 	if (err)
6666 		goto err_rifs_init;
6667 
6668 	err = mlxsw_sp_ipips_init(mlxsw_sp);
6669 	if (err)
6670 		goto err_ipips_init;
6671 
6672 	err = rhashtable_init(&mlxsw_sp->router->nexthop_ht,
6673 			      &mlxsw_sp_nexthop_ht_params);
6674 	if (err)
6675 		goto err_nexthop_ht_init;
6676 
6677 	err = rhashtable_init(&mlxsw_sp->router->nexthop_group_ht,
6678 			      &mlxsw_sp_nexthop_group_ht_params);
6679 	if (err)
6680 		goto err_nexthop_group_ht_init;
6681 
6682 	INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_list);
6683 	err = mlxsw_sp_lpm_init(mlxsw_sp);
6684 	if (err)
6685 		goto err_lpm_init;
6686 
6687 	err = mlxsw_sp_mr_init(mlxsw_sp, &mlxsw_sp_mr_tcam_ops);
6688 	if (err)
6689 		goto err_mr_init;
6690 
6691 	err = mlxsw_sp_vrs_init(mlxsw_sp);
6692 	if (err)
6693 		goto err_vrs_init;
6694 
6695 	err = mlxsw_sp_neigh_init(mlxsw_sp);
6696 	if (err)
6697 		goto err_neigh_init;
6698 
6699 	mlxsw_sp->router->fib_nb.notifier_call = mlxsw_sp_router_fib_event;
6700 	err = register_fib_notifier(&mlxsw_sp->router->fib_nb,
6701 				    mlxsw_sp_router_fib_dump_flush);
6702 	if (err)
6703 		goto err_register_fib_notifier;
6704 
6705 	return 0;
6706 
6707 err_register_fib_notifier:
6708 	mlxsw_sp_neigh_fini(mlxsw_sp);
6709 err_neigh_init:
6710 	mlxsw_sp_vrs_fini(mlxsw_sp);
6711 err_vrs_init:
6712 	mlxsw_sp_mr_fini(mlxsw_sp);
6713 err_mr_init:
6714 	mlxsw_sp_lpm_fini(mlxsw_sp);
6715 err_lpm_init:
6716 	rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
6717 err_nexthop_group_ht_init:
6718 	rhashtable_destroy(&mlxsw_sp->router->nexthop_ht);
6719 err_nexthop_ht_init:
6720 	mlxsw_sp_ipips_fini(mlxsw_sp);
6721 err_ipips_init:
6722 	mlxsw_sp_rifs_fini(mlxsw_sp);
6723 err_rifs_init:
6724 	__mlxsw_sp_router_fini(mlxsw_sp);
6725 err_router_init:
6726 	kfree(mlxsw_sp->router);
6727 	return err;
6728 }
6729 
6730 void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
6731 {
6732 	unregister_fib_notifier(&mlxsw_sp->router->fib_nb);
6733 	mlxsw_sp_neigh_fini(mlxsw_sp);
6734 	mlxsw_sp_vrs_fini(mlxsw_sp);
6735 	mlxsw_sp_mr_fini(mlxsw_sp);
6736 	mlxsw_sp_lpm_fini(mlxsw_sp);
6737 	rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
6738 	rhashtable_destroy(&mlxsw_sp->router->nexthop_ht);
6739 	mlxsw_sp_ipips_fini(mlxsw_sp);
6740 	mlxsw_sp_rifs_fini(mlxsw_sp);
6741 	__mlxsw_sp_router_fini(mlxsw_sp);
6742 	kfree(mlxsw_sp->router);
6743 }
6744