xref: /linux/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c (revision ef347a340b1a8507c22ee3cf981cd5cd64188431)
1 /*
2  * drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
3  * Copyright (c) 2016-2018 Mellanox Technologies. All rights reserved.
4  * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
5  * Copyright (c) 2016 Ido Schimmel <idosch@mellanox.com>
6  * Copyright (c) 2016 Yotam Gigi <yotamg@mellanox.com>
7  * Copyright (c) 2017-2018 Petr Machata <petrm@mellanox.com>
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. Neither the names of the copyright holders nor the names of its
18  *    contributors may be used to endorse or promote products derived from
19  *    this software without specific prior written permission.
20  *
21  * Alternatively, this software may be distributed under the terms of the
22  * GNU General Public License ("GPL") version 2 as published by the Free
23  * Software Foundation.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35  * POSSIBILITY OF SUCH DAMAGE.
36  */
37 
38 #include <linux/kernel.h>
39 #include <linux/types.h>
40 #include <linux/rhashtable.h>
41 #include <linux/bitops.h>
42 #include <linux/in6.h>
43 #include <linux/notifier.h>
44 #include <linux/inetdevice.h>
45 #include <linux/netdevice.h>
46 #include <linux/if_bridge.h>
47 #include <linux/socket.h>
48 #include <linux/route.h>
49 #include <linux/gcd.h>
50 #include <linux/random.h>
51 #include <net/netevent.h>
52 #include <net/neighbour.h>
53 #include <net/arp.h>
54 #include <net/ip_fib.h>
55 #include <net/ip6_fib.h>
56 #include <net/fib_rules.h>
57 #include <net/ip_tunnels.h>
58 #include <net/l3mdev.h>
59 #include <net/addrconf.h>
60 #include <net/ndisc.h>
61 #include <net/ipv6.h>
62 #include <net/fib_notifier.h>
63 
64 #include "spectrum.h"
65 #include "core.h"
66 #include "reg.h"
67 #include "spectrum_cnt.h"
68 #include "spectrum_dpipe.h"
69 #include "spectrum_ipip.h"
70 #include "spectrum_mr.h"
71 #include "spectrum_mr_tcam.h"
72 #include "spectrum_router.h"
73 #include "spectrum_span.h"
74 
75 struct mlxsw_sp_fib;
76 struct mlxsw_sp_vr;
77 struct mlxsw_sp_lpm_tree;
78 struct mlxsw_sp_rif_ops;
79 
80 struct mlxsw_sp_router {
81 	struct mlxsw_sp *mlxsw_sp;
82 	struct mlxsw_sp_rif **rifs;
83 	struct mlxsw_sp_vr *vrs;
84 	struct rhashtable neigh_ht;
85 	struct rhashtable nexthop_group_ht;
86 	struct rhashtable nexthop_ht;
87 	struct list_head nexthop_list;
88 	struct {
89 		/* One tree for each protocol: IPv4 and IPv6 */
90 		struct mlxsw_sp_lpm_tree *proto_trees[2];
91 		struct mlxsw_sp_lpm_tree *trees;
92 		unsigned int tree_count;
93 	} lpm;
94 	struct {
95 		struct delayed_work dw;
96 		unsigned long interval;	/* ms */
97 	} neighs_update;
98 	struct delayed_work nexthop_probe_dw;
99 #define MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL 5000 /* ms */
100 	struct list_head nexthop_neighs_list;
101 	struct list_head ipip_list;
102 	bool aborted;
103 	struct notifier_block fib_nb;
104 	struct notifier_block netevent_nb;
105 	const struct mlxsw_sp_rif_ops **rif_ops_arr;
106 	const struct mlxsw_sp_ipip_ops **ipip_ops_arr;
107 };
108 
109 struct mlxsw_sp_rif {
110 	struct list_head nexthop_list;
111 	struct list_head neigh_list;
112 	struct net_device *dev;
113 	struct mlxsw_sp_fid *fid;
114 	unsigned char addr[ETH_ALEN];
115 	int mtu;
116 	u16 rif_index;
117 	u16 vr_id;
118 	const struct mlxsw_sp_rif_ops *ops;
119 	struct mlxsw_sp *mlxsw_sp;
120 
121 	unsigned int counter_ingress;
122 	bool counter_ingress_valid;
123 	unsigned int counter_egress;
124 	bool counter_egress_valid;
125 };
126 
127 struct mlxsw_sp_rif_params {
128 	struct net_device *dev;
129 	union {
130 		u16 system_port;
131 		u16 lag_id;
132 	};
133 	u16 vid;
134 	bool lag;
135 };
136 
137 struct mlxsw_sp_rif_subport {
138 	struct mlxsw_sp_rif common;
139 	union {
140 		u16 system_port;
141 		u16 lag_id;
142 	};
143 	u16 vid;
144 	bool lag;
145 };
146 
147 struct mlxsw_sp_rif_ipip_lb {
148 	struct mlxsw_sp_rif common;
149 	struct mlxsw_sp_rif_ipip_lb_config lb_config;
150 	u16 ul_vr_id; /* Reserved for Spectrum-2. */
151 };
152 
153 struct mlxsw_sp_rif_params_ipip_lb {
154 	struct mlxsw_sp_rif_params common;
155 	struct mlxsw_sp_rif_ipip_lb_config lb_config;
156 };
157 
158 struct mlxsw_sp_rif_ops {
159 	enum mlxsw_sp_rif_type type;
160 	size_t rif_size;
161 
162 	void (*setup)(struct mlxsw_sp_rif *rif,
163 		      const struct mlxsw_sp_rif_params *params);
164 	int (*configure)(struct mlxsw_sp_rif *rif);
165 	void (*deconfigure)(struct mlxsw_sp_rif *rif);
166 	struct mlxsw_sp_fid * (*fid_get)(struct mlxsw_sp_rif *rif,
167 					 struct netlink_ext_ack *extack);
168 };
169 
170 static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree);
171 static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
172 				  struct mlxsw_sp_lpm_tree *lpm_tree);
173 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
174 				     const struct mlxsw_sp_fib *fib,
175 				     u8 tree_id);
176 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
177 				       const struct mlxsw_sp_fib *fib);
178 
179 static unsigned int *
180 mlxsw_sp_rif_p_counter_get(struct mlxsw_sp_rif *rif,
181 			   enum mlxsw_sp_rif_counter_dir dir)
182 {
183 	switch (dir) {
184 	case MLXSW_SP_RIF_COUNTER_EGRESS:
185 		return &rif->counter_egress;
186 	case MLXSW_SP_RIF_COUNTER_INGRESS:
187 		return &rif->counter_ingress;
188 	}
189 	return NULL;
190 }
191 
192 static bool
193 mlxsw_sp_rif_counter_valid_get(struct mlxsw_sp_rif *rif,
194 			       enum mlxsw_sp_rif_counter_dir dir)
195 {
196 	switch (dir) {
197 	case MLXSW_SP_RIF_COUNTER_EGRESS:
198 		return rif->counter_egress_valid;
199 	case MLXSW_SP_RIF_COUNTER_INGRESS:
200 		return rif->counter_ingress_valid;
201 	}
202 	return false;
203 }
204 
205 static void
206 mlxsw_sp_rif_counter_valid_set(struct mlxsw_sp_rif *rif,
207 			       enum mlxsw_sp_rif_counter_dir dir,
208 			       bool valid)
209 {
210 	switch (dir) {
211 	case MLXSW_SP_RIF_COUNTER_EGRESS:
212 		rif->counter_egress_valid = valid;
213 		break;
214 	case MLXSW_SP_RIF_COUNTER_INGRESS:
215 		rif->counter_ingress_valid = valid;
216 		break;
217 	}
218 }
219 
220 static int mlxsw_sp_rif_counter_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
221 				     unsigned int counter_index, bool enable,
222 				     enum mlxsw_sp_rif_counter_dir dir)
223 {
224 	char ritr_pl[MLXSW_REG_RITR_LEN];
225 	bool is_egress = false;
226 	int err;
227 
228 	if (dir == MLXSW_SP_RIF_COUNTER_EGRESS)
229 		is_egress = true;
230 	mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
231 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
232 	if (err)
233 		return err;
234 
235 	mlxsw_reg_ritr_counter_pack(ritr_pl, counter_index, enable,
236 				    is_egress);
237 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
238 }
239 
240 int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp,
241 				   struct mlxsw_sp_rif *rif,
242 				   enum mlxsw_sp_rif_counter_dir dir, u64 *cnt)
243 {
244 	char ricnt_pl[MLXSW_REG_RICNT_LEN];
245 	unsigned int *p_counter_index;
246 	bool valid;
247 	int err;
248 
249 	valid = mlxsw_sp_rif_counter_valid_get(rif, dir);
250 	if (!valid)
251 		return -EINVAL;
252 
253 	p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
254 	if (!p_counter_index)
255 		return -EINVAL;
256 	mlxsw_reg_ricnt_pack(ricnt_pl, *p_counter_index,
257 			     MLXSW_REG_RICNT_OPCODE_NOP);
258 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
259 	if (err)
260 		return err;
261 	*cnt = mlxsw_reg_ricnt_good_unicast_packets_get(ricnt_pl);
262 	return 0;
263 }
264 
265 static int mlxsw_sp_rif_counter_clear(struct mlxsw_sp *mlxsw_sp,
266 				      unsigned int counter_index)
267 {
268 	char ricnt_pl[MLXSW_REG_RICNT_LEN];
269 
270 	mlxsw_reg_ricnt_pack(ricnt_pl, counter_index,
271 			     MLXSW_REG_RICNT_OPCODE_CLEAR);
272 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
273 }
274 
275 int mlxsw_sp_rif_counter_alloc(struct mlxsw_sp *mlxsw_sp,
276 			       struct mlxsw_sp_rif *rif,
277 			       enum mlxsw_sp_rif_counter_dir dir)
278 {
279 	unsigned int *p_counter_index;
280 	int err;
281 
282 	p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
283 	if (!p_counter_index)
284 		return -EINVAL;
285 	err = mlxsw_sp_counter_alloc(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
286 				     p_counter_index);
287 	if (err)
288 		return err;
289 
290 	err = mlxsw_sp_rif_counter_clear(mlxsw_sp, *p_counter_index);
291 	if (err)
292 		goto err_counter_clear;
293 
294 	err = mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
295 					*p_counter_index, true, dir);
296 	if (err)
297 		goto err_counter_edit;
298 	mlxsw_sp_rif_counter_valid_set(rif, dir, true);
299 	return 0;
300 
301 err_counter_edit:
302 err_counter_clear:
303 	mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
304 			      *p_counter_index);
305 	return err;
306 }
307 
308 void mlxsw_sp_rif_counter_free(struct mlxsw_sp *mlxsw_sp,
309 			       struct mlxsw_sp_rif *rif,
310 			       enum mlxsw_sp_rif_counter_dir dir)
311 {
312 	unsigned int *p_counter_index;
313 
314 	if (!mlxsw_sp_rif_counter_valid_get(rif, dir))
315 		return;
316 
317 	p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
318 	if (WARN_ON(!p_counter_index))
319 		return;
320 	mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
321 				  *p_counter_index, false, dir);
322 	mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
323 			      *p_counter_index);
324 	mlxsw_sp_rif_counter_valid_set(rif, dir, false);
325 }
326 
327 static void mlxsw_sp_rif_counters_alloc(struct mlxsw_sp_rif *rif)
328 {
329 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
330 	struct devlink *devlink;
331 
332 	devlink = priv_to_devlink(mlxsw_sp->core);
333 	if (!devlink_dpipe_table_counter_enabled(devlink,
334 						 MLXSW_SP_DPIPE_TABLE_NAME_ERIF))
335 		return;
336 	mlxsw_sp_rif_counter_alloc(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS);
337 }
338 
339 static void mlxsw_sp_rif_counters_free(struct mlxsw_sp_rif *rif)
340 {
341 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
342 
343 	mlxsw_sp_rif_counter_free(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS);
344 }
345 
346 #define MLXSW_SP_PREFIX_COUNT (sizeof(struct in6_addr) * BITS_PER_BYTE + 1)
347 
348 struct mlxsw_sp_prefix_usage {
349 	DECLARE_BITMAP(b, MLXSW_SP_PREFIX_COUNT);
350 };
351 
352 #define mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) \
353 	for_each_set_bit(prefix, (prefix_usage)->b, MLXSW_SP_PREFIX_COUNT)
354 
355 static bool
356 mlxsw_sp_prefix_usage_eq(struct mlxsw_sp_prefix_usage *prefix_usage1,
357 			 struct mlxsw_sp_prefix_usage *prefix_usage2)
358 {
359 	return !memcmp(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
360 }
361 
362 static void
363 mlxsw_sp_prefix_usage_cpy(struct mlxsw_sp_prefix_usage *prefix_usage1,
364 			  struct mlxsw_sp_prefix_usage *prefix_usage2)
365 {
366 	memcpy(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
367 }
368 
369 static void
370 mlxsw_sp_prefix_usage_set(struct mlxsw_sp_prefix_usage *prefix_usage,
371 			  unsigned char prefix_len)
372 {
373 	set_bit(prefix_len, prefix_usage->b);
374 }
375 
376 static void
377 mlxsw_sp_prefix_usage_clear(struct mlxsw_sp_prefix_usage *prefix_usage,
378 			    unsigned char prefix_len)
379 {
380 	clear_bit(prefix_len, prefix_usage->b);
381 }
382 
383 struct mlxsw_sp_fib_key {
384 	unsigned char addr[sizeof(struct in6_addr)];
385 	unsigned char prefix_len;
386 };
387 
388 enum mlxsw_sp_fib_entry_type {
389 	MLXSW_SP_FIB_ENTRY_TYPE_REMOTE,
390 	MLXSW_SP_FIB_ENTRY_TYPE_LOCAL,
391 	MLXSW_SP_FIB_ENTRY_TYPE_TRAP,
392 
393 	/* This is a special case of local delivery, where a packet should be
394 	 * decapsulated on reception. Note that there is no corresponding ENCAP,
395 	 * because that's a type of next hop, not of FIB entry. (There can be
396 	 * several next hops in a REMOTE entry, and some of them may be
397 	 * encapsulating entries.)
398 	 */
399 	MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP,
400 };
401 
402 struct mlxsw_sp_nexthop_group;
403 
404 struct mlxsw_sp_fib_node {
405 	struct list_head entry_list;
406 	struct list_head list;
407 	struct rhash_head ht_node;
408 	struct mlxsw_sp_fib *fib;
409 	struct mlxsw_sp_fib_key key;
410 };
411 
412 struct mlxsw_sp_fib_entry_decap {
413 	struct mlxsw_sp_ipip_entry *ipip_entry;
414 	u32 tunnel_index;
415 };
416 
417 struct mlxsw_sp_fib_entry {
418 	struct list_head list;
419 	struct mlxsw_sp_fib_node *fib_node;
420 	enum mlxsw_sp_fib_entry_type type;
421 	struct list_head nexthop_group_node;
422 	struct mlxsw_sp_nexthop_group *nh_group;
423 	struct mlxsw_sp_fib_entry_decap decap; /* Valid for decap entries. */
424 };
425 
426 struct mlxsw_sp_fib4_entry {
427 	struct mlxsw_sp_fib_entry common;
428 	u32 tb_id;
429 	u32 prio;
430 	u8 tos;
431 	u8 type;
432 };
433 
434 struct mlxsw_sp_fib6_entry {
435 	struct mlxsw_sp_fib_entry common;
436 	struct list_head rt6_list;
437 	unsigned int nrt6;
438 };
439 
440 struct mlxsw_sp_rt6 {
441 	struct list_head list;
442 	struct fib6_info *rt;
443 };
444 
445 struct mlxsw_sp_lpm_tree {
446 	u8 id; /* tree ID */
447 	unsigned int ref_count;
448 	enum mlxsw_sp_l3proto proto;
449 	unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT];
450 	struct mlxsw_sp_prefix_usage prefix_usage;
451 };
452 
453 struct mlxsw_sp_fib {
454 	struct rhashtable ht;
455 	struct list_head node_list;
456 	struct mlxsw_sp_vr *vr;
457 	struct mlxsw_sp_lpm_tree *lpm_tree;
458 	enum mlxsw_sp_l3proto proto;
459 };
460 
461 struct mlxsw_sp_vr {
462 	u16 id; /* virtual router ID */
463 	u32 tb_id; /* kernel fib table id */
464 	unsigned int rif_count;
465 	struct mlxsw_sp_fib *fib4;
466 	struct mlxsw_sp_fib *fib6;
467 	struct mlxsw_sp_mr_table *mr_table[MLXSW_SP_L3_PROTO_MAX];
468 };
469 
470 static const struct rhashtable_params mlxsw_sp_fib_ht_params;
471 
472 static struct mlxsw_sp_fib *mlxsw_sp_fib_create(struct mlxsw_sp *mlxsw_sp,
473 						struct mlxsw_sp_vr *vr,
474 						enum mlxsw_sp_l3proto proto)
475 {
476 	struct mlxsw_sp_lpm_tree *lpm_tree;
477 	struct mlxsw_sp_fib *fib;
478 	int err;
479 
480 	lpm_tree = mlxsw_sp->router->lpm.proto_trees[proto];
481 	fib = kzalloc(sizeof(*fib), GFP_KERNEL);
482 	if (!fib)
483 		return ERR_PTR(-ENOMEM);
484 	err = rhashtable_init(&fib->ht, &mlxsw_sp_fib_ht_params);
485 	if (err)
486 		goto err_rhashtable_init;
487 	INIT_LIST_HEAD(&fib->node_list);
488 	fib->proto = proto;
489 	fib->vr = vr;
490 	fib->lpm_tree = lpm_tree;
491 	mlxsw_sp_lpm_tree_hold(lpm_tree);
492 	err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, lpm_tree->id);
493 	if (err)
494 		goto err_lpm_tree_bind;
495 	return fib;
496 
497 err_lpm_tree_bind:
498 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
499 err_rhashtable_init:
500 	kfree(fib);
501 	return ERR_PTR(err);
502 }
503 
504 static void mlxsw_sp_fib_destroy(struct mlxsw_sp *mlxsw_sp,
505 				 struct mlxsw_sp_fib *fib)
506 {
507 	mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, fib);
508 	mlxsw_sp_lpm_tree_put(mlxsw_sp, fib->lpm_tree);
509 	WARN_ON(!list_empty(&fib->node_list));
510 	rhashtable_destroy(&fib->ht);
511 	kfree(fib);
512 }
513 
514 static struct mlxsw_sp_lpm_tree *
515 mlxsw_sp_lpm_tree_find_unused(struct mlxsw_sp *mlxsw_sp)
516 {
517 	static struct mlxsw_sp_lpm_tree *lpm_tree;
518 	int i;
519 
520 	for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
521 		lpm_tree = &mlxsw_sp->router->lpm.trees[i];
522 		if (lpm_tree->ref_count == 0)
523 			return lpm_tree;
524 	}
525 	return NULL;
526 }
527 
528 static int mlxsw_sp_lpm_tree_alloc(struct mlxsw_sp *mlxsw_sp,
529 				   struct mlxsw_sp_lpm_tree *lpm_tree)
530 {
531 	char ralta_pl[MLXSW_REG_RALTA_LEN];
532 
533 	mlxsw_reg_ralta_pack(ralta_pl, true,
534 			     (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
535 			     lpm_tree->id);
536 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
537 }
538 
539 static void mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp,
540 				   struct mlxsw_sp_lpm_tree *lpm_tree)
541 {
542 	char ralta_pl[MLXSW_REG_RALTA_LEN];
543 
544 	mlxsw_reg_ralta_pack(ralta_pl, false,
545 			     (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
546 			     lpm_tree->id);
547 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
548 }
549 
550 static int
551 mlxsw_sp_lpm_tree_left_struct_set(struct mlxsw_sp *mlxsw_sp,
552 				  struct mlxsw_sp_prefix_usage *prefix_usage,
553 				  struct mlxsw_sp_lpm_tree *lpm_tree)
554 {
555 	char ralst_pl[MLXSW_REG_RALST_LEN];
556 	u8 root_bin = 0;
557 	u8 prefix;
558 	u8 last_prefix = MLXSW_REG_RALST_BIN_NO_CHILD;
559 
560 	mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage)
561 		root_bin = prefix;
562 
563 	mlxsw_reg_ralst_pack(ralst_pl, root_bin, lpm_tree->id);
564 	mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) {
565 		if (prefix == 0)
566 			continue;
567 		mlxsw_reg_ralst_bin_pack(ralst_pl, prefix, last_prefix,
568 					 MLXSW_REG_RALST_BIN_NO_CHILD);
569 		last_prefix = prefix;
570 	}
571 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
572 }
573 
574 static struct mlxsw_sp_lpm_tree *
575 mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp,
576 			 struct mlxsw_sp_prefix_usage *prefix_usage,
577 			 enum mlxsw_sp_l3proto proto)
578 {
579 	struct mlxsw_sp_lpm_tree *lpm_tree;
580 	int err;
581 
582 	lpm_tree = mlxsw_sp_lpm_tree_find_unused(mlxsw_sp);
583 	if (!lpm_tree)
584 		return ERR_PTR(-EBUSY);
585 	lpm_tree->proto = proto;
586 	err = mlxsw_sp_lpm_tree_alloc(mlxsw_sp, lpm_tree);
587 	if (err)
588 		return ERR_PTR(err);
589 
590 	err = mlxsw_sp_lpm_tree_left_struct_set(mlxsw_sp, prefix_usage,
591 						lpm_tree);
592 	if (err)
593 		goto err_left_struct_set;
594 	memcpy(&lpm_tree->prefix_usage, prefix_usage,
595 	       sizeof(lpm_tree->prefix_usage));
596 	memset(&lpm_tree->prefix_ref_count, 0,
597 	       sizeof(lpm_tree->prefix_ref_count));
598 	lpm_tree->ref_count = 1;
599 	return lpm_tree;
600 
601 err_left_struct_set:
602 	mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
603 	return ERR_PTR(err);
604 }
605 
606 static void mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp *mlxsw_sp,
607 				      struct mlxsw_sp_lpm_tree *lpm_tree)
608 {
609 	mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
610 }
611 
612 static struct mlxsw_sp_lpm_tree *
613 mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp,
614 		      struct mlxsw_sp_prefix_usage *prefix_usage,
615 		      enum mlxsw_sp_l3proto proto)
616 {
617 	struct mlxsw_sp_lpm_tree *lpm_tree;
618 	int i;
619 
620 	for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
621 		lpm_tree = &mlxsw_sp->router->lpm.trees[i];
622 		if (lpm_tree->ref_count != 0 &&
623 		    lpm_tree->proto == proto &&
624 		    mlxsw_sp_prefix_usage_eq(&lpm_tree->prefix_usage,
625 					     prefix_usage)) {
626 			mlxsw_sp_lpm_tree_hold(lpm_tree);
627 			return lpm_tree;
628 		}
629 	}
630 	return mlxsw_sp_lpm_tree_create(mlxsw_sp, prefix_usage, proto);
631 }
632 
633 static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree)
634 {
635 	lpm_tree->ref_count++;
636 }
637 
638 static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
639 				  struct mlxsw_sp_lpm_tree *lpm_tree)
640 {
641 	if (--lpm_tree->ref_count == 0)
642 		mlxsw_sp_lpm_tree_destroy(mlxsw_sp, lpm_tree);
643 }
644 
645 #define MLXSW_SP_LPM_TREE_MIN 1 /* tree 0 is reserved */
646 
647 static int mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp)
648 {
649 	struct mlxsw_sp_prefix_usage req_prefix_usage = {{ 0 } };
650 	struct mlxsw_sp_lpm_tree *lpm_tree;
651 	u64 max_trees;
652 	int err, i;
653 
654 	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_LPM_TREES))
655 		return -EIO;
656 
657 	max_trees = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_LPM_TREES);
658 	mlxsw_sp->router->lpm.tree_count = max_trees - MLXSW_SP_LPM_TREE_MIN;
659 	mlxsw_sp->router->lpm.trees = kcalloc(mlxsw_sp->router->lpm.tree_count,
660 					     sizeof(struct mlxsw_sp_lpm_tree),
661 					     GFP_KERNEL);
662 	if (!mlxsw_sp->router->lpm.trees)
663 		return -ENOMEM;
664 
665 	for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
666 		lpm_tree = &mlxsw_sp->router->lpm.trees[i];
667 		lpm_tree->id = i + MLXSW_SP_LPM_TREE_MIN;
668 	}
669 
670 	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
671 					 MLXSW_SP_L3_PROTO_IPV4);
672 	if (IS_ERR(lpm_tree)) {
673 		err = PTR_ERR(lpm_tree);
674 		goto err_ipv4_tree_get;
675 	}
676 	mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4] = lpm_tree;
677 
678 	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
679 					 MLXSW_SP_L3_PROTO_IPV6);
680 	if (IS_ERR(lpm_tree)) {
681 		err = PTR_ERR(lpm_tree);
682 		goto err_ipv6_tree_get;
683 	}
684 	mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV6] = lpm_tree;
685 
686 	return 0;
687 
688 err_ipv6_tree_get:
689 	lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4];
690 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
691 err_ipv4_tree_get:
692 	kfree(mlxsw_sp->router->lpm.trees);
693 	return err;
694 }
695 
696 static void mlxsw_sp_lpm_fini(struct mlxsw_sp *mlxsw_sp)
697 {
698 	struct mlxsw_sp_lpm_tree *lpm_tree;
699 
700 	lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV6];
701 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
702 
703 	lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4];
704 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
705 
706 	kfree(mlxsw_sp->router->lpm.trees);
707 }
708 
709 static bool mlxsw_sp_vr_is_used(const struct mlxsw_sp_vr *vr)
710 {
711 	return !!vr->fib4 || !!vr->fib6 ||
712 	       !!vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] ||
713 	       !!vr->mr_table[MLXSW_SP_L3_PROTO_IPV6];
714 }
715 
716 static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp)
717 {
718 	struct mlxsw_sp_vr *vr;
719 	int i;
720 
721 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
722 		vr = &mlxsw_sp->router->vrs[i];
723 		if (!mlxsw_sp_vr_is_used(vr))
724 			return vr;
725 	}
726 	return NULL;
727 }
728 
729 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
730 				     const struct mlxsw_sp_fib *fib, u8 tree_id)
731 {
732 	char raltb_pl[MLXSW_REG_RALTB_LEN];
733 
734 	mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
735 			     (enum mlxsw_reg_ralxx_protocol) fib->proto,
736 			     tree_id);
737 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
738 }
739 
740 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
741 				       const struct mlxsw_sp_fib *fib)
742 {
743 	char raltb_pl[MLXSW_REG_RALTB_LEN];
744 
745 	/* Bind to tree 0 which is default */
746 	mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
747 			     (enum mlxsw_reg_ralxx_protocol) fib->proto, 0);
748 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
749 }
750 
751 static u32 mlxsw_sp_fix_tb_id(u32 tb_id)
752 {
753 	/* For our purpose, squash main, default and local tables into one */
754 	if (tb_id == RT_TABLE_LOCAL || tb_id == RT_TABLE_DEFAULT)
755 		tb_id = RT_TABLE_MAIN;
756 	return tb_id;
757 }
758 
759 static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp,
760 					    u32 tb_id)
761 {
762 	struct mlxsw_sp_vr *vr;
763 	int i;
764 
765 	tb_id = mlxsw_sp_fix_tb_id(tb_id);
766 
767 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
768 		vr = &mlxsw_sp->router->vrs[i];
769 		if (mlxsw_sp_vr_is_used(vr) && vr->tb_id == tb_id)
770 			return vr;
771 	}
772 	return NULL;
773 }
774 
775 static struct mlxsw_sp_fib *mlxsw_sp_vr_fib(const struct mlxsw_sp_vr *vr,
776 					    enum mlxsw_sp_l3proto proto)
777 {
778 	switch (proto) {
779 	case MLXSW_SP_L3_PROTO_IPV4:
780 		return vr->fib4;
781 	case MLXSW_SP_L3_PROTO_IPV6:
782 		return vr->fib6;
783 	}
784 	return NULL;
785 }
786 
787 static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp,
788 					      u32 tb_id,
789 					      struct netlink_ext_ack *extack)
790 {
791 	struct mlxsw_sp_mr_table *mr4_table, *mr6_table;
792 	struct mlxsw_sp_fib *fib4;
793 	struct mlxsw_sp_fib *fib6;
794 	struct mlxsw_sp_vr *vr;
795 	int err;
796 
797 	vr = mlxsw_sp_vr_find_unused(mlxsw_sp);
798 	if (!vr) {
799 		NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported virtual routers");
800 		return ERR_PTR(-EBUSY);
801 	}
802 	fib4 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
803 	if (IS_ERR(fib4))
804 		return ERR_CAST(fib4);
805 	fib6 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
806 	if (IS_ERR(fib6)) {
807 		err = PTR_ERR(fib6);
808 		goto err_fib6_create;
809 	}
810 	mr4_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id,
811 					     MLXSW_SP_L3_PROTO_IPV4);
812 	if (IS_ERR(mr4_table)) {
813 		err = PTR_ERR(mr4_table);
814 		goto err_mr4_table_create;
815 	}
816 	mr6_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id,
817 					     MLXSW_SP_L3_PROTO_IPV6);
818 	if (IS_ERR(mr6_table)) {
819 		err = PTR_ERR(mr6_table);
820 		goto err_mr6_table_create;
821 	}
822 
823 	vr->fib4 = fib4;
824 	vr->fib6 = fib6;
825 	vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] = mr4_table;
826 	vr->mr_table[MLXSW_SP_L3_PROTO_IPV6] = mr6_table;
827 	vr->tb_id = tb_id;
828 	return vr;
829 
830 err_mr6_table_create:
831 	mlxsw_sp_mr_table_destroy(mr4_table);
832 err_mr4_table_create:
833 	mlxsw_sp_fib_destroy(mlxsw_sp, fib6);
834 err_fib6_create:
835 	mlxsw_sp_fib_destroy(mlxsw_sp, fib4);
836 	return ERR_PTR(err);
837 }
838 
839 static void mlxsw_sp_vr_destroy(struct mlxsw_sp *mlxsw_sp,
840 				struct mlxsw_sp_vr *vr)
841 {
842 	mlxsw_sp_mr_table_destroy(vr->mr_table[MLXSW_SP_L3_PROTO_IPV6]);
843 	vr->mr_table[MLXSW_SP_L3_PROTO_IPV6] = NULL;
844 	mlxsw_sp_mr_table_destroy(vr->mr_table[MLXSW_SP_L3_PROTO_IPV4]);
845 	vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] = NULL;
846 	mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib6);
847 	vr->fib6 = NULL;
848 	mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib4);
849 	vr->fib4 = NULL;
850 }
851 
852 static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
853 					   struct netlink_ext_ack *extack)
854 {
855 	struct mlxsw_sp_vr *vr;
856 
857 	tb_id = mlxsw_sp_fix_tb_id(tb_id);
858 	vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
859 	if (!vr)
860 		vr = mlxsw_sp_vr_create(mlxsw_sp, tb_id, extack);
861 	return vr;
862 }
863 
864 static void mlxsw_sp_vr_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr)
865 {
866 	if (!vr->rif_count && list_empty(&vr->fib4->node_list) &&
867 	    list_empty(&vr->fib6->node_list) &&
868 	    mlxsw_sp_mr_table_empty(vr->mr_table[MLXSW_SP_L3_PROTO_IPV4]) &&
869 	    mlxsw_sp_mr_table_empty(vr->mr_table[MLXSW_SP_L3_PROTO_IPV6]))
870 		mlxsw_sp_vr_destroy(mlxsw_sp, vr);
871 }
872 
873 static bool
874 mlxsw_sp_vr_lpm_tree_should_replace(struct mlxsw_sp_vr *vr,
875 				    enum mlxsw_sp_l3proto proto, u8 tree_id)
876 {
877 	struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
878 
879 	if (!mlxsw_sp_vr_is_used(vr))
880 		return false;
881 	if (fib->lpm_tree->id == tree_id)
882 		return true;
883 	return false;
884 }
885 
886 static int mlxsw_sp_vr_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
887 					struct mlxsw_sp_fib *fib,
888 					struct mlxsw_sp_lpm_tree *new_tree)
889 {
890 	struct mlxsw_sp_lpm_tree *old_tree = fib->lpm_tree;
891 	int err;
892 
893 	fib->lpm_tree = new_tree;
894 	mlxsw_sp_lpm_tree_hold(new_tree);
895 	err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, new_tree->id);
896 	if (err)
897 		goto err_tree_bind;
898 	mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree);
899 	return 0;
900 
901 err_tree_bind:
902 	mlxsw_sp_lpm_tree_put(mlxsw_sp, new_tree);
903 	fib->lpm_tree = old_tree;
904 	return err;
905 }
906 
907 static int mlxsw_sp_vrs_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
908 					 struct mlxsw_sp_fib *fib,
909 					 struct mlxsw_sp_lpm_tree *new_tree)
910 {
911 	enum mlxsw_sp_l3proto proto = fib->proto;
912 	struct mlxsw_sp_lpm_tree *old_tree;
913 	u8 old_id, new_id = new_tree->id;
914 	struct mlxsw_sp_vr *vr;
915 	int i, err;
916 
917 	old_tree = mlxsw_sp->router->lpm.proto_trees[proto];
918 	old_id = old_tree->id;
919 
920 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
921 		vr = &mlxsw_sp->router->vrs[i];
922 		if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, old_id))
923 			continue;
924 		err = mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
925 						   mlxsw_sp_vr_fib(vr, proto),
926 						   new_tree);
927 		if (err)
928 			goto err_tree_replace;
929 	}
930 
931 	memcpy(new_tree->prefix_ref_count, old_tree->prefix_ref_count,
932 	       sizeof(new_tree->prefix_ref_count));
933 	mlxsw_sp->router->lpm.proto_trees[proto] = new_tree;
934 	mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree);
935 
936 	return 0;
937 
938 err_tree_replace:
939 	for (i--; i >= 0; i--) {
940 		if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, new_id))
941 			continue;
942 		mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
943 					     mlxsw_sp_vr_fib(vr, proto),
944 					     old_tree);
945 	}
946 	return err;
947 }
948 
949 static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp)
950 {
951 	struct mlxsw_sp_vr *vr;
952 	u64 max_vrs;
953 	int i;
954 
955 	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_VRS))
956 		return -EIO;
957 
958 	max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
959 	mlxsw_sp->router->vrs = kcalloc(max_vrs, sizeof(struct mlxsw_sp_vr),
960 					GFP_KERNEL);
961 	if (!mlxsw_sp->router->vrs)
962 		return -ENOMEM;
963 
964 	for (i = 0; i < max_vrs; i++) {
965 		vr = &mlxsw_sp->router->vrs[i];
966 		vr->id = i;
967 	}
968 
969 	return 0;
970 }
971 
972 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp);
973 
974 static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp)
975 {
976 	/* At this stage we're guaranteed not to have new incoming
977 	 * FIB notifications and the work queue is free from FIBs
978 	 * sitting on top of mlxsw netdevs. However, we can still
979 	 * have other FIBs queued. Flush the queue before flushing
980 	 * the device's tables. No need for locks, as we're the only
981 	 * writer.
982 	 */
983 	mlxsw_core_flush_owq();
984 	mlxsw_sp_router_fib_flush(mlxsw_sp);
985 	kfree(mlxsw_sp->router->vrs);
986 }
987 
988 static struct net_device *
989 __mlxsw_sp_ipip_netdev_ul_dev_get(const struct net_device *ol_dev)
990 {
991 	struct ip_tunnel *tun = netdev_priv(ol_dev);
992 	struct net *net = dev_net(ol_dev);
993 
994 	return __dev_get_by_index(net, tun->parms.link);
995 }
996 
997 u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev)
998 {
999 	struct net_device *d = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
1000 
1001 	if (d)
1002 		return l3mdev_fib_table(d) ? : RT_TABLE_MAIN;
1003 	else
1004 		return l3mdev_fib_table(ol_dev) ? : RT_TABLE_MAIN;
1005 }
1006 
1007 static struct mlxsw_sp_rif *
1008 mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
1009 		    const struct mlxsw_sp_rif_params *params,
1010 		    struct netlink_ext_ack *extack);
1011 
1012 static struct mlxsw_sp_rif_ipip_lb *
1013 mlxsw_sp_ipip_ol_ipip_lb_create(struct mlxsw_sp *mlxsw_sp,
1014 				enum mlxsw_sp_ipip_type ipipt,
1015 				struct net_device *ol_dev,
1016 				struct netlink_ext_ack *extack)
1017 {
1018 	struct mlxsw_sp_rif_params_ipip_lb lb_params;
1019 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1020 	struct mlxsw_sp_rif *rif;
1021 
1022 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
1023 	lb_params = (struct mlxsw_sp_rif_params_ipip_lb) {
1024 		.common.dev = ol_dev,
1025 		.common.lag = false,
1026 		.lb_config = ipip_ops->ol_loopback_config(mlxsw_sp, ol_dev),
1027 	};
1028 
1029 	rif = mlxsw_sp_rif_create(mlxsw_sp, &lb_params.common, extack);
1030 	if (IS_ERR(rif))
1031 		return ERR_CAST(rif);
1032 	return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
1033 }
1034 
1035 static struct mlxsw_sp_ipip_entry *
1036 mlxsw_sp_ipip_entry_alloc(struct mlxsw_sp *mlxsw_sp,
1037 			  enum mlxsw_sp_ipip_type ipipt,
1038 			  struct net_device *ol_dev)
1039 {
1040 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1041 	struct mlxsw_sp_ipip_entry *ipip_entry;
1042 	struct mlxsw_sp_ipip_entry *ret = NULL;
1043 
1044 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
1045 	ipip_entry = kzalloc(sizeof(*ipip_entry), GFP_KERNEL);
1046 	if (!ipip_entry)
1047 		return ERR_PTR(-ENOMEM);
1048 
1049 	ipip_entry->ol_lb = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp, ipipt,
1050 							    ol_dev, NULL);
1051 	if (IS_ERR(ipip_entry->ol_lb)) {
1052 		ret = ERR_CAST(ipip_entry->ol_lb);
1053 		goto err_ol_ipip_lb_create;
1054 	}
1055 
1056 	ipip_entry->ipipt = ipipt;
1057 	ipip_entry->ol_dev = ol_dev;
1058 
1059 	switch (ipip_ops->ul_proto) {
1060 	case MLXSW_SP_L3_PROTO_IPV4:
1061 		ipip_entry->parms4 = mlxsw_sp_ipip_netdev_parms4(ol_dev);
1062 		break;
1063 	case MLXSW_SP_L3_PROTO_IPV6:
1064 		WARN_ON(1);
1065 		break;
1066 	}
1067 
1068 	return ipip_entry;
1069 
1070 err_ol_ipip_lb_create:
1071 	kfree(ipip_entry);
1072 	return ret;
1073 }
1074 
1075 static void
1076 mlxsw_sp_ipip_entry_dealloc(struct mlxsw_sp_ipip_entry *ipip_entry)
1077 {
1078 	mlxsw_sp_rif_destroy(&ipip_entry->ol_lb->common);
1079 	kfree(ipip_entry);
1080 }
1081 
1082 static bool
1083 mlxsw_sp_ipip_entry_saddr_matches(struct mlxsw_sp *mlxsw_sp,
1084 				  const enum mlxsw_sp_l3proto ul_proto,
1085 				  union mlxsw_sp_l3addr saddr,
1086 				  u32 ul_tb_id,
1087 				  struct mlxsw_sp_ipip_entry *ipip_entry)
1088 {
1089 	u32 tun_ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
1090 	enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
1091 	union mlxsw_sp_l3addr tun_saddr;
1092 
1093 	if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
1094 		return false;
1095 
1096 	tun_saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ipip_entry->ol_dev);
1097 	return tun_ul_tb_id == ul_tb_id &&
1098 	       mlxsw_sp_l3addr_eq(&tun_saddr, &saddr);
1099 }
1100 
1101 static int
1102 mlxsw_sp_fib_entry_decap_init(struct mlxsw_sp *mlxsw_sp,
1103 			      struct mlxsw_sp_fib_entry *fib_entry,
1104 			      struct mlxsw_sp_ipip_entry *ipip_entry)
1105 {
1106 	u32 tunnel_index;
1107 	int err;
1108 
1109 	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, 1, &tunnel_index);
1110 	if (err)
1111 		return err;
1112 
1113 	ipip_entry->decap_fib_entry = fib_entry;
1114 	fib_entry->decap.ipip_entry = ipip_entry;
1115 	fib_entry->decap.tunnel_index = tunnel_index;
1116 	return 0;
1117 }
1118 
1119 static void mlxsw_sp_fib_entry_decap_fini(struct mlxsw_sp *mlxsw_sp,
1120 					  struct mlxsw_sp_fib_entry *fib_entry)
1121 {
1122 	/* Unlink this node from the IPIP entry that it's the decap entry of. */
1123 	fib_entry->decap.ipip_entry->decap_fib_entry = NULL;
1124 	fib_entry->decap.ipip_entry = NULL;
1125 	mlxsw_sp_kvdl_free(mlxsw_sp, fib_entry->decap.tunnel_index);
1126 }
1127 
1128 static struct mlxsw_sp_fib_node *
1129 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
1130 			 size_t addr_len, unsigned char prefix_len);
1131 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
1132 				     struct mlxsw_sp_fib_entry *fib_entry);
1133 
1134 static void
1135 mlxsw_sp_ipip_entry_demote_decap(struct mlxsw_sp *mlxsw_sp,
1136 				 struct mlxsw_sp_ipip_entry *ipip_entry)
1137 {
1138 	struct mlxsw_sp_fib_entry *fib_entry = ipip_entry->decap_fib_entry;
1139 
1140 	mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, fib_entry);
1141 	fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1142 
1143 	mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1144 }
1145 
1146 static void
1147 mlxsw_sp_ipip_entry_promote_decap(struct mlxsw_sp *mlxsw_sp,
1148 				  struct mlxsw_sp_ipip_entry *ipip_entry,
1149 				  struct mlxsw_sp_fib_entry *decap_fib_entry)
1150 {
1151 	if (mlxsw_sp_fib_entry_decap_init(mlxsw_sp, decap_fib_entry,
1152 					  ipip_entry))
1153 		return;
1154 	decap_fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
1155 
1156 	if (mlxsw_sp_fib_entry_update(mlxsw_sp, decap_fib_entry))
1157 		mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1158 }
1159 
1160 /* Given an IPIP entry, find the corresponding decap route. */
1161 static struct mlxsw_sp_fib_entry *
1162 mlxsw_sp_ipip_entry_find_decap(struct mlxsw_sp *mlxsw_sp,
1163 			       struct mlxsw_sp_ipip_entry *ipip_entry)
1164 {
1165 	static struct mlxsw_sp_fib_node *fib_node;
1166 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1167 	struct mlxsw_sp_fib_entry *fib_entry;
1168 	unsigned char saddr_prefix_len;
1169 	union mlxsw_sp_l3addr saddr;
1170 	struct mlxsw_sp_fib *ul_fib;
1171 	struct mlxsw_sp_vr *ul_vr;
1172 	const void *saddrp;
1173 	size_t saddr_len;
1174 	u32 ul_tb_id;
1175 	u32 saddr4;
1176 
1177 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1178 
1179 	ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
1180 	ul_vr = mlxsw_sp_vr_find(mlxsw_sp, ul_tb_id);
1181 	if (!ul_vr)
1182 		return NULL;
1183 
1184 	ul_fib = mlxsw_sp_vr_fib(ul_vr, ipip_ops->ul_proto);
1185 	saddr = mlxsw_sp_ipip_netdev_saddr(ipip_ops->ul_proto,
1186 					   ipip_entry->ol_dev);
1187 
1188 	switch (ipip_ops->ul_proto) {
1189 	case MLXSW_SP_L3_PROTO_IPV4:
1190 		saddr4 = be32_to_cpu(saddr.addr4);
1191 		saddrp = &saddr4;
1192 		saddr_len = 4;
1193 		saddr_prefix_len = 32;
1194 		break;
1195 	case MLXSW_SP_L3_PROTO_IPV6:
1196 		WARN_ON(1);
1197 		return NULL;
1198 	}
1199 
1200 	fib_node = mlxsw_sp_fib_node_lookup(ul_fib, saddrp, saddr_len,
1201 					    saddr_prefix_len);
1202 	if (!fib_node || list_empty(&fib_node->entry_list))
1203 		return NULL;
1204 
1205 	fib_entry = list_first_entry(&fib_node->entry_list,
1206 				     struct mlxsw_sp_fib_entry, list);
1207 	if (fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_TRAP)
1208 		return NULL;
1209 
1210 	return fib_entry;
1211 }
1212 
1213 static struct mlxsw_sp_ipip_entry *
1214 mlxsw_sp_ipip_entry_create(struct mlxsw_sp *mlxsw_sp,
1215 			   enum mlxsw_sp_ipip_type ipipt,
1216 			   struct net_device *ol_dev)
1217 {
1218 	struct mlxsw_sp_ipip_entry *ipip_entry;
1219 
1220 	ipip_entry = mlxsw_sp_ipip_entry_alloc(mlxsw_sp, ipipt, ol_dev);
1221 	if (IS_ERR(ipip_entry))
1222 		return ipip_entry;
1223 
1224 	list_add_tail(&ipip_entry->ipip_list_node,
1225 		      &mlxsw_sp->router->ipip_list);
1226 
1227 	return ipip_entry;
1228 }
1229 
1230 static void
1231 mlxsw_sp_ipip_entry_destroy(struct mlxsw_sp *mlxsw_sp,
1232 			    struct mlxsw_sp_ipip_entry *ipip_entry)
1233 {
1234 	list_del(&ipip_entry->ipip_list_node);
1235 	mlxsw_sp_ipip_entry_dealloc(ipip_entry);
1236 }
1237 
1238 static bool
1239 mlxsw_sp_ipip_entry_matches_decap(struct mlxsw_sp *mlxsw_sp,
1240 				  const struct net_device *ul_dev,
1241 				  enum mlxsw_sp_l3proto ul_proto,
1242 				  union mlxsw_sp_l3addr ul_dip,
1243 				  struct mlxsw_sp_ipip_entry *ipip_entry)
1244 {
1245 	u32 ul_tb_id = l3mdev_fib_table(ul_dev) ? : RT_TABLE_MAIN;
1246 	enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
1247 	struct net_device *ipip_ul_dev;
1248 
1249 	if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
1250 		return false;
1251 
1252 	ipip_ul_dev = __mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev);
1253 	return mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, ul_dip,
1254 						 ul_tb_id, ipip_entry) &&
1255 	       (!ipip_ul_dev || ipip_ul_dev == ul_dev);
1256 }
1257 
1258 /* Given decap parameters, find the corresponding IPIP entry. */
1259 static struct mlxsw_sp_ipip_entry *
1260 mlxsw_sp_ipip_entry_find_by_decap(struct mlxsw_sp *mlxsw_sp,
1261 				  const struct net_device *ul_dev,
1262 				  enum mlxsw_sp_l3proto ul_proto,
1263 				  union mlxsw_sp_l3addr ul_dip)
1264 {
1265 	struct mlxsw_sp_ipip_entry *ipip_entry;
1266 
1267 	list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
1268 			    ipip_list_node)
1269 		if (mlxsw_sp_ipip_entry_matches_decap(mlxsw_sp, ul_dev,
1270 						      ul_proto, ul_dip,
1271 						      ipip_entry))
1272 			return ipip_entry;
1273 
1274 	return NULL;
1275 }
1276 
1277 static bool mlxsw_sp_netdev_ipip_type(const struct mlxsw_sp *mlxsw_sp,
1278 				      const struct net_device *dev,
1279 				      enum mlxsw_sp_ipip_type *p_type)
1280 {
1281 	struct mlxsw_sp_router *router = mlxsw_sp->router;
1282 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1283 	enum mlxsw_sp_ipip_type ipipt;
1284 
1285 	for (ipipt = 0; ipipt < MLXSW_SP_IPIP_TYPE_MAX; ++ipipt) {
1286 		ipip_ops = router->ipip_ops_arr[ipipt];
1287 		if (dev->type == ipip_ops->dev_type) {
1288 			if (p_type)
1289 				*p_type = ipipt;
1290 			return true;
1291 		}
1292 	}
1293 	return false;
1294 }
1295 
1296 bool mlxsw_sp_netdev_is_ipip_ol(const struct mlxsw_sp *mlxsw_sp,
1297 				const struct net_device *dev)
1298 {
1299 	return mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL);
1300 }
1301 
1302 static struct mlxsw_sp_ipip_entry *
1303 mlxsw_sp_ipip_entry_find_by_ol_dev(struct mlxsw_sp *mlxsw_sp,
1304 				   const struct net_device *ol_dev)
1305 {
1306 	struct mlxsw_sp_ipip_entry *ipip_entry;
1307 
1308 	list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
1309 			    ipip_list_node)
1310 		if (ipip_entry->ol_dev == ol_dev)
1311 			return ipip_entry;
1312 
1313 	return NULL;
1314 }
1315 
1316 static struct mlxsw_sp_ipip_entry *
1317 mlxsw_sp_ipip_entry_find_by_ul_dev(const struct mlxsw_sp *mlxsw_sp,
1318 				   const struct net_device *ul_dev,
1319 				   struct mlxsw_sp_ipip_entry *start)
1320 {
1321 	struct mlxsw_sp_ipip_entry *ipip_entry;
1322 
1323 	ipip_entry = list_prepare_entry(start, &mlxsw_sp->router->ipip_list,
1324 					ipip_list_node);
1325 	list_for_each_entry_continue(ipip_entry, &mlxsw_sp->router->ipip_list,
1326 				     ipip_list_node) {
1327 		struct net_device *ipip_ul_dev =
1328 			__mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev);
1329 
1330 		if (ipip_ul_dev == ul_dev)
1331 			return ipip_entry;
1332 	}
1333 
1334 	return NULL;
1335 }
1336 
1337 bool mlxsw_sp_netdev_is_ipip_ul(const struct mlxsw_sp *mlxsw_sp,
1338 				const struct net_device *dev)
1339 {
1340 	return mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp, dev, NULL);
1341 }
1342 
1343 static bool mlxsw_sp_netdevice_ipip_can_offload(struct mlxsw_sp *mlxsw_sp,
1344 						const struct net_device *ol_dev,
1345 						enum mlxsw_sp_ipip_type ipipt)
1346 {
1347 	const struct mlxsw_sp_ipip_ops *ops
1348 		= mlxsw_sp->router->ipip_ops_arr[ipipt];
1349 
1350 	/* For deciding whether decap should be offloaded, we don't care about
1351 	 * overlay protocol, so ask whether either one is supported.
1352 	 */
1353 	return ops->can_offload(mlxsw_sp, ol_dev, MLXSW_SP_L3_PROTO_IPV4) ||
1354 	       ops->can_offload(mlxsw_sp, ol_dev, MLXSW_SP_L3_PROTO_IPV6);
1355 }
1356 
1357 static int mlxsw_sp_netdevice_ipip_ol_reg_event(struct mlxsw_sp *mlxsw_sp,
1358 						struct net_device *ol_dev)
1359 {
1360 	struct mlxsw_sp_ipip_entry *ipip_entry;
1361 	enum mlxsw_sp_l3proto ul_proto;
1362 	enum mlxsw_sp_ipip_type ipipt;
1363 	union mlxsw_sp_l3addr saddr;
1364 	u32 ul_tb_id;
1365 
1366 	mlxsw_sp_netdev_ipip_type(mlxsw_sp, ol_dev, &ipipt);
1367 	if (mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev, ipipt)) {
1368 		ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev);
1369 		ul_proto = mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto;
1370 		saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev);
1371 		if (!mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto,
1372 							  saddr, ul_tb_id,
1373 							  NULL)) {
1374 			ipip_entry = mlxsw_sp_ipip_entry_create(mlxsw_sp, ipipt,
1375 								ol_dev);
1376 			if (IS_ERR(ipip_entry))
1377 				return PTR_ERR(ipip_entry);
1378 		}
1379 	}
1380 
1381 	return 0;
1382 }
1383 
1384 static void mlxsw_sp_netdevice_ipip_ol_unreg_event(struct mlxsw_sp *mlxsw_sp,
1385 						   struct net_device *ol_dev)
1386 {
1387 	struct mlxsw_sp_ipip_entry *ipip_entry;
1388 
1389 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1390 	if (ipip_entry)
1391 		mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
1392 }
1393 
1394 static void
1395 mlxsw_sp_ipip_entry_ol_up_event(struct mlxsw_sp *mlxsw_sp,
1396 				struct mlxsw_sp_ipip_entry *ipip_entry)
1397 {
1398 	struct mlxsw_sp_fib_entry *decap_fib_entry;
1399 
1400 	decap_fib_entry = mlxsw_sp_ipip_entry_find_decap(mlxsw_sp, ipip_entry);
1401 	if (decap_fib_entry)
1402 		mlxsw_sp_ipip_entry_promote_decap(mlxsw_sp, ipip_entry,
1403 						  decap_fib_entry);
1404 }
1405 
1406 static int
1407 mlxsw_sp_rif_ipip_lb_op(struct mlxsw_sp_rif_ipip_lb *lb_rif,
1408 			struct mlxsw_sp_vr *ul_vr, bool enable)
1409 {
1410 	struct mlxsw_sp_rif_ipip_lb_config lb_cf = lb_rif->lb_config;
1411 	struct mlxsw_sp_rif *rif = &lb_rif->common;
1412 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
1413 	char ritr_pl[MLXSW_REG_RITR_LEN];
1414 	u32 saddr4;
1415 
1416 	switch (lb_cf.ul_protocol) {
1417 	case MLXSW_SP_L3_PROTO_IPV4:
1418 		saddr4 = be32_to_cpu(lb_cf.saddr.addr4);
1419 		mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF,
1420 				    rif->rif_index, rif->vr_id, rif->dev->mtu);
1421 		mlxsw_reg_ritr_loopback_ipip4_pack(ritr_pl, lb_cf.lb_ipipt,
1422 			    MLXSW_REG_RITR_LOOPBACK_IPIP_OPTIONS_GRE_KEY_PRESET,
1423 			    ul_vr->id, saddr4, lb_cf.okey);
1424 		break;
1425 
1426 	case MLXSW_SP_L3_PROTO_IPV6:
1427 		return -EAFNOSUPPORT;
1428 	}
1429 
1430 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
1431 }
1432 
1433 static int mlxsw_sp_netdevice_ipip_ol_update_mtu(struct mlxsw_sp *mlxsw_sp,
1434 						 struct net_device *ol_dev)
1435 {
1436 	struct mlxsw_sp_ipip_entry *ipip_entry;
1437 	struct mlxsw_sp_rif_ipip_lb *lb_rif;
1438 	struct mlxsw_sp_vr *ul_vr;
1439 	int err = 0;
1440 
1441 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1442 	if (ipip_entry) {
1443 		lb_rif = ipip_entry->ol_lb;
1444 		ul_vr = &mlxsw_sp->router->vrs[lb_rif->ul_vr_id];
1445 		err = mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, true);
1446 		if (err)
1447 			goto out;
1448 		lb_rif->common.mtu = ol_dev->mtu;
1449 	}
1450 
1451 out:
1452 	return err;
1453 }
1454 
1455 static void mlxsw_sp_netdevice_ipip_ol_up_event(struct mlxsw_sp *mlxsw_sp,
1456 						struct net_device *ol_dev)
1457 {
1458 	struct mlxsw_sp_ipip_entry *ipip_entry;
1459 
1460 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1461 	if (ipip_entry)
1462 		mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry);
1463 }
1464 
1465 static void
1466 mlxsw_sp_ipip_entry_ol_down_event(struct mlxsw_sp *mlxsw_sp,
1467 				  struct mlxsw_sp_ipip_entry *ipip_entry)
1468 {
1469 	if (ipip_entry->decap_fib_entry)
1470 		mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1471 }
1472 
1473 static void mlxsw_sp_netdevice_ipip_ol_down_event(struct mlxsw_sp *mlxsw_sp,
1474 						  struct net_device *ol_dev)
1475 {
1476 	struct mlxsw_sp_ipip_entry *ipip_entry;
1477 
1478 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1479 	if (ipip_entry)
1480 		mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry);
1481 }
1482 
1483 static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp,
1484 					 struct mlxsw_sp_rif *old_rif,
1485 					 struct mlxsw_sp_rif *new_rif);
1486 static int
1487 mlxsw_sp_ipip_entry_ol_lb_update(struct mlxsw_sp *mlxsw_sp,
1488 				 struct mlxsw_sp_ipip_entry *ipip_entry,
1489 				 bool keep_encap,
1490 				 struct netlink_ext_ack *extack)
1491 {
1492 	struct mlxsw_sp_rif_ipip_lb *old_lb_rif = ipip_entry->ol_lb;
1493 	struct mlxsw_sp_rif_ipip_lb *new_lb_rif;
1494 
1495 	new_lb_rif = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp,
1496 						     ipip_entry->ipipt,
1497 						     ipip_entry->ol_dev,
1498 						     extack);
1499 	if (IS_ERR(new_lb_rif))
1500 		return PTR_ERR(new_lb_rif);
1501 	ipip_entry->ol_lb = new_lb_rif;
1502 
1503 	if (keep_encap)
1504 		mlxsw_sp_nexthop_rif_migrate(mlxsw_sp, &old_lb_rif->common,
1505 					     &new_lb_rif->common);
1506 
1507 	mlxsw_sp_rif_destroy(&old_lb_rif->common);
1508 
1509 	return 0;
1510 }
1511 
1512 static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp,
1513 					struct mlxsw_sp_rif *rif);
1514 
1515 /**
1516  * Update the offload related to an IPIP entry. This always updates decap, and
1517  * in addition to that it also:
1518  * @recreate_loopback: recreates the associated loopback RIF
1519  * @keep_encap: updates next hops that use the tunnel netdevice. This is only
1520  *              relevant when recreate_loopback is true.
1521  * @update_nexthops: updates next hops, keeping the current loopback RIF. This
1522  *                   is only relevant when recreate_loopback is false.
1523  */
1524 int __mlxsw_sp_ipip_entry_update_tunnel(struct mlxsw_sp *mlxsw_sp,
1525 					struct mlxsw_sp_ipip_entry *ipip_entry,
1526 					bool recreate_loopback,
1527 					bool keep_encap,
1528 					bool update_nexthops,
1529 					struct netlink_ext_ack *extack)
1530 {
1531 	int err;
1532 
1533 	/* RIFs can't be edited, so to update loopback, we need to destroy and
1534 	 * recreate it. That creates a window of opportunity where RALUE and
1535 	 * RATR registers end up referencing a RIF that's already gone. RATRs
1536 	 * are handled in mlxsw_sp_ipip_entry_ol_lb_update(), and to take care
1537 	 * of RALUE, demote the decap route back.
1538 	 */
1539 	if (ipip_entry->decap_fib_entry)
1540 		mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1541 
1542 	if (recreate_loopback) {
1543 		err = mlxsw_sp_ipip_entry_ol_lb_update(mlxsw_sp, ipip_entry,
1544 						       keep_encap, extack);
1545 		if (err)
1546 			return err;
1547 	} else if (update_nexthops) {
1548 		mlxsw_sp_nexthop_rif_update(mlxsw_sp,
1549 					    &ipip_entry->ol_lb->common);
1550 	}
1551 
1552 	if (ipip_entry->ol_dev->flags & IFF_UP)
1553 		mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry);
1554 
1555 	return 0;
1556 }
1557 
1558 static int mlxsw_sp_netdevice_ipip_ol_vrf_event(struct mlxsw_sp *mlxsw_sp,
1559 						struct net_device *ol_dev,
1560 						struct netlink_ext_ack *extack)
1561 {
1562 	struct mlxsw_sp_ipip_entry *ipip_entry =
1563 		mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1564 	enum mlxsw_sp_l3proto ul_proto;
1565 	union mlxsw_sp_l3addr saddr;
1566 	u32 ul_tb_id;
1567 
1568 	if (!ipip_entry)
1569 		return 0;
1570 
1571 	/* For flat configuration cases, moving overlay to a different VRF might
1572 	 * cause local address conflict, and the conflicting tunnels need to be
1573 	 * demoted.
1574 	 */
1575 	ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev);
1576 	ul_proto = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt]->ul_proto;
1577 	saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev);
1578 	if (mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto,
1579 						 saddr, ul_tb_id,
1580 						 ipip_entry)) {
1581 		mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1582 		return 0;
1583 	}
1584 
1585 	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1586 						   true, false, false, extack);
1587 }
1588 
1589 static int
1590 mlxsw_sp_netdevice_ipip_ul_vrf_event(struct mlxsw_sp *mlxsw_sp,
1591 				     struct mlxsw_sp_ipip_entry *ipip_entry,
1592 				     struct net_device *ul_dev,
1593 				     struct netlink_ext_ack *extack)
1594 {
1595 	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1596 						   true, true, false, extack);
1597 }
1598 
1599 static int
1600 mlxsw_sp_netdevice_ipip_ul_up_event(struct mlxsw_sp *mlxsw_sp,
1601 				    struct mlxsw_sp_ipip_entry *ipip_entry,
1602 				    struct net_device *ul_dev)
1603 {
1604 	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1605 						   false, false, true, NULL);
1606 }
1607 
1608 static int
1609 mlxsw_sp_netdevice_ipip_ul_down_event(struct mlxsw_sp *mlxsw_sp,
1610 				      struct mlxsw_sp_ipip_entry *ipip_entry,
1611 				      struct net_device *ul_dev)
1612 {
1613 	/* A down underlay device causes encapsulated packets to not be
1614 	 * forwarded, but decap still works. So refresh next hops without
1615 	 * touching anything else.
1616 	 */
1617 	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1618 						   false, false, true, NULL);
1619 }
1620 
1621 static int
1622 mlxsw_sp_netdevice_ipip_ol_change_event(struct mlxsw_sp *mlxsw_sp,
1623 					struct net_device *ol_dev,
1624 					struct netlink_ext_ack *extack)
1625 {
1626 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1627 	struct mlxsw_sp_ipip_entry *ipip_entry;
1628 	int err;
1629 
1630 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1631 	if (!ipip_entry)
1632 		/* A change might make a tunnel eligible for offloading, but
1633 		 * that is currently not implemented. What falls to slow path
1634 		 * stays there.
1635 		 */
1636 		return 0;
1637 
1638 	/* A change might make a tunnel not eligible for offloading. */
1639 	if (!mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev,
1640 						 ipip_entry->ipipt)) {
1641 		mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1642 		return 0;
1643 	}
1644 
1645 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1646 	err = ipip_ops->ol_netdev_change(mlxsw_sp, ipip_entry, extack);
1647 	return err;
1648 }
1649 
1650 void mlxsw_sp_ipip_entry_demote_tunnel(struct mlxsw_sp *mlxsw_sp,
1651 				       struct mlxsw_sp_ipip_entry *ipip_entry)
1652 {
1653 	struct net_device *ol_dev = ipip_entry->ol_dev;
1654 
1655 	if (ol_dev->flags & IFF_UP)
1656 		mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry);
1657 	mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
1658 }
1659 
1660 /* The configuration where several tunnels have the same local address in the
1661  * same underlay table needs special treatment in the HW. That is currently not
1662  * implemented in the driver. This function finds and demotes the first tunnel
1663  * with a given source address, except the one passed in in the argument
1664  * `except'.
1665  */
1666 bool
1667 mlxsw_sp_ipip_demote_tunnel_by_saddr(struct mlxsw_sp *mlxsw_sp,
1668 				     enum mlxsw_sp_l3proto ul_proto,
1669 				     union mlxsw_sp_l3addr saddr,
1670 				     u32 ul_tb_id,
1671 				     const struct mlxsw_sp_ipip_entry *except)
1672 {
1673 	struct mlxsw_sp_ipip_entry *ipip_entry, *tmp;
1674 
1675 	list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list,
1676 				 ipip_list_node) {
1677 		if (ipip_entry != except &&
1678 		    mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, saddr,
1679 						      ul_tb_id, ipip_entry)) {
1680 			mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1681 			return true;
1682 		}
1683 	}
1684 
1685 	return false;
1686 }
1687 
1688 static void mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(struct mlxsw_sp *mlxsw_sp,
1689 						     struct net_device *ul_dev)
1690 {
1691 	struct mlxsw_sp_ipip_entry *ipip_entry, *tmp;
1692 
1693 	list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list,
1694 				 ipip_list_node) {
1695 		struct net_device *ipip_ul_dev =
1696 			__mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev);
1697 
1698 		if (ipip_ul_dev == ul_dev)
1699 			mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1700 	}
1701 }
1702 
1703 int mlxsw_sp_netdevice_ipip_ol_event(struct mlxsw_sp *mlxsw_sp,
1704 				     struct net_device *ol_dev,
1705 				     unsigned long event,
1706 				     struct netdev_notifier_info *info)
1707 {
1708 	struct netdev_notifier_changeupper_info *chup;
1709 	struct netlink_ext_ack *extack;
1710 
1711 	switch (event) {
1712 	case NETDEV_REGISTER:
1713 		return mlxsw_sp_netdevice_ipip_ol_reg_event(mlxsw_sp, ol_dev);
1714 	case NETDEV_UNREGISTER:
1715 		mlxsw_sp_netdevice_ipip_ol_unreg_event(mlxsw_sp, ol_dev);
1716 		return 0;
1717 	case NETDEV_UP:
1718 		mlxsw_sp_netdevice_ipip_ol_up_event(mlxsw_sp, ol_dev);
1719 		return 0;
1720 	case NETDEV_DOWN:
1721 		mlxsw_sp_netdevice_ipip_ol_down_event(mlxsw_sp, ol_dev);
1722 		return 0;
1723 	case NETDEV_CHANGEUPPER:
1724 		chup = container_of(info, typeof(*chup), info);
1725 		extack = info->extack;
1726 		if (netif_is_l3_master(chup->upper_dev))
1727 			return mlxsw_sp_netdevice_ipip_ol_vrf_event(mlxsw_sp,
1728 								    ol_dev,
1729 								    extack);
1730 		return 0;
1731 	case NETDEV_CHANGE:
1732 		extack = info->extack;
1733 		return mlxsw_sp_netdevice_ipip_ol_change_event(mlxsw_sp,
1734 							       ol_dev, extack);
1735 	case NETDEV_CHANGEMTU:
1736 		return mlxsw_sp_netdevice_ipip_ol_update_mtu(mlxsw_sp, ol_dev);
1737 	}
1738 	return 0;
1739 }
1740 
1741 static int
1742 __mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
1743 				   struct mlxsw_sp_ipip_entry *ipip_entry,
1744 				   struct net_device *ul_dev,
1745 				   unsigned long event,
1746 				   struct netdev_notifier_info *info)
1747 {
1748 	struct netdev_notifier_changeupper_info *chup;
1749 	struct netlink_ext_ack *extack;
1750 
1751 	switch (event) {
1752 	case NETDEV_CHANGEUPPER:
1753 		chup = container_of(info, typeof(*chup), info);
1754 		extack = info->extack;
1755 		if (netif_is_l3_master(chup->upper_dev))
1756 			return mlxsw_sp_netdevice_ipip_ul_vrf_event(mlxsw_sp,
1757 								    ipip_entry,
1758 								    ul_dev,
1759 								    extack);
1760 		break;
1761 
1762 	case NETDEV_UP:
1763 		return mlxsw_sp_netdevice_ipip_ul_up_event(mlxsw_sp, ipip_entry,
1764 							   ul_dev);
1765 	case NETDEV_DOWN:
1766 		return mlxsw_sp_netdevice_ipip_ul_down_event(mlxsw_sp,
1767 							     ipip_entry,
1768 							     ul_dev);
1769 	}
1770 	return 0;
1771 }
1772 
1773 int
1774 mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
1775 				 struct net_device *ul_dev,
1776 				 unsigned long event,
1777 				 struct netdev_notifier_info *info)
1778 {
1779 	struct mlxsw_sp_ipip_entry *ipip_entry = NULL;
1780 	int err;
1781 
1782 	while ((ipip_entry = mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp,
1783 								ul_dev,
1784 								ipip_entry))) {
1785 		err = __mlxsw_sp_netdevice_ipip_ul_event(mlxsw_sp, ipip_entry,
1786 							 ul_dev, event, info);
1787 		if (err) {
1788 			mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(mlxsw_sp,
1789 								 ul_dev);
1790 			return err;
1791 		}
1792 	}
1793 
1794 	return 0;
1795 }
1796 
1797 struct mlxsw_sp_neigh_key {
1798 	struct neighbour *n;
1799 };
1800 
1801 struct mlxsw_sp_neigh_entry {
1802 	struct list_head rif_list_node;
1803 	struct rhash_head ht_node;
1804 	struct mlxsw_sp_neigh_key key;
1805 	u16 rif;
1806 	bool connected;
1807 	unsigned char ha[ETH_ALEN];
1808 	struct list_head nexthop_list; /* list of nexthops using
1809 					* this neigh entry
1810 					*/
1811 	struct list_head nexthop_neighs_list_node;
1812 	unsigned int counter_index;
1813 	bool counter_valid;
1814 };
1815 
1816 static const struct rhashtable_params mlxsw_sp_neigh_ht_params = {
1817 	.key_offset = offsetof(struct mlxsw_sp_neigh_entry, key),
1818 	.head_offset = offsetof(struct mlxsw_sp_neigh_entry, ht_node),
1819 	.key_len = sizeof(struct mlxsw_sp_neigh_key),
1820 };
1821 
1822 struct mlxsw_sp_neigh_entry *
1823 mlxsw_sp_rif_neigh_next(struct mlxsw_sp_rif *rif,
1824 			struct mlxsw_sp_neigh_entry *neigh_entry)
1825 {
1826 	if (!neigh_entry) {
1827 		if (list_empty(&rif->neigh_list))
1828 			return NULL;
1829 		else
1830 			return list_first_entry(&rif->neigh_list,
1831 						typeof(*neigh_entry),
1832 						rif_list_node);
1833 	}
1834 	if (list_is_last(&neigh_entry->rif_list_node, &rif->neigh_list))
1835 		return NULL;
1836 	return list_next_entry(neigh_entry, rif_list_node);
1837 }
1838 
1839 int mlxsw_sp_neigh_entry_type(struct mlxsw_sp_neigh_entry *neigh_entry)
1840 {
1841 	return neigh_entry->key.n->tbl->family;
1842 }
1843 
1844 unsigned char *
1845 mlxsw_sp_neigh_entry_ha(struct mlxsw_sp_neigh_entry *neigh_entry)
1846 {
1847 	return neigh_entry->ha;
1848 }
1849 
1850 u32 mlxsw_sp_neigh4_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
1851 {
1852 	struct neighbour *n;
1853 
1854 	n = neigh_entry->key.n;
1855 	return ntohl(*((__be32 *) n->primary_key));
1856 }
1857 
1858 struct in6_addr *
1859 mlxsw_sp_neigh6_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
1860 {
1861 	struct neighbour *n;
1862 
1863 	n = neigh_entry->key.n;
1864 	return (struct in6_addr *) &n->primary_key;
1865 }
1866 
1867 int mlxsw_sp_neigh_counter_get(struct mlxsw_sp *mlxsw_sp,
1868 			       struct mlxsw_sp_neigh_entry *neigh_entry,
1869 			       u64 *p_counter)
1870 {
1871 	if (!neigh_entry->counter_valid)
1872 		return -EINVAL;
1873 
1874 	return mlxsw_sp_flow_counter_get(mlxsw_sp, neigh_entry->counter_index,
1875 					 p_counter, NULL);
1876 }
1877 
1878 static struct mlxsw_sp_neigh_entry *
1879 mlxsw_sp_neigh_entry_alloc(struct mlxsw_sp *mlxsw_sp, struct neighbour *n,
1880 			   u16 rif)
1881 {
1882 	struct mlxsw_sp_neigh_entry *neigh_entry;
1883 
1884 	neigh_entry = kzalloc(sizeof(*neigh_entry), GFP_KERNEL);
1885 	if (!neigh_entry)
1886 		return NULL;
1887 
1888 	neigh_entry->key.n = n;
1889 	neigh_entry->rif = rif;
1890 	INIT_LIST_HEAD(&neigh_entry->nexthop_list);
1891 
1892 	return neigh_entry;
1893 }
1894 
1895 static void mlxsw_sp_neigh_entry_free(struct mlxsw_sp_neigh_entry *neigh_entry)
1896 {
1897 	kfree(neigh_entry);
1898 }
1899 
1900 static int
1901 mlxsw_sp_neigh_entry_insert(struct mlxsw_sp *mlxsw_sp,
1902 			    struct mlxsw_sp_neigh_entry *neigh_entry)
1903 {
1904 	return rhashtable_insert_fast(&mlxsw_sp->router->neigh_ht,
1905 				      &neigh_entry->ht_node,
1906 				      mlxsw_sp_neigh_ht_params);
1907 }
1908 
1909 static void
1910 mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp,
1911 			    struct mlxsw_sp_neigh_entry *neigh_entry)
1912 {
1913 	rhashtable_remove_fast(&mlxsw_sp->router->neigh_ht,
1914 			       &neigh_entry->ht_node,
1915 			       mlxsw_sp_neigh_ht_params);
1916 }
1917 
1918 static bool
1919 mlxsw_sp_neigh_counter_should_alloc(struct mlxsw_sp *mlxsw_sp,
1920 				    struct mlxsw_sp_neigh_entry *neigh_entry)
1921 {
1922 	struct devlink *devlink;
1923 	const char *table_name;
1924 
1925 	switch (mlxsw_sp_neigh_entry_type(neigh_entry)) {
1926 	case AF_INET:
1927 		table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST4;
1928 		break;
1929 	case AF_INET6:
1930 		table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST6;
1931 		break;
1932 	default:
1933 		WARN_ON(1);
1934 		return false;
1935 	}
1936 
1937 	devlink = priv_to_devlink(mlxsw_sp->core);
1938 	return devlink_dpipe_table_counter_enabled(devlink, table_name);
1939 }
1940 
1941 static void
1942 mlxsw_sp_neigh_counter_alloc(struct mlxsw_sp *mlxsw_sp,
1943 			     struct mlxsw_sp_neigh_entry *neigh_entry)
1944 {
1945 	if (!mlxsw_sp_neigh_counter_should_alloc(mlxsw_sp, neigh_entry))
1946 		return;
1947 
1948 	if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &neigh_entry->counter_index))
1949 		return;
1950 
1951 	neigh_entry->counter_valid = true;
1952 }
1953 
1954 static void
1955 mlxsw_sp_neigh_counter_free(struct mlxsw_sp *mlxsw_sp,
1956 			    struct mlxsw_sp_neigh_entry *neigh_entry)
1957 {
1958 	if (!neigh_entry->counter_valid)
1959 		return;
1960 	mlxsw_sp_flow_counter_free(mlxsw_sp,
1961 				   neigh_entry->counter_index);
1962 	neigh_entry->counter_valid = false;
1963 }
1964 
1965 static struct mlxsw_sp_neigh_entry *
1966 mlxsw_sp_neigh_entry_create(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
1967 {
1968 	struct mlxsw_sp_neigh_entry *neigh_entry;
1969 	struct mlxsw_sp_rif *rif;
1970 	int err;
1971 
1972 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, n->dev);
1973 	if (!rif)
1974 		return ERR_PTR(-EINVAL);
1975 
1976 	neigh_entry = mlxsw_sp_neigh_entry_alloc(mlxsw_sp, n, rif->rif_index);
1977 	if (!neigh_entry)
1978 		return ERR_PTR(-ENOMEM);
1979 
1980 	err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
1981 	if (err)
1982 		goto err_neigh_entry_insert;
1983 
1984 	mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
1985 	list_add(&neigh_entry->rif_list_node, &rif->neigh_list);
1986 
1987 	return neigh_entry;
1988 
1989 err_neigh_entry_insert:
1990 	mlxsw_sp_neigh_entry_free(neigh_entry);
1991 	return ERR_PTR(err);
1992 }
1993 
1994 static void
1995 mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp *mlxsw_sp,
1996 			     struct mlxsw_sp_neigh_entry *neigh_entry)
1997 {
1998 	list_del(&neigh_entry->rif_list_node);
1999 	mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
2000 	mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry);
2001 	mlxsw_sp_neigh_entry_free(neigh_entry);
2002 }
2003 
2004 static struct mlxsw_sp_neigh_entry *
2005 mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
2006 {
2007 	struct mlxsw_sp_neigh_key key;
2008 
2009 	key.n = n;
2010 	return rhashtable_lookup_fast(&mlxsw_sp->router->neigh_ht,
2011 				      &key, mlxsw_sp_neigh_ht_params);
2012 }
2013 
2014 static void
2015 mlxsw_sp_router_neighs_update_interval_init(struct mlxsw_sp *mlxsw_sp)
2016 {
2017 	unsigned long interval;
2018 
2019 #if IS_ENABLED(CONFIG_IPV6)
2020 	interval = min_t(unsigned long,
2021 			 NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME),
2022 			 NEIGH_VAR(&nd_tbl.parms, DELAY_PROBE_TIME));
2023 #else
2024 	interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
2025 #endif
2026 	mlxsw_sp->router->neighs_update.interval = jiffies_to_msecs(interval);
2027 }
2028 
2029 static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp,
2030 						   char *rauhtd_pl,
2031 						   int ent_index)
2032 {
2033 	struct net_device *dev;
2034 	struct neighbour *n;
2035 	__be32 dipn;
2036 	u32 dip;
2037 	u16 rif;
2038 
2039 	mlxsw_reg_rauhtd_ent_ipv4_unpack(rauhtd_pl, ent_index, &rif, &dip);
2040 
2041 	if (!mlxsw_sp->router->rifs[rif]) {
2042 		dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
2043 		return;
2044 	}
2045 
2046 	dipn = htonl(dip);
2047 	dev = mlxsw_sp->router->rifs[rif]->dev;
2048 	n = neigh_lookup(&arp_tbl, &dipn, dev);
2049 	if (!n)
2050 		return;
2051 
2052 	netdev_dbg(dev, "Updating neighbour with IP=%pI4h\n", &dip);
2053 	neigh_event_send(n, NULL);
2054 	neigh_release(n);
2055 }
2056 
2057 #if IS_ENABLED(CONFIG_IPV6)
2058 static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2059 						   char *rauhtd_pl,
2060 						   int rec_index)
2061 {
2062 	struct net_device *dev;
2063 	struct neighbour *n;
2064 	struct in6_addr dip;
2065 	u16 rif;
2066 
2067 	mlxsw_reg_rauhtd_ent_ipv6_unpack(rauhtd_pl, rec_index, &rif,
2068 					 (char *) &dip);
2069 
2070 	if (!mlxsw_sp->router->rifs[rif]) {
2071 		dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
2072 		return;
2073 	}
2074 
2075 	dev = mlxsw_sp->router->rifs[rif]->dev;
2076 	n = neigh_lookup(&nd_tbl, &dip, dev);
2077 	if (!n)
2078 		return;
2079 
2080 	netdev_dbg(dev, "Updating neighbour with IP=%pI6c\n", &dip);
2081 	neigh_event_send(n, NULL);
2082 	neigh_release(n);
2083 }
2084 #else
2085 static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2086 						   char *rauhtd_pl,
2087 						   int rec_index)
2088 {
2089 }
2090 #endif
2091 
2092 static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp,
2093 						   char *rauhtd_pl,
2094 						   int rec_index)
2095 {
2096 	u8 num_entries;
2097 	int i;
2098 
2099 	num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
2100 								rec_index);
2101 	/* Hardware starts counting at 0, so add 1. */
2102 	num_entries++;
2103 
2104 	/* Each record consists of several neighbour entries. */
2105 	for (i = 0; i < num_entries; i++) {
2106 		int ent_index;
2107 
2108 		ent_index = rec_index * MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC + i;
2109 		mlxsw_sp_router_neigh_ent_ipv4_process(mlxsw_sp, rauhtd_pl,
2110 						       ent_index);
2111 	}
2112 
2113 }
2114 
2115 static void mlxsw_sp_router_neigh_rec_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2116 						   char *rauhtd_pl,
2117 						   int rec_index)
2118 {
2119 	/* One record contains one entry. */
2120 	mlxsw_sp_router_neigh_ent_ipv6_process(mlxsw_sp, rauhtd_pl,
2121 					       rec_index);
2122 }
2123 
2124 static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp,
2125 					      char *rauhtd_pl, int rec_index)
2126 {
2127 	switch (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, rec_index)) {
2128 	case MLXSW_REG_RAUHTD_TYPE_IPV4:
2129 		mlxsw_sp_router_neigh_rec_ipv4_process(mlxsw_sp, rauhtd_pl,
2130 						       rec_index);
2131 		break;
2132 	case MLXSW_REG_RAUHTD_TYPE_IPV6:
2133 		mlxsw_sp_router_neigh_rec_ipv6_process(mlxsw_sp, rauhtd_pl,
2134 						       rec_index);
2135 		break;
2136 	}
2137 }
2138 
2139 static bool mlxsw_sp_router_rauhtd_is_full(char *rauhtd_pl)
2140 {
2141 	u8 num_rec, last_rec_index, num_entries;
2142 
2143 	num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
2144 	last_rec_index = num_rec - 1;
2145 
2146 	if (num_rec < MLXSW_REG_RAUHTD_REC_MAX_NUM)
2147 		return false;
2148 	if (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, last_rec_index) ==
2149 	    MLXSW_REG_RAUHTD_TYPE_IPV6)
2150 		return true;
2151 
2152 	num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
2153 								last_rec_index);
2154 	if (++num_entries == MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC)
2155 		return true;
2156 	return false;
2157 }
2158 
2159 static int
2160 __mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp,
2161 				       char *rauhtd_pl,
2162 				       enum mlxsw_reg_rauhtd_type type)
2163 {
2164 	int i, num_rec;
2165 	int err;
2166 
2167 	/* Make sure the neighbour's netdev isn't removed in the
2168 	 * process.
2169 	 */
2170 	rtnl_lock();
2171 	do {
2172 		mlxsw_reg_rauhtd_pack(rauhtd_pl, type);
2173 		err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd),
2174 				      rauhtd_pl);
2175 		if (err) {
2176 			dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to dump neighbour table\n");
2177 			break;
2178 		}
2179 		num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
2180 		for (i = 0; i < num_rec; i++)
2181 			mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl,
2182 							  i);
2183 	} while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl));
2184 	rtnl_unlock();
2185 
2186 	return err;
2187 }
2188 
2189 static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp)
2190 {
2191 	enum mlxsw_reg_rauhtd_type type;
2192 	char *rauhtd_pl;
2193 	int err;
2194 
2195 	rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL);
2196 	if (!rauhtd_pl)
2197 		return -ENOMEM;
2198 
2199 	type = MLXSW_REG_RAUHTD_TYPE_IPV4;
2200 	err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
2201 	if (err)
2202 		goto out;
2203 
2204 	type = MLXSW_REG_RAUHTD_TYPE_IPV6;
2205 	err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
2206 out:
2207 	kfree(rauhtd_pl);
2208 	return err;
2209 }
2210 
2211 static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp *mlxsw_sp)
2212 {
2213 	struct mlxsw_sp_neigh_entry *neigh_entry;
2214 
2215 	/* Take RTNL mutex here to prevent lists from changes */
2216 	rtnl_lock();
2217 	list_for_each_entry(neigh_entry, &mlxsw_sp->router->nexthop_neighs_list,
2218 			    nexthop_neighs_list_node)
2219 		/* If this neigh have nexthops, make the kernel think this neigh
2220 		 * is active regardless of the traffic.
2221 		 */
2222 		neigh_event_send(neigh_entry->key.n, NULL);
2223 	rtnl_unlock();
2224 }
2225 
2226 static void
2227 mlxsw_sp_router_neighs_update_work_schedule(struct mlxsw_sp *mlxsw_sp)
2228 {
2229 	unsigned long interval = mlxsw_sp->router->neighs_update.interval;
2230 
2231 	mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw,
2232 			       msecs_to_jiffies(interval));
2233 }
2234 
2235 static void mlxsw_sp_router_neighs_update_work(struct work_struct *work)
2236 {
2237 	struct mlxsw_sp_router *router;
2238 	int err;
2239 
2240 	router = container_of(work, struct mlxsw_sp_router,
2241 			      neighs_update.dw.work);
2242 	err = mlxsw_sp_router_neighs_update_rauhtd(router->mlxsw_sp);
2243 	if (err)
2244 		dev_err(router->mlxsw_sp->bus_info->dev, "Could not update kernel for neigh activity");
2245 
2246 	mlxsw_sp_router_neighs_update_nh(router->mlxsw_sp);
2247 
2248 	mlxsw_sp_router_neighs_update_work_schedule(router->mlxsw_sp);
2249 }
2250 
2251 static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work)
2252 {
2253 	struct mlxsw_sp_neigh_entry *neigh_entry;
2254 	struct mlxsw_sp_router *router;
2255 
2256 	router = container_of(work, struct mlxsw_sp_router,
2257 			      nexthop_probe_dw.work);
2258 	/* Iterate over nexthop neighbours, find those who are unresolved and
2259 	 * send arp on them. This solves the chicken-egg problem when
2260 	 * the nexthop wouldn't get offloaded until the neighbor is resolved
2261 	 * but it wouldn't get resolved ever in case traffic is flowing in HW
2262 	 * using different nexthop.
2263 	 *
2264 	 * Take RTNL mutex here to prevent lists from changes.
2265 	 */
2266 	rtnl_lock();
2267 	list_for_each_entry(neigh_entry, &router->nexthop_neighs_list,
2268 			    nexthop_neighs_list_node)
2269 		if (!neigh_entry->connected)
2270 			neigh_event_send(neigh_entry->key.n, NULL);
2271 	rtnl_unlock();
2272 
2273 	mlxsw_core_schedule_dw(&router->nexthop_probe_dw,
2274 			       MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL);
2275 }
2276 
2277 static void
2278 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
2279 			      struct mlxsw_sp_neigh_entry *neigh_entry,
2280 			      bool removing);
2281 
2282 static enum mlxsw_reg_rauht_op mlxsw_sp_rauht_op(bool adding)
2283 {
2284 	return adding ? MLXSW_REG_RAUHT_OP_WRITE_ADD :
2285 			MLXSW_REG_RAUHT_OP_WRITE_DELETE;
2286 }
2287 
2288 static void
2289 mlxsw_sp_router_neigh_entry_op4(struct mlxsw_sp *mlxsw_sp,
2290 				struct mlxsw_sp_neigh_entry *neigh_entry,
2291 				enum mlxsw_reg_rauht_op op)
2292 {
2293 	struct neighbour *n = neigh_entry->key.n;
2294 	u32 dip = ntohl(*((__be32 *) n->primary_key));
2295 	char rauht_pl[MLXSW_REG_RAUHT_LEN];
2296 
2297 	mlxsw_reg_rauht_pack4(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
2298 			      dip);
2299 	if (neigh_entry->counter_valid)
2300 		mlxsw_reg_rauht_pack_counter(rauht_pl,
2301 					     neigh_entry->counter_index);
2302 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
2303 }
2304 
2305 static void
2306 mlxsw_sp_router_neigh_entry_op6(struct mlxsw_sp *mlxsw_sp,
2307 				struct mlxsw_sp_neigh_entry *neigh_entry,
2308 				enum mlxsw_reg_rauht_op op)
2309 {
2310 	struct neighbour *n = neigh_entry->key.n;
2311 	char rauht_pl[MLXSW_REG_RAUHT_LEN];
2312 	const char *dip = n->primary_key;
2313 
2314 	mlxsw_reg_rauht_pack6(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
2315 			      dip);
2316 	if (neigh_entry->counter_valid)
2317 		mlxsw_reg_rauht_pack_counter(rauht_pl,
2318 					     neigh_entry->counter_index);
2319 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
2320 }
2321 
2322 bool mlxsw_sp_neigh_ipv6_ignore(struct mlxsw_sp_neigh_entry *neigh_entry)
2323 {
2324 	struct neighbour *n = neigh_entry->key.n;
2325 
2326 	/* Packets with a link-local destination address are trapped
2327 	 * after LPM lookup and never reach the neighbour table, so
2328 	 * there is no need to program such neighbours to the device.
2329 	 */
2330 	if (ipv6_addr_type((struct in6_addr *) &n->primary_key) &
2331 	    IPV6_ADDR_LINKLOCAL)
2332 		return true;
2333 	return false;
2334 }
2335 
2336 static void
2337 mlxsw_sp_neigh_entry_update(struct mlxsw_sp *mlxsw_sp,
2338 			    struct mlxsw_sp_neigh_entry *neigh_entry,
2339 			    bool adding)
2340 {
2341 	if (!adding && !neigh_entry->connected)
2342 		return;
2343 	neigh_entry->connected = adding;
2344 	if (neigh_entry->key.n->tbl->family == AF_INET) {
2345 		mlxsw_sp_router_neigh_entry_op4(mlxsw_sp, neigh_entry,
2346 						mlxsw_sp_rauht_op(adding));
2347 	} else if (neigh_entry->key.n->tbl->family == AF_INET6) {
2348 		if (mlxsw_sp_neigh_ipv6_ignore(neigh_entry))
2349 			return;
2350 		mlxsw_sp_router_neigh_entry_op6(mlxsw_sp, neigh_entry,
2351 						mlxsw_sp_rauht_op(adding));
2352 	} else {
2353 		WARN_ON_ONCE(1);
2354 	}
2355 }
2356 
2357 void
2358 mlxsw_sp_neigh_entry_counter_update(struct mlxsw_sp *mlxsw_sp,
2359 				    struct mlxsw_sp_neigh_entry *neigh_entry,
2360 				    bool adding)
2361 {
2362 	if (adding)
2363 		mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
2364 	else
2365 		mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
2366 	mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, true);
2367 }
2368 
2369 struct mlxsw_sp_netevent_work {
2370 	struct work_struct work;
2371 	struct mlxsw_sp *mlxsw_sp;
2372 	struct neighbour *n;
2373 };
2374 
2375 static void mlxsw_sp_router_neigh_event_work(struct work_struct *work)
2376 {
2377 	struct mlxsw_sp_netevent_work *net_work =
2378 		container_of(work, struct mlxsw_sp_netevent_work, work);
2379 	struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2380 	struct mlxsw_sp_neigh_entry *neigh_entry;
2381 	struct neighbour *n = net_work->n;
2382 	unsigned char ha[ETH_ALEN];
2383 	bool entry_connected;
2384 	u8 nud_state, dead;
2385 
2386 	/* If these parameters are changed after we release the lock,
2387 	 * then we are guaranteed to receive another event letting us
2388 	 * know about it.
2389 	 */
2390 	read_lock_bh(&n->lock);
2391 	memcpy(ha, n->ha, ETH_ALEN);
2392 	nud_state = n->nud_state;
2393 	dead = n->dead;
2394 	read_unlock_bh(&n->lock);
2395 
2396 	rtnl_lock();
2397 	mlxsw_sp_span_respin(mlxsw_sp);
2398 
2399 	entry_connected = nud_state & NUD_VALID && !dead;
2400 	neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
2401 	if (!entry_connected && !neigh_entry)
2402 		goto out;
2403 	if (!neigh_entry) {
2404 		neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
2405 		if (IS_ERR(neigh_entry))
2406 			goto out;
2407 	}
2408 
2409 	memcpy(neigh_entry->ha, ha, ETH_ALEN);
2410 	mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, entry_connected);
2411 	mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, !entry_connected);
2412 
2413 	if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
2414 		mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2415 
2416 out:
2417 	rtnl_unlock();
2418 	neigh_release(n);
2419 	kfree(net_work);
2420 }
2421 
2422 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp);
2423 
2424 static void mlxsw_sp_router_mp_hash_event_work(struct work_struct *work)
2425 {
2426 	struct mlxsw_sp_netevent_work *net_work =
2427 		container_of(work, struct mlxsw_sp_netevent_work, work);
2428 	struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2429 
2430 	mlxsw_sp_mp_hash_init(mlxsw_sp);
2431 	kfree(net_work);
2432 }
2433 
2434 static int mlxsw_sp_router_netevent_event(struct notifier_block *nb,
2435 					  unsigned long event, void *ptr)
2436 {
2437 	struct mlxsw_sp_netevent_work *net_work;
2438 	struct mlxsw_sp_port *mlxsw_sp_port;
2439 	struct mlxsw_sp_router *router;
2440 	struct mlxsw_sp *mlxsw_sp;
2441 	unsigned long interval;
2442 	struct neigh_parms *p;
2443 	struct neighbour *n;
2444 	struct net *net;
2445 
2446 	switch (event) {
2447 	case NETEVENT_DELAY_PROBE_TIME_UPDATE:
2448 		p = ptr;
2449 
2450 		/* We don't care about changes in the default table. */
2451 		if (!p->dev || (p->tbl->family != AF_INET &&
2452 				p->tbl->family != AF_INET6))
2453 			return NOTIFY_DONE;
2454 
2455 		/* We are in atomic context and can't take RTNL mutex,
2456 		 * so use RCU variant to walk the device chain.
2457 		 */
2458 		mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(p->dev);
2459 		if (!mlxsw_sp_port)
2460 			return NOTIFY_DONE;
2461 
2462 		mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
2463 		interval = jiffies_to_msecs(NEIGH_VAR(p, DELAY_PROBE_TIME));
2464 		mlxsw_sp->router->neighs_update.interval = interval;
2465 
2466 		mlxsw_sp_port_dev_put(mlxsw_sp_port);
2467 		break;
2468 	case NETEVENT_NEIGH_UPDATE:
2469 		n = ptr;
2470 
2471 		if (n->tbl->family != AF_INET && n->tbl->family != AF_INET6)
2472 			return NOTIFY_DONE;
2473 
2474 		mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(n->dev);
2475 		if (!mlxsw_sp_port)
2476 			return NOTIFY_DONE;
2477 
2478 		net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC);
2479 		if (!net_work) {
2480 			mlxsw_sp_port_dev_put(mlxsw_sp_port);
2481 			return NOTIFY_BAD;
2482 		}
2483 
2484 		INIT_WORK(&net_work->work, mlxsw_sp_router_neigh_event_work);
2485 		net_work->mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
2486 		net_work->n = n;
2487 
2488 		/* Take a reference to ensure the neighbour won't be
2489 		 * destructed until we drop the reference in delayed
2490 		 * work.
2491 		 */
2492 		neigh_clone(n);
2493 		mlxsw_core_schedule_work(&net_work->work);
2494 		mlxsw_sp_port_dev_put(mlxsw_sp_port);
2495 		break;
2496 	case NETEVENT_IPV4_MPATH_HASH_UPDATE:
2497 	case NETEVENT_IPV6_MPATH_HASH_UPDATE:
2498 		net = ptr;
2499 
2500 		if (!net_eq(net, &init_net))
2501 			return NOTIFY_DONE;
2502 
2503 		net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC);
2504 		if (!net_work)
2505 			return NOTIFY_BAD;
2506 
2507 		router = container_of(nb, struct mlxsw_sp_router, netevent_nb);
2508 		INIT_WORK(&net_work->work, mlxsw_sp_router_mp_hash_event_work);
2509 		net_work->mlxsw_sp = router->mlxsw_sp;
2510 		mlxsw_core_schedule_work(&net_work->work);
2511 		break;
2512 	}
2513 
2514 	return NOTIFY_DONE;
2515 }
2516 
2517 static int mlxsw_sp_neigh_init(struct mlxsw_sp *mlxsw_sp)
2518 {
2519 	int err;
2520 
2521 	err = rhashtable_init(&mlxsw_sp->router->neigh_ht,
2522 			      &mlxsw_sp_neigh_ht_params);
2523 	if (err)
2524 		return err;
2525 
2526 	/* Initialize the polling interval according to the default
2527 	 * table.
2528 	 */
2529 	mlxsw_sp_router_neighs_update_interval_init(mlxsw_sp);
2530 
2531 	/* Create the delayed works for the activity_update */
2532 	INIT_DELAYED_WORK(&mlxsw_sp->router->neighs_update.dw,
2533 			  mlxsw_sp_router_neighs_update_work);
2534 	INIT_DELAYED_WORK(&mlxsw_sp->router->nexthop_probe_dw,
2535 			  mlxsw_sp_router_probe_unresolved_nexthops);
2536 	mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw, 0);
2537 	mlxsw_core_schedule_dw(&mlxsw_sp->router->nexthop_probe_dw, 0);
2538 	return 0;
2539 }
2540 
2541 static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp)
2542 {
2543 	cancel_delayed_work_sync(&mlxsw_sp->router->neighs_update.dw);
2544 	cancel_delayed_work_sync(&mlxsw_sp->router->nexthop_probe_dw);
2545 	rhashtable_destroy(&mlxsw_sp->router->neigh_ht);
2546 }
2547 
2548 static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
2549 					 struct mlxsw_sp_rif *rif)
2550 {
2551 	struct mlxsw_sp_neigh_entry *neigh_entry, *tmp;
2552 
2553 	list_for_each_entry_safe(neigh_entry, tmp, &rif->neigh_list,
2554 				 rif_list_node) {
2555 		mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, false);
2556 		mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2557 	}
2558 }
2559 
2560 enum mlxsw_sp_nexthop_type {
2561 	MLXSW_SP_NEXTHOP_TYPE_ETH,
2562 	MLXSW_SP_NEXTHOP_TYPE_IPIP,
2563 };
2564 
2565 struct mlxsw_sp_nexthop_key {
2566 	struct fib_nh *fib_nh;
2567 };
2568 
2569 struct mlxsw_sp_nexthop {
2570 	struct list_head neigh_list_node; /* member of neigh entry list */
2571 	struct list_head rif_list_node;
2572 	struct list_head router_list_node;
2573 	struct mlxsw_sp_nexthop_group *nh_grp; /* pointer back to the group
2574 						* this belongs to
2575 						*/
2576 	struct rhash_head ht_node;
2577 	struct mlxsw_sp_nexthop_key key;
2578 	unsigned char gw_addr[sizeof(struct in6_addr)];
2579 	int ifindex;
2580 	int nh_weight;
2581 	int norm_nh_weight;
2582 	int num_adj_entries;
2583 	struct mlxsw_sp_rif *rif;
2584 	u8 should_offload:1, /* set indicates this neigh is connected and
2585 			      * should be put to KVD linear area of this group.
2586 			      */
2587 	   offloaded:1, /* set in case the neigh is actually put into
2588 			 * KVD linear area of this group.
2589 			 */
2590 	   update:1; /* set indicates that MAC of this neigh should be
2591 		      * updated in HW
2592 		      */
2593 	enum mlxsw_sp_nexthop_type type;
2594 	union {
2595 		struct mlxsw_sp_neigh_entry *neigh_entry;
2596 		struct mlxsw_sp_ipip_entry *ipip_entry;
2597 	};
2598 	unsigned int counter_index;
2599 	bool counter_valid;
2600 };
2601 
2602 struct mlxsw_sp_nexthop_group {
2603 	void *priv;
2604 	struct rhash_head ht_node;
2605 	struct list_head fib_list; /* list of fib entries that use this group */
2606 	struct neigh_table *neigh_tbl;
2607 	u8 adj_index_valid:1,
2608 	   gateway:1; /* routes using the group use a gateway */
2609 	u32 adj_index;
2610 	u16 ecmp_size;
2611 	u16 count;
2612 	int sum_norm_weight;
2613 	struct mlxsw_sp_nexthop nexthops[0];
2614 #define nh_rif	nexthops[0].rif
2615 };
2616 
2617 void mlxsw_sp_nexthop_counter_alloc(struct mlxsw_sp *mlxsw_sp,
2618 				    struct mlxsw_sp_nexthop *nh)
2619 {
2620 	struct devlink *devlink;
2621 
2622 	devlink = priv_to_devlink(mlxsw_sp->core);
2623 	if (!devlink_dpipe_table_counter_enabled(devlink,
2624 						 MLXSW_SP_DPIPE_TABLE_NAME_ADJ))
2625 		return;
2626 
2627 	if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &nh->counter_index))
2628 		return;
2629 
2630 	nh->counter_valid = true;
2631 }
2632 
2633 void mlxsw_sp_nexthop_counter_free(struct mlxsw_sp *mlxsw_sp,
2634 				   struct mlxsw_sp_nexthop *nh)
2635 {
2636 	if (!nh->counter_valid)
2637 		return;
2638 	mlxsw_sp_flow_counter_free(mlxsw_sp, nh->counter_index);
2639 	nh->counter_valid = false;
2640 }
2641 
2642 int mlxsw_sp_nexthop_counter_get(struct mlxsw_sp *mlxsw_sp,
2643 				 struct mlxsw_sp_nexthop *nh, u64 *p_counter)
2644 {
2645 	if (!nh->counter_valid)
2646 		return -EINVAL;
2647 
2648 	return mlxsw_sp_flow_counter_get(mlxsw_sp, nh->counter_index,
2649 					 p_counter, NULL);
2650 }
2651 
2652 struct mlxsw_sp_nexthop *mlxsw_sp_nexthop_next(struct mlxsw_sp_router *router,
2653 					       struct mlxsw_sp_nexthop *nh)
2654 {
2655 	if (!nh) {
2656 		if (list_empty(&router->nexthop_list))
2657 			return NULL;
2658 		else
2659 			return list_first_entry(&router->nexthop_list,
2660 						typeof(*nh), router_list_node);
2661 	}
2662 	if (list_is_last(&nh->router_list_node, &router->nexthop_list))
2663 		return NULL;
2664 	return list_next_entry(nh, router_list_node);
2665 }
2666 
2667 bool mlxsw_sp_nexthop_offload(struct mlxsw_sp_nexthop *nh)
2668 {
2669 	return nh->offloaded;
2670 }
2671 
2672 unsigned char *mlxsw_sp_nexthop_ha(struct mlxsw_sp_nexthop *nh)
2673 {
2674 	if (!nh->offloaded)
2675 		return NULL;
2676 	return nh->neigh_entry->ha;
2677 }
2678 
2679 int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop *nh, u32 *p_adj_index,
2680 			     u32 *p_adj_size, u32 *p_adj_hash_index)
2681 {
2682 	struct mlxsw_sp_nexthop_group *nh_grp = nh->nh_grp;
2683 	u32 adj_hash_index = 0;
2684 	int i;
2685 
2686 	if (!nh->offloaded || !nh_grp->adj_index_valid)
2687 		return -EINVAL;
2688 
2689 	*p_adj_index = nh_grp->adj_index;
2690 	*p_adj_size = nh_grp->ecmp_size;
2691 
2692 	for (i = 0; i < nh_grp->count; i++) {
2693 		struct mlxsw_sp_nexthop *nh_iter = &nh_grp->nexthops[i];
2694 
2695 		if (nh_iter == nh)
2696 			break;
2697 		if (nh_iter->offloaded)
2698 			adj_hash_index += nh_iter->num_adj_entries;
2699 	}
2700 
2701 	*p_adj_hash_index = adj_hash_index;
2702 	return 0;
2703 }
2704 
2705 struct mlxsw_sp_rif *mlxsw_sp_nexthop_rif(struct mlxsw_sp_nexthop *nh)
2706 {
2707 	return nh->rif;
2708 }
2709 
2710 bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh)
2711 {
2712 	struct mlxsw_sp_nexthop_group *nh_grp = nh->nh_grp;
2713 	int i;
2714 
2715 	for (i = 0; i < nh_grp->count; i++) {
2716 		struct mlxsw_sp_nexthop *nh_iter = &nh_grp->nexthops[i];
2717 
2718 		if (nh_iter->type == MLXSW_SP_NEXTHOP_TYPE_IPIP)
2719 			return true;
2720 	}
2721 	return false;
2722 }
2723 
2724 static struct fib_info *
2725 mlxsw_sp_nexthop4_group_fi(const struct mlxsw_sp_nexthop_group *nh_grp)
2726 {
2727 	return nh_grp->priv;
2728 }
2729 
2730 struct mlxsw_sp_nexthop_group_cmp_arg {
2731 	enum mlxsw_sp_l3proto proto;
2732 	union {
2733 		struct fib_info *fi;
2734 		struct mlxsw_sp_fib6_entry *fib6_entry;
2735 	};
2736 };
2737 
2738 static bool
2739 mlxsw_sp_nexthop6_group_has_nexthop(const struct mlxsw_sp_nexthop_group *nh_grp,
2740 				    const struct in6_addr *gw, int ifindex,
2741 				    int weight)
2742 {
2743 	int i;
2744 
2745 	for (i = 0; i < nh_grp->count; i++) {
2746 		const struct mlxsw_sp_nexthop *nh;
2747 
2748 		nh = &nh_grp->nexthops[i];
2749 		if (nh->ifindex == ifindex && nh->nh_weight == weight &&
2750 		    ipv6_addr_equal(gw, (struct in6_addr *) nh->gw_addr))
2751 			return true;
2752 	}
2753 
2754 	return false;
2755 }
2756 
2757 static bool
2758 mlxsw_sp_nexthop6_group_cmp(const struct mlxsw_sp_nexthop_group *nh_grp,
2759 			    const struct mlxsw_sp_fib6_entry *fib6_entry)
2760 {
2761 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
2762 
2763 	if (nh_grp->count != fib6_entry->nrt6)
2764 		return false;
2765 
2766 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
2767 		struct in6_addr *gw;
2768 		int ifindex, weight;
2769 
2770 		ifindex = mlxsw_sp_rt6->rt->fib6_nh.nh_dev->ifindex;
2771 		weight = mlxsw_sp_rt6->rt->fib6_nh.nh_weight;
2772 		gw = &mlxsw_sp_rt6->rt->fib6_nh.nh_gw;
2773 		if (!mlxsw_sp_nexthop6_group_has_nexthop(nh_grp, gw, ifindex,
2774 							 weight))
2775 			return false;
2776 	}
2777 
2778 	return true;
2779 }
2780 
2781 static int
2782 mlxsw_sp_nexthop_group_cmp(struct rhashtable_compare_arg *arg, const void *ptr)
2783 {
2784 	const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = arg->key;
2785 	const struct mlxsw_sp_nexthop_group *nh_grp = ptr;
2786 
2787 	switch (cmp_arg->proto) {
2788 	case MLXSW_SP_L3_PROTO_IPV4:
2789 		return cmp_arg->fi != mlxsw_sp_nexthop4_group_fi(nh_grp);
2790 	case MLXSW_SP_L3_PROTO_IPV6:
2791 		return !mlxsw_sp_nexthop6_group_cmp(nh_grp,
2792 						    cmp_arg->fib6_entry);
2793 	default:
2794 		WARN_ON(1);
2795 		return 1;
2796 	}
2797 }
2798 
2799 static int
2800 mlxsw_sp_nexthop_group_type(const struct mlxsw_sp_nexthop_group *nh_grp)
2801 {
2802 	return nh_grp->neigh_tbl->family;
2803 }
2804 
2805 static u32 mlxsw_sp_nexthop_group_hash_obj(const void *data, u32 len, u32 seed)
2806 {
2807 	const struct mlxsw_sp_nexthop_group *nh_grp = data;
2808 	const struct mlxsw_sp_nexthop *nh;
2809 	struct fib_info *fi;
2810 	unsigned int val;
2811 	int i;
2812 
2813 	switch (mlxsw_sp_nexthop_group_type(nh_grp)) {
2814 	case AF_INET:
2815 		fi = mlxsw_sp_nexthop4_group_fi(nh_grp);
2816 		return jhash(&fi, sizeof(fi), seed);
2817 	case AF_INET6:
2818 		val = nh_grp->count;
2819 		for (i = 0; i < nh_grp->count; i++) {
2820 			nh = &nh_grp->nexthops[i];
2821 			val ^= nh->ifindex;
2822 		}
2823 		return jhash(&val, sizeof(val), seed);
2824 	default:
2825 		WARN_ON(1);
2826 		return 0;
2827 	}
2828 }
2829 
2830 static u32
2831 mlxsw_sp_nexthop6_group_hash(struct mlxsw_sp_fib6_entry *fib6_entry, u32 seed)
2832 {
2833 	unsigned int val = fib6_entry->nrt6;
2834 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
2835 	struct net_device *dev;
2836 
2837 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
2838 		dev = mlxsw_sp_rt6->rt->fib6_nh.nh_dev;
2839 		val ^= dev->ifindex;
2840 	}
2841 
2842 	return jhash(&val, sizeof(val), seed);
2843 }
2844 
2845 static u32
2846 mlxsw_sp_nexthop_group_hash(const void *data, u32 len, u32 seed)
2847 {
2848 	const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = data;
2849 
2850 	switch (cmp_arg->proto) {
2851 	case MLXSW_SP_L3_PROTO_IPV4:
2852 		return jhash(&cmp_arg->fi, sizeof(cmp_arg->fi), seed);
2853 	case MLXSW_SP_L3_PROTO_IPV6:
2854 		return mlxsw_sp_nexthop6_group_hash(cmp_arg->fib6_entry, seed);
2855 	default:
2856 		WARN_ON(1);
2857 		return 0;
2858 	}
2859 }
2860 
2861 static const struct rhashtable_params mlxsw_sp_nexthop_group_ht_params = {
2862 	.head_offset = offsetof(struct mlxsw_sp_nexthop_group, ht_node),
2863 	.hashfn	     = mlxsw_sp_nexthop_group_hash,
2864 	.obj_hashfn  = mlxsw_sp_nexthop_group_hash_obj,
2865 	.obj_cmpfn   = mlxsw_sp_nexthop_group_cmp,
2866 };
2867 
2868 static int mlxsw_sp_nexthop_group_insert(struct mlxsw_sp *mlxsw_sp,
2869 					 struct mlxsw_sp_nexthop_group *nh_grp)
2870 {
2871 	if (mlxsw_sp_nexthop_group_type(nh_grp) == AF_INET6 &&
2872 	    !nh_grp->gateway)
2873 		return 0;
2874 
2875 	return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_group_ht,
2876 				      &nh_grp->ht_node,
2877 				      mlxsw_sp_nexthop_group_ht_params);
2878 }
2879 
2880 static void mlxsw_sp_nexthop_group_remove(struct mlxsw_sp *mlxsw_sp,
2881 					  struct mlxsw_sp_nexthop_group *nh_grp)
2882 {
2883 	if (mlxsw_sp_nexthop_group_type(nh_grp) == AF_INET6 &&
2884 	    !nh_grp->gateway)
2885 		return;
2886 
2887 	rhashtable_remove_fast(&mlxsw_sp->router->nexthop_group_ht,
2888 			       &nh_grp->ht_node,
2889 			       mlxsw_sp_nexthop_group_ht_params);
2890 }
2891 
2892 static struct mlxsw_sp_nexthop_group *
2893 mlxsw_sp_nexthop4_group_lookup(struct mlxsw_sp *mlxsw_sp,
2894 			       struct fib_info *fi)
2895 {
2896 	struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
2897 
2898 	cmp_arg.proto = MLXSW_SP_L3_PROTO_IPV4;
2899 	cmp_arg.fi = fi;
2900 	return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
2901 				      &cmp_arg,
2902 				      mlxsw_sp_nexthop_group_ht_params);
2903 }
2904 
2905 static struct mlxsw_sp_nexthop_group *
2906 mlxsw_sp_nexthop6_group_lookup(struct mlxsw_sp *mlxsw_sp,
2907 			       struct mlxsw_sp_fib6_entry *fib6_entry)
2908 {
2909 	struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
2910 
2911 	cmp_arg.proto = MLXSW_SP_L3_PROTO_IPV6;
2912 	cmp_arg.fib6_entry = fib6_entry;
2913 	return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
2914 				      &cmp_arg,
2915 				      mlxsw_sp_nexthop_group_ht_params);
2916 }
2917 
2918 static const struct rhashtable_params mlxsw_sp_nexthop_ht_params = {
2919 	.key_offset = offsetof(struct mlxsw_sp_nexthop, key),
2920 	.head_offset = offsetof(struct mlxsw_sp_nexthop, ht_node),
2921 	.key_len = sizeof(struct mlxsw_sp_nexthop_key),
2922 };
2923 
2924 static int mlxsw_sp_nexthop_insert(struct mlxsw_sp *mlxsw_sp,
2925 				   struct mlxsw_sp_nexthop *nh)
2926 {
2927 	return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_ht,
2928 				      &nh->ht_node, mlxsw_sp_nexthop_ht_params);
2929 }
2930 
2931 static void mlxsw_sp_nexthop_remove(struct mlxsw_sp *mlxsw_sp,
2932 				    struct mlxsw_sp_nexthop *nh)
2933 {
2934 	rhashtable_remove_fast(&mlxsw_sp->router->nexthop_ht, &nh->ht_node,
2935 			       mlxsw_sp_nexthop_ht_params);
2936 }
2937 
2938 static struct mlxsw_sp_nexthop *
2939 mlxsw_sp_nexthop_lookup(struct mlxsw_sp *mlxsw_sp,
2940 			struct mlxsw_sp_nexthop_key key)
2941 {
2942 	return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_ht, &key,
2943 				      mlxsw_sp_nexthop_ht_params);
2944 }
2945 
2946 static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp,
2947 					     const struct mlxsw_sp_fib *fib,
2948 					     u32 adj_index, u16 ecmp_size,
2949 					     u32 new_adj_index,
2950 					     u16 new_ecmp_size)
2951 {
2952 	char raleu_pl[MLXSW_REG_RALEU_LEN];
2953 
2954 	mlxsw_reg_raleu_pack(raleu_pl,
2955 			     (enum mlxsw_reg_ralxx_protocol) fib->proto,
2956 			     fib->vr->id, adj_index, ecmp_size, new_adj_index,
2957 			     new_ecmp_size);
2958 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raleu), raleu_pl);
2959 }
2960 
2961 static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp,
2962 					  struct mlxsw_sp_nexthop_group *nh_grp,
2963 					  u32 old_adj_index, u16 old_ecmp_size)
2964 {
2965 	struct mlxsw_sp_fib_entry *fib_entry;
2966 	struct mlxsw_sp_fib *fib = NULL;
2967 	int err;
2968 
2969 	list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
2970 		if (fib == fib_entry->fib_node->fib)
2971 			continue;
2972 		fib = fib_entry->fib_node->fib;
2973 		err = mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, fib,
2974 							old_adj_index,
2975 							old_ecmp_size,
2976 							nh_grp->adj_index,
2977 							nh_grp->ecmp_size);
2978 		if (err)
2979 			return err;
2980 	}
2981 	return 0;
2982 }
2983 
2984 static int __mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
2985 				     struct mlxsw_sp_nexthop *nh)
2986 {
2987 	struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
2988 	char ratr_pl[MLXSW_REG_RATR_LEN];
2989 
2990 	mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY,
2991 			    true, MLXSW_REG_RATR_TYPE_ETHERNET,
2992 			    adj_index, neigh_entry->rif);
2993 	mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha);
2994 	if (nh->counter_valid)
2995 		mlxsw_reg_ratr_counter_pack(ratr_pl, nh->counter_index, true);
2996 	else
2997 		mlxsw_reg_ratr_counter_pack(ratr_pl, 0, false);
2998 
2999 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
3000 }
3001 
3002 int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
3003 			    struct mlxsw_sp_nexthop *nh)
3004 {
3005 	int i;
3006 
3007 	for (i = 0; i < nh->num_adj_entries; i++) {
3008 		int err;
3009 
3010 		err = __mlxsw_sp_nexthop_update(mlxsw_sp, adj_index + i, nh);
3011 		if (err)
3012 			return err;
3013 	}
3014 
3015 	return 0;
3016 }
3017 
3018 static int __mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
3019 					  u32 adj_index,
3020 					  struct mlxsw_sp_nexthop *nh)
3021 {
3022 	const struct mlxsw_sp_ipip_ops *ipip_ops;
3023 
3024 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[nh->ipip_entry->ipipt];
3025 	return ipip_ops->nexthop_update(mlxsw_sp, adj_index, nh->ipip_entry);
3026 }
3027 
3028 static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
3029 					u32 adj_index,
3030 					struct mlxsw_sp_nexthop *nh)
3031 {
3032 	int i;
3033 
3034 	for (i = 0; i < nh->num_adj_entries; i++) {
3035 		int err;
3036 
3037 		err = __mlxsw_sp_nexthop_ipip_update(mlxsw_sp, adj_index + i,
3038 						     nh);
3039 		if (err)
3040 			return err;
3041 	}
3042 
3043 	return 0;
3044 }
3045 
3046 static int
3047 mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp,
3048 			      struct mlxsw_sp_nexthop_group *nh_grp,
3049 			      bool reallocate)
3050 {
3051 	u32 adj_index = nh_grp->adj_index; /* base */
3052 	struct mlxsw_sp_nexthop *nh;
3053 	int i;
3054 	int err;
3055 
3056 	for (i = 0; i < nh_grp->count; i++) {
3057 		nh = &nh_grp->nexthops[i];
3058 
3059 		if (!nh->should_offload) {
3060 			nh->offloaded = 0;
3061 			continue;
3062 		}
3063 
3064 		if (nh->update || reallocate) {
3065 			switch (nh->type) {
3066 			case MLXSW_SP_NEXTHOP_TYPE_ETH:
3067 				err = mlxsw_sp_nexthop_update
3068 					    (mlxsw_sp, adj_index, nh);
3069 				break;
3070 			case MLXSW_SP_NEXTHOP_TYPE_IPIP:
3071 				err = mlxsw_sp_nexthop_ipip_update
3072 					    (mlxsw_sp, adj_index, nh);
3073 				break;
3074 			}
3075 			if (err)
3076 				return err;
3077 			nh->update = 0;
3078 			nh->offloaded = 1;
3079 		}
3080 		adj_index += nh->num_adj_entries;
3081 	}
3082 	return 0;
3083 }
3084 
3085 static bool
3086 mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node,
3087 				 const struct mlxsw_sp_fib_entry *fib_entry);
3088 
3089 static int
3090 mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp,
3091 				    struct mlxsw_sp_nexthop_group *nh_grp)
3092 {
3093 	struct mlxsw_sp_fib_entry *fib_entry;
3094 	int err;
3095 
3096 	list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
3097 		if (!mlxsw_sp_fib_node_entry_is_first(fib_entry->fib_node,
3098 						      fib_entry))
3099 			continue;
3100 		err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
3101 		if (err)
3102 			return err;
3103 	}
3104 	return 0;
3105 }
3106 
3107 static void
3108 mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
3109 				   enum mlxsw_reg_ralue_op op, int err);
3110 
3111 static void
3112 mlxsw_sp_nexthop_fib_entries_refresh(struct mlxsw_sp_nexthop_group *nh_grp)
3113 {
3114 	enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_WRITE;
3115 	struct mlxsw_sp_fib_entry *fib_entry;
3116 
3117 	list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
3118 		if (!mlxsw_sp_fib_node_entry_is_first(fib_entry->fib_node,
3119 						      fib_entry))
3120 			continue;
3121 		mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0);
3122 	}
3123 }
3124 
3125 static void mlxsw_sp_adj_grp_size_round_up(u16 *p_adj_grp_size)
3126 {
3127 	/* Valid sizes for an adjacency group are:
3128 	 * 1-64, 512, 1024, 2048 and 4096.
3129 	 */
3130 	if (*p_adj_grp_size <= 64)
3131 		return;
3132 	else if (*p_adj_grp_size <= 512)
3133 		*p_adj_grp_size = 512;
3134 	else if (*p_adj_grp_size <= 1024)
3135 		*p_adj_grp_size = 1024;
3136 	else if (*p_adj_grp_size <= 2048)
3137 		*p_adj_grp_size = 2048;
3138 	else
3139 		*p_adj_grp_size = 4096;
3140 }
3141 
3142 static void mlxsw_sp_adj_grp_size_round_down(u16 *p_adj_grp_size,
3143 					     unsigned int alloc_size)
3144 {
3145 	if (alloc_size >= 4096)
3146 		*p_adj_grp_size = 4096;
3147 	else if (alloc_size >= 2048)
3148 		*p_adj_grp_size = 2048;
3149 	else if (alloc_size >= 1024)
3150 		*p_adj_grp_size = 1024;
3151 	else if (alloc_size >= 512)
3152 		*p_adj_grp_size = 512;
3153 }
3154 
3155 static int mlxsw_sp_fix_adj_grp_size(struct mlxsw_sp *mlxsw_sp,
3156 				     u16 *p_adj_grp_size)
3157 {
3158 	unsigned int alloc_size;
3159 	int err;
3160 
3161 	/* Round up the requested group size to the next size supported
3162 	 * by the device and make sure the request can be satisfied.
3163 	 */
3164 	mlxsw_sp_adj_grp_size_round_up(p_adj_grp_size);
3165 	err = mlxsw_sp_kvdl_alloc_size_query(mlxsw_sp, *p_adj_grp_size,
3166 					     &alloc_size);
3167 	if (err)
3168 		return err;
3169 	/* It is possible the allocation results in more allocated
3170 	 * entries than requested. Try to use as much of them as
3171 	 * possible.
3172 	 */
3173 	mlxsw_sp_adj_grp_size_round_down(p_adj_grp_size, alloc_size);
3174 
3175 	return 0;
3176 }
3177 
3178 static void
3179 mlxsw_sp_nexthop_group_normalize(struct mlxsw_sp_nexthop_group *nh_grp)
3180 {
3181 	int i, g = 0, sum_norm_weight = 0;
3182 	struct mlxsw_sp_nexthop *nh;
3183 
3184 	for (i = 0; i < nh_grp->count; i++) {
3185 		nh = &nh_grp->nexthops[i];
3186 
3187 		if (!nh->should_offload)
3188 			continue;
3189 		if (g > 0)
3190 			g = gcd(nh->nh_weight, g);
3191 		else
3192 			g = nh->nh_weight;
3193 	}
3194 
3195 	for (i = 0; i < nh_grp->count; i++) {
3196 		nh = &nh_grp->nexthops[i];
3197 
3198 		if (!nh->should_offload)
3199 			continue;
3200 		nh->norm_nh_weight = nh->nh_weight / g;
3201 		sum_norm_weight += nh->norm_nh_weight;
3202 	}
3203 
3204 	nh_grp->sum_norm_weight = sum_norm_weight;
3205 }
3206 
3207 static void
3208 mlxsw_sp_nexthop_group_rebalance(struct mlxsw_sp_nexthop_group *nh_grp)
3209 {
3210 	int total = nh_grp->sum_norm_weight;
3211 	u16 ecmp_size = nh_grp->ecmp_size;
3212 	int i, weight = 0, lower_bound = 0;
3213 
3214 	for (i = 0; i < nh_grp->count; i++) {
3215 		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3216 		int upper_bound;
3217 
3218 		if (!nh->should_offload)
3219 			continue;
3220 		weight += nh->norm_nh_weight;
3221 		upper_bound = DIV_ROUND_CLOSEST(ecmp_size * weight, total);
3222 		nh->num_adj_entries = upper_bound - lower_bound;
3223 		lower_bound = upper_bound;
3224 	}
3225 }
3226 
3227 static void
3228 mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
3229 			       struct mlxsw_sp_nexthop_group *nh_grp)
3230 {
3231 	u16 ecmp_size, old_ecmp_size;
3232 	struct mlxsw_sp_nexthop *nh;
3233 	bool offload_change = false;
3234 	u32 adj_index;
3235 	bool old_adj_index_valid;
3236 	u32 old_adj_index;
3237 	int i;
3238 	int err;
3239 
3240 	if (!nh_grp->gateway) {
3241 		mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3242 		return;
3243 	}
3244 
3245 	for (i = 0; i < nh_grp->count; i++) {
3246 		nh = &nh_grp->nexthops[i];
3247 
3248 		if (nh->should_offload != nh->offloaded) {
3249 			offload_change = true;
3250 			if (nh->should_offload)
3251 				nh->update = 1;
3252 		}
3253 	}
3254 	if (!offload_change) {
3255 		/* Nothing was added or removed, so no need to reallocate. Just
3256 		 * update MAC on existing adjacency indexes.
3257 		 */
3258 		err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, false);
3259 		if (err) {
3260 			dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
3261 			goto set_trap;
3262 		}
3263 		return;
3264 	}
3265 	mlxsw_sp_nexthop_group_normalize(nh_grp);
3266 	if (!nh_grp->sum_norm_weight)
3267 		/* No neigh of this group is connected so we just set
3268 		 * the trap and let everthing flow through kernel.
3269 		 */
3270 		goto set_trap;
3271 
3272 	ecmp_size = nh_grp->sum_norm_weight;
3273 	err = mlxsw_sp_fix_adj_grp_size(mlxsw_sp, &ecmp_size);
3274 	if (err)
3275 		/* No valid allocation size available. */
3276 		goto set_trap;
3277 
3278 	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, ecmp_size, &adj_index);
3279 	if (err) {
3280 		/* We ran out of KVD linear space, just set the
3281 		 * trap and let everything flow through kernel.
3282 		 */
3283 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to allocate KVD linear area for nexthop group.\n");
3284 		goto set_trap;
3285 	}
3286 	old_adj_index_valid = nh_grp->adj_index_valid;
3287 	old_adj_index = nh_grp->adj_index;
3288 	old_ecmp_size = nh_grp->ecmp_size;
3289 	nh_grp->adj_index_valid = 1;
3290 	nh_grp->adj_index = adj_index;
3291 	nh_grp->ecmp_size = ecmp_size;
3292 	mlxsw_sp_nexthop_group_rebalance(nh_grp);
3293 	err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, true);
3294 	if (err) {
3295 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
3296 		goto set_trap;
3297 	}
3298 
3299 	if (!old_adj_index_valid) {
3300 		/* The trap was set for fib entries, so we have to call
3301 		 * fib entry update to unset it and use adjacency index.
3302 		 */
3303 		err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3304 		if (err) {
3305 			dev_warn(mlxsw_sp->bus_info->dev, "Failed to add adjacency index to fib entries.\n");
3306 			goto set_trap;
3307 		}
3308 		return;
3309 	}
3310 
3311 	err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, nh_grp,
3312 					     old_adj_index, old_ecmp_size);
3313 	mlxsw_sp_kvdl_free(mlxsw_sp, old_adj_index);
3314 	if (err) {
3315 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to mass-update adjacency index for nexthop group.\n");
3316 		goto set_trap;
3317 	}
3318 
3319 	/* Offload state within the group changed, so update the flags. */
3320 	mlxsw_sp_nexthop_fib_entries_refresh(nh_grp);
3321 
3322 	return;
3323 
3324 set_trap:
3325 	old_adj_index_valid = nh_grp->adj_index_valid;
3326 	nh_grp->adj_index_valid = 0;
3327 	for (i = 0; i < nh_grp->count; i++) {
3328 		nh = &nh_grp->nexthops[i];
3329 		nh->offloaded = 0;
3330 	}
3331 	err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3332 	if (err)
3333 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n");
3334 	if (old_adj_index_valid)
3335 		mlxsw_sp_kvdl_free(mlxsw_sp, nh_grp->adj_index);
3336 }
3337 
3338 static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh,
3339 					    bool removing)
3340 {
3341 	if (!removing)
3342 		nh->should_offload = 1;
3343 	else
3344 		nh->should_offload = 0;
3345 	nh->update = 1;
3346 }
3347 
3348 static void
3349 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
3350 			      struct mlxsw_sp_neigh_entry *neigh_entry,
3351 			      bool removing)
3352 {
3353 	struct mlxsw_sp_nexthop *nh;
3354 
3355 	list_for_each_entry(nh, &neigh_entry->nexthop_list,
3356 			    neigh_list_node) {
3357 		__mlxsw_sp_nexthop_neigh_update(nh, removing);
3358 		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3359 	}
3360 }
3361 
3362 static void mlxsw_sp_nexthop_rif_init(struct mlxsw_sp_nexthop *nh,
3363 				      struct mlxsw_sp_rif *rif)
3364 {
3365 	if (nh->rif)
3366 		return;
3367 
3368 	nh->rif = rif;
3369 	list_add(&nh->rif_list_node, &rif->nexthop_list);
3370 }
3371 
3372 static void mlxsw_sp_nexthop_rif_fini(struct mlxsw_sp_nexthop *nh)
3373 {
3374 	if (!nh->rif)
3375 		return;
3376 
3377 	list_del(&nh->rif_list_node);
3378 	nh->rif = NULL;
3379 }
3380 
3381 static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp,
3382 				       struct mlxsw_sp_nexthop *nh)
3383 {
3384 	struct mlxsw_sp_neigh_entry *neigh_entry;
3385 	struct neighbour *n;
3386 	u8 nud_state, dead;
3387 	int err;
3388 
3389 	if (!nh->nh_grp->gateway || nh->neigh_entry)
3390 		return 0;
3391 
3392 	/* Take a reference of neigh here ensuring that neigh would
3393 	 * not be destructed before the nexthop entry is finished.
3394 	 * The reference is taken either in neigh_lookup() or
3395 	 * in neigh_create() in case n is not found.
3396 	 */
3397 	n = neigh_lookup(nh->nh_grp->neigh_tbl, &nh->gw_addr, nh->rif->dev);
3398 	if (!n) {
3399 		n = neigh_create(nh->nh_grp->neigh_tbl, &nh->gw_addr,
3400 				 nh->rif->dev);
3401 		if (IS_ERR(n))
3402 			return PTR_ERR(n);
3403 		neigh_event_send(n, NULL);
3404 	}
3405 	neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
3406 	if (!neigh_entry) {
3407 		neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
3408 		if (IS_ERR(neigh_entry)) {
3409 			err = -EINVAL;
3410 			goto err_neigh_entry_create;
3411 		}
3412 	}
3413 
3414 	/* If that is the first nexthop connected to that neigh, add to
3415 	 * nexthop_neighs_list
3416 	 */
3417 	if (list_empty(&neigh_entry->nexthop_list))
3418 		list_add_tail(&neigh_entry->nexthop_neighs_list_node,
3419 			      &mlxsw_sp->router->nexthop_neighs_list);
3420 
3421 	nh->neigh_entry = neigh_entry;
3422 	list_add_tail(&nh->neigh_list_node, &neigh_entry->nexthop_list);
3423 	read_lock_bh(&n->lock);
3424 	nud_state = n->nud_state;
3425 	dead = n->dead;
3426 	read_unlock_bh(&n->lock);
3427 	__mlxsw_sp_nexthop_neigh_update(nh, !(nud_state & NUD_VALID && !dead));
3428 
3429 	return 0;
3430 
3431 err_neigh_entry_create:
3432 	neigh_release(n);
3433 	return err;
3434 }
3435 
3436 static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp,
3437 					struct mlxsw_sp_nexthop *nh)
3438 {
3439 	struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
3440 	struct neighbour *n;
3441 
3442 	if (!neigh_entry)
3443 		return;
3444 	n = neigh_entry->key.n;
3445 
3446 	__mlxsw_sp_nexthop_neigh_update(nh, true);
3447 	list_del(&nh->neigh_list_node);
3448 	nh->neigh_entry = NULL;
3449 
3450 	/* If that is the last nexthop connected to that neigh, remove from
3451 	 * nexthop_neighs_list
3452 	 */
3453 	if (list_empty(&neigh_entry->nexthop_list))
3454 		list_del(&neigh_entry->nexthop_neighs_list_node);
3455 
3456 	if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
3457 		mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
3458 
3459 	neigh_release(n);
3460 }
3461 
3462 static bool mlxsw_sp_ipip_netdev_ul_up(struct net_device *ol_dev)
3463 {
3464 	struct net_device *ul_dev = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
3465 
3466 	return ul_dev ? (ul_dev->flags & IFF_UP) : true;
3467 }
3468 
3469 static void mlxsw_sp_nexthop_ipip_init(struct mlxsw_sp *mlxsw_sp,
3470 				       struct mlxsw_sp_nexthop *nh,
3471 				       struct mlxsw_sp_ipip_entry *ipip_entry)
3472 {
3473 	bool removing;
3474 
3475 	if (!nh->nh_grp->gateway || nh->ipip_entry)
3476 		return;
3477 
3478 	nh->ipip_entry = ipip_entry;
3479 	removing = !mlxsw_sp_ipip_netdev_ul_up(ipip_entry->ol_dev);
3480 	__mlxsw_sp_nexthop_neigh_update(nh, removing);
3481 	mlxsw_sp_nexthop_rif_init(nh, &ipip_entry->ol_lb->common);
3482 }
3483 
3484 static void mlxsw_sp_nexthop_ipip_fini(struct mlxsw_sp *mlxsw_sp,
3485 				       struct mlxsw_sp_nexthop *nh)
3486 {
3487 	struct mlxsw_sp_ipip_entry *ipip_entry = nh->ipip_entry;
3488 
3489 	if (!ipip_entry)
3490 		return;
3491 
3492 	__mlxsw_sp_nexthop_neigh_update(nh, true);
3493 	nh->ipip_entry = NULL;
3494 }
3495 
3496 static bool mlxsw_sp_nexthop4_ipip_type(const struct mlxsw_sp *mlxsw_sp,
3497 					const struct fib_nh *fib_nh,
3498 					enum mlxsw_sp_ipip_type *p_ipipt)
3499 {
3500 	struct net_device *dev = fib_nh->nh_dev;
3501 
3502 	return dev &&
3503 	       fib_nh->nh_parent->fib_type == RTN_UNICAST &&
3504 	       mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, p_ipipt);
3505 }
3506 
3507 static void mlxsw_sp_nexthop_type_fini(struct mlxsw_sp *mlxsw_sp,
3508 				       struct mlxsw_sp_nexthop *nh)
3509 {
3510 	switch (nh->type) {
3511 	case MLXSW_SP_NEXTHOP_TYPE_ETH:
3512 		mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
3513 		mlxsw_sp_nexthop_rif_fini(nh);
3514 		break;
3515 	case MLXSW_SP_NEXTHOP_TYPE_IPIP:
3516 		mlxsw_sp_nexthop_rif_fini(nh);
3517 		mlxsw_sp_nexthop_ipip_fini(mlxsw_sp, nh);
3518 		break;
3519 	}
3520 }
3521 
3522 static int mlxsw_sp_nexthop4_type_init(struct mlxsw_sp *mlxsw_sp,
3523 				       struct mlxsw_sp_nexthop *nh,
3524 				       struct fib_nh *fib_nh)
3525 {
3526 	const struct mlxsw_sp_ipip_ops *ipip_ops;
3527 	struct net_device *dev = fib_nh->nh_dev;
3528 	struct mlxsw_sp_ipip_entry *ipip_entry;
3529 	struct mlxsw_sp_rif *rif;
3530 	int err;
3531 
3532 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, dev);
3533 	if (ipip_entry) {
3534 		ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
3535 		if (ipip_ops->can_offload(mlxsw_sp, dev,
3536 					  MLXSW_SP_L3_PROTO_IPV4)) {
3537 			nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP;
3538 			mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, ipip_entry);
3539 			return 0;
3540 		}
3541 	}
3542 
3543 	nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH;
3544 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
3545 	if (!rif)
3546 		return 0;
3547 
3548 	mlxsw_sp_nexthop_rif_init(nh, rif);
3549 	err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
3550 	if (err)
3551 		goto err_neigh_init;
3552 
3553 	return 0;
3554 
3555 err_neigh_init:
3556 	mlxsw_sp_nexthop_rif_fini(nh);
3557 	return err;
3558 }
3559 
3560 static void mlxsw_sp_nexthop4_type_fini(struct mlxsw_sp *mlxsw_sp,
3561 					struct mlxsw_sp_nexthop *nh)
3562 {
3563 	mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
3564 }
3565 
3566 static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp,
3567 				  struct mlxsw_sp_nexthop_group *nh_grp,
3568 				  struct mlxsw_sp_nexthop *nh,
3569 				  struct fib_nh *fib_nh)
3570 {
3571 	struct net_device *dev = fib_nh->nh_dev;
3572 	struct in_device *in_dev;
3573 	int err;
3574 
3575 	nh->nh_grp = nh_grp;
3576 	nh->key.fib_nh = fib_nh;
3577 #ifdef CONFIG_IP_ROUTE_MULTIPATH
3578 	nh->nh_weight = fib_nh->nh_weight;
3579 #else
3580 	nh->nh_weight = 1;
3581 #endif
3582 	memcpy(&nh->gw_addr, &fib_nh->nh_gw, sizeof(fib_nh->nh_gw));
3583 	err = mlxsw_sp_nexthop_insert(mlxsw_sp, nh);
3584 	if (err)
3585 		return err;
3586 
3587 	mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
3588 	list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
3589 
3590 	if (!dev)
3591 		return 0;
3592 
3593 	in_dev = __in_dev_get_rtnl(dev);
3594 	if (in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
3595 	    fib_nh->nh_flags & RTNH_F_LINKDOWN)
3596 		return 0;
3597 
3598 	err = mlxsw_sp_nexthop4_type_init(mlxsw_sp, nh, fib_nh);
3599 	if (err)
3600 		goto err_nexthop_neigh_init;
3601 
3602 	return 0;
3603 
3604 err_nexthop_neigh_init:
3605 	mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
3606 	return err;
3607 }
3608 
3609 static void mlxsw_sp_nexthop4_fini(struct mlxsw_sp *mlxsw_sp,
3610 				   struct mlxsw_sp_nexthop *nh)
3611 {
3612 	mlxsw_sp_nexthop4_type_fini(mlxsw_sp, nh);
3613 	list_del(&nh->router_list_node);
3614 	mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
3615 	mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
3616 }
3617 
3618 static void mlxsw_sp_nexthop4_event(struct mlxsw_sp *mlxsw_sp,
3619 				    unsigned long event, struct fib_nh *fib_nh)
3620 {
3621 	struct mlxsw_sp_nexthop_key key;
3622 	struct mlxsw_sp_nexthop *nh;
3623 
3624 	if (mlxsw_sp->router->aborted)
3625 		return;
3626 
3627 	key.fib_nh = fib_nh;
3628 	nh = mlxsw_sp_nexthop_lookup(mlxsw_sp, key);
3629 	if (WARN_ON_ONCE(!nh))
3630 		return;
3631 
3632 	switch (event) {
3633 	case FIB_EVENT_NH_ADD:
3634 		mlxsw_sp_nexthop4_type_init(mlxsw_sp, nh, fib_nh);
3635 		break;
3636 	case FIB_EVENT_NH_DEL:
3637 		mlxsw_sp_nexthop4_type_fini(mlxsw_sp, nh);
3638 		break;
3639 	}
3640 
3641 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3642 }
3643 
3644 static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp,
3645 					struct mlxsw_sp_rif *rif)
3646 {
3647 	struct mlxsw_sp_nexthop *nh;
3648 	bool removing;
3649 
3650 	list_for_each_entry(nh, &rif->nexthop_list, rif_list_node) {
3651 		switch (nh->type) {
3652 		case MLXSW_SP_NEXTHOP_TYPE_ETH:
3653 			removing = false;
3654 			break;
3655 		case MLXSW_SP_NEXTHOP_TYPE_IPIP:
3656 			removing = !mlxsw_sp_ipip_netdev_ul_up(rif->dev);
3657 			break;
3658 		default:
3659 			WARN_ON(1);
3660 			continue;
3661 		}
3662 
3663 		__mlxsw_sp_nexthop_neigh_update(nh, removing);
3664 		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3665 	}
3666 }
3667 
3668 static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp,
3669 					 struct mlxsw_sp_rif *old_rif,
3670 					 struct mlxsw_sp_rif *new_rif)
3671 {
3672 	struct mlxsw_sp_nexthop *nh;
3673 
3674 	list_splice_init(&old_rif->nexthop_list, &new_rif->nexthop_list);
3675 	list_for_each_entry(nh, &new_rif->nexthop_list, rif_list_node)
3676 		nh->rif = new_rif;
3677 	mlxsw_sp_nexthop_rif_update(mlxsw_sp, new_rif);
3678 }
3679 
3680 static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
3681 					   struct mlxsw_sp_rif *rif)
3682 {
3683 	struct mlxsw_sp_nexthop *nh, *tmp;
3684 
3685 	list_for_each_entry_safe(nh, tmp, &rif->nexthop_list, rif_list_node) {
3686 		mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
3687 		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3688 	}
3689 }
3690 
3691 static bool mlxsw_sp_fi_is_gateway(const struct mlxsw_sp *mlxsw_sp,
3692 				   const struct fib_info *fi)
3693 {
3694 	return fi->fib_nh->nh_scope == RT_SCOPE_LINK ||
3695 	       mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, fi->fib_nh, NULL);
3696 }
3697 
3698 static struct mlxsw_sp_nexthop_group *
3699 mlxsw_sp_nexthop4_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
3700 {
3701 	struct mlxsw_sp_nexthop_group *nh_grp;
3702 	struct mlxsw_sp_nexthop *nh;
3703 	struct fib_nh *fib_nh;
3704 	size_t alloc_size;
3705 	int i;
3706 	int err;
3707 
3708 	alloc_size = sizeof(*nh_grp) +
3709 		     fi->fib_nhs * sizeof(struct mlxsw_sp_nexthop);
3710 	nh_grp = kzalloc(alloc_size, GFP_KERNEL);
3711 	if (!nh_grp)
3712 		return ERR_PTR(-ENOMEM);
3713 	nh_grp->priv = fi;
3714 	INIT_LIST_HEAD(&nh_grp->fib_list);
3715 	nh_grp->neigh_tbl = &arp_tbl;
3716 
3717 	nh_grp->gateway = mlxsw_sp_fi_is_gateway(mlxsw_sp, fi);
3718 	nh_grp->count = fi->fib_nhs;
3719 	fib_info_hold(fi);
3720 	for (i = 0; i < nh_grp->count; i++) {
3721 		nh = &nh_grp->nexthops[i];
3722 		fib_nh = &fi->fib_nh[i];
3723 		err = mlxsw_sp_nexthop4_init(mlxsw_sp, nh_grp, nh, fib_nh);
3724 		if (err)
3725 			goto err_nexthop4_init;
3726 	}
3727 	err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
3728 	if (err)
3729 		goto err_nexthop_group_insert;
3730 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
3731 	return nh_grp;
3732 
3733 err_nexthop_group_insert:
3734 err_nexthop4_init:
3735 	for (i--; i >= 0; i--) {
3736 		nh = &nh_grp->nexthops[i];
3737 		mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
3738 	}
3739 	fib_info_put(fi);
3740 	kfree(nh_grp);
3741 	return ERR_PTR(err);
3742 }
3743 
3744 static void
3745 mlxsw_sp_nexthop4_group_destroy(struct mlxsw_sp *mlxsw_sp,
3746 				struct mlxsw_sp_nexthop_group *nh_grp)
3747 {
3748 	struct mlxsw_sp_nexthop *nh;
3749 	int i;
3750 
3751 	mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
3752 	for (i = 0; i < nh_grp->count; i++) {
3753 		nh = &nh_grp->nexthops[i];
3754 		mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
3755 	}
3756 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
3757 	WARN_ON_ONCE(nh_grp->adj_index_valid);
3758 	fib_info_put(mlxsw_sp_nexthop4_group_fi(nh_grp));
3759 	kfree(nh_grp);
3760 }
3761 
3762 static int mlxsw_sp_nexthop4_group_get(struct mlxsw_sp *mlxsw_sp,
3763 				       struct mlxsw_sp_fib_entry *fib_entry,
3764 				       struct fib_info *fi)
3765 {
3766 	struct mlxsw_sp_nexthop_group *nh_grp;
3767 
3768 	nh_grp = mlxsw_sp_nexthop4_group_lookup(mlxsw_sp, fi);
3769 	if (!nh_grp) {
3770 		nh_grp = mlxsw_sp_nexthop4_group_create(mlxsw_sp, fi);
3771 		if (IS_ERR(nh_grp))
3772 			return PTR_ERR(nh_grp);
3773 	}
3774 	list_add_tail(&fib_entry->nexthop_group_node, &nh_grp->fib_list);
3775 	fib_entry->nh_group = nh_grp;
3776 	return 0;
3777 }
3778 
3779 static void mlxsw_sp_nexthop4_group_put(struct mlxsw_sp *mlxsw_sp,
3780 					struct mlxsw_sp_fib_entry *fib_entry)
3781 {
3782 	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3783 
3784 	list_del(&fib_entry->nexthop_group_node);
3785 	if (!list_empty(&nh_grp->fib_list))
3786 		return;
3787 	mlxsw_sp_nexthop4_group_destroy(mlxsw_sp, nh_grp);
3788 }
3789 
3790 static bool
3791 mlxsw_sp_fib4_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
3792 {
3793 	struct mlxsw_sp_fib4_entry *fib4_entry;
3794 
3795 	fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry,
3796 				  common);
3797 	return !fib4_entry->tos;
3798 }
3799 
3800 static bool
3801 mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
3802 {
3803 	struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group;
3804 
3805 	switch (fib_entry->fib_node->fib->proto) {
3806 	case MLXSW_SP_L3_PROTO_IPV4:
3807 		if (!mlxsw_sp_fib4_entry_should_offload(fib_entry))
3808 			return false;
3809 		break;
3810 	case MLXSW_SP_L3_PROTO_IPV6:
3811 		break;
3812 	}
3813 
3814 	switch (fib_entry->type) {
3815 	case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
3816 		return !!nh_group->adj_index_valid;
3817 	case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
3818 		return !!nh_group->nh_rif;
3819 	case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
3820 		return true;
3821 	default:
3822 		return false;
3823 	}
3824 }
3825 
3826 static struct mlxsw_sp_nexthop *
3827 mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp,
3828 		     const struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
3829 {
3830 	int i;
3831 
3832 	for (i = 0; i < nh_grp->count; i++) {
3833 		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3834 		struct fib6_info *rt = mlxsw_sp_rt6->rt;
3835 
3836 		if (nh->rif && nh->rif->dev == rt->fib6_nh.nh_dev &&
3837 		    ipv6_addr_equal((const struct in6_addr *) &nh->gw_addr,
3838 				    &rt->fib6_nh.nh_gw))
3839 			return nh;
3840 		continue;
3841 	}
3842 
3843 	return NULL;
3844 }
3845 
3846 static void
3847 mlxsw_sp_fib4_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
3848 {
3849 	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3850 	int i;
3851 
3852 	if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL ||
3853 	    fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP) {
3854 		nh_grp->nexthops->key.fib_nh->nh_flags |= RTNH_F_OFFLOAD;
3855 		return;
3856 	}
3857 
3858 	for (i = 0; i < nh_grp->count; i++) {
3859 		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3860 
3861 		if (nh->offloaded)
3862 			nh->key.fib_nh->nh_flags |= RTNH_F_OFFLOAD;
3863 		else
3864 			nh->key.fib_nh->nh_flags &= ~RTNH_F_OFFLOAD;
3865 	}
3866 }
3867 
3868 static void
3869 mlxsw_sp_fib4_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
3870 {
3871 	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3872 	int i;
3873 
3874 	if (!list_is_singular(&nh_grp->fib_list))
3875 		return;
3876 
3877 	for (i = 0; i < nh_grp->count; i++) {
3878 		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3879 
3880 		nh->key.fib_nh->nh_flags &= ~RTNH_F_OFFLOAD;
3881 	}
3882 }
3883 
3884 static void
3885 mlxsw_sp_fib6_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
3886 {
3887 	struct mlxsw_sp_fib6_entry *fib6_entry;
3888 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
3889 
3890 	fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
3891 				  common);
3892 
3893 	if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL) {
3894 		list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
3895 				 list)->rt->fib6_nh.nh_flags |= RTNH_F_OFFLOAD;
3896 		return;
3897 	}
3898 
3899 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
3900 		struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3901 		struct mlxsw_sp_nexthop *nh;
3902 
3903 		nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6);
3904 		if (nh && nh->offloaded)
3905 			mlxsw_sp_rt6->rt->fib6_nh.nh_flags |= RTNH_F_OFFLOAD;
3906 		else
3907 			mlxsw_sp_rt6->rt->fib6_nh.nh_flags &= ~RTNH_F_OFFLOAD;
3908 	}
3909 }
3910 
3911 static void
3912 mlxsw_sp_fib6_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
3913 {
3914 	struct mlxsw_sp_fib6_entry *fib6_entry;
3915 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
3916 
3917 	fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
3918 				  common);
3919 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
3920 		struct fib6_info *rt = mlxsw_sp_rt6->rt;
3921 
3922 		rt->fib6_nh.nh_flags &= ~RTNH_F_OFFLOAD;
3923 	}
3924 }
3925 
3926 static void mlxsw_sp_fib_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
3927 {
3928 	switch (fib_entry->fib_node->fib->proto) {
3929 	case MLXSW_SP_L3_PROTO_IPV4:
3930 		mlxsw_sp_fib4_entry_offload_set(fib_entry);
3931 		break;
3932 	case MLXSW_SP_L3_PROTO_IPV6:
3933 		mlxsw_sp_fib6_entry_offload_set(fib_entry);
3934 		break;
3935 	}
3936 }
3937 
3938 static void
3939 mlxsw_sp_fib_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
3940 {
3941 	switch (fib_entry->fib_node->fib->proto) {
3942 	case MLXSW_SP_L3_PROTO_IPV4:
3943 		mlxsw_sp_fib4_entry_offload_unset(fib_entry);
3944 		break;
3945 	case MLXSW_SP_L3_PROTO_IPV6:
3946 		mlxsw_sp_fib6_entry_offload_unset(fib_entry);
3947 		break;
3948 	}
3949 }
3950 
3951 static void
3952 mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
3953 				   enum mlxsw_reg_ralue_op op, int err)
3954 {
3955 	switch (op) {
3956 	case MLXSW_REG_RALUE_OP_WRITE_DELETE:
3957 		return mlxsw_sp_fib_entry_offload_unset(fib_entry);
3958 	case MLXSW_REG_RALUE_OP_WRITE_WRITE:
3959 		if (err)
3960 			return;
3961 		if (mlxsw_sp_fib_entry_should_offload(fib_entry))
3962 			mlxsw_sp_fib_entry_offload_set(fib_entry);
3963 		else
3964 			mlxsw_sp_fib_entry_offload_unset(fib_entry);
3965 		return;
3966 	default:
3967 		return;
3968 	}
3969 }
3970 
3971 static void
3972 mlxsw_sp_fib_entry_ralue_pack(char *ralue_pl,
3973 			      const struct mlxsw_sp_fib_entry *fib_entry,
3974 			      enum mlxsw_reg_ralue_op op)
3975 {
3976 	struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib;
3977 	enum mlxsw_reg_ralxx_protocol proto;
3978 	u32 *p_dip;
3979 
3980 	proto = (enum mlxsw_reg_ralxx_protocol) fib->proto;
3981 
3982 	switch (fib->proto) {
3983 	case MLXSW_SP_L3_PROTO_IPV4:
3984 		p_dip = (u32 *) fib_entry->fib_node->key.addr;
3985 		mlxsw_reg_ralue_pack4(ralue_pl, proto, op, fib->vr->id,
3986 				      fib_entry->fib_node->key.prefix_len,
3987 				      *p_dip);
3988 		break;
3989 	case MLXSW_SP_L3_PROTO_IPV6:
3990 		mlxsw_reg_ralue_pack6(ralue_pl, proto, op, fib->vr->id,
3991 				      fib_entry->fib_node->key.prefix_len,
3992 				      fib_entry->fib_node->key.addr);
3993 		break;
3994 	}
3995 }
3996 
3997 static int mlxsw_sp_fib_entry_op_remote(struct mlxsw_sp *mlxsw_sp,
3998 					struct mlxsw_sp_fib_entry *fib_entry,
3999 					enum mlxsw_reg_ralue_op op)
4000 {
4001 	char ralue_pl[MLXSW_REG_RALUE_LEN];
4002 	enum mlxsw_reg_ralue_trap_action trap_action;
4003 	u16 trap_id = 0;
4004 	u32 adjacency_index = 0;
4005 	u16 ecmp_size = 0;
4006 
4007 	/* In case the nexthop group adjacency index is valid, use it
4008 	 * with provided ECMP size. Otherwise, setup trap and pass
4009 	 * traffic to kernel.
4010 	 */
4011 	if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
4012 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
4013 		adjacency_index = fib_entry->nh_group->adj_index;
4014 		ecmp_size = fib_entry->nh_group->ecmp_size;
4015 	} else {
4016 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
4017 		trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
4018 	}
4019 
4020 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
4021 	mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id,
4022 					adjacency_index, ecmp_size);
4023 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
4024 }
4025 
4026 static int mlxsw_sp_fib_entry_op_local(struct mlxsw_sp *mlxsw_sp,
4027 				       struct mlxsw_sp_fib_entry *fib_entry,
4028 				       enum mlxsw_reg_ralue_op op)
4029 {
4030 	struct mlxsw_sp_rif *rif = fib_entry->nh_group->nh_rif;
4031 	enum mlxsw_reg_ralue_trap_action trap_action;
4032 	char ralue_pl[MLXSW_REG_RALUE_LEN];
4033 	u16 trap_id = 0;
4034 	u16 rif_index = 0;
4035 
4036 	if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
4037 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
4038 		rif_index = rif->rif_index;
4039 	} else {
4040 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
4041 		trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
4042 	}
4043 
4044 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
4045 	mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id,
4046 				       rif_index);
4047 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
4048 }
4049 
4050 static int mlxsw_sp_fib_entry_op_trap(struct mlxsw_sp *mlxsw_sp,
4051 				      struct mlxsw_sp_fib_entry *fib_entry,
4052 				      enum mlxsw_reg_ralue_op op)
4053 {
4054 	char ralue_pl[MLXSW_REG_RALUE_LEN];
4055 
4056 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
4057 	mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
4058 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
4059 }
4060 
4061 static int
4062 mlxsw_sp_fib_entry_op_ipip_decap(struct mlxsw_sp *mlxsw_sp,
4063 				 struct mlxsw_sp_fib_entry *fib_entry,
4064 				 enum mlxsw_reg_ralue_op op)
4065 {
4066 	struct mlxsw_sp_ipip_entry *ipip_entry = fib_entry->decap.ipip_entry;
4067 	const struct mlxsw_sp_ipip_ops *ipip_ops;
4068 
4069 	if (WARN_ON(!ipip_entry))
4070 		return -EINVAL;
4071 
4072 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
4073 	return ipip_ops->fib_entry_op(mlxsw_sp, ipip_entry, op,
4074 				      fib_entry->decap.tunnel_index);
4075 }
4076 
4077 static int __mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
4078 				   struct mlxsw_sp_fib_entry *fib_entry,
4079 				   enum mlxsw_reg_ralue_op op)
4080 {
4081 	switch (fib_entry->type) {
4082 	case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
4083 		return mlxsw_sp_fib_entry_op_remote(mlxsw_sp, fib_entry, op);
4084 	case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
4085 		return mlxsw_sp_fib_entry_op_local(mlxsw_sp, fib_entry, op);
4086 	case MLXSW_SP_FIB_ENTRY_TYPE_TRAP:
4087 		return mlxsw_sp_fib_entry_op_trap(mlxsw_sp, fib_entry, op);
4088 	case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
4089 		return mlxsw_sp_fib_entry_op_ipip_decap(mlxsw_sp,
4090 							fib_entry, op);
4091 	}
4092 	return -EINVAL;
4093 }
4094 
4095 static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
4096 				 struct mlxsw_sp_fib_entry *fib_entry,
4097 				 enum mlxsw_reg_ralue_op op)
4098 {
4099 	int err = __mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, op);
4100 
4101 	mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, err);
4102 
4103 	return err;
4104 }
4105 
4106 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
4107 				     struct mlxsw_sp_fib_entry *fib_entry)
4108 {
4109 	return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
4110 				     MLXSW_REG_RALUE_OP_WRITE_WRITE);
4111 }
4112 
4113 static int mlxsw_sp_fib_entry_del(struct mlxsw_sp *mlxsw_sp,
4114 				  struct mlxsw_sp_fib_entry *fib_entry)
4115 {
4116 	return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
4117 				     MLXSW_REG_RALUE_OP_WRITE_DELETE);
4118 }
4119 
4120 static int
4121 mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp,
4122 			     const struct fib_entry_notifier_info *fen_info,
4123 			     struct mlxsw_sp_fib_entry *fib_entry)
4124 {
4125 	union mlxsw_sp_l3addr dip = { .addr4 = htonl(fen_info->dst) };
4126 	struct net_device *dev = fen_info->fi->fib_dev;
4127 	struct mlxsw_sp_ipip_entry *ipip_entry;
4128 	struct fib_info *fi = fen_info->fi;
4129 
4130 	switch (fen_info->type) {
4131 	case RTN_LOCAL:
4132 		ipip_entry = mlxsw_sp_ipip_entry_find_by_decap(mlxsw_sp, dev,
4133 						 MLXSW_SP_L3_PROTO_IPV4, dip);
4134 		if (ipip_entry && ipip_entry->ol_dev->flags & IFF_UP) {
4135 			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
4136 			return mlxsw_sp_fib_entry_decap_init(mlxsw_sp,
4137 							     fib_entry,
4138 							     ipip_entry);
4139 		}
4140 		/* fall through */
4141 	case RTN_BROADCAST:
4142 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
4143 		return 0;
4144 	case RTN_UNREACHABLE: /* fall through */
4145 	case RTN_BLACKHOLE: /* fall through */
4146 	case RTN_PROHIBIT:
4147 		/* Packets hitting these routes need to be trapped, but
4148 		 * can do so with a lower priority than packets directed
4149 		 * at the host, so use action type local instead of trap.
4150 		 */
4151 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
4152 		return 0;
4153 	case RTN_UNICAST:
4154 		if (mlxsw_sp_fi_is_gateway(mlxsw_sp, fi))
4155 			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
4156 		else
4157 			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
4158 		return 0;
4159 	default:
4160 		return -EINVAL;
4161 	}
4162 }
4163 
4164 static struct mlxsw_sp_fib4_entry *
4165 mlxsw_sp_fib4_entry_create(struct mlxsw_sp *mlxsw_sp,
4166 			   struct mlxsw_sp_fib_node *fib_node,
4167 			   const struct fib_entry_notifier_info *fen_info)
4168 {
4169 	struct mlxsw_sp_fib4_entry *fib4_entry;
4170 	struct mlxsw_sp_fib_entry *fib_entry;
4171 	int err;
4172 
4173 	fib4_entry = kzalloc(sizeof(*fib4_entry), GFP_KERNEL);
4174 	if (!fib4_entry)
4175 		return ERR_PTR(-ENOMEM);
4176 	fib_entry = &fib4_entry->common;
4177 
4178 	err = mlxsw_sp_fib4_entry_type_set(mlxsw_sp, fen_info, fib_entry);
4179 	if (err)
4180 		goto err_fib4_entry_type_set;
4181 
4182 	err = mlxsw_sp_nexthop4_group_get(mlxsw_sp, fib_entry, fen_info->fi);
4183 	if (err)
4184 		goto err_nexthop4_group_get;
4185 
4186 	fib4_entry->prio = fen_info->fi->fib_priority;
4187 	fib4_entry->tb_id = fen_info->tb_id;
4188 	fib4_entry->type = fen_info->type;
4189 	fib4_entry->tos = fen_info->tos;
4190 
4191 	fib_entry->fib_node = fib_node;
4192 
4193 	return fib4_entry;
4194 
4195 err_nexthop4_group_get:
4196 err_fib4_entry_type_set:
4197 	kfree(fib4_entry);
4198 	return ERR_PTR(err);
4199 }
4200 
4201 static void mlxsw_sp_fib4_entry_destroy(struct mlxsw_sp *mlxsw_sp,
4202 					struct mlxsw_sp_fib4_entry *fib4_entry)
4203 {
4204 	mlxsw_sp_nexthop4_group_put(mlxsw_sp, &fib4_entry->common);
4205 	kfree(fib4_entry);
4206 }
4207 
4208 static struct mlxsw_sp_fib4_entry *
4209 mlxsw_sp_fib4_entry_lookup(struct mlxsw_sp *mlxsw_sp,
4210 			   const struct fib_entry_notifier_info *fen_info)
4211 {
4212 	struct mlxsw_sp_fib4_entry *fib4_entry;
4213 	struct mlxsw_sp_fib_node *fib_node;
4214 	struct mlxsw_sp_fib *fib;
4215 	struct mlxsw_sp_vr *vr;
4216 
4217 	vr = mlxsw_sp_vr_find(mlxsw_sp, fen_info->tb_id);
4218 	if (!vr)
4219 		return NULL;
4220 	fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV4);
4221 
4222 	fib_node = mlxsw_sp_fib_node_lookup(fib, &fen_info->dst,
4223 					    sizeof(fen_info->dst),
4224 					    fen_info->dst_len);
4225 	if (!fib_node)
4226 		return NULL;
4227 
4228 	list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) {
4229 		if (fib4_entry->tb_id == fen_info->tb_id &&
4230 		    fib4_entry->tos == fen_info->tos &&
4231 		    fib4_entry->type == fen_info->type &&
4232 		    mlxsw_sp_nexthop4_group_fi(fib4_entry->common.nh_group) ==
4233 		    fen_info->fi) {
4234 			return fib4_entry;
4235 		}
4236 	}
4237 
4238 	return NULL;
4239 }
4240 
4241 static const struct rhashtable_params mlxsw_sp_fib_ht_params = {
4242 	.key_offset = offsetof(struct mlxsw_sp_fib_node, key),
4243 	.head_offset = offsetof(struct mlxsw_sp_fib_node, ht_node),
4244 	.key_len = sizeof(struct mlxsw_sp_fib_key),
4245 	.automatic_shrinking = true,
4246 };
4247 
4248 static int mlxsw_sp_fib_node_insert(struct mlxsw_sp_fib *fib,
4249 				    struct mlxsw_sp_fib_node *fib_node)
4250 {
4251 	return rhashtable_insert_fast(&fib->ht, &fib_node->ht_node,
4252 				      mlxsw_sp_fib_ht_params);
4253 }
4254 
4255 static void mlxsw_sp_fib_node_remove(struct mlxsw_sp_fib *fib,
4256 				     struct mlxsw_sp_fib_node *fib_node)
4257 {
4258 	rhashtable_remove_fast(&fib->ht, &fib_node->ht_node,
4259 			       mlxsw_sp_fib_ht_params);
4260 }
4261 
4262 static struct mlxsw_sp_fib_node *
4263 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
4264 			 size_t addr_len, unsigned char prefix_len)
4265 {
4266 	struct mlxsw_sp_fib_key key;
4267 
4268 	memset(&key, 0, sizeof(key));
4269 	memcpy(key.addr, addr, addr_len);
4270 	key.prefix_len = prefix_len;
4271 	return rhashtable_lookup_fast(&fib->ht, &key, mlxsw_sp_fib_ht_params);
4272 }
4273 
4274 static struct mlxsw_sp_fib_node *
4275 mlxsw_sp_fib_node_create(struct mlxsw_sp_fib *fib, const void *addr,
4276 			 size_t addr_len, unsigned char prefix_len)
4277 {
4278 	struct mlxsw_sp_fib_node *fib_node;
4279 
4280 	fib_node = kzalloc(sizeof(*fib_node), GFP_KERNEL);
4281 	if (!fib_node)
4282 		return NULL;
4283 
4284 	INIT_LIST_HEAD(&fib_node->entry_list);
4285 	list_add(&fib_node->list, &fib->node_list);
4286 	memcpy(fib_node->key.addr, addr, addr_len);
4287 	fib_node->key.prefix_len = prefix_len;
4288 
4289 	return fib_node;
4290 }
4291 
4292 static void mlxsw_sp_fib_node_destroy(struct mlxsw_sp_fib_node *fib_node)
4293 {
4294 	list_del(&fib_node->list);
4295 	WARN_ON(!list_empty(&fib_node->entry_list));
4296 	kfree(fib_node);
4297 }
4298 
4299 static bool
4300 mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node,
4301 				 const struct mlxsw_sp_fib_entry *fib_entry)
4302 {
4303 	return list_first_entry(&fib_node->entry_list,
4304 				struct mlxsw_sp_fib_entry, list) == fib_entry;
4305 }
4306 
4307 static int mlxsw_sp_fib_lpm_tree_link(struct mlxsw_sp *mlxsw_sp,
4308 				      struct mlxsw_sp_fib_node *fib_node)
4309 {
4310 	struct mlxsw_sp_prefix_usage req_prefix_usage;
4311 	struct mlxsw_sp_fib *fib = fib_node->fib;
4312 	struct mlxsw_sp_lpm_tree *lpm_tree;
4313 	int err;
4314 
4315 	lpm_tree = mlxsw_sp->router->lpm.proto_trees[fib->proto];
4316 	if (lpm_tree->prefix_ref_count[fib_node->key.prefix_len] != 0)
4317 		goto out;
4318 
4319 	mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &lpm_tree->prefix_usage);
4320 	mlxsw_sp_prefix_usage_set(&req_prefix_usage, fib_node->key.prefix_len);
4321 	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
4322 					 fib->proto);
4323 	if (IS_ERR(lpm_tree))
4324 		return PTR_ERR(lpm_tree);
4325 
4326 	err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
4327 	if (err)
4328 		goto err_lpm_tree_replace;
4329 
4330 out:
4331 	lpm_tree->prefix_ref_count[fib_node->key.prefix_len]++;
4332 	return 0;
4333 
4334 err_lpm_tree_replace:
4335 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
4336 	return err;
4337 }
4338 
4339 static void mlxsw_sp_fib_lpm_tree_unlink(struct mlxsw_sp *mlxsw_sp,
4340 					 struct mlxsw_sp_fib_node *fib_node)
4341 {
4342 	struct mlxsw_sp_lpm_tree *lpm_tree = fib_node->fib->lpm_tree;
4343 	struct mlxsw_sp_prefix_usage req_prefix_usage;
4344 	struct mlxsw_sp_fib *fib = fib_node->fib;
4345 	int err;
4346 
4347 	if (--lpm_tree->prefix_ref_count[fib_node->key.prefix_len] != 0)
4348 		return;
4349 	/* Try to construct a new LPM tree from the current prefix usage
4350 	 * minus the unused one. If we fail, continue using the old one.
4351 	 */
4352 	mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &lpm_tree->prefix_usage);
4353 	mlxsw_sp_prefix_usage_clear(&req_prefix_usage,
4354 				    fib_node->key.prefix_len);
4355 	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
4356 					 fib->proto);
4357 	if (IS_ERR(lpm_tree))
4358 		return;
4359 
4360 	err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
4361 	if (err)
4362 		goto err_lpm_tree_replace;
4363 
4364 	return;
4365 
4366 err_lpm_tree_replace:
4367 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
4368 }
4369 
4370 static int mlxsw_sp_fib_node_init(struct mlxsw_sp *mlxsw_sp,
4371 				  struct mlxsw_sp_fib_node *fib_node,
4372 				  struct mlxsw_sp_fib *fib)
4373 {
4374 	int err;
4375 
4376 	err = mlxsw_sp_fib_node_insert(fib, fib_node);
4377 	if (err)
4378 		return err;
4379 	fib_node->fib = fib;
4380 
4381 	err = mlxsw_sp_fib_lpm_tree_link(mlxsw_sp, fib_node);
4382 	if (err)
4383 		goto err_fib_lpm_tree_link;
4384 
4385 	return 0;
4386 
4387 err_fib_lpm_tree_link:
4388 	fib_node->fib = NULL;
4389 	mlxsw_sp_fib_node_remove(fib, fib_node);
4390 	return err;
4391 }
4392 
4393 static void mlxsw_sp_fib_node_fini(struct mlxsw_sp *mlxsw_sp,
4394 				   struct mlxsw_sp_fib_node *fib_node)
4395 {
4396 	struct mlxsw_sp_fib *fib = fib_node->fib;
4397 
4398 	mlxsw_sp_fib_lpm_tree_unlink(mlxsw_sp, fib_node);
4399 	fib_node->fib = NULL;
4400 	mlxsw_sp_fib_node_remove(fib, fib_node);
4401 }
4402 
4403 static struct mlxsw_sp_fib_node *
4404 mlxsw_sp_fib_node_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id, const void *addr,
4405 		      size_t addr_len, unsigned char prefix_len,
4406 		      enum mlxsw_sp_l3proto proto)
4407 {
4408 	struct mlxsw_sp_fib_node *fib_node;
4409 	struct mlxsw_sp_fib *fib;
4410 	struct mlxsw_sp_vr *vr;
4411 	int err;
4412 
4413 	vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id, NULL);
4414 	if (IS_ERR(vr))
4415 		return ERR_CAST(vr);
4416 	fib = mlxsw_sp_vr_fib(vr, proto);
4417 
4418 	fib_node = mlxsw_sp_fib_node_lookup(fib, addr, addr_len, prefix_len);
4419 	if (fib_node)
4420 		return fib_node;
4421 
4422 	fib_node = mlxsw_sp_fib_node_create(fib, addr, addr_len, prefix_len);
4423 	if (!fib_node) {
4424 		err = -ENOMEM;
4425 		goto err_fib_node_create;
4426 	}
4427 
4428 	err = mlxsw_sp_fib_node_init(mlxsw_sp, fib_node, fib);
4429 	if (err)
4430 		goto err_fib_node_init;
4431 
4432 	return fib_node;
4433 
4434 err_fib_node_init:
4435 	mlxsw_sp_fib_node_destroy(fib_node);
4436 err_fib_node_create:
4437 	mlxsw_sp_vr_put(mlxsw_sp, vr);
4438 	return ERR_PTR(err);
4439 }
4440 
4441 static void mlxsw_sp_fib_node_put(struct mlxsw_sp *mlxsw_sp,
4442 				  struct mlxsw_sp_fib_node *fib_node)
4443 {
4444 	struct mlxsw_sp_vr *vr = fib_node->fib->vr;
4445 
4446 	if (!list_empty(&fib_node->entry_list))
4447 		return;
4448 	mlxsw_sp_fib_node_fini(mlxsw_sp, fib_node);
4449 	mlxsw_sp_fib_node_destroy(fib_node);
4450 	mlxsw_sp_vr_put(mlxsw_sp, vr);
4451 }
4452 
4453 static struct mlxsw_sp_fib4_entry *
4454 mlxsw_sp_fib4_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
4455 			      const struct mlxsw_sp_fib4_entry *new4_entry)
4456 {
4457 	struct mlxsw_sp_fib4_entry *fib4_entry;
4458 
4459 	list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) {
4460 		if (fib4_entry->tb_id > new4_entry->tb_id)
4461 			continue;
4462 		if (fib4_entry->tb_id != new4_entry->tb_id)
4463 			break;
4464 		if (fib4_entry->tos > new4_entry->tos)
4465 			continue;
4466 		if (fib4_entry->prio >= new4_entry->prio ||
4467 		    fib4_entry->tos < new4_entry->tos)
4468 			return fib4_entry;
4469 	}
4470 
4471 	return NULL;
4472 }
4473 
4474 static int
4475 mlxsw_sp_fib4_node_list_append(struct mlxsw_sp_fib4_entry *fib4_entry,
4476 			       struct mlxsw_sp_fib4_entry *new4_entry)
4477 {
4478 	struct mlxsw_sp_fib_node *fib_node;
4479 
4480 	if (WARN_ON(!fib4_entry))
4481 		return -EINVAL;
4482 
4483 	fib_node = fib4_entry->common.fib_node;
4484 	list_for_each_entry_from(fib4_entry, &fib_node->entry_list,
4485 				 common.list) {
4486 		if (fib4_entry->tb_id != new4_entry->tb_id ||
4487 		    fib4_entry->tos != new4_entry->tos ||
4488 		    fib4_entry->prio != new4_entry->prio)
4489 			break;
4490 	}
4491 
4492 	list_add_tail(&new4_entry->common.list, &fib4_entry->common.list);
4493 	return 0;
4494 }
4495 
4496 static int
4497 mlxsw_sp_fib4_node_list_insert(struct mlxsw_sp_fib4_entry *new4_entry,
4498 			       bool replace, bool append)
4499 {
4500 	struct mlxsw_sp_fib_node *fib_node = new4_entry->common.fib_node;
4501 	struct mlxsw_sp_fib4_entry *fib4_entry;
4502 
4503 	fib4_entry = mlxsw_sp_fib4_node_entry_find(fib_node, new4_entry);
4504 
4505 	if (append)
4506 		return mlxsw_sp_fib4_node_list_append(fib4_entry, new4_entry);
4507 	if (replace && WARN_ON(!fib4_entry))
4508 		return -EINVAL;
4509 
4510 	/* Insert new entry before replaced one, so that we can later
4511 	 * remove the second.
4512 	 */
4513 	if (fib4_entry) {
4514 		list_add_tail(&new4_entry->common.list,
4515 			      &fib4_entry->common.list);
4516 	} else {
4517 		struct mlxsw_sp_fib4_entry *last;
4518 
4519 		list_for_each_entry(last, &fib_node->entry_list, common.list) {
4520 			if (new4_entry->tb_id > last->tb_id)
4521 				break;
4522 			fib4_entry = last;
4523 		}
4524 
4525 		if (fib4_entry)
4526 			list_add(&new4_entry->common.list,
4527 				 &fib4_entry->common.list);
4528 		else
4529 			list_add(&new4_entry->common.list,
4530 				 &fib_node->entry_list);
4531 	}
4532 
4533 	return 0;
4534 }
4535 
4536 static void
4537 mlxsw_sp_fib4_node_list_remove(struct mlxsw_sp_fib4_entry *fib4_entry)
4538 {
4539 	list_del(&fib4_entry->common.list);
4540 }
4541 
4542 static int mlxsw_sp_fib_node_entry_add(struct mlxsw_sp *mlxsw_sp,
4543 				       struct mlxsw_sp_fib_entry *fib_entry)
4544 {
4545 	struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
4546 
4547 	if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
4548 		return 0;
4549 
4550 	/* To prevent packet loss, overwrite the previously offloaded
4551 	 * entry.
4552 	 */
4553 	if (!list_is_singular(&fib_node->entry_list)) {
4554 		enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
4555 		struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list);
4556 
4557 		mlxsw_sp_fib_entry_offload_refresh(n, op, 0);
4558 	}
4559 
4560 	return mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
4561 }
4562 
4563 static void mlxsw_sp_fib_node_entry_del(struct mlxsw_sp *mlxsw_sp,
4564 					struct mlxsw_sp_fib_entry *fib_entry)
4565 {
4566 	struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
4567 
4568 	if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
4569 		return;
4570 
4571 	/* Promote the next entry by overwriting the deleted entry */
4572 	if (!list_is_singular(&fib_node->entry_list)) {
4573 		struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list);
4574 		enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
4575 
4576 		mlxsw_sp_fib_entry_update(mlxsw_sp, n);
4577 		mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0);
4578 		return;
4579 	}
4580 
4581 	mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry);
4582 }
4583 
4584 static int mlxsw_sp_fib4_node_entry_link(struct mlxsw_sp *mlxsw_sp,
4585 					 struct mlxsw_sp_fib4_entry *fib4_entry,
4586 					 bool replace, bool append)
4587 {
4588 	int err;
4589 
4590 	err = mlxsw_sp_fib4_node_list_insert(fib4_entry, replace, append);
4591 	if (err)
4592 		return err;
4593 
4594 	err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib4_entry->common);
4595 	if (err)
4596 		goto err_fib_node_entry_add;
4597 
4598 	return 0;
4599 
4600 err_fib_node_entry_add:
4601 	mlxsw_sp_fib4_node_list_remove(fib4_entry);
4602 	return err;
4603 }
4604 
4605 static void
4606 mlxsw_sp_fib4_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
4607 				struct mlxsw_sp_fib4_entry *fib4_entry)
4608 {
4609 	mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib4_entry->common);
4610 	mlxsw_sp_fib4_node_list_remove(fib4_entry);
4611 
4612 	if (fib4_entry->common.type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP)
4613 		mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, &fib4_entry->common);
4614 }
4615 
4616 static void mlxsw_sp_fib4_entry_replace(struct mlxsw_sp *mlxsw_sp,
4617 					struct mlxsw_sp_fib4_entry *fib4_entry,
4618 					bool replace)
4619 {
4620 	struct mlxsw_sp_fib_node *fib_node = fib4_entry->common.fib_node;
4621 	struct mlxsw_sp_fib4_entry *replaced;
4622 
4623 	if (!replace)
4624 		return;
4625 
4626 	/* We inserted the new entry before replaced one */
4627 	replaced = list_next_entry(fib4_entry, common.list);
4628 
4629 	mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, replaced);
4630 	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, replaced);
4631 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4632 }
4633 
4634 static int
4635 mlxsw_sp_router_fib4_add(struct mlxsw_sp *mlxsw_sp,
4636 			 const struct fib_entry_notifier_info *fen_info,
4637 			 bool replace, bool append)
4638 {
4639 	struct mlxsw_sp_fib4_entry *fib4_entry;
4640 	struct mlxsw_sp_fib_node *fib_node;
4641 	int err;
4642 
4643 	if (mlxsw_sp->router->aborted)
4644 		return 0;
4645 
4646 	fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, fen_info->tb_id,
4647 					 &fen_info->dst, sizeof(fen_info->dst),
4648 					 fen_info->dst_len,
4649 					 MLXSW_SP_L3_PROTO_IPV4);
4650 	if (IS_ERR(fib_node)) {
4651 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to get FIB node\n");
4652 		return PTR_ERR(fib_node);
4653 	}
4654 
4655 	fib4_entry = mlxsw_sp_fib4_entry_create(mlxsw_sp, fib_node, fen_info);
4656 	if (IS_ERR(fib4_entry)) {
4657 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to create FIB entry\n");
4658 		err = PTR_ERR(fib4_entry);
4659 		goto err_fib4_entry_create;
4660 	}
4661 
4662 	err = mlxsw_sp_fib4_node_entry_link(mlxsw_sp, fib4_entry, replace,
4663 					    append);
4664 	if (err) {
4665 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to link FIB entry to node\n");
4666 		goto err_fib4_node_entry_link;
4667 	}
4668 
4669 	mlxsw_sp_fib4_entry_replace(mlxsw_sp, fib4_entry, replace);
4670 
4671 	return 0;
4672 
4673 err_fib4_node_entry_link:
4674 	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
4675 err_fib4_entry_create:
4676 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4677 	return err;
4678 }
4679 
4680 static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp,
4681 				     struct fib_entry_notifier_info *fen_info)
4682 {
4683 	struct mlxsw_sp_fib4_entry *fib4_entry;
4684 	struct mlxsw_sp_fib_node *fib_node;
4685 
4686 	if (mlxsw_sp->router->aborted)
4687 		return;
4688 
4689 	fib4_entry = mlxsw_sp_fib4_entry_lookup(mlxsw_sp, fen_info);
4690 	if (WARN_ON(!fib4_entry))
4691 		return;
4692 	fib_node = fib4_entry->common.fib_node;
4693 
4694 	mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry);
4695 	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
4696 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4697 }
4698 
4699 static bool mlxsw_sp_fib6_rt_should_ignore(const struct fib6_info *rt)
4700 {
4701 	/* Packets with link-local destination IP arriving to the router
4702 	 * are trapped to the CPU, so no need to program specific routes
4703 	 * for them.
4704 	 */
4705 	if (ipv6_addr_type(&rt->fib6_dst.addr) & IPV6_ADDR_LINKLOCAL)
4706 		return true;
4707 
4708 	/* Multicast routes aren't supported, so ignore them. Neighbour
4709 	 * Discovery packets are specifically trapped.
4710 	 */
4711 	if (ipv6_addr_type(&rt->fib6_dst.addr) & IPV6_ADDR_MULTICAST)
4712 		return true;
4713 
4714 	/* Cloned routes are irrelevant in the forwarding path. */
4715 	if (rt->fib6_flags & RTF_CACHE)
4716 		return true;
4717 
4718 	return false;
4719 }
4720 
4721 static struct mlxsw_sp_rt6 *mlxsw_sp_rt6_create(struct fib6_info *rt)
4722 {
4723 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4724 
4725 	mlxsw_sp_rt6 = kzalloc(sizeof(*mlxsw_sp_rt6), GFP_KERNEL);
4726 	if (!mlxsw_sp_rt6)
4727 		return ERR_PTR(-ENOMEM);
4728 
4729 	/* In case of route replace, replaced route is deleted with
4730 	 * no notification. Take reference to prevent accessing freed
4731 	 * memory.
4732 	 */
4733 	mlxsw_sp_rt6->rt = rt;
4734 	fib6_info_hold(rt);
4735 
4736 	return mlxsw_sp_rt6;
4737 }
4738 
4739 #if IS_ENABLED(CONFIG_IPV6)
4740 static void mlxsw_sp_rt6_release(struct fib6_info *rt)
4741 {
4742 	fib6_info_release(rt);
4743 }
4744 #else
4745 static void mlxsw_sp_rt6_release(struct fib6_info *rt)
4746 {
4747 }
4748 #endif
4749 
4750 static void mlxsw_sp_rt6_destroy(struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
4751 {
4752 	mlxsw_sp_rt6_release(mlxsw_sp_rt6->rt);
4753 	kfree(mlxsw_sp_rt6);
4754 }
4755 
4756 static struct fib6_info *
4757 mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry)
4758 {
4759 	return list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
4760 				list)->rt;
4761 }
4762 
4763 static struct mlxsw_sp_fib6_entry *
4764 mlxsw_sp_fib6_node_mp_entry_find(const struct mlxsw_sp_fib_node *fib_node,
4765 				 const struct fib6_info *nrt, bool append)
4766 {
4767 	struct mlxsw_sp_fib6_entry *fib6_entry;
4768 
4769 	if (!append)
4770 		return NULL;
4771 
4772 	list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
4773 		struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
4774 
4775 		/* RT6_TABLE_LOCAL and RT6_TABLE_MAIN share the same
4776 		 * virtual router.
4777 		 */
4778 		if (rt->fib6_table->tb6_id > nrt->fib6_table->tb6_id)
4779 			continue;
4780 		if (rt->fib6_table->tb6_id != nrt->fib6_table->tb6_id)
4781 			break;
4782 		if (rt->fib6_metric < nrt->fib6_metric)
4783 			continue;
4784 		if (rt->fib6_metric == nrt->fib6_metric)
4785 			return fib6_entry;
4786 		if (rt->fib6_metric > nrt->fib6_metric)
4787 			break;
4788 	}
4789 
4790 	return NULL;
4791 }
4792 
4793 static struct mlxsw_sp_rt6 *
4794 mlxsw_sp_fib6_entry_rt_find(const struct mlxsw_sp_fib6_entry *fib6_entry,
4795 			    const struct fib6_info *rt)
4796 {
4797 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4798 
4799 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
4800 		if (mlxsw_sp_rt6->rt == rt)
4801 			return mlxsw_sp_rt6;
4802 	}
4803 
4804 	return NULL;
4805 }
4806 
4807 static bool mlxsw_sp_nexthop6_ipip_type(const struct mlxsw_sp *mlxsw_sp,
4808 					const struct fib6_info *rt,
4809 					enum mlxsw_sp_ipip_type *ret)
4810 {
4811 	return rt->fib6_nh.nh_dev &&
4812 	       mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->fib6_nh.nh_dev, ret);
4813 }
4814 
4815 static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp *mlxsw_sp,
4816 				       struct mlxsw_sp_nexthop_group *nh_grp,
4817 				       struct mlxsw_sp_nexthop *nh,
4818 				       const struct fib6_info *rt)
4819 {
4820 	const struct mlxsw_sp_ipip_ops *ipip_ops;
4821 	struct mlxsw_sp_ipip_entry *ipip_entry;
4822 	struct net_device *dev = rt->fib6_nh.nh_dev;
4823 	struct mlxsw_sp_rif *rif;
4824 	int err;
4825 
4826 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, dev);
4827 	if (ipip_entry) {
4828 		ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
4829 		if (ipip_ops->can_offload(mlxsw_sp, dev,
4830 					  MLXSW_SP_L3_PROTO_IPV6)) {
4831 			nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP;
4832 			mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, ipip_entry);
4833 			return 0;
4834 		}
4835 	}
4836 
4837 	nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH;
4838 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
4839 	if (!rif)
4840 		return 0;
4841 	mlxsw_sp_nexthop_rif_init(nh, rif);
4842 
4843 	err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
4844 	if (err)
4845 		goto err_nexthop_neigh_init;
4846 
4847 	return 0;
4848 
4849 err_nexthop_neigh_init:
4850 	mlxsw_sp_nexthop_rif_fini(nh);
4851 	return err;
4852 }
4853 
4854 static void mlxsw_sp_nexthop6_type_fini(struct mlxsw_sp *mlxsw_sp,
4855 					struct mlxsw_sp_nexthop *nh)
4856 {
4857 	mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
4858 }
4859 
4860 static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp,
4861 				  struct mlxsw_sp_nexthop_group *nh_grp,
4862 				  struct mlxsw_sp_nexthop *nh,
4863 				  const struct fib6_info *rt)
4864 {
4865 	struct net_device *dev = rt->fib6_nh.nh_dev;
4866 
4867 	nh->nh_grp = nh_grp;
4868 	nh->nh_weight = rt->fib6_nh.nh_weight;
4869 	memcpy(&nh->gw_addr, &rt->fib6_nh.nh_gw, sizeof(nh->gw_addr));
4870 	mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
4871 
4872 	list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
4873 
4874 	if (!dev)
4875 		return 0;
4876 	nh->ifindex = dev->ifindex;
4877 
4878 	return mlxsw_sp_nexthop6_type_init(mlxsw_sp, nh_grp, nh, rt);
4879 }
4880 
4881 static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp,
4882 				   struct mlxsw_sp_nexthop *nh)
4883 {
4884 	mlxsw_sp_nexthop6_type_fini(mlxsw_sp, nh);
4885 	list_del(&nh->router_list_node);
4886 	mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
4887 }
4888 
4889 static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp *mlxsw_sp,
4890 				    const struct fib6_info *rt)
4891 {
4892 	return rt->fib6_flags & RTF_GATEWAY ||
4893 	       mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, NULL);
4894 }
4895 
4896 static struct mlxsw_sp_nexthop_group *
4897 mlxsw_sp_nexthop6_group_create(struct mlxsw_sp *mlxsw_sp,
4898 			       struct mlxsw_sp_fib6_entry *fib6_entry)
4899 {
4900 	struct mlxsw_sp_nexthop_group *nh_grp;
4901 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4902 	struct mlxsw_sp_nexthop *nh;
4903 	size_t alloc_size;
4904 	int i = 0;
4905 	int err;
4906 
4907 	alloc_size = sizeof(*nh_grp) +
4908 		     fib6_entry->nrt6 * sizeof(struct mlxsw_sp_nexthop);
4909 	nh_grp = kzalloc(alloc_size, GFP_KERNEL);
4910 	if (!nh_grp)
4911 		return ERR_PTR(-ENOMEM);
4912 	INIT_LIST_HEAD(&nh_grp->fib_list);
4913 #if IS_ENABLED(CONFIG_IPV6)
4914 	nh_grp->neigh_tbl = &nd_tbl;
4915 #endif
4916 	mlxsw_sp_rt6 = list_first_entry(&fib6_entry->rt6_list,
4917 					struct mlxsw_sp_rt6, list);
4918 	nh_grp->gateway = mlxsw_sp_rt6_is_gateway(mlxsw_sp, mlxsw_sp_rt6->rt);
4919 	nh_grp->count = fib6_entry->nrt6;
4920 	for (i = 0; i < nh_grp->count; i++) {
4921 		struct fib6_info *rt = mlxsw_sp_rt6->rt;
4922 
4923 		nh = &nh_grp->nexthops[i];
4924 		err = mlxsw_sp_nexthop6_init(mlxsw_sp, nh_grp, nh, rt);
4925 		if (err)
4926 			goto err_nexthop6_init;
4927 		mlxsw_sp_rt6 = list_next_entry(mlxsw_sp_rt6, list);
4928 	}
4929 
4930 	err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
4931 	if (err)
4932 		goto err_nexthop_group_insert;
4933 
4934 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
4935 	return nh_grp;
4936 
4937 err_nexthop_group_insert:
4938 err_nexthop6_init:
4939 	for (i--; i >= 0; i--) {
4940 		nh = &nh_grp->nexthops[i];
4941 		mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
4942 	}
4943 	kfree(nh_grp);
4944 	return ERR_PTR(err);
4945 }
4946 
4947 static void
4948 mlxsw_sp_nexthop6_group_destroy(struct mlxsw_sp *mlxsw_sp,
4949 				struct mlxsw_sp_nexthop_group *nh_grp)
4950 {
4951 	struct mlxsw_sp_nexthop *nh;
4952 	int i = nh_grp->count;
4953 
4954 	mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
4955 	for (i--; i >= 0; i--) {
4956 		nh = &nh_grp->nexthops[i];
4957 		mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
4958 	}
4959 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
4960 	WARN_ON(nh_grp->adj_index_valid);
4961 	kfree(nh_grp);
4962 }
4963 
4964 static int mlxsw_sp_nexthop6_group_get(struct mlxsw_sp *mlxsw_sp,
4965 				       struct mlxsw_sp_fib6_entry *fib6_entry)
4966 {
4967 	struct mlxsw_sp_nexthop_group *nh_grp;
4968 
4969 	nh_grp = mlxsw_sp_nexthop6_group_lookup(mlxsw_sp, fib6_entry);
4970 	if (!nh_grp) {
4971 		nh_grp = mlxsw_sp_nexthop6_group_create(mlxsw_sp, fib6_entry);
4972 		if (IS_ERR(nh_grp))
4973 			return PTR_ERR(nh_grp);
4974 	}
4975 
4976 	list_add_tail(&fib6_entry->common.nexthop_group_node,
4977 		      &nh_grp->fib_list);
4978 	fib6_entry->common.nh_group = nh_grp;
4979 
4980 	return 0;
4981 }
4982 
4983 static void mlxsw_sp_nexthop6_group_put(struct mlxsw_sp *mlxsw_sp,
4984 					struct mlxsw_sp_fib_entry *fib_entry)
4985 {
4986 	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
4987 
4988 	list_del(&fib_entry->nexthop_group_node);
4989 	if (!list_empty(&nh_grp->fib_list))
4990 		return;
4991 	mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, nh_grp);
4992 }
4993 
4994 static int
4995 mlxsw_sp_nexthop6_group_update(struct mlxsw_sp *mlxsw_sp,
4996 			       struct mlxsw_sp_fib6_entry *fib6_entry)
4997 {
4998 	struct mlxsw_sp_nexthop_group *old_nh_grp = fib6_entry->common.nh_group;
4999 	int err;
5000 
5001 	fib6_entry->common.nh_group = NULL;
5002 	list_del(&fib6_entry->common.nexthop_group_node);
5003 
5004 	err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
5005 	if (err)
5006 		goto err_nexthop6_group_get;
5007 
5008 	/* In case this entry is offloaded, then the adjacency index
5009 	 * currently associated with it in the device's table is that
5010 	 * of the old group. Start using the new one instead.
5011 	 */
5012 	err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common);
5013 	if (err)
5014 		goto err_fib_node_entry_add;
5015 
5016 	if (list_empty(&old_nh_grp->fib_list))
5017 		mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, old_nh_grp);
5018 
5019 	return 0;
5020 
5021 err_fib_node_entry_add:
5022 	mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
5023 err_nexthop6_group_get:
5024 	list_add_tail(&fib6_entry->common.nexthop_group_node,
5025 		      &old_nh_grp->fib_list);
5026 	fib6_entry->common.nh_group = old_nh_grp;
5027 	return err;
5028 }
5029 
5030 static int
5031 mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp,
5032 				struct mlxsw_sp_fib6_entry *fib6_entry,
5033 				struct fib6_info *rt)
5034 {
5035 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5036 	int err;
5037 
5038 	mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt);
5039 	if (IS_ERR(mlxsw_sp_rt6))
5040 		return PTR_ERR(mlxsw_sp_rt6);
5041 
5042 	list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
5043 	fib6_entry->nrt6++;
5044 
5045 	err = mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
5046 	if (err)
5047 		goto err_nexthop6_group_update;
5048 
5049 	return 0;
5050 
5051 err_nexthop6_group_update:
5052 	fib6_entry->nrt6--;
5053 	list_del(&mlxsw_sp_rt6->list);
5054 	mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5055 	return err;
5056 }
5057 
5058 static void
5059 mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp,
5060 				struct mlxsw_sp_fib6_entry *fib6_entry,
5061 				struct fib6_info *rt)
5062 {
5063 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5064 
5065 	mlxsw_sp_rt6 = mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt);
5066 	if (WARN_ON(!mlxsw_sp_rt6))
5067 		return;
5068 
5069 	fib6_entry->nrt6--;
5070 	list_del(&mlxsw_sp_rt6->list);
5071 	mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
5072 	mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5073 }
5074 
5075 static void mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp *mlxsw_sp,
5076 					 struct mlxsw_sp_fib_entry *fib_entry,
5077 					 const struct fib6_info *rt)
5078 {
5079 	/* Packets hitting RTF_REJECT routes need to be discarded by the
5080 	 * stack. We can rely on their destination device not having a
5081 	 * RIF (it's the loopback device) and can thus use action type
5082 	 * local, which will cause them to be trapped with a lower
5083 	 * priority than packets that need to be locally received.
5084 	 */
5085 	if (rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST))
5086 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
5087 	else if (rt->fib6_flags & RTF_REJECT)
5088 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
5089 	else if (mlxsw_sp_rt6_is_gateway(mlxsw_sp, rt))
5090 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
5091 	else
5092 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
5093 }
5094 
5095 static void
5096 mlxsw_sp_fib6_entry_rt_destroy_all(struct mlxsw_sp_fib6_entry *fib6_entry)
5097 {
5098 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6, *tmp;
5099 
5100 	list_for_each_entry_safe(mlxsw_sp_rt6, tmp, &fib6_entry->rt6_list,
5101 				 list) {
5102 		fib6_entry->nrt6--;
5103 		list_del(&mlxsw_sp_rt6->list);
5104 		mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5105 	}
5106 }
5107 
5108 static struct mlxsw_sp_fib6_entry *
5109 mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp,
5110 			   struct mlxsw_sp_fib_node *fib_node,
5111 			   struct fib6_info *rt)
5112 {
5113 	struct mlxsw_sp_fib6_entry *fib6_entry;
5114 	struct mlxsw_sp_fib_entry *fib_entry;
5115 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5116 	int err;
5117 
5118 	fib6_entry = kzalloc(sizeof(*fib6_entry), GFP_KERNEL);
5119 	if (!fib6_entry)
5120 		return ERR_PTR(-ENOMEM);
5121 	fib_entry = &fib6_entry->common;
5122 
5123 	mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt);
5124 	if (IS_ERR(mlxsw_sp_rt6)) {
5125 		err = PTR_ERR(mlxsw_sp_rt6);
5126 		goto err_rt6_create;
5127 	}
5128 
5129 	mlxsw_sp_fib6_entry_type_set(mlxsw_sp, fib_entry, mlxsw_sp_rt6->rt);
5130 
5131 	INIT_LIST_HEAD(&fib6_entry->rt6_list);
5132 	list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
5133 	fib6_entry->nrt6 = 1;
5134 	err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
5135 	if (err)
5136 		goto err_nexthop6_group_get;
5137 
5138 	fib_entry->fib_node = fib_node;
5139 
5140 	return fib6_entry;
5141 
5142 err_nexthop6_group_get:
5143 	list_del(&mlxsw_sp_rt6->list);
5144 	mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5145 err_rt6_create:
5146 	kfree(fib6_entry);
5147 	return ERR_PTR(err);
5148 }
5149 
5150 static void mlxsw_sp_fib6_entry_destroy(struct mlxsw_sp *mlxsw_sp,
5151 					struct mlxsw_sp_fib6_entry *fib6_entry)
5152 {
5153 	mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
5154 	mlxsw_sp_fib6_entry_rt_destroy_all(fib6_entry);
5155 	WARN_ON(fib6_entry->nrt6);
5156 	kfree(fib6_entry);
5157 }
5158 
5159 static struct mlxsw_sp_fib6_entry *
5160 mlxsw_sp_fib6_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
5161 			      const struct fib6_info *nrt, bool replace)
5162 {
5163 	struct mlxsw_sp_fib6_entry *fib6_entry;
5164 
5165 	list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
5166 		struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
5167 
5168 		if (rt->fib6_table->tb6_id > nrt->fib6_table->tb6_id)
5169 			continue;
5170 		if (rt->fib6_table->tb6_id != nrt->fib6_table->tb6_id)
5171 			break;
5172 		if (replace && rt->fib6_metric == nrt->fib6_metric)
5173 			return fib6_entry;
5174 		if (rt->fib6_metric > nrt->fib6_metric)
5175 			return fib6_entry;
5176 	}
5177 
5178 	return NULL;
5179 }
5180 
5181 static int
5182 mlxsw_sp_fib6_node_list_insert(struct mlxsw_sp_fib6_entry *new6_entry,
5183 			       bool replace)
5184 {
5185 	struct mlxsw_sp_fib_node *fib_node = new6_entry->common.fib_node;
5186 	struct fib6_info *nrt = mlxsw_sp_fib6_entry_rt(new6_entry);
5187 	struct mlxsw_sp_fib6_entry *fib6_entry;
5188 
5189 	fib6_entry = mlxsw_sp_fib6_node_entry_find(fib_node, nrt, replace);
5190 
5191 	if (replace && WARN_ON(!fib6_entry))
5192 		return -EINVAL;
5193 
5194 	if (fib6_entry) {
5195 		list_add_tail(&new6_entry->common.list,
5196 			      &fib6_entry->common.list);
5197 	} else {
5198 		struct mlxsw_sp_fib6_entry *last;
5199 
5200 		list_for_each_entry(last, &fib_node->entry_list, common.list) {
5201 			struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(last);
5202 
5203 			if (nrt->fib6_table->tb6_id > rt->fib6_table->tb6_id)
5204 				break;
5205 			fib6_entry = last;
5206 		}
5207 
5208 		if (fib6_entry)
5209 			list_add(&new6_entry->common.list,
5210 				 &fib6_entry->common.list);
5211 		else
5212 			list_add(&new6_entry->common.list,
5213 				 &fib_node->entry_list);
5214 	}
5215 
5216 	return 0;
5217 }
5218 
5219 static void
5220 mlxsw_sp_fib6_node_list_remove(struct mlxsw_sp_fib6_entry *fib6_entry)
5221 {
5222 	list_del(&fib6_entry->common.list);
5223 }
5224 
5225 static int mlxsw_sp_fib6_node_entry_link(struct mlxsw_sp *mlxsw_sp,
5226 					 struct mlxsw_sp_fib6_entry *fib6_entry,
5227 					 bool replace)
5228 {
5229 	int err;
5230 
5231 	err = mlxsw_sp_fib6_node_list_insert(fib6_entry, replace);
5232 	if (err)
5233 		return err;
5234 
5235 	err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common);
5236 	if (err)
5237 		goto err_fib_node_entry_add;
5238 
5239 	return 0;
5240 
5241 err_fib_node_entry_add:
5242 	mlxsw_sp_fib6_node_list_remove(fib6_entry);
5243 	return err;
5244 }
5245 
5246 static void
5247 mlxsw_sp_fib6_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
5248 				struct mlxsw_sp_fib6_entry *fib6_entry)
5249 {
5250 	mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib6_entry->common);
5251 	mlxsw_sp_fib6_node_list_remove(fib6_entry);
5252 }
5253 
5254 static struct mlxsw_sp_fib6_entry *
5255 mlxsw_sp_fib6_entry_lookup(struct mlxsw_sp *mlxsw_sp,
5256 			   const struct fib6_info *rt)
5257 {
5258 	struct mlxsw_sp_fib6_entry *fib6_entry;
5259 	struct mlxsw_sp_fib_node *fib_node;
5260 	struct mlxsw_sp_fib *fib;
5261 	struct mlxsw_sp_vr *vr;
5262 
5263 	vr = mlxsw_sp_vr_find(mlxsw_sp, rt->fib6_table->tb6_id);
5264 	if (!vr)
5265 		return NULL;
5266 	fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV6);
5267 
5268 	fib_node = mlxsw_sp_fib_node_lookup(fib, &rt->fib6_dst.addr,
5269 					    sizeof(rt->fib6_dst.addr),
5270 					    rt->fib6_dst.plen);
5271 	if (!fib_node)
5272 		return NULL;
5273 
5274 	list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
5275 		struct fib6_info *iter_rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
5276 
5277 		if (rt->fib6_table->tb6_id == iter_rt->fib6_table->tb6_id &&
5278 		    rt->fib6_metric == iter_rt->fib6_metric &&
5279 		    mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt))
5280 			return fib6_entry;
5281 	}
5282 
5283 	return NULL;
5284 }
5285 
5286 static void mlxsw_sp_fib6_entry_replace(struct mlxsw_sp *mlxsw_sp,
5287 					struct mlxsw_sp_fib6_entry *fib6_entry,
5288 					bool replace)
5289 {
5290 	struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node;
5291 	struct mlxsw_sp_fib6_entry *replaced;
5292 
5293 	if (!replace)
5294 		return;
5295 
5296 	replaced = list_next_entry(fib6_entry, common.list);
5297 
5298 	mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, replaced);
5299 	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, replaced);
5300 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5301 }
5302 
5303 static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp,
5304 				    struct fib6_info *rt, bool replace,
5305 				    bool append)
5306 {
5307 	struct mlxsw_sp_fib6_entry *fib6_entry;
5308 	struct mlxsw_sp_fib_node *fib_node;
5309 	int err;
5310 
5311 	if (mlxsw_sp->router->aborted)
5312 		return 0;
5313 
5314 	if (rt->fib6_src.plen)
5315 		return -EINVAL;
5316 
5317 	if (mlxsw_sp_fib6_rt_should_ignore(rt))
5318 		return 0;
5319 
5320 	fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->fib6_table->tb6_id,
5321 					 &rt->fib6_dst.addr,
5322 					 sizeof(rt->fib6_dst.addr),
5323 					 rt->fib6_dst.plen,
5324 					 MLXSW_SP_L3_PROTO_IPV6);
5325 	if (IS_ERR(fib_node))
5326 		return PTR_ERR(fib_node);
5327 
5328 	/* Before creating a new entry, try to append route to an existing
5329 	 * multipath entry.
5330 	 */
5331 	fib6_entry = mlxsw_sp_fib6_node_mp_entry_find(fib_node, rt, append);
5332 	if (fib6_entry) {
5333 		err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry, rt);
5334 		if (err)
5335 			goto err_fib6_entry_nexthop_add;
5336 		return 0;
5337 	}
5338 
5339 	/* We received an append event, yet did not find any route to
5340 	 * append to.
5341 	 */
5342 	if (WARN_ON(append)) {
5343 		err = -EINVAL;
5344 		goto err_fib6_entry_append;
5345 	}
5346 
5347 	fib6_entry = mlxsw_sp_fib6_entry_create(mlxsw_sp, fib_node, rt);
5348 	if (IS_ERR(fib6_entry)) {
5349 		err = PTR_ERR(fib6_entry);
5350 		goto err_fib6_entry_create;
5351 	}
5352 
5353 	err = mlxsw_sp_fib6_node_entry_link(mlxsw_sp, fib6_entry, replace);
5354 	if (err)
5355 		goto err_fib6_node_entry_link;
5356 
5357 	mlxsw_sp_fib6_entry_replace(mlxsw_sp, fib6_entry, replace);
5358 
5359 	return 0;
5360 
5361 err_fib6_node_entry_link:
5362 	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5363 err_fib6_entry_create:
5364 err_fib6_entry_append:
5365 err_fib6_entry_nexthop_add:
5366 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5367 	return err;
5368 }
5369 
5370 static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp,
5371 				     struct fib6_info *rt)
5372 {
5373 	struct mlxsw_sp_fib6_entry *fib6_entry;
5374 	struct mlxsw_sp_fib_node *fib_node;
5375 
5376 	if (mlxsw_sp->router->aborted)
5377 		return;
5378 
5379 	if (mlxsw_sp_fib6_rt_should_ignore(rt))
5380 		return;
5381 
5382 	fib6_entry = mlxsw_sp_fib6_entry_lookup(mlxsw_sp, rt);
5383 	if (WARN_ON(!fib6_entry))
5384 		return;
5385 
5386 	/* If route is part of a multipath entry, but not the last one
5387 	 * removed, then only reduce its nexthop group.
5388 	 */
5389 	if (!list_is_singular(&fib6_entry->rt6_list)) {
5390 		mlxsw_sp_fib6_entry_nexthop_del(mlxsw_sp, fib6_entry, rt);
5391 		return;
5392 	}
5393 
5394 	fib_node = fib6_entry->common.fib_node;
5395 
5396 	mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry);
5397 	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5398 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5399 }
5400 
5401 static int __mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp,
5402 					    enum mlxsw_reg_ralxx_protocol proto,
5403 					    u8 tree_id)
5404 {
5405 	char ralta_pl[MLXSW_REG_RALTA_LEN];
5406 	char ralst_pl[MLXSW_REG_RALST_LEN];
5407 	int i, err;
5408 
5409 	mlxsw_reg_ralta_pack(ralta_pl, true, proto, tree_id);
5410 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
5411 	if (err)
5412 		return err;
5413 
5414 	mlxsw_reg_ralst_pack(ralst_pl, 0xff, tree_id);
5415 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
5416 	if (err)
5417 		return err;
5418 
5419 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
5420 		struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
5421 		char raltb_pl[MLXSW_REG_RALTB_LEN];
5422 		char ralue_pl[MLXSW_REG_RALUE_LEN];
5423 
5424 		mlxsw_reg_raltb_pack(raltb_pl, vr->id, proto, tree_id);
5425 		err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb),
5426 				      raltb_pl);
5427 		if (err)
5428 			return err;
5429 
5430 		mlxsw_reg_ralue_pack(ralue_pl, proto,
5431 				     MLXSW_REG_RALUE_OP_WRITE_WRITE, vr->id, 0);
5432 		mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
5433 		err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue),
5434 				      ralue_pl);
5435 		if (err)
5436 			return err;
5437 	}
5438 
5439 	return 0;
5440 }
5441 
5442 static struct mlxsw_sp_mr_table *
5443 mlxsw_sp_router_fibmr_family_to_table(struct mlxsw_sp_vr *vr, int family)
5444 {
5445 	if (family == RTNL_FAMILY_IPMR)
5446 		return vr->mr_table[MLXSW_SP_L3_PROTO_IPV4];
5447 	else
5448 		return vr->mr_table[MLXSW_SP_L3_PROTO_IPV6];
5449 }
5450 
5451 static int mlxsw_sp_router_fibmr_add(struct mlxsw_sp *mlxsw_sp,
5452 				     struct mfc_entry_notifier_info *men_info,
5453 				     bool replace)
5454 {
5455 	struct mlxsw_sp_mr_table *mrt;
5456 	struct mlxsw_sp_vr *vr;
5457 
5458 	if (mlxsw_sp->router->aborted)
5459 		return 0;
5460 
5461 	vr = mlxsw_sp_vr_get(mlxsw_sp, men_info->tb_id, NULL);
5462 	if (IS_ERR(vr))
5463 		return PTR_ERR(vr);
5464 
5465 	mrt = mlxsw_sp_router_fibmr_family_to_table(vr, men_info->info.family);
5466 	return mlxsw_sp_mr_route_add(mrt, men_info->mfc, replace);
5467 }
5468 
5469 static void mlxsw_sp_router_fibmr_del(struct mlxsw_sp *mlxsw_sp,
5470 				      struct mfc_entry_notifier_info *men_info)
5471 {
5472 	struct mlxsw_sp_mr_table *mrt;
5473 	struct mlxsw_sp_vr *vr;
5474 
5475 	if (mlxsw_sp->router->aborted)
5476 		return;
5477 
5478 	vr = mlxsw_sp_vr_find(mlxsw_sp, men_info->tb_id);
5479 	if (WARN_ON(!vr))
5480 		return;
5481 
5482 	mrt = mlxsw_sp_router_fibmr_family_to_table(vr, men_info->info.family);
5483 	mlxsw_sp_mr_route_del(mrt, men_info->mfc);
5484 	mlxsw_sp_vr_put(mlxsw_sp, vr);
5485 }
5486 
5487 static int
5488 mlxsw_sp_router_fibmr_vif_add(struct mlxsw_sp *mlxsw_sp,
5489 			      struct vif_entry_notifier_info *ven_info)
5490 {
5491 	struct mlxsw_sp_mr_table *mrt;
5492 	struct mlxsw_sp_rif *rif;
5493 	struct mlxsw_sp_vr *vr;
5494 
5495 	if (mlxsw_sp->router->aborted)
5496 		return 0;
5497 
5498 	vr = mlxsw_sp_vr_get(mlxsw_sp, ven_info->tb_id, NULL);
5499 	if (IS_ERR(vr))
5500 		return PTR_ERR(vr);
5501 
5502 	mrt = mlxsw_sp_router_fibmr_family_to_table(vr, ven_info->info.family);
5503 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, ven_info->dev);
5504 	return mlxsw_sp_mr_vif_add(mrt, ven_info->dev,
5505 				   ven_info->vif_index,
5506 				   ven_info->vif_flags, rif);
5507 }
5508 
5509 static void
5510 mlxsw_sp_router_fibmr_vif_del(struct mlxsw_sp *mlxsw_sp,
5511 			      struct vif_entry_notifier_info *ven_info)
5512 {
5513 	struct mlxsw_sp_mr_table *mrt;
5514 	struct mlxsw_sp_vr *vr;
5515 
5516 	if (mlxsw_sp->router->aborted)
5517 		return;
5518 
5519 	vr = mlxsw_sp_vr_find(mlxsw_sp, ven_info->tb_id);
5520 	if (WARN_ON(!vr))
5521 		return;
5522 
5523 	mrt = mlxsw_sp_router_fibmr_family_to_table(vr, ven_info->info.family);
5524 	mlxsw_sp_mr_vif_del(mrt, ven_info->vif_index);
5525 	mlxsw_sp_vr_put(mlxsw_sp, vr);
5526 }
5527 
5528 static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp)
5529 {
5530 	enum mlxsw_reg_ralxx_protocol proto = MLXSW_REG_RALXX_PROTOCOL_IPV4;
5531 	int err;
5532 
5533 	err = __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto,
5534 					       MLXSW_SP_LPM_TREE_MIN);
5535 	if (err)
5536 		return err;
5537 
5538 	/* The multicast router code does not need an abort trap as by default,
5539 	 * packets that don't match any routes are trapped to the CPU.
5540 	 */
5541 
5542 	proto = MLXSW_REG_RALXX_PROTOCOL_IPV6;
5543 	return __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto,
5544 						MLXSW_SP_LPM_TREE_MIN + 1);
5545 }
5546 
5547 static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp,
5548 				     struct mlxsw_sp_fib_node *fib_node)
5549 {
5550 	struct mlxsw_sp_fib4_entry *fib4_entry, *tmp;
5551 
5552 	list_for_each_entry_safe(fib4_entry, tmp, &fib_node->entry_list,
5553 				 common.list) {
5554 		bool do_break = &tmp->common.list == &fib_node->entry_list;
5555 
5556 		mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry);
5557 		mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
5558 		mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5559 		/* Break when entry list is empty and node was freed.
5560 		 * Otherwise, we'll access freed memory in the next
5561 		 * iteration.
5562 		 */
5563 		if (do_break)
5564 			break;
5565 	}
5566 }
5567 
5568 static void mlxsw_sp_fib6_node_flush(struct mlxsw_sp *mlxsw_sp,
5569 				     struct mlxsw_sp_fib_node *fib_node)
5570 {
5571 	struct mlxsw_sp_fib6_entry *fib6_entry, *tmp;
5572 
5573 	list_for_each_entry_safe(fib6_entry, tmp, &fib_node->entry_list,
5574 				 common.list) {
5575 		bool do_break = &tmp->common.list == &fib_node->entry_list;
5576 
5577 		mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry);
5578 		mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5579 		mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5580 		if (do_break)
5581 			break;
5582 	}
5583 }
5584 
5585 static void mlxsw_sp_fib_node_flush(struct mlxsw_sp *mlxsw_sp,
5586 				    struct mlxsw_sp_fib_node *fib_node)
5587 {
5588 	switch (fib_node->fib->proto) {
5589 	case MLXSW_SP_L3_PROTO_IPV4:
5590 		mlxsw_sp_fib4_node_flush(mlxsw_sp, fib_node);
5591 		break;
5592 	case MLXSW_SP_L3_PROTO_IPV6:
5593 		mlxsw_sp_fib6_node_flush(mlxsw_sp, fib_node);
5594 		break;
5595 	}
5596 }
5597 
5598 static void mlxsw_sp_vr_fib_flush(struct mlxsw_sp *mlxsw_sp,
5599 				  struct mlxsw_sp_vr *vr,
5600 				  enum mlxsw_sp_l3proto proto)
5601 {
5602 	struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
5603 	struct mlxsw_sp_fib_node *fib_node, *tmp;
5604 
5605 	list_for_each_entry_safe(fib_node, tmp, &fib->node_list, list) {
5606 		bool do_break = &tmp->list == &fib->node_list;
5607 
5608 		mlxsw_sp_fib_node_flush(mlxsw_sp, fib_node);
5609 		if (do_break)
5610 			break;
5611 	}
5612 }
5613 
5614 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp)
5615 {
5616 	int i, j;
5617 
5618 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
5619 		struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
5620 
5621 		if (!mlxsw_sp_vr_is_used(vr))
5622 			continue;
5623 
5624 		for (j = 0; j < MLXSW_SP_L3_PROTO_MAX; j++)
5625 			mlxsw_sp_mr_table_flush(vr->mr_table[j]);
5626 		mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
5627 
5628 		/* If virtual router was only used for IPv4, then it's no
5629 		 * longer used.
5630 		 */
5631 		if (!mlxsw_sp_vr_is_used(vr))
5632 			continue;
5633 		mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
5634 	}
5635 }
5636 
5637 static void mlxsw_sp_router_fib_abort(struct mlxsw_sp *mlxsw_sp)
5638 {
5639 	int err;
5640 
5641 	if (mlxsw_sp->router->aborted)
5642 		return;
5643 	dev_warn(mlxsw_sp->bus_info->dev, "FIB abort triggered. Note that FIB entries are no longer being offloaded to this device.\n");
5644 	mlxsw_sp_router_fib_flush(mlxsw_sp);
5645 	mlxsw_sp->router->aborted = true;
5646 	err = mlxsw_sp_router_set_abort_trap(mlxsw_sp);
5647 	if (err)
5648 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to set abort trap.\n");
5649 }
5650 
5651 struct mlxsw_sp_fib_event_work {
5652 	struct work_struct work;
5653 	union {
5654 		struct fib6_entry_notifier_info fen6_info;
5655 		struct fib_entry_notifier_info fen_info;
5656 		struct fib_rule_notifier_info fr_info;
5657 		struct fib_nh_notifier_info fnh_info;
5658 		struct mfc_entry_notifier_info men_info;
5659 		struct vif_entry_notifier_info ven_info;
5660 	};
5661 	struct mlxsw_sp *mlxsw_sp;
5662 	unsigned long event;
5663 };
5664 
5665 static void mlxsw_sp_router_fib4_event_work(struct work_struct *work)
5666 {
5667 	struct mlxsw_sp_fib_event_work *fib_work =
5668 		container_of(work, struct mlxsw_sp_fib_event_work, work);
5669 	struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5670 	bool replace, append;
5671 	int err;
5672 
5673 	/* Protect internal structures from changes */
5674 	rtnl_lock();
5675 	mlxsw_sp_span_respin(mlxsw_sp);
5676 
5677 	switch (fib_work->event) {
5678 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5679 	case FIB_EVENT_ENTRY_APPEND: /* fall through */
5680 	case FIB_EVENT_ENTRY_ADD:
5681 		replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
5682 		append = fib_work->event == FIB_EVENT_ENTRY_APPEND;
5683 		err = mlxsw_sp_router_fib4_add(mlxsw_sp, &fib_work->fen_info,
5684 					       replace, append);
5685 		if (err)
5686 			mlxsw_sp_router_fib_abort(mlxsw_sp);
5687 		fib_info_put(fib_work->fen_info.fi);
5688 		break;
5689 	case FIB_EVENT_ENTRY_DEL:
5690 		mlxsw_sp_router_fib4_del(mlxsw_sp, &fib_work->fen_info);
5691 		fib_info_put(fib_work->fen_info.fi);
5692 		break;
5693 	case FIB_EVENT_RULE_ADD:
5694 		/* if we get here, a rule was added that we do not support.
5695 		 * just do the fib_abort
5696 		 */
5697 		mlxsw_sp_router_fib_abort(mlxsw_sp);
5698 		break;
5699 	case FIB_EVENT_NH_ADD: /* fall through */
5700 	case FIB_EVENT_NH_DEL:
5701 		mlxsw_sp_nexthop4_event(mlxsw_sp, fib_work->event,
5702 					fib_work->fnh_info.fib_nh);
5703 		fib_info_put(fib_work->fnh_info.fib_nh->nh_parent);
5704 		break;
5705 	}
5706 	rtnl_unlock();
5707 	kfree(fib_work);
5708 }
5709 
5710 static void mlxsw_sp_router_fib6_event_work(struct work_struct *work)
5711 {
5712 	struct mlxsw_sp_fib_event_work *fib_work =
5713 		container_of(work, struct mlxsw_sp_fib_event_work, work);
5714 	struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5715 	bool replace, append;
5716 	int err;
5717 
5718 	rtnl_lock();
5719 	mlxsw_sp_span_respin(mlxsw_sp);
5720 
5721 	switch (fib_work->event) {
5722 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5723 	case FIB_EVENT_ENTRY_APPEND: /* fall through */
5724 	case FIB_EVENT_ENTRY_ADD:
5725 		replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
5726 		append = fib_work->event == FIB_EVENT_ENTRY_APPEND;
5727 		err = mlxsw_sp_router_fib6_add(mlxsw_sp,
5728 					       fib_work->fen6_info.rt, replace,
5729 					       append);
5730 		if (err)
5731 			mlxsw_sp_router_fib_abort(mlxsw_sp);
5732 		mlxsw_sp_rt6_release(fib_work->fen6_info.rt);
5733 		break;
5734 	case FIB_EVENT_ENTRY_DEL:
5735 		mlxsw_sp_router_fib6_del(mlxsw_sp, fib_work->fen6_info.rt);
5736 		mlxsw_sp_rt6_release(fib_work->fen6_info.rt);
5737 		break;
5738 	case FIB_EVENT_RULE_ADD:
5739 		/* if we get here, a rule was added that we do not support.
5740 		 * just do the fib_abort
5741 		 */
5742 		mlxsw_sp_router_fib_abort(mlxsw_sp);
5743 		break;
5744 	}
5745 	rtnl_unlock();
5746 	kfree(fib_work);
5747 }
5748 
5749 static void mlxsw_sp_router_fibmr_event_work(struct work_struct *work)
5750 {
5751 	struct mlxsw_sp_fib_event_work *fib_work =
5752 		container_of(work, struct mlxsw_sp_fib_event_work, work);
5753 	struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5754 	bool replace;
5755 	int err;
5756 
5757 	rtnl_lock();
5758 	switch (fib_work->event) {
5759 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5760 	case FIB_EVENT_ENTRY_ADD:
5761 		replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
5762 
5763 		err = mlxsw_sp_router_fibmr_add(mlxsw_sp, &fib_work->men_info,
5764 						replace);
5765 		if (err)
5766 			mlxsw_sp_router_fib_abort(mlxsw_sp);
5767 		mr_cache_put(fib_work->men_info.mfc);
5768 		break;
5769 	case FIB_EVENT_ENTRY_DEL:
5770 		mlxsw_sp_router_fibmr_del(mlxsw_sp, &fib_work->men_info);
5771 		mr_cache_put(fib_work->men_info.mfc);
5772 		break;
5773 	case FIB_EVENT_VIF_ADD:
5774 		err = mlxsw_sp_router_fibmr_vif_add(mlxsw_sp,
5775 						    &fib_work->ven_info);
5776 		if (err)
5777 			mlxsw_sp_router_fib_abort(mlxsw_sp);
5778 		dev_put(fib_work->ven_info.dev);
5779 		break;
5780 	case FIB_EVENT_VIF_DEL:
5781 		mlxsw_sp_router_fibmr_vif_del(mlxsw_sp,
5782 					      &fib_work->ven_info);
5783 		dev_put(fib_work->ven_info.dev);
5784 		break;
5785 	case FIB_EVENT_RULE_ADD:
5786 		/* if we get here, a rule was added that we do not support.
5787 		 * just do the fib_abort
5788 		 */
5789 		mlxsw_sp_router_fib_abort(mlxsw_sp);
5790 		break;
5791 	}
5792 	rtnl_unlock();
5793 	kfree(fib_work);
5794 }
5795 
5796 static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work,
5797 				       struct fib_notifier_info *info)
5798 {
5799 	struct fib_entry_notifier_info *fen_info;
5800 	struct fib_nh_notifier_info *fnh_info;
5801 
5802 	switch (fib_work->event) {
5803 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5804 	case FIB_EVENT_ENTRY_APPEND: /* fall through */
5805 	case FIB_EVENT_ENTRY_ADD: /* fall through */
5806 	case FIB_EVENT_ENTRY_DEL:
5807 		fen_info = container_of(info, struct fib_entry_notifier_info,
5808 					info);
5809 		fib_work->fen_info = *fen_info;
5810 		/* Take reference on fib_info to prevent it from being
5811 		 * freed while work is queued. Release it afterwards.
5812 		 */
5813 		fib_info_hold(fib_work->fen_info.fi);
5814 		break;
5815 	case FIB_EVENT_NH_ADD: /* fall through */
5816 	case FIB_EVENT_NH_DEL:
5817 		fnh_info = container_of(info, struct fib_nh_notifier_info,
5818 					info);
5819 		fib_work->fnh_info = *fnh_info;
5820 		fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
5821 		break;
5822 	}
5823 }
5824 
5825 static void mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work,
5826 				       struct fib_notifier_info *info)
5827 {
5828 	struct fib6_entry_notifier_info *fen6_info;
5829 
5830 	switch (fib_work->event) {
5831 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5832 	case FIB_EVENT_ENTRY_APPEND: /* fall through */
5833 	case FIB_EVENT_ENTRY_ADD: /* fall through */
5834 	case FIB_EVENT_ENTRY_DEL:
5835 		fen6_info = container_of(info, struct fib6_entry_notifier_info,
5836 					 info);
5837 		fib_work->fen6_info = *fen6_info;
5838 		fib6_info_hold(fib_work->fen6_info.rt);
5839 		break;
5840 	}
5841 }
5842 
5843 static void
5844 mlxsw_sp_router_fibmr_event(struct mlxsw_sp_fib_event_work *fib_work,
5845 			    struct fib_notifier_info *info)
5846 {
5847 	switch (fib_work->event) {
5848 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5849 	case FIB_EVENT_ENTRY_ADD: /* fall through */
5850 	case FIB_EVENT_ENTRY_DEL:
5851 		memcpy(&fib_work->men_info, info, sizeof(fib_work->men_info));
5852 		mr_cache_hold(fib_work->men_info.mfc);
5853 		break;
5854 	case FIB_EVENT_VIF_ADD: /* fall through */
5855 	case FIB_EVENT_VIF_DEL:
5856 		memcpy(&fib_work->ven_info, info, sizeof(fib_work->ven_info));
5857 		dev_hold(fib_work->ven_info.dev);
5858 		break;
5859 	}
5860 }
5861 
5862 static int mlxsw_sp_router_fib_rule_event(unsigned long event,
5863 					  struct fib_notifier_info *info,
5864 					  struct mlxsw_sp *mlxsw_sp)
5865 {
5866 	struct netlink_ext_ack *extack = info->extack;
5867 	struct fib_rule_notifier_info *fr_info;
5868 	struct fib_rule *rule;
5869 	int err = 0;
5870 
5871 	/* nothing to do at the moment */
5872 	if (event == FIB_EVENT_RULE_DEL)
5873 		return 0;
5874 
5875 	if (mlxsw_sp->router->aborted)
5876 		return 0;
5877 
5878 	fr_info = container_of(info, struct fib_rule_notifier_info, info);
5879 	rule = fr_info->rule;
5880 
5881 	switch (info->family) {
5882 	case AF_INET:
5883 		if (!fib4_rule_default(rule) && !rule->l3mdev)
5884 			err = -EOPNOTSUPP;
5885 		break;
5886 	case AF_INET6:
5887 		if (!fib6_rule_default(rule) && !rule->l3mdev)
5888 			err = -EOPNOTSUPP;
5889 		break;
5890 	case RTNL_FAMILY_IPMR:
5891 		if (!ipmr_rule_default(rule) && !rule->l3mdev)
5892 			err = -EOPNOTSUPP;
5893 		break;
5894 	case RTNL_FAMILY_IP6MR:
5895 		if (!ip6mr_rule_default(rule) && !rule->l3mdev)
5896 			err = -EOPNOTSUPP;
5897 		break;
5898 	}
5899 
5900 	if (err < 0)
5901 		NL_SET_ERR_MSG_MOD(extack, "FIB rules not supported");
5902 
5903 	return err;
5904 }
5905 
5906 /* Called with rcu_read_lock() */
5907 static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
5908 				     unsigned long event, void *ptr)
5909 {
5910 	struct mlxsw_sp_fib_event_work *fib_work;
5911 	struct fib_notifier_info *info = ptr;
5912 	struct mlxsw_sp_router *router;
5913 	int err;
5914 
5915 	if (!net_eq(info->net, &init_net) ||
5916 	    (info->family != AF_INET && info->family != AF_INET6 &&
5917 	     info->family != RTNL_FAMILY_IPMR &&
5918 	     info->family != RTNL_FAMILY_IP6MR))
5919 		return NOTIFY_DONE;
5920 
5921 	router = container_of(nb, struct mlxsw_sp_router, fib_nb);
5922 
5923 	switch (event) {
5924 	case FIB_EVENT_RULE_ADD: /* fall through */
5925 	case FIB_EVENT_RULE_DEL:
5926 		err = mlxsw_sp_router_fib_rule_event(event, info,
5927 						     router->mlxsw_sp);
5928 		if (!err || info->extack)
5929 			return notifier_from_errno(err);
5930 		break;
5931 	case FIB_EVENT_ENTRY_ADD:
5932 		if (router->aborted) {
5933 			NL_SET_ERR_MSG_MOD(info->extack, "FIB offload was aborted. Not configuring route");
5934 			return notifier_from_errno(-EINVAL);
5935 		}
5936 		break;
5937 	}
5938 
5939 	fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
5940 	if (WARN_ON(!fib_work))
5941 		return NOTIFY_BAD;
5942 
5943 	fib_work->mlxsw_sp = router->mlxsw_sp;
5944 	fib_work->event = event;
5945 
5946 	switch (info->family) {
5947 	case AF_INET:
5948 		INIT_WORK(&fib_work->work, mlxsw_sp_router_fib4_event_work);
5949 		mlxsw_sp_router_fib4_event(fib_work, info);
5950 		break;
5951 	case AF_INET6:
5952 		INIT_WORK(&fib_work->work, mlxsw_sp_router_fib6_event_work);
5953 		mlxsw_sp_router_fib6_event(fib_work, info);
5954 		break;
5955 	case RTNL_FAMILY_IP6MR:
5956 	case RTNL_FAMILY_IPMR:
5957 		INIT_WORK(&fib_work->work, mlxsw_sp_router_fibmr_event_work);
5958 		mlxsw_sp_router_fibmr_event(fib_work, info);
5959 		break;
5960 	}
5961 
5962 	mlxsw_core_schedule_work(&fib_work->work);
5963 
5964 	return NOTIFY_DONE;
5965 }
5966 
5967 struct mlxsw_sp_rif *
5968 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
5969 			 const struct net_device *dev)
5970 {
5971 	int i;
5972 
5973 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
5974 		if (mlxsw_sp->router->rifs[i] &&
5975 		    mlxsw_sp->router->rifs[i]->dev == dev)
5976 			return mlxsw_sp->router->rifs[i];
5977 
5978 	return NULL;
5979 }
5980 
5981 static int mlxsw_sp_router_rif_disable(struct mlxsw_sp *mlxsw_sp, u16 rif)
5982 {
5983 	char ritr_pl[MLXSW_REG_RITR_LEN];
5984 	int err;
5985 
5986 	mlxsw_reg_ritr_rif_pack(ritr_pl, rif);
5987 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
5988 	if (WARN_ON_ONCE(err))
5989 		return err;
5990 
5991 	mlxsw_reg_ritr_enable_set(ritr_pl, false);
5992 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
5993 }
5994 
5995 static void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
5996 					  struct mlxsw_sp_rif *rif)
5997 {
5998 	mlxsw_sp_router_rif_disable(mlxsw_sp, rif->rif_index);
5999 	mlxsw_sp_nexthop_rif_gone_sync(mlxsw_sp, rif);
6000 	mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, rif);
6001 }
6002 
6003 static bool
6004 mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *rif, struct net_device *dev,
6005 			   unsigned long event)
6006 {
6007 	struct inet6_dev *inet6_dev;
6008 	bool addr_list_empty = true;
6009 	struct in_device *idev;
6010 
6011 	switch (event) {
6012 	case NETDEV_UP:
6013 		return rif == NULL;
6014 	case NETDEV_DOWN:
6015 		idev = __in_dev_get_rtnl(dev);
6016 		if (idev && idev->ifa_list)
6017 			addr_list_empty = false;
6018 
6019 		inet6_dev = __in6_dev_get(dev);
6020 		if (addr_list_empty && inet6_dev &&
6021 		    !list_empty(&inet6_dev->addr_list))
6022 			addr_list_empty = false;
6023 
6024 		if (rif && addr_list_empty &&
6025 		    !netif_is_l3_slave(rif->dev))
6026 			return true;
6027 		/* It is possible we already removed the RIF ourselves
6028 		 * if it was assigned to a netdev that is now a bridge
6029 		 * or LAG slave.
6030 		 */
6031 		return false;
6032 	}
6033 
6034 	return false;
6035 }
6036 
6037 static enum mlxsw_sp_rif_type
6038 mlxsw_sp_dev_rif_type(const struct mlxsw_sp *mlxsw_sp,
6039 		      const struct net_device *dev)
6040 {
6041 	enum mlxsw_sp_fid_type type;
6042 
6043 	if (mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL))
6044 		return MLXSW_SP_RIF_TYPE_IPIP_LB;
6045 
6046 	/* Otherwise RIF type is derived from the type of the underlying FID. */
6047 	if (is_vlan_dev(dev) && netif_is_bridge_master(vlan_dev_real_dev(dev)))
6048 		type = MLXSW_SP_FID_TYPE_8021Q;
6049 	else if (netif_is_bridge_master(dev) && br_vlan_enabled(dev))
6050 		type = MLXSW_SP_FID_TYPE_8021Q;
6051 	else if (netif_is_bridge_master(dev))
6052 		type = MLXSW_SP_FID_TYPE_8021D;
6053 	else
6054 		type = MLXSW_SP_FID_TYPE_RFID;
6055 
6056 	return mlxsw_sp_fid_type_rif_type(mlxsw_sp, type);
6057 }
6058 
6059 static int mlxsw_sp_rif_index_alloc(struct mlxsw_sp *mlxsw_sp, u16 *p_rif_index)
6060 {
6061 	int i;
6062 
6063 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) {
6064 		if (!mlxsw_sp->router->rifs[i]) {
6065 			*p_rif_index = i;
6066 			return 0;
6067 		}
6068 	}
6069 
6070 	return -ENOBUFS;
6071 }
6072 
6073 static struct mlxsw_sp_rif *mlxsw_sp_rif_alloc(size_t rif_size, u16 rif_index,
6074 					       u16 vr_id,
6075 					       struct net_device *l3_dev)
6076 {
6077 	struct mlxsw_sp_rif *rif;
6078 
6079 	rif = kzalloc(rif_size, GFP_KERNEL);
6080 	if (!rif)
6081 		return NULL;
6082 
6083 	INIT_LIST_HEAD(&rif->nexthop_list);
6084 	INIT_LIST_HEAD(&rif->neigh_list);
6085 	ether_addr_copy(rif->addr, l3_dev->dev_addr);
6086 	rif->mtu = l3_dev->mtu;
6087 	rif->vr_id = vr_id;
6088 	rif->dev = l3_dev;
6089 	rif->rif_index = rif_index;
6090 
6091 	return rif;
6092 }
6093 
6094 struct mlxsw_sp_rif *mlxsw_sp_rif_by_index(const struct mlxsw_sp *mlxsw_sp,
6095 					   u16 rif_index)
6096 {
6097 	return mlxsw_sp->router->rifs[rif_index];
6098 }
6099 
6100 u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif)
6101 {
6102 	return rif->rif_index;
6103 }
6104 
6105 u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
6106 {
6107 	return lb_rif->common.rif_index;
6108 }
6109 
6110 u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
6111 {
6112 	return lb_rif->ul_vr_id;
6113 }
6114 
6115 int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif)
6116 {
6117 	return rif->dev->ifindex;
6118 }
6119 
6120 const struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif)
6121 {
6122 	return rif->dev;
6123 }
6124 
6125 struct mlxsw_sp_fid *mlxsw_sp_rif_fid(const struct mlxsw_sp_rif *rif)
6126 {
6127 	return rif->fid;
6128 }
6129 
6130 static struct mlxsw_sp_rif *
6131 mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
6132 		    const struct mlxsw_sp_rif_params *params,
6133 		    struct netlink_ext_ack *extack)
6134 {
6135 	u32 tb_id = l3mdev_fib_table(params->dev);
6136 	const struct mlxsw_sp_rif_ops *ops;
6137 	struct mlxsw_sp_fid *fid = NULL;
6138 	enum mlxsw_sp_rif_type type;
6139 	struct mlxsw_sp_rif *rif;
6140 	struct mlxsw_sp_vr *vr;
6141 	u16 rif_index;
6142 	int i, err;
6143 
6144 	type = mlxsw_sp_dev_rif_type(mlxsw_sp, params->dev);
6145 	ops = mlxsw_sp->router->rif_ops_arr[type];
6146 
6147 	vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN, extack);
6148 	if (IS_ERR(vr))
6149 		return ERR_CAST(vr);
6150 	vr->rif_count++;
6151 
6152 	err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index);
6153 	if (err) {
6154 		NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported router interfaces");
6155 		goto err_rif_index_alloc;
6156 	}
6157 
6158 	rif = mlxsw_sp_rif_alloc(ops->rif_size, rif_index, vr->id, params->dev);
6159 	if (!rif) {
6160 		err = -ENOMEM;
6161 		goto err_rif_alloc;
6162 	}
6163 	rif->mlxsw_sp = mlxsw_sp;
6164 	rif->ops = ops;
6165 
6166 	if (ops->fid_get) {
6167 		fid = ops->fid_get(rif, extack);
6168 		if (IS_ERR(fid)) {
6169 			err = PTR_ERR(fid);
6170 			goto err_fid_get;
6171 		}
6172 		rif->fid = fid;
6173 	}
6174 
6175 	if (ops->setup)
6176 		ops->setup(rif, params);
6177 
6178 	err = ops->configure(rif);
6179 	if (err)
6180 		goto err_configure;
6181 
6182 	for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++) {
6183 		err = mlxsw_sp_mr_rif_add(vr->mr_table[i], rif);
6184 		if (err)
6185 			goto err_mr_rif_add;
6186 	}
6187 
6188 	mlxsw_sp_rif_counters_alloc(rif);
6189 	mlxsw_sp->router->rifs[rif_index] = rif;
6190 
6191 	return rif;
6192 
6193 err_mr_rif_add:
6194 	for (i--; i >= 0; i--)
6195 		mlxsw_sp_mr_rif_del(vr->mr_table[i], rif);
6196 	ops->deconfigure(rif);
6197 err_configure:
6198 	if (fid)
6199 		mlxsw_sp_fid_put(fid);
6200 err_fid_get:
6201 	kfree(rif);
6202 err_rif_alloc:
6203 err_rif_index_alloc:
6204 	vr->rif_count--;
6205 	mlxsw_sp_vr_put(mlxsw_sp, vr);
6206 	return ERR_PTR(err);
6207 }
6208 
6209 void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif)
6210 {
6211 	const struct mlxsw_sp_rif_ops *ops = rif->ops;
6212 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6213 	struct mlxsw_sp_fid *fid = rif->fid;
6214 	struct mlxsw_sp_vr *vr;
6215 	int i;
6216 
6217 	mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif);
6218 	vr = &mlxsw_sp->router->vrs[rif->vr_id];
6219 
6220 	mlxsw_sp->router->rifs[rif->rif_index] = NULL;
6221 	mlxsw_sp_rif_counters_free(rif);
6222 	for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++)
6223 		mlxsw_sp_mr_rif_del(vr->mr_table[i], rif);
6224 	ops->deconfigure(rif);
6225 	if (fid)
6226 		/* Loopback RIFs are not associated with a FID. */
6227 		mlxsw_sp_fid_put(fid);
6228 	kfree(rif);
6229 	vr->rif_count--;
6230 	mlxsw_sp_vr_put(mlxsw_sp, vr);
6231 }
6232 
6233 static void
6234 mlxsw_sp_rif_subport_params_init(struct mlxsw_sp_rif_params *params,
6235 				 struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
6236 {
6237 	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
6238 
6239 	params->vid = mlxsw_sp_port_vlan->vid;
6240 	params->lag = mlxsw_sp_port->lagged;
6241 	if (params->lag)
6242 		params->lag_id = mlxsw_sp_port->lag_id;
6243 	else
6244 		params->system_port = mlxsw_sp_port->local_port;
6245 }
6246 
6247 static int
6248 mlxsw_sp_port_vlan_router_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan,
6249 			       struct net_device *l3_dev,
6250 			       struct netlink_ext_ack *extack)
6251 {
6252 	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
6253 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
6254 	u16 vid = mlxsw_sp_port_vlan->vid;
6255 	struct mlxsw_sp_rif *rif;
6256 	struct mlxsw_sp_fid *fid;
6257 	int err;
6258 
6259 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
6260 	if (!rif) {
6261 		struct mlxsw_sp_rif_params params = {
6262 			.dev = l3_dev,
6263 		};
6264 
6265 		mlxsw_sp_rif_subport_params_init(&params, mlxsw_sp_port_vlan);
6266 		rif = mlxsw_sp_rif_create(mlxsw_sp, &params, extack);
6267 		if (IS_ERR(rif))
6268 			return PTR_ERR(rif);
6269 	}
6270 
6271 	/* FID was already created, just take a reference */
6272 	fid = rif->ops->fid_get(rif, extack);
6273 	err = mlxsw_sp_fid_port_vid_map(fid, mlxsw_sp_port, vid);
6274 	if (err)
6275 		goto err_fid_port_vid_map;
6276 
6277 	err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, false);
6278 	if (err)
6279 		goto err_port_vid_learning_set;
6280 
6281 	err = mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid,
6282 					BR_STATE_FORWARDING);
6283 	if (err)
6284 		goto err_port_vid_stp_set;
6285 
6286 	mlxsw_sp_port_vlan->fid = fid;
6287 
6288 	return 0;
6289 
6290 err_port_vid_stp_set:
6291 	mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
6292 err_port_vid_learning_set:
6293 	mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
6294 err_fid_port_vid_map:
6295 	mlxsw_sp_fid_put(fid);
6296 	return err;
6297 }
6298 
6299 void
6300 mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
6301 {
6302 	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
6303 	struct mlxsw_sp_fid *fid = mlxsw_sp_port_vlan->fid;
6304 	u16 vid = mlxsw_sp_port_vlan->vid;
6305 
6306 	if (WARN_ON(mlxsw_sp_fid_type(fid) != MLXSW_SP_FID_TYPE_RFID))
6307 		return;
6308 
6309 	mlxsw_sp_port_vlan->fid = NULL;
6310 	mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid, BR_STATE_BLOCKING);
6311 	mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
6312 	mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
6313 	/* If router port holds the last reference on the rFID, then the
6314 	 * associated Sub-port RIF will be destroyed.
6315 	 */
6316 	mlxsw_sp_fid_put(fid);
6317 }
6318 
6319 static int mlxsw_sp_inetaddr_port_vlan_event(struct net_device *l3_dev,
6320 					     struct net_device *port_dev,
6321 					     unsigned long event, u16 vid,
6322 					     struct netlink_ext_ack *extack)
6323 {
6324 	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(port_dev);
6325 	struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
6326 
6327 	mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid);
6328 	if (WARN_ON(!mlxsw_sp_port_vlan))
6329 		return -EINVAL;
6330 
6331 	switch (event) {
6332 	case NETDEV_UP:
6333 		return mlxsw_sp_port_vlan_router_join(mlxsw_sp_port_vlan,
6334 						      l3_dev, extack);
6335 	case NETDEV_DOWN:
6336 		mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan);
6337 		break;
6338 	}
6339 
6340 	return 0;
6341 }
6342 
6343 static int mlxsw_sp_inetaddr_port_event(struct net_device *port_dev,
6344 					unsigned long event,
6345 					struct netlink_ext_ack *extack)
6346 {
6347 	if (netif_is_bridge_port(port_dev) ||
6348 	    netif_is_lag_port(port_dev) ||
6349 	    netif_is_ovs_port(port_dev))
6350 		return 0;
6351 
6352 	return mlxsw_sp_inetaddr_port_vlan_event(port_dev, port_dev, event, 1,
6353 						 extack);
6354 }
6355 
6356 static int __mlxsw_sp_inetaddr_lag_event(struct net_device *l3_dev,
6357 					 struct net_device *lag_dev,
6358 					 unsigned long event, u16 vid,
6359 					 struct netlink_ext_ack *extack)
6360 {
6361 	struct net_device *port_dev;
6362 	struct list_head *iter;
6363 	int err;
6364 
6365 	netdev_for_each_lower_dev(lag_dev, port_dev, iter) {
6366 		if (mlxsw_sp_port_dev_check(port_dev)) {
6367 			err = mlxsw_sp_inetaddr_port_vlan_event(l3_dev,
6368 								port_dev,
6369 								event, vid,
6370 								extack);
6371 			if (err)
6372 				return err;
6373 		}
6374 	}
6375 
6376 	return 0;
6377 }
6378 
6379 static int mlxsw_sp_inetaddr_lag_event(struct net_device *lag_dev,
6380 				       unsigned long event,
6381 				       struct netlink_ext_ack *extack)
6382 {
6383 	if (netif_is_bridge_port(lag_dev))
6384 		return 0;
6385 
6386 	return __mlxsw_sp_inetaddr_lag_event(lag_dev, lag_dev, event, 1,
6387 					     extack);
6388 }
6389 
6390 static int mlxsw_sp_inetaddr_bridge_event(struct net_device *l3_dev,
6391 					  unsigned long event,
6392 					  struct netlink_ext_ack *extack)
6393 {
6394 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
6395 	struct mlxsw_sp_rif_params params = {
6396 		.dev = l3_dev,
6397 	};
6398 	struct mlxsw_sp_rif *rif;
6399 
6400 	switch (event) {
6401 	case NETDEV_UP:
6402 		rif = mlxsw_sp_rif_create(mlxsw_sp, &params, extack);
6403 		if (IS_ERR(rif))
6404 			return PTR_ERR(rif);
6405 		break;
6406 	case NETDEV_DOWN:
6407 		rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
6408 		mlxsw_sp_rif_destroy(rif);
6409 		break;
6410 	}
6411 
6412 	return 0;
6413 }
6414 
6415 static int mlxsw_sp_inetaddr_vlan_event(struct net_device *vlan_dev,
6416 					unsigned long event,
6417 					struct netlink_ext_ack *extack)
6418 {
6419 	struct net_device *real_dev = vlan_dev_real_dev(vlan_dev);
6420 	u16 vid = vlan_dev_vlan_id(vlan_dev);
6421 
6422 	if (netif_is_bridge_port(vlan_dev))
6423 		return 0;
6424 
6425 	if (mlxsw_sp_port_dev_check(real_dev))
6426 		return mlxsw_sp_inetaddr_port_vlan_event(vlan_dev, real_dev,
6427 							 event, vid, extack);
6428 	else if (netif_is_lag_master(real_dev))
6429 		return __mlxsw_sp_inetaddr_lag_event(vlan_dev, real_dev, event,
6430 						     vid, extack);
6431 	else if (netif_is_bridge_master(real_dev) && br_vlan_enabled(real_dev))
6432 		return mlxsw_sp_inetaddr_bridge_event(vlan_dev, event, extack);
6433 
6434 	return 0;
6435 }
6436 
6437 static int __mlxsw_sp_inetaddr_event(struct net_device *dev,
6438 				     unsigned long event,
6439 				     struct netlink_ext_ack *extack)
6440 {
6441 	if (mlxsw_sp_port_dev_check(dev))
6442 		return mlxsw_sp_inetaddr_port_event(dev, event, extack);
6443 	else if (netif_is_lag_master(dev))
6444 		return mlxsw_sp_inetaddr_lag_event(dev, event, extack);
6445 	else if (netif_is_bridge_master(dev))
6446 		return mlxsw_sp_inetaddr_bridge_event(dev, event, extack);
6447 	else if (is_vlan_dev(dev))
6448 		return mlxsw_sp_inetaddr_vlan_event(dev, event, extack);
6449 	else
6450 		return 0;
6451 }
6452 
6453 int mlxsw_sp_inetaddr_event(struct notifier_block *unused,
6454 			    unsigned long event, void *ptr)
6455 {
6456 	struct in_ifaddr *ifa = (struct in_ifaddr *) ptr;
6457 	struct net_device *dev = ifa->ifa_dev->dev;
6458 	struct mlxsw_sp *mlxsw_sp;
6459 	struct mlxsw_sp_rif *rif;
6460 	int err = 0;
6461 
6462 	/* NETDEV_UP event is handled by mlxsw_sp_inetaddr_valid_event */
6463 	if (event == NETDEV_UP)
6464 		goto out;
6465 
6466 	mlxsw_sp = mlxsw_sp_lower_get(dev);
6467 	if (!mlxsw_sp)
6468 		goto out;
6469 
6470 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6471 	if (!mlxsw_sp_rif_should_config(rif, dev, event))
6472 		goto out;
6473 
6474 	err = __mlxsw_sp_inetaddr_event(dev, event, NULL);
6475 out:
6476 	return notifier_from_errno(err);
6477 }
6478 
6479 int mlxsw_sp_inetaddr_valid_event(struct notifier_block *unused,
6480 				  unsigned long event, void *ptr)
6481 {
6482 	struct in_validator_info *ivi = (struct in_validator_info *) ptr;
6483 	struct net_device *dev = ivi->ivi_dev->dev;
6484 	struct mlxsw_sp *mlxsw_sp;
6485 	struct mlxsw_sp_rif *rif;
6486 	int err = 0;
6487 
6488 	mlxsw_sp = mlxsw_sp_lower_get(dev);
6489 	if (!mlxsw_sp)
6490 		goto out;
6491 
6492 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6493 	if (!mlxsw_sp_rif_should_config(rif, dev, event))
6494 		goto out;
6495 
6496 	err = __mlxsw_sp_inetaddr_event(dev, event, ivi->extack);
6497 out:
6498 	return notifier_from_errno(err);
6499 }
6500 
6501 struct mlxsw_sp_inet6addr_event_work {
6502 	struct work_struct work;
6503 	struct net_device *dev;
6504 	unsigned long event;
6505 };
6506 
6507 static void mlxsw_sp_inet6addr_event_work(struct work_struct *work)
6508 {
6509 	struct mlxsw_sp_inet6addr_event_work *inet6addr_work =
6510 		container_of(work, struct mlxsw_sp_inet6addr_event_work, work);
6511 	struct net_device *dev = inet6addr_work->dev;
6512 	unsigned long event = inet6addr_work->event;
6513 	struct mlxsw_sp *mlxsw_sp;
6514 	struct mlxsw_sp_rif *rif;
6515 
6516 	rtnl_lock();
6517 	mlxsw_sp = mlxsw_sp_lower_get(dev);
6518 	if (!mlxsw_sp)
6519 		goto out;
6520 
6521 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6522 	if (!mlxsw_sp_rif_should_config(rif, dev, event))
6523 		goto out;
6524 
6525 	__mlxsw_sp_inetaddr_event(dev, event, NULL);
6526 out:
6527 	rtnl_unlock();
6528 	dev_put(dev);
6529 	kfree(inet6addr_work);
6530 }
6531 
6532 /* Called with rcu_read_lock() */
6533 int mlxsw_sp_inet6addr_event(struct notifier_block *unused,
6534 			     unsigned long event, void *ptr)
6535 {
6536 	struct inet6_ifaddr *if6 = (struct inet6_ifaddr *) ptr;
6537 	struct mlxsw_sp_inet6addr_event_work *inet6addr_work;
6538 	struct net_device *dev = if6->idev->dev;
6539 
6540 	/* NETDEV_UP event is handled by mlxsw_sp_inet6addr_valid_event */
6541 	if (event == NETDEV_UP)
6542 		return NOTIFY_DONE;
6543 
6544 	if (!mlxsw_sp_port_dev_lower_find_rcu(dev))
6545 		return NOTIFY_DONE;
6546 
6547 	inet6addr_work = kzalloc(sizeof(*inet6addr_work), GFP_ATOMIC);
6548 	if (!inet6addr_work)
6549 		return NOTIFY_BAD;
6550 
6551 	INIT_WORK(&inet6addr_work->work, mlxsw_sp_inet6addr_event_work);
6552 	inet6addr_work->dev = dev;
6553 	inet6addr_work->event = event;
6554 	dev_hold(dev);
6555 	mlxsw_core_schedule_work(&inet6addr_work->work);
6556 
6557 	return NOTIFY_DONE;
6558 }
6559 
6560 int mlxsw_sp_inet6addr_valid_event(struct notifier_block *unused,
6561 				   unsigned long event, void *ptr)
6562 {
6563 	struct in6_validator_info *i6vi = (struct in6_validator_info *) ptr;
6564 	struct net_device *dev = i6vi->i6vi_dev->dev;
6565 	struct mlxsw_sp *mlxsw_sp;
6566 	struct mlxsw_sp_rif *rif;
6567 	int err = 0;
6568 
6569 	mlxsw_sp = mlxsw_sp_lower_get(dev);
6570 	if (!mlxsw_sp)
6571 		goto out;
6572 
6573 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6574 	if (!mlxsw_sp_rif_should_config(rif, dev, event))
6575 		goto out;
6576 
6577 	err = __mlxsw_sp_inetaddr_event(dev, event, i6vi->extack);
6578 out:
6579 	return notifier_from_errno(err);
6580 }
6581 
6582 static int mlxsw_sp_rif_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
6583 			     const char *mac, int mtu)
6584 {
6585 	char ritr_pl[MLXSW_REG_RITR_LEN];
6586 	int err;
6587 
6588 	mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
6589 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6590 	if (err)
6591 		return err;
6592 
6593 	mlxsw_reg_ritr_mtu_set(ritr_pl, mtu);
6594 	mlxsw_reg_ritr_if_mac_memcpy_to(ritr_pl, mac);
6595 	mlxsw_reg_ritr_op_set(ritr_pl, MLXSW_REG_RITR_RIF_CREATE);
6596 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6597 }
6598 
6599 int mlxsw_sp_netdevice_router_port_event(struct net_device *dev)
6600 {
6601 	struct mlxsw_sp *mlxsw_sp;
6602 	struct mlxsw_sp_rif *rif;
6603 	u16 fid_index;
6604 	int err;
6605 
6606 	mlxsw_sp = mlxsw_sp_lower_get(dev);
6607 	if (!mlxsw_sp)
6608 		return 0;
6609 
6610 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6611 	if (!rif)
6612 		return 0;
6613 	fid_index = mlxsw_sp_fid_index(rif->fid);
6614 
6615 	err = mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, false);
6616 	if (err)
6617 		return err;
6618 
6619 	err = mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, dev->dev_addr,
6620 				dev->mtu);
6621 	if (err)
6622 		goto err_rif_edit;
6623 
6624 	err = mlxsw_sp_rif_fdb_op(mlxsw_sp, dev->dev_addr, fid_index, true);
6625 	if (err)
6626 		goto err_rif_fdb_op;
6627 
6628 	if (rif->mtu != dev->mtu) {
6629 		struct mlxsw_sp_vr *vr;
6630 		int i;
6631 
6632 		/* The RIF is relevant only to its mr_table instance, as unlike
6633 		 * unicast routing, in multicast routing a RIF cannot be shared
6634 		 * between several multicast routing tables.
6635 		 */
6636 		vr = &mlxsw_sp->router->vrs[rif->vr_id];
6637 		for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++)
6638 			mlxsw_sp_mr_rif_mtu_update(vr->mr_table[i],
6639 						   rif, dev->mtu);
6640 	}
6641 
6642 	ether_addr_copy(rif->addr, dev->dev_addr);
6643 	rif->mtu = dev->mtu;
6644 
6645 	netdev_dbg(dev, "Updated RIF=%d\n", rif->rif_index);
6646 
6647 	return 0;
6648 
6649 err_rif_fdb_op:
6650 	mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, rif->addr, rif->mtu);
6651 err_rif_edit:
6652 	mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, true);
6653 	return err;
6654 }
6655 
6656 static int mlxsw_sp_port_vrf_join(struct mlxsw_sp *mlxsw_sp,
6657 				  struct net_device *l3_dev,
6658 				  struct netlink_ext_ack *extack)
6659 {
6660 	struct mlxsw_sp_rif *rif;
6661 
6662 	/* If netdev is already associated with a RIF, then we need to
6663 	 * destroy it and create a new one with the new virtual router ID.
6664 	 */
6665 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
6666 	if (rif)
6667 		__mlxsw_sp_inetaddr_event(l3_dev, NETDEV_DOWN, extack);
6668 
6669 	return __mlxsw_sp_inetaddr_event(l3_dev, NETDEV_UP, extack);
6670 }
6671 
6672 static void mlxsw_sp_port_vrf_leave(struct mlxsw_sp *mlxsw_sp,
6673 				    struct net_device *l3_dev)
6674 {
6675 	struct mlxsw_sp_rif *rif;
6676 
6677 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
6678 	if (!rif)
6679 		return;
6680 	__mlxsw_sp_inetaddr_event(l3_dev, NETDEV_DOWN, NULL);
6681 }
6682 
6683 int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event,
6684 				 struct netdev_notifier_changeupper_info *info)
6685 {
6686 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
6687 	int err = 0;
6688 
6689 	if (!mlxsw_sp)
6690 		return 0;
6691 
6692 	switch (event) {
6693 	case NETDEV_PRECHANGEUPPER:
6694 		return 0;
6695 	case NETDEV_CHANGEUPPER:
6696 		if (info->linking) {
6697 			struct netlink_ext_ack *extack;
6698 
6699 			extack = netdev_notifier_info_to_extack(&info->info);
6700 			err = mlxsw_sp_port_vrf_join(mlxsw_sp, l3_dev, extack);
6701 		} else {
6702 			mlxsw_sp_port_vrf_leave(mlxsw_sp, l3_dev);
6703 		}
6704 		break;
6705 	}
6706 
6707 	return err;
6708 }
6709 
6710 static struct mlxsw_sp_rif_subport *
6711 mlxsw_sp_rif_subport_rif(const struct mlxsw_sp_rif *rif)
6712 {
6713 	return container_of(rif, struct mlxsw_sp_rif_subport, common);
6714 }
6715 
6716 static void mlxsw_sp_rif_subport_setup(struct mlxsw_sp_rif *rif,
6717 				       const struct mlxsw_sp_rif_params *params)
6718 {
6719 	struct mlxsw_sp_rif_subport *rif_subport;
6720 
6721 	rif_subport = mlxsw_sp_rif_subport_rif(rif);
6722 	rif_subport->vid = params->vid;
6723 	rif_subport->lag = params->lag;
6724 	if (params->lag)
6725 		rif_subport->lag_id = params->lag_id;
6726 	else
6727 		rif_subport->system_port = params->system_port;
6728 }
6729 
6730 static int mlxsw_sp_rif_subport_op(struct mlxsw_sp_rif *rif, bool enable)
6731 {
6732 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6733 	struct mlxsw_sp_rif_subport *rif_subport;
6734 	char ritr_pl[MLXSW_REG_RITR_LEN];
6735 
6736 	rif_subport = mlxsw_sp_rif_subport_rif(rif);
6737 	mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_SP_IF,
6738 			    rif->rif_index, rif->vr_id, rif->dev->mtu);
6739 	mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr);
6740 	mlxsw_reg_ritr_sp_if_pack(ritr_pl, rif_subport->lag,
6741 				  rif_subport->lag ? rif_subport->lag_id :
6742 						     rif_subport->system_port,
6743 				  rif_subport->vid);
6744 
6745 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6746 }
6747 
6748 static int mlxsw_sp_rif_subport_configure(struct mlxsw_sp_rif *rif)
6749 {
6750 	int err;
6751 
6752 	err = mlxsw_sp_rif_subport_op(rif, true);
6753 	if (err)
6754 		return err;
6755 
6756 	err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6757 				  mlxsw_sp_fid_index(rif->fid), true);
6758 	if (err)
6759 		goto err_rif_fdb_op;
6760 
6761 	mlxsw_sp_fid_rif_set(rif->fid, rif);
6762 	return 0;
6763 
6764 err_rif_fdb_op:
6765 	mlxsw_sp_rif_subport_op(rif, false);
6766 	return err;
6767 }
6768 
6769 static void mlxsw_sp_rif_subport_deconfigure(struct mlxsw_sp_rif *rif)
6770 {
6771 	struct mlxsw_sp_fid *fid = rif->fid;
6772 
6773 	mlxsw_sp_fid_rif_set(fid, NULL);
6774 	mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6775 			    mlxsw_sp_fid_index(fid), false);
6776 	mlxsw_sp_rif_subport_op(rif, false);
6777 }
6778 
6779 static struct mlxsw_sp_fid *
6780 mlxsw_sp_rif_subport_fid_get(struct mlxsw_sp_rif *rif,
6781 			     struct netlink_ext_ack *extack)
6782 {
6783 	return mlxsw_sp_fid_rfid_get(rif->mlxsw_sp, rif->rif_index);
6784 }
6785 
6786 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_subport_ops = {
6787 	.type			= MLXSW_SP_RIF_TYPE_SUBPORT,
6788 	.rif_size		= sizeof(struct mlxsw_sp_rif_subport),
6789 	.setup			= mlxsw_sp_rif_subport_setup,
6790 	.configure		= mlxsw_sp_rif_subport_configure,
6791 	.deconfigure		= mlxsw_sp_rif_subport_deconfigure,
6792 	.fid_get		= mlxsw_sp_rif_subport_fid_get,
6793 };
6794 
6795 static int mlxsw_sp_rif_vlan_fid_op(struct mlxsw_sp_rif *rif,
6796 				    enum mlxsw_reg_ritr_if_type type,
6797 				    u16 vid_fid, bool enable)
6798 {
6799 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6800 	char ritr_pl[MLXSW_REG_RITR_LEN];
6801 
6802 	mlxsw_reg_ritr_pack(ritr_pl, enable, type, rif->rif_index, rif->vr_id,
6803 			    rif->dev->mtu);
6804 	mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr);
6805 	mlxsw_reg_ritr_fid_set(ritr_pl, type, vid_fid);
6806 
6807 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6808 }
6809 
6810 u8 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp)
6811 {
6812 	return mlxsw_core_max_ports(mlxsw_sp->core) + 1;
6813 }
6814 
6815 static int mlxsw_sp_rif_vlan_configure(struct mlxsw_sp_rif *rif)
6816 {
6817 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6818 	u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
6819 	int err;
6820 
6821 	err = mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, true);
6822 	if (err)
6823 		return err;
6824 
6825 	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
6826 				     mlxsw_sp_router_port(mlxsw_sp), true);
6827 	if (err)
6828 		goto err_fid_mc_flood_set;
6829 
6830 	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
6831 				     mlxsw_sp_router_port(mlxsw_sp), true);
6832 	if (err)
6833 		goto err_fid_bc_flood_set;
6834 
6835 	err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6836 				  mlxsw_sp_fid_index(rif->fid), true);
6837 	if (err)
6838 		goto err_rif_fdb_op;
6839 
6840 	mlxsw_sp_fid_rif_set(rif->fid, rif);
6841 	return 0;
6842 
6843 err_rif_fdb_op:
6844 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
6845 			       mlxsw_sp_router_port(mlxsw_sp), false);
6846 err_fid_bc_flood_set:
6847 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
6848 			       mlxsw_sp_router_port(mlxsw_sp), false);
6849 err_fid_mc_flood_set:
6850 	mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false);
6851 	return err;
6852 }
6853 
6854 static void mlxsw_sp_rif_vlan_deconfigure(struct mlxsw_sp_rif *rif)
6855 {
6856 	u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
6857 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6858 	struct mlxsw_sp_fid *fid = rif->fid;
6859 
6860 	mlxsw_sp_fid_rif_set(fid, NULL);
6861 	mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6862 			    mlxsw_sp_fid_index(fid), false);
6863 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
6864 			       mlxsw_sp_router_port(mlxsw_sp), false);
6865 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
6866 			       mlxsw_sp_router_port(mlxsw_sp), false);
6867 	mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false);
6868 }
6869 
6870 static struct mlxsw_sp_fid *
6871 mlxsw_sp_rif_vlan_fid_get(struct mlxsw_sp_rif *rif,
6872 			  struct netlink_ext_ack *extack)
6873 {
6874 	u16 vid;
6875 	int err;
6876 
6877 	if (is_vlan_dev(rif->dev)) {
6878 		vid = vlan_dev_vlan_id(rif->dev);
6879 	} else {
6880 		err = br_vlan_get_pvid(rif->dev, &vid);
6881 		if (!vid)
6882 			err = -EINVAL;
6883 		if (err) {
6884 			NL_SET_ERR_MSG_MOD(extack, "Couldn't determine bridge PVID");
6885 			return ERR_PTR(err);
6886 		}
6887 	}
6888 
6889 	return mlxsw_sp_fid_8021q_get(rif->mlxsw_sp, vid);
6890 }
6891 
6892 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_vlan_ops = {
6893 	.type			= MLXSW_SP_RIF_TYPE_VLAN,
6894 	.rif_size		= sizeof(struct mlxsw_sp_rif),
6895 	.configure		= mlxsw_sp_rif_vlan_configure,
6896 	.deconfigure		= mlxsw_sp_rif_vlan_deconfigure,
6897 	.fid_get		= mlxsw_sp_rif_vlan_fid_get,
6898 };
6899 
6900 static int mlxsw_sp_rif_fid_configure(struct mlxsw_sp_rif *rif)
6901 {
6902 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6903 	u16 fid_index = mlxsw_sp_fid_index(rif->fid);
6904 	int err;
6905 
6906 	err = mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index,
6907 				       true);
6908 	if (err)
6909 		return err;
6910 
6911 	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
6912 				     mlxsw_sp_router_port(mlxsw_sp), true);
6913 	if (err)
6914 		goto err_fid_mc_flood_set;
6915 
6916 	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
6917 				     mlxsw_sp_router_port(mlxsw_sp), true);
6918 	if (err)
6919 		goto err_fid_bc_flood_set;
6920 
6921 	err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6922 				  mlxsw_sp_fid_index(rif->fid), true);
6923 	if (err)
6924 		goto err_rif_fdb_op;
6925 
6926 	mlxsw_sp_fid_rif_set(rif->fid, rif);
6927 	return 0;
6928 
6929 err_rif_fdb_op:
6930 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
6931 			       mlxsw_sp_router_port(mlxsw_sp), false);
6932 err_fid_bc_flood_set:
6933 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
6934 			       mlxsw_sp_router_port(mlxsw_sp), false);
6935 err_fid_mc_flood_set:
6936 	mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false);
6937 	return err;
6938 }
6939 
6940 static void mlxsw_sp_rif_fid_deconfigure(struct mlxsw_sp_rif *rif)
6941 {
6942 	u16 fid_index = mlxsw_sp_fid_index(rif->fid);
6943 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6944 	struct mlxsw_sp_fid *fid = rif->fid;
6945 
6946 	mlxsw_sp_fid_rif_set(fid, NULL);
6947 	mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6948 			    mlxsw_sp_fid_index(fid), false);
6949 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
6950 			       mlxsw_sp_router_port(mlxsw_sp), false);
6951 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
6952 			       mlxsw_sp_router_port(mlxsw_sp), false);
6953 	mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false);
6954 }
6955 
6956 static struct mlxsw_sp_fid *
6957 mlxsw_sp_rif_fid_fid_get(struct mlxsw_sp_rif *rif,
6958 			 struct netlink_ext_ack *extack)
6959 {
6960 	return mlxsw_sp_fid_8021d_get(rif->mlxsw_sp, rif->dev->ifindex);
6961 }
6962 
6963 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_fid_ops = {
6964 	.type			= MLXSW_SP_RIF_TYPE_FID,
6965 	.rif_size		= sizeof(struct mlxsw_sp_rif),
6966 	.configure		= mlxsw_sp_rif_fid_configure,
6967 	.deconfigure		= mlxsw_sp_rif_fid_deconfigure,
6968 	.fid_get		= mlxsw_sp_rif_fid_fid_get,
6969 };
6970 
6971 static struct mlxsw_sp_rif_ipip_lb *
6972 mlxsw_sp_rif_ipip_lb_rif(struct mlxsw_sp_rif *rif)
6973 {
6974 	return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
6975 }
6976 
6977 static void
6978 mlxsw_sp_rif_ipip_lb_setup(struct mlxsw_sp_rif *rif,
6979 			   const struct mlxsw_sp_rif_params *params)
6980 {
6981 	struct mlxsw_sp_rif_params_ipip_lb *params_lb;
6982 	struct mlxsw_sp_rif_ipip_lb *rif_lb;
6983 
6984 	params_lb = container_of(params, struct mlxsw_sp_rif_params_ipip_lb,
6985 				 common);
6986 	rif_lb = mlxsw_sp_rif_ipip_lb_rif(rif);
6987 	rif_lb->lb_config = params_lb->lb_config;
6988 }
6989 
6990 static int
6991 mlxsw_sp_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif)
6992 {
6993 	struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
6994 	u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(rif->dev);
6995 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6996 	struct mlxsw_sp_vr *ul_vr;
6997 	int err;
6998 
6999 	ul_vr = mlxsw_sp_vr_get(mlxsw_sp, ul_tb_id, NULL);
7000 	if (IS_ERR(ul_vr))
7001 		return PTR_ERR(ul_vr);
7002 
7003 	err = mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, true);
7004 	if (err)
7005 		goto err_loopback_op;
7006 
7007 	lb_rif->ul_vr_id = ul_vr->id;
7008 	++ul_vr->rif_count;
7009 	return 0;
7010 
7011 err_loopback_op:
7012 	mlxsw_sp_vr_put(mlxsw_sp, ul_vr);
7013 	return err;
7014 }
7015 
7016 static void mlxsw_sp_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif)
7017 {
7018 	struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
7019 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7020 	struct mlxsw_sp_vr *ul_vr;
7021 
7022 	ul_vr = &mlxsw_sp->router->vrs[lb_rif->ul_vr_id];
7023 	mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, false);
7024 
7025 	--ul_vr->rif_count;
7026 	mlxsw_sp_vr_put(mlxsw_sp, ul_vr);
7027 }
7028 
7029 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_ipip_lb_ops = {
7030 	.type			= MLXSW_SP_RIF_TYPE_IPIP_LB,
7031 	.rif_size		= sizeof(struct mlxsw_sp_rif_ipip_lb),
7032 	.setup                  = mlxsw_sp_rif_ipip_lb_setup,
7033 	.configure		= mlxsw_sp_rif_ipip_lb_configure,
7034 	.deconfigure		= mlxsw_sp_rif_ipip_lb_deconfigure,
7035 };
7036 
7037 static const struct mlxsw_sp_rif_ops *mlxsw_sp_rif_ops_arr[] = {
7038 	[MLXSW_SP_RIF_TYPE_SUBPORT]	= &mlxsw_sp_rif_subport_ops,
7039 	[MLXSW_SP_RIF_TYPE_VLAN]	= &mlxsw_sp_rif_vlan_ops,
7040 	[MLXSW_SP_RIF_TYPE_FID]		= &mlxsw_sp_rif_fid_ops,
7041 	[MLXSW_SP_RIF_TYPE_IPIP_LB]	= &mlxsw_sp_rif_ipip_lb_ops,
7042 };
7043 
7044 static int mlxsw_sp_rifs_init(struct mlxsw_sp *mlxsw_sp)
7045 {
7046 	u64 max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
7047 
7048 	mlxsw_sp->router->rifs = kcalloc(max_rifs,
7049 					 sizeof(struct mlxsw_sp_rif *),
7050 					 GFP_KERNEL);
7051 	if (!mlxsw_sp->router->rifs)
7052 		return -ENOMEM;
7053 
7054 	mlxsw_sp->router->rif_ops_arr = mlxsw_sp_rif_ops_arr;
7055 
7056 	return 0;
7057 }
7058 
7059 static void mlxsw_sp_rifs_fini(struct mlxsw_sp *mlxsw_sp)
7060 {
7061 	int i;
7062 
7063 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
7064 		WARN_ON_ONCE(mlxsw_sp->router->rifs[i]);
7065 
7066 	kfree(mlxsw_sp->router->rifs);
7067 }
7068 
7069 static int
7070 mlxsw_sp_ipip_config_tigcr(struct mlxsw_sp *mlxsw_sp)
7071 {
7072 	char tigcr_pl[MLXSW_REG_TIGCR_LEN];
7073 
7074 	mlxsw_reg_tigcr_pack(tigcr_pl, true, 0);
7075 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tigcr), tigcr_pl);
7076 }
7077 
7078 static int mlxsw_sp_ipips_init(struct mlxsw_sp *mlxsw_sp)
7079 {
7080 	mlxsw_sp->router->ipip_ops_arr = mlxsw_sp_ipip_ops_arr;
7081 	INIT_LIST_HEAD(&mlxsw_sp->router->ipip_list);
7082 	return mlxsw_sp_ipip_config_tigcr(mlxsw_sp);
7083 }
7084 
7085 static void mlxsw_sp_ipips_fini(struct mlxsw_sp *mlxsw_sp)
7086 {
7087 	WARN_ON(!list_empty(&mlxsw_sp->router->ipip_list));
7088 }
7089 
7090 static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb)
7091 {
7092 	struct mlxsw_sp_router *router;
7093 
7094 	/* Flush pending FIB notifications and then flush the device's
7095 	 * table before requesting another dump. The FIB notification
7096 	 * block is unregistered, so no need to take RTNL.
7097 	 */
7098 	mlxsw_core_flush_owq();
7099 	router = container_of(nb, struct mlxsw_sp_router, fib_nb);
7100 	mlxsw_sp_router_fib_flush(router->mlxsw_sp);
7101 }
7102 
7103 #ifdef CONFIG_IP_ROUTE_MULTIPATH
7104 static void mlxsw_sp_mp_hash_header_set(char *recr2_pl, int header)
7105 {
7106 	mlxsw_reg_recr2_outer_header_enables_set(recr2_pl, header, true);
7107 }
7108 
7109 static void mlxsw_sp_mp_hash_field_set(char *recr2_pl, int field)
7110 {
7111 	mlxsw_reg_recr2_outer_header_fields_enable_set(recr2_pl, field, true);
7112 }
7113 
7114 static void mlxsw_sp_mp4_hash_init(char *recr2_pl)
7115 {
7116 	bool only_l3 = !init_net.ipv4.sysctl_fib_multipath_hash_policy;
7117 
7118 	mlxsw_sp_mp_hash_header_set(recr2_pl,
7119 				    MLXSW_REG_RECR2_IPV4_EN_NOT_TCP_NOT_UDP);
7120 	mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV4_EN_TCP_UDP);
7121 	mlxsw_reg_recr2_ipv4_sip_enable(recr2_pl);
7122 	mlxsw_reg_recr2_ipv4_dip_enable(recr2_pl);
7123 	if (only_l3)
7124 		return;
7125 	mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_EN_IPV4);
7126 	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV4_PROTOCOL);
7127 	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_SPORT);
7128 	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_DPORT);
7129 }
7130 
7131 static void mlxsw_sp_mp6_hash_init(char *recr2_pl)
7132 {
7133 	bool only_l3 = !ip6_multipath_hash_policy(&init_net);
7134 
7135 	mlxsw_sp_mp_hash_header_set(recr2_pl,
7136 				    MLXSW_REG_RECR2_IPV6_EN_NOT_TCP_NOT_UDP);
7137 	mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV6_EN_TCP_UDP);
7138 	mlxsw_reg_recr2_ipv6_sip_enable(recr2_pl);
7139 	mlxsw_reg_recr2_ipv6_dip_enable(recr2_pl);
7140 	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV6_NEXT_HEADER);
7141 	if (only_l3) {
7142 		mlxsw_sp_mp_hash_field_set(recr2_pl,
7143 					   MLXSW_REG_RECR2_IPV6_FLOW_LABEL);
7144 	} else {
7145 		mlxsw_sp_mp_hash_header_set(recr2_pl,
7146 					    MLXSW_REG_RECR2_TCP_UDP_EN_IPV6);
7147 		mlxsw_sp_mp_hash_field_set(recr2_pl,
7148 					   MLXSW_REG_RECR2_TCP_UDP_SPORT);
7149 		mlxsw_sp_mp_hash_field_set(recr2_pl,
7150 					   MLXSW_REG_RECR2_TCP_UDP_DPORT);
7151 	}
7152 }
7153 
7154 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
7155 {
7156 	char recr2_pl[MLXSW_REG_RECR2_LEN];
7157 	u32 seed;
7158 
7159 	get_random_bytes(&seed, sizeof(seed));
7160 	mlxsw_reg_recr2_pack(recr2_pl, seed);
7161 	mlxsw_sp_mp4_hash_init(recr2_pl);
7162 	mlxsw_sp_mp6_hash_init(recr2_pl);
7163 
7164 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(recr2), recr2_pl);
7165 }
7166 #else
7167 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
7168 {
7169 	return 0;
7170 }
7171 #endif
7172 
7173 static int mlxsw_sp_dscp_init(struct mlxsw_sp *mlxsw_sp)
7174 {
7175 	char rdpm_pl[MLXSW_REG_RDPM_LEN];
7176 	unsigned int i;
7177 
7178 	MLXSW_REG_ZERO(rdpm, rdpm_pl);
7179 
7180 	/* HW is determining switch priority based on DSCP-bits, but the
7181 	 * kernel is still doing that based on the ToS. Since there's a
7182 	 * mismatch in bits we need to make sure to translate the right
7183 	 * value ToS would observe, skipping the 2 least-significant ECN bits.
7184 	 */
7185 	for (i = 0; i < MLXSW_REG_RDPM_DSCP_ENTRY_REC_MAX_COUNT; i++)
7186 		mlxsw_reg_rdpm_pack(rdpm_pl, i, rt_tos2priority(i << 2));
7187 
7188 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rdpm), rdpm_pl);
7189 }
7190 
7191 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
7192 {
7193 	char rgcr_pl[MLXSW_REG_RGCR_LEN];
7194 	u64 max_rifs;
7195 	int err;
7196 
7197 	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_RIFS))
7198 		return -EIO;
7199 	max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
7200 
7201 	mlxsw_reg_rgcr_pack(rgcr_pl, true, true);
7202 	mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs);
7203 	mlxsw_reg_rgcr_usp_set(rgcr_pl, true);
7204 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
7205 	if (err)
7206 		return err;
7207 	return 0;
7208 }
7209 
7210 static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
7211 {
7212 	char rgcr_pl[MLXSW_REG_RGCR_LEN];
7213 
7214 	mlxsw_reg_rgcr_pack(rgcr_pl, false, false);
7215 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
7216 }
7217 
7218 int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
7219 {
7220 	struct mlxsw_sp_router *router;
7221 	int err;
7222 
7223 	router = kzalloc(sizeof(*mlxsw_sp->router), GFP_KERNEL);
7224 	if (!router)
7225 		return -ENOMEM;
7226 	mlxsw_sp->router = router;
7227 	router->mlxsw_sp = mlxsw_sp;
7228 
7229 	INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_neighs_list);
7230 	err = __mlxsw_sp_router_init(mlxsw_sp);
7231 	if (err)
7232 		goto err_router_init;
7233 
7234 	err = mlxsw_sp_rifs_init(mlxsw_sp);
7235 	if (err)
7236 		goto err_rifs_init;
7237 
7238 	err = mlxsw_sp_ipips_init(mlxsw_sp);
7239 	if (err)
7240 		goto err_ipips_init;
7241 
7242 	err = rhashtable_init(&mlxsw_sp->router->nexthop_ht,
7243 			      &mlxsw_sp_nexthop_ht_params);
7244 	if (err)
7245 		goto err_nexthop_ht_init;
7246 
7247 	err = rhashtable_init(&mlxsw_sp->router->nexthop_group_ht,
7248 			      &mlxsw_sp_nexthop_group_ht_params);
7249 	if (err)
7250 		goto err_nexthop_group_ht_init;
7251 
7252 	INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_list);
7253 	err = mlxsw_sp_lpm_init(mlxsw_sp);
7254 	if (err)
7255 		goto err_lpm_init;
7256 
7257 	err = mlxsw_sp_mr_init(mlxsw_sp, &mlxsw_sp_mr_tcam_ops);
7258 	if (err)
7259 		goto err_mr_init;
7260 
7261 	err = mlxsw_sp_vrs_init(mlxsw_sp);
7262 	if (err)
7263 		goto err_vrs_init;
7264 
7265 	err = mlxsw_sp_neigh_init(mlxsw_sp);
7266 	if (err)
7267 		goto err_neigh_init;
7268 
7269 	mlxsw_sp->router->netevent_nb.notifier_call =
7270 		mlxsw_sp_router_netevent_event;
7271 	err = register_netevent_notifier(&mlxsw_sp->router->netevent_nb);
7272 	if (err)
7273 		goto err_register_netevent_notifier;
7274 
7275 	err = mlxsw_sp_mp_hash_init(mlxsw_sp);
7276 	if (err)
7277 		goto err_mp_hash_init;
7278 
7279 	err = mlxsw_sp_dscp_init(mlxsw_sp);
7280 	if (err)
7281 		goto err_dscp_init;
7282 
7283 	mlxsw_sp->router->fib_nb.notifier_call = mlxsw_sp_router_fib_event;
7284 	err = register_fib_notifier(&mlxsw_sp->router->fib_nb,
7285 				    mlxsw_sp_router_fib_dump_flush);
7286 	if (err)
7287 		goto err_register_fib_notifier;
7288 
7289 	return 0;
7290 
7291 err_register_fib_notifier:
7292 err_dscp_init:
7293 err_mp_hash_init:
7294 	unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
7295 err_register_netevent_notifier:
7296 	mlxsw_sp_neigh_fini(mlxsw_sp);
7297 err_neigh_init:
7298 	mlxsw_sp_vrs_fini(mlxsw_sp);
7299 err_vrs_init:
7300 	mlxsw_sp_mr_fini(mlxsw_sp);
7301 err_mr_init:
7302 	mlxsw_sp_lpm_fini(mlxsw_sp);
7303 err_lpm_init:
7304 	rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
7305 err_nexthop_group_ht_init:
7306 	rhashtable_destroy(&mlxsw_sp->router->nexthop_ht);
7307 err_nexthop_ht_init:
7308 	mlxsw_sp_ipips_fini(mlxsw_sp);
7309 err_ipips_init:
7310 	mlxsw_sp_rifs_fini(mlxsw_sp);
7311 err_rifs_init:
7312 	__mlxsw_sp_router_fini(mlxsw_sp);
7313 err_router_init:
7314 	kfree(mlxsw_sp->router);
7315 	return err;
7316 }
7317 
7318 void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
7319 {
7320 	unregister_fib_notifier(&mlxsw_sp->router->fib_nb);
7321 	unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
7322 	mlxsw_sp_neigh_fini(mlxsw_sp);
7323 	mlxsw_sp_vrs_fini(mlxsw_sp);
7324 	mlxsw_sp_mr_fini(mlxsw_sp);
7325 	mlxsw_sp_lpm_fini(mlxsw_sp);
7326 	rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
7327 	rhashtable_destroy(&mlxsw_sp->router->nexthop_ht);
7328 	mlxsw_sp_ipips_fini(mlxsw_sp);
7329 	mlxsw_sp_rifs_fini(mlxsw_sp);
7330 	__mlxsw_sp_router_fini(mlxsw_sp);
7331 	kfree(mlxsw_sp->router);
7332 }
7333