xref: /linux/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c (revision 140eb5227767c6754742020a16d2691222b9c19b)
1 /*
2  * drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
3  * Copyright (c) 2016-2017 Mellanox Technologies. All rights reserved.
4  * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
5  * Copyright (c) 2016 Ido Schimmel <idosch@mellanox.com>
6  * Copyright (c) 2016 Yotam Gigi <yotamg@mellanox.com>
7  * Copyright (c) 2017 Petr Machata <petrm@mellanox.com>
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. Neither the names of the copyright holders nor the names of its
18  *    contributors may be used to endorse or promote products derived from
19  *    this software without specific prior written permission.
20  *
21  * Alternatively, this software may be distributed under the terms of the
22  * GNU General Public License ("GPL") version 2 as published by the Free
23  * Software Foundation.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35  * POSSIBILITY OF SUCH DAMAGE.
36  */
37 
38 #include <linux/kernel.h>
39 #include <linux/types.h>
40 #include <linux/rhashtable.h>
41 #include <linux/bitops.h>
42 #include <linux/in6.h>
43 #include <linux/notifier.h>
44 #include <linux/inetdevice.h>
45 #include <linux/netdevice.h>
46 #include <linux/if_bridge.h>
47 #include <linux/socket.h>
48 #include <linux/route.h>
49 #include <linux/gcd.h>
50 #include <linux/random.h>
51 #include <net/netevent.h>
52 #include <net/neighbour.h>
53 #include <net/arp.h>
54 #include <net/ip_fib.h>
55 #include <net/ip6_fib.h>
56 #include <net/fib_rules.h>
57 #include <net/ip_tunnels.h>
58 #include <net/l3mdev.h>
59 #include <net/addrconf.h>
60 #include <net/ndisc.h>
61 #include <net/ipv6.h>
62 #include <net/fib_notifier.h>
63 
64 #include "spectrum.h"
65 #include "core.h"
66 #include "reg.h"
67 #include "spectrum_cnt.h"
68 #include "spectrum_dpipe.h"
69 #include "spectrum_ipip.h"
70 #include "spectrum_mr.h"
71 #include "spectrum_mr_tcam.h"
72 #include "spectrum_router.h"
73 
74 struct mlxsw_sp_vr;
75 struct mlxsw_sp_lpm_tree;
76 struct mlxsw_sp_rif_ops;
77 
78 struct mlxsw_sp_router {
79 	struct mlxsw_sp *mlxsw_sp;
80 	struct mlxsw_sp_rif **rifs;
81 	struct mlxsw_sp_vr *vrs;
82 	struct rhashtable neigh_ht;
83 	struct rhashtable nexthop_group_ht;
84 	struct rhashtable nexthop_ht;
85 	struct list_head nexthop_list;
86 	struct {
87 		struct mlxsw_sp_lpm_tree *trees;
88 		unsigned int tree_count;
89 	} lpm;
90 	struct {
91 		struct delayed_work dw;
92 		unsigned long interval;	/* ms */
93 	} neighs_update;
94 	struct delayed_work nexthop_probe_dw;
95 #define MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL 5000 /* ms */
96 	struct list_head nexthop_neighs_list;
97 	struct list_head ipip_list;
98 	bool aborted;
99 	struct notifier_block fib_nb;
100 	struct notifier_block netevent_nb;
101 	const struct mlxsw_sp_rif_ops **rif_ops_arr;
102 	const struct mlxsw_sp_ipip_ops **ipip_ops_arr;
103 };
104 
105 struct mlxsw_sp_rif {
106 	struct list_head nexthop_list;
107 	struct list_head neigh_list;
108 	struct net_device *dev;
109 	struct mlxsw_sp_fid *fid;
110 	unsigned char addr[ETH_ALEN];
111 	int mtu;
112 	u16 rif_index;
113 	u16 vr_id;
114 	const struct mlxsw_sp_rif_ops *ops;
115 	struct mlxsw_sp *mlxsw_sp;
116 
117 	unsigned int counter_ingress;
118 	bool counter_ingress_valid;
119 	unsigned int counter_egress;
120 	bool counter_egress_valid;
121 };
122 
123 struct mlxsw_sp_rif_params {
124 	struct net_device *dev;
125 	union {
126 		u16 system_port;
127 		u16 lag_id;
128 	};
129 	u16 vid;
130 	bool lag;
131 };
132 
133 struct mlxsw_sp_rif_subport {
134 	struct mlxsw_sp_rif common;
135 	union {
136 		u16 system_port;
137 		u16 lag_id;
138 	};
139 	u16 vid;
140 	bool lag;
141 };
142 
143 struct mlxsw_sp_rif_ipip_lb {
144 	struct mlxsw_sp_rif common;
145 	struct mlxsw_sp_rif_ipip_lb_config lb_config;
146 	u16 ul_vr_id; /* Reserved for Spectrum-2. */
147 };
148 
149 struct mlxsw_sp_rif_params_ipip_lb {
150 	struct mlxsw_sp_rif_params common;
151 	struct mlxsw_sp_rif_ipip_lb_config lb_config;
152 };
153 
154 struct mlxsw_sp_rif_ops {
155 	enum mlxsw_sp_rif_type type;
156 	size_t rif_size;
157 
158 	void (*setup)(struct mlxsw_sp_rif *rif,
159 		      const struct mlxsw_sp_rif_params *params);
160 	int (*configure)(struct mlxsw_sp_rif *rif);
161 	void (*deconfigure)(struct mlxsw_sp_rif *rif);
162 	struct mlxsw_sp_fid * (*fid_get)(struct mlxsw_sp_rif *rif);
163 };
164 
165 static unsigned int *
166 mlxsw_sp_rif_p_counter_get(struct mlxsw_sp_rif *rif,
167 			   enum mlxsw_sp_rif_counter_dir dir)
168 {
169 	switch (dir) {
170 	case MLXSW_SP_RIF_COUNTER_EGRESS:
171 		return &rif->counter_egress;
172 	case MLXSW_SP_RIF_COUNTER_INGRESS:
173 		return &rif->counter_ingress;
174 	}
175 	return NULL;
176 }
177 
178 static bool
179 mlxsw_sp_rif_counter_valid_get(struct mlxsw_sp_rif *rif,
180 			       enum mlxsw_sp_rif_counter_dir dir)
181 {
182 	switch (dir) {
183 	case MLXSW_SP_RIF_COUNTER_EGRESS:
184 		return rif->counter_egress_valid;
185 	case MLXSW_SP_RIF_COUNTER_INGRESS:
186 		return rif->counter_ingress_valid;
187 	}
188 	return false;
189 }
190 
191 static void
192 mlxsw_sp_rif_counter_valid_set(struct mlxsw_sp_rif *rif,
193 			       enum mlxsw_sp_rif_counter_dir dir,
194 			       bool valid)
195 {
196 	switch (dir) {
197 	case MLXSW_SP_RIF_COUNTER_EGRESS:
198 		rif->counter_egress_valid = valid;
199 		break;
200 	case MLXSW_SP_RIF_COUNTER_INGRESS:
201 		rif->counter_ingress_valid = valid;
202 		break;
203 	}
204 }
205 
206 static int mlxsw_sp_rif_counter_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
207 				     unsigned int counter_index, bool enable,
208 				     enum mlxsw_sp_rif_counter_dir dir)
209 {
210 	char ritr_pl[MLXSW_REG_RITR_LEN];
211 	bool is_egress = false;
212 	int err;
213 
214 	if (dir == MLXSW_SP_RIF_COUNTER_EGRESS)
215 		is_egress = true;
216 	mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
217 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
218 	if (err)
219 		return err;
220 
221 	mlxsw_reg_ritr_counter_pack(ritr_pl, counter_index, enable,
222 				    is_egress);
223 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
224 }
225 
226 int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp,
227 				   struct mlxsw_sp_rif *rif,
228 				   enum mlxsw_sp_rif_counter_dir dir, u64 *cnt)
229 {
230 	char ricnt_pl[MLXSW_REG_RICNT_LEN];
231 	unsigned int *p_counter_index;
232 	bool valid;
233 	int err;
234 
235 	valid = mlxsw_sp_rif_counter_valid_get(rif, dir);
236 	if (!valid)
237 		return -EINVAL;
238 
239 	p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
240 	if (!p_counter_index)
241 		return -EINVAL;
242 	mlxsw_reg_ricnt_pack(ricnt_pl, *p_counter_index,
243 			     MLXSW_REG_RICNT_OPCODE_NOP);
244 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
245 	if (err)
246 		return err;
247 	*cnt = mlxsw_reg_ricnt_good_unicast_packets_get(ricnt_pl);
248 	return 0;
249 }
250 
251 static int mlxsw_sp_rif_counter_clear(struct mlxsw_sp *mlxsw_sp,
252 				      unsigned int counter_index)
253 {
254 	char ricnt_pl[MLXSW_REG_RICNT_LEN];
255 
256 	mlxsw_reg_ricnt_pack(ricnt_pl, counter_index,
257 			     MLXSW_REG_RICNT_OPCODE_CLEAR);
258 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
259 }
260 
261 int mlxsw_sp_rif_counter_alloc(struct mlxsw_sp *mlxsw_sp,
262 			       struct mlxsw_sp_rif *rif,
263 			       enum mlxsw_sp_rif_counter_dir dir)
264 {
265 	unsigned int *p_counter_index;
266 	int err;
267 
268 	p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
269 	if (!p_counter_index)
270 		return -EINVAL;
271 	err = mlxsw_sp_counter_alloc(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
272 				     p_counter_index);
273 	if (err)
274 		return err;
275 
276 	err = mlxsw_sp_rif_counter_clear(mlxsw_sp, *p_counter_index);
277 	if (err)
278 		goto err_counter_clear;
279 
280 	err = mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
281 					*p_counter_index, true, dir);
282 	if (err)
283 		goto err_counter_edit;
284 	mlxsw_sp_rif_counter_valid_set(rif, dir, true);
285 	return 0;
286 
287 err_counter_edit:
288 err_counter_clear:
289 	mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
290 			      *p_counter_index);
291 	return err;
292 }
293 
294 void mlxsw_sp_rif_counter_free(struct mlxsw_sp *mlxsw_sp,
295 			       struct mlxsw_sp_rif *rif,
296 			       enum mlxsw_sp_rif_counter_dir dir)
297 {
298 	unsigned int *p_counter_index;
299 
300 	if (!mlxsw_sp_rif_counter_valid_get(rif, dir))
301 		return;
302 
303 	p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
304 	if (WARN_ON(!p_counter_index))
305 		return;
306 	mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
307 				  *p_counter_index, false, dir);
308 	mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
309 			      *p_counter_index);
310 	mlxsw_sp_rif_counter_valid_set(rif, dir, false);
311 }
312 
313 static void mlxsw_sp_rif_counters_alloc(struct mlxsw_sp_rif *rif)
314 {
315 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
316 	struct devlink *devlink;
317 
318 	devlink = priv_to_devlink(mlxsw_sp->core);
319 	if (!devlink_dpipe_table_counter_enabled(devlink,
320 						 MLXSW_SP_DPIPE_TABLE_NAME_ERIF))
321 		return;
322 	mlxsw_sp_rif_counter_alloc(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS);
323 }
324 
325 static void mlxsw_sp_rif_counters_free(struct mlxsw_sp_rif *rif)
326 {
327 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
328 
329 	mlxsw_sp_rif_counter_free(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS);
330 }
331 
332 static struct mlxsw_sp_rif *
333 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
334 			 const struct net_device *dev);
335 
336 #define MLXSW_SP_PREFIX_COUNT (sizeof(struct in6_addr) * BITS_PER_BYTE + 1)
337 
338 struct mlxsw_sp_prefix_usage {
339 	DECLARE_BITMAP(b, MLXSW_SP_PREFIX_COUNT);
340 };
341 
342 #define mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) \
343 	for_each_set_bit(prefix, (prefix_usage)->b, MLXSW_SP_PREFIX_COUNT)
344 
345 static bool
346 mlxsw_sp_prefix_usage_eq(struct mlxsw_sp_prefix_usage *prefix_usage1,
347 			 struct mlxsw_sp_prefix_usage *prefix_usage2)
348 {
349 	return !memcmp(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
350 }
351 
352 static bool
353 mlxsw_sp_prefix_usage_none(struct mlxsw_sp_prefix_usage *prefix_usage)
354 {
355 	struct mlxsw_sp_prefix_usage prefix_usage_none = {{ 0 } };
356 
357 	return mlxsw_sp_prefix_usage_eq(prefix_usage, &prefix_usage_none);
358 }
359 
360 static void
361 mlxsw_sp_prefix_usage_cpy(struct mlxsw_sp_prefix_usage *prefix_usage1,
362 			  struct mlxsw_sp_prefix_usage *prefix_usage2)
363 {
364 	memcpy(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
365 }
366 
367 static void
368 mlxsw_sp_prefix_usage_set(struct mlxsw_sp_prefix_usage *prefix_usage,
369 			  unsigned char prefix_len)
370 {
371 	set_bit(prefix_len, prefix_usage->b);
372 }
373 
374 static void
375 mlxsw_sp_prefix_usage_clear(struct mlxsw_sp_prefix_usage *prefix_usage,
376 			    unsigned char prefix_len)
377 {
378 	clear_bit(prefix_len, prefix_usage->b);
379 }
380 
381 struct mlxsw_sp_fib_key {
382 	unsigned char addr[sizeof(struct in6_addr)];
383 	unsigned char prefix_len;
384 };
385 
386 enum mlxsw_sp_fib_entry_type {
387 	MLXSW_SP_FIB_ENTRY_TYPE_REMOTE,
388 	MLXSW_SP_FIB_ENTRY_TYPE_LOCAL,
389 	MLXSW_SP_FIB_ENTRY_TYPE_TRAP,
390 
391 	/* This is a special case of local delivery, where a packet should be
392 	 * decapsulated on reception. Note that there is no corresponding ENCAP,
393 	 * because that's a type of next hop, not of FIB entry. (There can be
394 	 * several next hops in a REMOTE entry, and some of them may be
395 	 * encapsulating entries.)
396 	 */
397 	MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP,
398 };
399 
400 struct mlxsw_sp_nexthop_group;
401 struct mlxsw_sp_fib;
402 
403 struct mlxsw_sp_fib_node {
404 	struct list_head entry_list;
405 	struct list_head list;
406 	struct rhash_head ht_node;
407 	struct mlxsw_sp_fib *fib;
408 	struct mlxsw_sp_fib_key key;
409 };
410 
411 struct mlxsw_sp_fib_entry_decap {
412 	struct mlxsw_sp_ipip_entry *ipip_entry;
413 	u32 tunnel_index;
414 };
415 
416 struct mlxsw_sp_fib_entry {
417 	struct list_head list;
418 	struct mlxsw_sp_fib_node *fib_node;
419 	enum mlxsw_sp_fib_entry_type type;
420 	struct list_head nexthop_group_node;
421 	struct mlxsw_sp_nexthop_group *nh_group;
422 	struct mlxsw_sp_fib_entry_decap decap; /* Valid for decap entries. */
423 };
424 
425 struct mlxsw_sp_fib4_entry {
426 	struct mlxsw_sp_fib_entry common;
427 	u32 tb_id;
428 	u32 prio;
429 	u8 tos;
430 	u8 type;
431 };
432 
433 struct mlxsw_sp_fib6_entry {
434 	struct mlxsw_sp_fib_entry common;
435 	struct list_head rt6_list;
436 	unsigned int nrt6;
437 };
438 
439 struct mlxsw_sp_rt6 {
440 	struct list_head list;
441 	struct rt6_info *rt;
442 };
443 
444 struct mlxsw_sp_lpm_tree {
445 	u8 id; /* tree ID */
446 	unsigned int ref_count;
447 	enum mlxsw_sp_l3proto proto;
448 	struct mlxsw_sp_prefix_usage prefix_usage;
449 };
450 
451 struct mlxsw_sp_fib {
452 	struct rhashtable ht;
453 	struct list_head node_list;
454 	struct mlxsw_sp_vr *vr;
455 	struct mlxsw_sp_lpm_tree *lpm_tree;
456 	unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT];
457 	struct mlxsw_sp_prefix_usage prefix_usage;
458 	enum mlxsw_sp_l3proto proto;
459 };
460 
461 struct mlxsw_sp_vr {
462 	u16 id; /* virtual router ID */
463 	u32 tb_id; /* kernel fib table id */
464 	unsigned int rif_count;
465 	struct mlxsw_sp_fib *fib4;
466 	struct mlxsw_sp_fib *fib6;
467 	struct mlxsw_sp_mr_table *mr4_table;
468 };
469 
470 static const struct rhashtable_params mlxsw_sp_fib_ht_params;
471 
472 static struct mlxsw_sp_fib *mlxsw_sp_fib_create(struct mlxsw_sp_vr *vr,
473 						enum mlxsw_sp_l3proto proto)
474 {
475 	struct mlxsw_sp_fib *fib;
476 	int err;
477 
478 	fib = kzalloc(sizeof(*fib), GFP_KERNEL);
479 	if (!fib)
480 		return ERR_PTR(-ENOMEM);
481 	err = rhashtable_init(&fib->ht, &mlxsw_sp_fib_ht_params);
482 	if (err)
483 		goto err_rhashtable_init;
484 	INIT_LIST_HEAD(&fib->node_list);
485 	fib->proto = proto;
486 	fib->vr = vr;
487 	return fib;
488 
489 err_rhashtable_init:
490 	kfree(fib);
491 	return ERR_PTR(err);
492 }
493 
494 static void mlxsw_sp_fib_destroy(struct mlxsw_sp_fib *fib)
495 {
496 	WARN_ON(!list_empty(&fib->node_list));
497 	WARN_ON(fib->lpm_tree);
498 	rhashtable_destroy(&fib->ht);
499 	kfree(fib);
500 }
501 
502 static struct mlxsw_sp_lpm_tree *
503 mlxsw_sp_lpm_tree_find_unused(struct mlxsw_sp *mlxsw_sp)
504 {
505 	static struct mlxsw_sp_lpm_tree *lpm_tree;
506 	int i;
507 
508 	for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
509 		lpm_tree = &mlxsw_sp->router->lpm.trees[i];
510 		if (lpm_tree->ref_count == 0)
511 			return lpm_tree;
512 	}
513 	return NULL;
514 }
515 
516 static int mlxsw_sp_lpm_tree_alloc(struct mlxsw_sp *mlxsw_sp,
517 				   struct mlxsw_sp_lpm_tree *lpm_tree)
518 {
519 	char ralta_pl[MLXSW_REG_RALTA_LEN];
520 
521 	mlxsw_reg_ralta_pack(ralta_pl, true,
522 			     (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
523 			     lpm_tree->id);
524 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
525 }
526 
527 static void mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp,
528 				   struct mlxsw_sp_lpm_tree *lpm_tree)
529 {
530 	char ralta_pl[MLXSW_REG_RALTA_LEN];
531 
532 	mlxsw_reg_ralta_pack(ralta_pl, false,
533 			     (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
534 			     lpm_tree->id);
535 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
536 }
537 
538 static int
539 mlxsw_sp_lpm_tree_left_struct_set(struct mlxsw_sp *mlxsw_sp,
540 				  struct mlxsw_sp_prefix_usage *prefix_usage,
541 				  struct mlxsw_sp_lpm_tree *lpm_tree)
542 {
543 	char ralst_pl[MLXSW_REG_RALST_LEN];
544 	u8 root_bin = 0;
545 	u8 prefix;
546 	u8 last_prefix = MLXSW_REG_RALST_BIN_NO_CHILD;
547 
548 	mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage)
549 		root_bin = prefix;
550 
551 	mlxsw_reg_ralst_pack(ralst_pl, root_bin, lpm_tree->id);
552 	mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) {
553 		if (prefix == 0)
554 			continue;
555 		mlxsw_reg_ralst_bin_pack(ralst_pl, prefix, last_prefix,
556 					 MLXSW_REG_RALST_BIN_NO_CHILD);
557 		last_prefix = prefix;
558 	}
559 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
560 }
561 
562 static struct mlxsw_sp_lpm_tree *
563 mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp,
564 			 struct mlxsw_sp_prefix_usage *prefix_usage,
565 			 enum mlxsw_sp_l3proto proto)
566 {
567 	struct mlxsw_sp_lpm_tree *lpm_tree;
568 	int err;
569 
570 	lpm_tree = mlxsw_sp_lpm_tree_find_unused(mlxsw_sp);
571 	if (!lpm_tree)
572 		return ERR_PTR(-EBUSY);
573 	lpm_tree->proto = proto;
574 	err = mlxsw_sp_lpm_tree_alloc(mlxsw_sp, lpm_tree);
575 	if (err)
576 		return ERR_PTR(err);
577 
578 	err = mlxsw_sp_lpm_tree_left_struct_set(mlxsw_sp, prefix_usage,
579 						lpm_tree);
580 	if (err)
581 		goto err_left_struct_set;
582 	memcpy(&lpm_tree->prefix_usage, prefix_usage,
583 	       sizeof(lpm_tree->prefix_usage));
584 	return lpm_tree;
585 
586 err_left_struct_set:
587 	mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
588 	return ERR_PTR(err);
589 }
590 
591 static void mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp *mlxsw_sp,
592 				      struct mlxsw_sp_lpm_tree *lpm_tree)
593 {
594 	mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
595 }
596 
597 static struct mlxsw_sp_lpm_tree *
598 mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp,
599 		      struct mlxsw_sp_prefix_usage *prefix_usage,
600 		      enum mlxsw_sp_l3proto proto)
601 {
602 	struct mlxsw_sp_lpm_tree *lpm_tree;
603 	int i;
604 
605 	for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
606 		lpm_tree = &mlxsw_sp->router->lpm.trees[i];
607 		if (lpm_tree->ref_count != 0 &&
608 		    lpm_tree->proto == proto &&
609 		    mlxsw_sp_prefix_usage_eq(&lpm_tree->prefix_usage,
610 					     prefix_usage))
611 			return lpm_tree;
612 	}
613 	return mlxsw_sp_lpm_tree_create(mlxsw_sp, prefix_usage, proto);
614 }
615 
616 static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree)
617 {
618 	lpm_tree->ref_count++;
619 }
620 
621 static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
622 				  struct mlxsw_sp_lpm_tree *lpm_tree)
623 {
624 	if (--lpm_tree->ref_count == 0)
625 		mlxsw_sp_lpm_tree_destroy(mlxsw_sp, lpm_tree);
626 }
627 
628 #define MLXSW_SP_LPM_TREE_MIN 1 /* tree 0 is reserved */
629 
630 static int mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp)
631 {
632 	struct mlxsw_sp_lpm_tree *lpm_tree;
633 	u64 max_trees;
634 	int i;
635 
636 	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_LPM_TREES))
637 		return -EIO;
638 
639 	max_trees = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_LPM_TREES);
640 	mlxsw_sp->router->lpm.tree_count = max_trees - MLXSW_SP_LPM_TREE_MIN;
641 	mlxsw_sp->router->lpm.trees = kcalloc(mlxsw_sp->router->lpm.tree_count,
642 					     sizeof(struct mlxsw_sp_lpm_tree),
643 					     GFP_KERNEL);
644 	if (!mlxsw_sp->router->lpm.trees)
645 		return -ENOMEM;
646 
647 	for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
648 		lpm_tree = &mlxsw_sp->router->lpm.trees[i];
649 		lpm_tree->id = i + MLXSW_SP_LPM_TREE_MIN;
650 	}
651 
652 	return 0;
653 }
654 
655 static void mlxsw_sp_lpm_fini(struct mlxsw_sp *mlxsw_sp)
656 {
657 	kfree(mlxsw_sp->router->lpm.trees);
658 }
659 
660 static bool mlxsw_sp_vr_is_used(const struct mlxsw_sp_vr *vr)
661 {
662 	return !!vr->fib4 || !!vr->fib6 || !!vr->mr4_table;
663 }
664 
665 static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp)
666 {
667 	struct mlxsw_sp_vr *vr;
668 	int i;
669 
670 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
671 		vr = &mlxsw_sp->router->vrs[i];
672 		if (!mlxsw_sp_vr_is_used(vr))
673 			return vr;
674 	}
675 	return NULL;
676 }
677 
678 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
679 				     const struct mlxsw_sp_fib *fib, u8 tree_id)
680 {
681 	char raltb_pl[MLXSW_REG_RALTB_LEN];
682 
683 	mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
684 			     (enum mlxsw_reg_ralxx_protocol) fib->proto,
685 			     tree_id);
686 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
687 }
688 
689 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
690 				       const struct mlxsw_sp_fib *fib)
691 {
692 	char raltb_pl[MLXSW_REG_RALTB_LEN];
693 
694 	/* Bind to tree 0 which is default */
695 	mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
696 			     (enum mlxsw_reg_ralxx_protocol) fib->proto, 0);
697 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
698 }
699 
700 static u32 mlxsw_sp_fix_tb_id(u32 tb_id)
701 {
702 	/* For our purpose, squash main, default and local tables into one */
703 	if (tb_id == RT_TABLE_LOCAL || tb_id == RT_TABLE_DEFAULT)
704 		tb_id = RT_TABLE_MAIN;
705 	return tb_id;
706 }
707 
708 static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp,
709 					    u32 tb_id)
710 {
711 	struct mlxsw_sp_vr *vr;
712 	int i;
713 
714 	tb_id = mlxsw_sp_fix_tb_id(tb_id);
715 
716 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
717 		vr = &mlxsw_sp->router->vrs[i];
718 		if (mlxsw_sp_vr_is_used(vr) && vr->tb_id == tb_id)
719 			return vr;
720 	}
721 	return NULL;
722 }
723 
724 static struct mlxsw_sp_fib *mlxsw_sp_vr_fib(const struct mlxsw_sp_vr *vr,
725 					    enum mlxsw_sp_l3proto proto)
726 {
727 	switch (proto) {
728 	case MLXSW_SP_L3_PROTO_IPV4:
729 		return vr->fib4;
730 	case MLXSW_SP_L3_PROTO_IPV6:
731 		return vr->fib6;
732 	}
733 	return NULL;
734 }
735 
736 static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp,
737 					      u32 tb_id,
738 					      struct netlink_ext_ack *extack)
739 {
740 	struct mlxsw_sp_vr *vr;
741 	int err;
742 
743 	vr = mlxsw_sp_vr_find_unused(mlxsw_sp);
744 	if (!vr) {
745 		NL_SET_ERR_MSG(extack, "spectrum: Exceeded number of supported virtual routers");
746 		return ERR_PTR(-EBUSY);
747 	}
748 	vr->fib4 = mlxsw_sp_fib_create(vr, MLXSW_SP_L3_PROTO_IPV4);
749 	if (IS_ERR(vr->fib4))
750 		return ERR_CAST(vr->fib4);
751 	vr->fib6 = mlxsw_sp_fib_create(vr, MLXSW_SP_L3_PROTO_IPV6);
752 	if (IS_ERR(vr->fib6)) {
753 		err = PTR_ERR(vr->fib6);
754 		goto err_fib6_create;
755 	}
756 	vr->mr4_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id,
757 						 MLXSW_SP_L3_PROTO_IPV4);
758 	if (IS_ERR(vr->mr4_table)) {
759 		err = PTR_ERR(vr->mr4_table);
760 		goto err_mr_table_create;
761 	}
762 	vr->tb_id = tb_id;
763 	return vr;
764 
765 err_mr_table_create:
766 	mlxsw_sp_fib_destroy(vr->fib6);
767 	vr->fib6 = NULL;
768 err_fib6_create:
769 	mlxsw_sp_fib_destroy(vr->fib4);
770 	vr->fib4 = NULL;
771 	return ERR_PTR(err);
772 }
773 
774 static void mlxsw_sp_vr_destroy(struct mlxsw_sp_vr *vr)
775 {
776 	mlxsw_sp_mr_table_destroy(vr->mr4_table);
777 	vr->mr4_table = NULL;
778 	mlxsw_sp_fib_destroy(vr->fib6);
779 	vr->fib6 = NULL;
780 	mlxsw_sp_fib_destroy(vr->fib4);
781 	vr->fib4 = NULL;
782 }
783 
784 static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
785 					   struct netlink_ext_ack *extack)
786 {
787 	struct mlxsw_sp_vr *vr;
788 
789 	tb_id = mlxsw_sp_fix_tb_id(tb_id);
790 	vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
791 	if (!vr)
792 		vr = mlxsw_sp_vr_create(mlxsw_sp, tb_id, extack);
793 	return vr;
794 }
795 
796 static void mlxsw_sp_vr_put(struct mlxsw_sp_vr *vr)
797 {
798 	if (!vr->rif_count && list_empty(&vr->fib4->node_list) &&
799 	    list_empty(&vr->fib6->node_list) &&
800 	    mlxsw_sp_mr_table_empty(vr->mr4_table))
801 		mlxsw_sp_vr_destroy(vr);
802 }
803 
804 static bool
805 mlxsw_sp_vr_lpm_tree_should_replace(struct mlxsw_sp_vr *vr,
806 				    enum mlxsw_sp_l3proto proto, u8 tree_id)
807 {
808 	struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
809 
810 	if (!mlxsw_sp_vr_is_used(vr))
811 		return false;
812 	if (fib->lpm_tree && fib->lpm_tree->id == tree_id)
813 		return true;
814 	return false;
815 }
816 
817 static int mlxsw_sp_vr_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
818 					struct mlxsw_sp_fib *fib,
819 					struct mlxsw_sp_lpm_tree *new_tree)
820 {
821 	struct mlxsw_sp_lpm_tree *old_tree = fib->lpm_tree;
822 	int err;
823 
824 	fib->lpm_tree = new_tree;
825 	mlxsw_sp_lpm_tree_hold(new_tree);
826 	err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, new_tree->id);
827 	if (err)
828 		goto err_tree_bind;
829 	mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree);
830 	return 0;
831 
832 err_tree_bind:
833 	mlxsw_sp_lpm_tree_put(mlxsw_sp, new_tree);
834 	fib->lpm_tree = old_tree;
835 	return err;
836 }
837 
838 static int mlxsw_sp_vrs_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
839 					 struct mlxsw_sp_fib *fib,
840 					 struct mlxsw_sp_lpm_tree *new_tree)
841 {
842 	struct mlxsw_sp_lpm_tree *old_tree = fib->lpm_tree;
843 	enum mlxsw_sp_l3proto proto = fib->proto;
844 	u8 old_id, new_id = new_tree->id;
845 	struct mlxsw_sp_vr *vr;
846 	int i, err;
847 
848 	if (!old_tree)
849 		goto no_replace;
850 	old_id = old_tree->id;
851 
852 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
853 		vr = &mlxsw_sp->router->vrs[i];
854 		if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, old_id))
855 			continue;
856 		err = mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
857 						   mlxsw_sp_vr_fib(vr, proto),
858 						   new_tree);
859 		if (err)
860 			goto err_tree_replace;
861 	}
862 
863 	return 0;
864 
865 err_tree_replace:
866 	for (i--; i >= 0; i--) {
867 		if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, new_id))
868 			continue;
869 		mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
870 					     mlxsw_sp_vr_fib(vr, proto),
871 					     old_tree);
872 	}
873 	return err;
874 
875 no_replace:
876 	fib->lpm_tree = new_tree;
877 	mlxsw_sp_lpm_tree_hold(new_tree);
878 	err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, new_tree->id);
879 	if (err) {
880 		mlxsw_sp_lpm_tree_put(mlxsw_sp, new_tree);
881 		fib->lpm_tree = NULL;
882 		return err;
883 	}
884 	return 0;
885 }
886 
887 static void
888 mlxsw_sp_vrs_prefixes(struct mlxsw_sp *mlxsw_sp,
889 		      enum mlxsw_sp_l3proto proto,
890 		      struct mlxsw_sp_prefix_usage *req_prefix_usage)
891 {
892 	int i;
893 
894 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
895 		struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
896 		struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
897 		unsigned char prefix;
898 
899 		if (!mlxsw_sp_vr_is_used(vr))
900 			continue;
901 		mlxsw_sp_prefix_usage_for_each(prefix, &fib->prefix_usage)
902 			mlxsw_sp_prefix_usage_set(req_prefix_usage, prefix);
903 	}
904 }
905 
906 static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp)
907 {
908 	struct mlxsw_sp_vr *vr;
909 	u64 max_vrs;
910 	int i;
911 
912 	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_VRS))
913 		return -EIO;
914 
915 	max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
916 	mlxsw_sp->router->vrs = kcalloc(max_vrs, sizeof(struct mlxsw_sp_vr),
917 					GFP_KERNEL);
918 	if (!mlxsw_sp->router->vrs)
919 		return -ENOMEM;
920 
921 	for (i = 0; i < max_vrs; i++) {
922 		vr = &mlxsw_sp->router->vrs[i];
923 		vr->id = i;
924 	}
925 
926 	return 0;
927 }
928 
929 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp);
930 
931 static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp)
932 {
933 	/* At this stage we're guaranteed not to have new incoming
934 	 * FIB notifications and the work queue is free from FIBs
935 	 * sitting on top of mlxsw netdevs. However, we can still
936 	 * have other FIBs queued. Flush the queue before flushing
937 	 * the device's tables. No need for locks, as we're the only
938 	 * writer.
939 	 */
940 	mlxsw_core_flush_owq();
941 	mlxsw_sp_router_fib_flush(mlxsw_sp);
942 	kfree(mlxsw_sp->router->vrs);
943 }
944 
945 static struct net_device *
946 __mlxsw_sp_ipip_netdev_ul_dev_get(const struct net_device *ol_dev)
947 {
948 	struct ip_tunnel *tun = netdev_priv(ol_dev);
949 	struct net *net = dev_net(ol_dev);
950 
951 	return __dev_get_by_index(net, tun->parms.link);
952 }
953 
954 u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev)
955 {
956 	struct net_device *d = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
957 
958 	if (d)
959 		return l3mdev_fib_table(d) ? : RT_TABLE_MAIN;
960 	else
961 		return l3mdev_fib_table(ol_dev) ? : RT_TABLE_MAIN;
962 }
963 
964 static struct mlxsw_sp_rif *
965 mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
966 		    const struct mlxsw_sp_rif_params *params,
967 		    struct netlink_ext_ack *extack);
968 
969 static struct mlxsw_sp_rif_ipip_lb *
970 mlxsw_sp_ipip_ol_ipip_lb_create(struct mlxsw_sp *mlxsw_sp,
971 				enum mlxsw_sp_ipip_type ipipt,
972 				struct net_device *ol_dev,
973 				struct netlink_ext_ack *extack)
974 {
975 	struct mlxsw_sp_rif_params_ipip_lb lb_params;
976 	const struct mlxsw_sp_ipip_ops *ipip_ops;
977 	struct mlxsw_sp_rif *rif;
978 
979 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
980 	lb_params = (struct mlxsw_sp_rif_params_ipip_lb) {
981 		.common.dev = ol_dev,
982 		.common.lag = false,
983 		.lb_config = ipip_ops->ol_loopback_config(mlxsw_sp, ol_dev),
984 	};
985 
986 	rif = mlxsw_sp_rif_create(mlxsw_sp, &lb_params.common, extack);
987 	if (IS_ERR(rif))
988 		return ERR_CAST(rif);
989 	return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
990 }
991 
992 static struct mlxsw_sp_ipip_entry *
993 mlxsw_sp_ipip_entry_alloc(struct mlxsw_sp *mlxsw_sp,
994 			  enum mlxsw_sp_ipip_type ipipt,
995 			  struct net_device *ol_dev)
996 {
997 	struct mlxsw_sp_ipip_entry *ipip_entry;
998 	struct mlxsw_sp_ipip_entry *ret = NULL;
999 
1000 	ipip_entry = kzalloc(sizeof(*ipip_entry), GFP_KERNEL);
1001 	if (!ipip_entry)
1002 		return ERR_PTR(-ENOMEM);
1003 
1004 	ipip_entry->ol_lb = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp, ipipt,
1005 							    ol_dev, NULL);
1006 	if (IS_ERR(ipip_entry->ol_lb)) {
1007 		ret = ERR_CAST(ipip_entry->ol_lb);
1008 		goto err_ol_ipip_lb_create;
1009 	}
1010 
1011 	ipip_entry->ipipt = ipipt;
1012 	ipip_entry->ol_dev = ol_dev;
1013 	ipip_entry->parms = mlxsw_sp_ipip_netdev_parms(ol_dev);
1014 
1015 	return ipip_entry;
1016 
1017 err_ol_ipip_lb_create:
1018 	kfree(ipip_entry);
1019 	return ret;
1020 }
1021 
1022 static void
1023 mlxsw_sp_ipip_entry_dealloc(struct mlxsw_sp_ipip_entry *ipip_entry)
1024 {
1025 	mlxsw_sp_rif_destroy(&ipip_entry->ol_lb->common);
1026 	kfree(ipip_entry);
1027 }
1028 
1029 static bool
1030 mlxsw_sp_ipip_entry_saddr_matches(struct mlxsw_sp *mlxsw_sp,
1031 				  const enum mlxsw_sp_l3proto ul_proto,
1032 				  union mlxsw_sp_l3addr saddr,
1033 				  u32 ul_tb_id,
1034 				  struct mlxsw_sp_ipip_entry *ipip_entry)
1035 {
1036 	u32 tun_ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
1037 	enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
1038 	union mlxsw_sp_l3addr tun_saddr;
1039 
1040 	if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
1041 		return false;
1042 
1043 	tun_saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ipip_entry->ol_dev);
1044 	return tun_ul_tb_id == ul_tb_id &&
1045 	       mlxsw_sp_l3addr_eq(&tun_saddr, &saddr);
1046 }
1047 
1048 static int
1049 mlxsw_sp_fib_entry_decap_init(struct mlxsw_sp *mlxsw_sp,
1050 			      struct mlxsw_sp_fib_entry *fib_entry,
1051 			      struct mlxsw_sp_ipip_entry *ipip_entry)
1052 {
1053 	u32 tunnel_index;
1054 	int err;
1055 
1056 	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, 1, &tunnel_index);
1057 	if (err)
1058 		return err;
1059 
1060 	ipip_entry->decap_fib_entry = fib_entry;
1061 	fib_entry->decap.ipip_entry = ipip_entry;
1062 	fib_entry->decap.tunnel_index = tunnel_index;
1063 	return 0;
1064 }
1065 
1066 static void mlxsw_sp_fib_entry_decap_fini(struct mlxsw_sp *mlxsw_sp,
1067 					  struct mlxsw_sp_fib_entry *fib_entry)
1068 {
1069 	/* Unlink this node from the IPIP entry that it's the decap entry of. */
1070 	fib_entry->decap.ipip_entry->decap_fib_entry = NULL;
1071 	fib_entry->decap.ipip_entry = NULL;
1072 	mlxsw_sp_kvdl_free(mlxsw_sp, fib_entry->decap.tunnel_index);
1073 }
1074 
1075 static struct mlxsw_sp_fib_node *
1076 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
1077 			 size_t addr_len, unsigned char prefix_len);
1078 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
1079 				     struct mlxsw_sp_fib_entry *fib_entry);
1080 
1081 static void
1082 mlxsw_sp_ipip_entry_demote_decap(struct mlxsw_sp *mlxsw_sp,
1083 				 struct mlxsw_sp_ipip_entry *ipip_entry)
1084 {
1085 	struct mlxsw_sp_fib_entry *fib_entry = ipip_entry->decap_fib_entry;
1086 
1087 	mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, fib_entry);
1088 	fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1089 
1090 	mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1091 }
1092 
1093 static void
1094 mlxsw_sp_ipip_entry_promote_decap(struct mlxsw_sp *mlxsw_sp,
1095 				  struct mlxsw_sp_ipip_entry *ipip_entry,
1096 				  struct mlxsw_sp_fib_entry *decap_fib_entry)
1097 {
1098 	if (mlxsw_sp_fib_entry_decap_init(mlxsw_sp, decap_fib_entry,
1099 					  ipip_entry))
1100 		return;
1101 	decap_fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
1102 
1103 	if (mlxsw_sp_fib_entry_update(mlxsw_sp, decap_fib_entry))
1104 		mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1105 }
1106 
1107 /* Given an IPIP entry, find the corresponding decap route. */
1108 static struct mlxsw_sp_fib_entry *
1109 mlxsw_sp_ipip_entry_find_decap(struct mlxsw_sp *mlxsw_sp,
1110 			       struct mlxsw_sp_ipip_entry *ipip_entry)
1111 {
1112 	static struct mlxsw_sp_fib_node *fib_node;
1113 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1114 	struct mlxsw_sp_fib_entry *fib_entry;
1115 	unsigned char saddr_prefix_len;
1116 	union mlxsw_sp_l3addr saddr;
1117 	struct mlxsw_sp_fib *ul_fib;
1118 	struct mlxsw_sp_vr *ul_vr;
1119 	const void *saddrp;
1120 	size_t saddr_len;
1121 	u32 ul_tb_id;
1122 	u32 saddr4;
1123 
1124 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1125 
1126 	ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
1127 	ul_vr = mlxsw_sp_vr_find(mlxsw_sp, ul_tb_id);
1128 	if (!ul_vr)
1129 		return NULL;
1130 
1131 	ul_fib = mlxsw_sp_vr_fib(ul_vr, ipip_ops->ul_proto);
1132 	saddr = mlxsw_sp_ipip_netdev_saddr(ipip_ops->ul_proto,
1133 					   ipip_entry->ol_dev);
1134 
1135 	switch (ipip_ops->ul_proto) {
1136 	case MLXSW_SP_L3_PROTO_IPV4:
1137 		saddr4 = be32_to_cpu(saddr.addr4);
1138 		saddrp = &saddr4;
1139 		saddr_len = 4;
1140 		saddr_prefix_len = 32;
1141 		break;
1142 	case MLXSW_SP_L3_PROTO_IPV6:
1143 		WARN_ON(1);
1144 		return NULL;
1145 	}
1146 
1147 	fib_node = mlxsw_sp_fib_node_lookup(ul_fib, saddrp, saddr_len,
1148 					    saddr_prefix_len);
1149 	if (!fib_node || list_empty(&fib_node->entry_list))
1150 		return NULL;
1151 
1152 	fib_entry = list_first_entry(&fib_node->entry_list,
1153 				     struct mlxsw_sp_fib_entry, list);
1154 	if (fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_TRAP)
1155 		return NULL;
1156 
1157 	return fib_entry;
1158 }
1159 
1160 static struct mlxsw_sp_ipip_entry *
1161 mlxsw_sp_ipip_entry_create(struct mlxsw_sp *mlxsw_sp,
1162 			   enum mlxsw_sp_ipip_type ipipt,
1163 			   struct net_device *ol_dev)
1164 {
1165 	struct mlxsw_sp_ipip_entry *ipip_entry;
1166 
1167 	ipip_entry = mlxsw_sp_ipip_entry_alloc(mlxsw_sp, ipipt, ol_dev);
1168 	if (IS_ERR(ipip_entry))
1169 		return ipip_entry;
1170 
1171 	list_add_tail(&ipip_entry->ipip_list_node,
1172 		      &mlxsw_sp->router->ipip_list);
1173 
1174 	return ipip_entry;
1175 }
1176 
1177 static void
1178 mlxsw_sp_ipip_entry_destroy(struct mlxsw_sp *mlxsw_sp,
1179 			    struct mlxsw_sp_ipip_entry *ipip_entry)
1180 {
1181 	list_del(&ipip_entry->ipip_list_node);
1182 	mlxsw_sp_ipip_entry_dealloc(ipip_entry);
1183 }
1184 
1185 static bool
1186 mlxsw_sp_ipip_entry_matches_decap(struct mlxsw_sp *mlxsw_sp,
1187 				  const struct net_device *ul_dev,
1188 				  enum mlxsw_sp_l3proto ul_proto,
1189 				  union mlxsw_sp_l3addr ul_dip,
1190 				  struct mlxsw_sp_ipip_entry *ipip_entry)
1191 {
1192 	u32 ul_tb_id = l3mdev_fib_table(ul_dev) ? : RT_TABLE_MAIN;
1193 	enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
1194 	struct net_device *ipip_ul_dev;
1195 
1196 	if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
1197 		return false;
1198 
1199 	ipip_ul_dev = __mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev);
1200 	return mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, ul_dip,
1201 						 ul_tb_id, ipip_entry) &&
1202 	       (!ipip_ul_dev || ipip_ul_dev == ul_dev);
1203 }
1204 
1205 /* Given decap parameters, find the corresponding IPIP entry. */
1206 static struct mlxsw_sp_ipip_entry *
1207 mlxsw_sp_ipip_entry_find_by_decap(struct mlxsw_sp *mlxsw_sp,
1208 				  const struct net_device *ul_dev,
1209 				  enum mlxsw_sp_l3proto ul_proto,
1210 				  union mlxsw_sp_l3addr ul_dip)
1211 {
1212 	struct mlxsw_sp_ipip_entry *ipip_entry;
1213 
1214 	list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
1215 			    ipip_list_node)
1216 		if (mlxsw_sp_ipip_entry_matches_decap(mlxsw_sp, ul_dev,
1217 						      ul_proto, ul_dip,
1218 						      ipip_entry))
1219 			return ipip_entry;
1220 
1221 	return NULL;
1222 }
1223 
1224 static bool mlxsw_sp_netdev_ipip_type(const struct mlxsw_sp *mlxsw_sp,
1225 				      const struct net_device *dev,
1226 				      enum mlxsw_sp_ipip_type *p_type)
1227 {
1228 	struct mlxsw_sp_router *router = mlxsw_sp->router;
1229 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1230 	enum mlxsw_sp_ipip_type ipipt;
1231 
1232 	for (ipipt = 0; ipipt < MLXSW_SP_IPIP_TYPE_MAX; ++ipipt) {
1233 		ipip_ops = router->ipip_ops_arr[ipipt];
1234 		if (dev->type == ipip_ops->dev_type) {
1235 			if (p_type)
1236 				*p_type = ipipt;
1237 			return true;
1238 		}
1239 	}
1240 	return false;
1241 }
1242 
1243 bool mlxsw_sp_netdev_is_ipip_ol(const struct mlxsw_sp *mlxsw_sp,
1244 				const struct net_device *dev)
1245 {
1246 	return mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL);
1247 }
1248 
1249 static struct mlxsw_sp_ipip_entry *
1250 mlxsw_sp_ipip_entry_find_by_ol_dev(struct mlxsw_sp *mlxsw_sp,
1251 				   const struct net_device *ol_dev)
1252 {
1253 	struct mlxsw_sp_ipip_entry *ipip_entry;
1254 
1255 	list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
1256 			    ipip_list_node)
1257 		if (ipip_entry->ol_dev == ol_dev)
1258 			return ipip_entry;
1259 
1260 	return NULL;
1261 }
1262 
1263 static struct mlxsw_sp_ipip_entry *
1264 mlxsw_sp_ipip_entry_find_by_ul_dev(const struct mlxsw_sp *mlxsw_sp,
1265 				   const struct net_device *ul_dev,
1266 				   struct mlxsw_sp_ipip_entry *start)
1267 {
1268 	struct mlxsw_sp_ipip_entry *ipip_entry;
1269 
1270 	ipip_entry = list_prepare_entry(start, &mlxsw_sp->router->ipip_list,
1271 					ipip_list_node);
1272 	list_for_each_entry_continue(ipip_entry, &mlxsw_sp->router->ipip_list,
1273 				     ipip_list_node) {
1274 		struct net_device *ipip_ul_dev =
1275 			__mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev);
1276 
1277 		if (ipip_ul_dev == ul_dev)
1278 			return ipip_entry;
1279 	}
1280 
1281 	return NULL;
1282 }
1283 
1284 bool mlxsw_sp_netdev_is_ipip_ul(const struct mlxsw_sp *mlxsw_sp,
1285 				const struct net_device *dev)
1286 {
1287 	return mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp, dev, NULL);
1288 }
1289 
1290 static bool mlxsw_sp_netdevice_ipip_can_offload(struct mlxsw_sp *mlxsw_sp,
1291 						const struct net_device *ol_dev,
1292 						enum mlxsw_sp_ipip_type ipipt)
1293 {
1294 	const struct mlxsw_sp_ipip_ops *ops
1295 		= mlxsw_sp->router->ipip_ops_arr[ipipt];
1296 
1297 	/* For deciding whether decap should be offloaded, we don't care about
1298 	 * overlay protocol, so ask whether either one is supported.
1299 	 */
1300 	return ops->can_offload(mlxsw_sp, ol_dev, MLXSW_SP_L3_PROTO_IPV4) ||
1301 	       ops->can_offload(mlxsw_sp, ol_dev, MLXSW_SP_L3_PROTO_IPV6);
1302 }
1303 
1304 static int mlxsw_sp_netdevice_ipip_ol_reg_event(struct mlxsw_sp *mlxsw_sp,
1305 						struct net_device *ol_dev)
1306 {
1307 	struct mlxsw_sp_ipip_entry *ipip_entry;
1308 	enum mlxsw_sp_l3proto ul_proto;
1309 	enum mlxsw_sp_ipip_type ipipt;
1310 	union mlxsw_sp_l3addr saddr;
1311 	u32 ul_tb_id;
1312 
1313 	mlxsw_sp_netdev_ipip_type(mlxsw_sp, ol_dev, &ipipt);
1314 	if (mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev, ipipt)) {
1315 		ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev);
1316 		ul_proto = mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto;
1317 		saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev);
1318 		if (!mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto,
1319 							  saddr, ul_tb_id,
1320 							  NULL)) {
1321 			ipip_entry = mlxsw_sp_ipip_entry_create(mlxsw_sp, ipipt,
1322 								ol_dev);
1323 			if (IS_ERR(ipip_entry))
1324 				return PTR_ERR(ipip_entry);
1325 		}
1326 	}
1327 
1328 	return 0;
1329 }
1330 
1331 static void mlxsw_sp_netdevice_ipip_ol_unreg_event(struct mlxsw_sp *mlxsw_sp,
1332 						   struct net_device *ol_dev)
1333 {
1334 	struct mlxsw_sp_ipip_entry *ipip_entry;
1335 
1336 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1337 	if (ipip_entry)
1338 		mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
1339 }
1340 
1341 static void
1342 mlxsw_sp_ipip_entry_ol_up_event(struct mlxsw_sp *mlxsw_sp,
1343 				struct mlxsw_sp_ipip_entry *ipip_entry)
1344 {
1345 	struct mlxsw_sp_fib_entry *decap_fib_entry;
1346 
1347 	decap_fib_entry = mlxsw_sp_ipip_entry_find_decap(mlxsw_sp, ipip_entry);
1348 	if (decap_fib_entry)
1349 		mlxsw_sp_ipip_entry_promote_decap(mlxsw_sp, ipip_entry,
1350 						  decap_fib_entry);
1351 }
1352 
1353 static void mlxsw_sp_netdevice_ipip_ol_up_event(struct mlxsw_sp *mlxsw_sp,
1354 						struct net_device *ol_dev)
1355 {
1356 	struct mlxsw_sp_ipip_entry *ipip_entry;
1357 
1358 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1359 	if (ipip_entry)
1360 		mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry);
1361 }
1362 
1363 static void
1364 mlxsw_sp_ipip_entry_ol_down_event(struct mlxsw_sp *mlxsw_sp,
1365 				  struct mlxsw_sp_ipip_entry *ipip_entry)
1366 {
1367 	if (ipip_entry->decap_fib_entry)
1368 		mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1369 }
1370 
1371 static void mlxsw_sp_netdevice_ipip_ol_down_event(struct mlxsw_sp *mlxsw_sp,
1372 						  struct net_device *ol_dev)
1373 {
1374 	struct mlxsw_sp_ipip_entry *ipip_entry;
1375 
1376 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1377 	if (ipip_entry)
1378 		mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry);
1379 }
1380 
1381 static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp,
1382 					 struct mlxsw_sp_rif *old_rif,
1383 					 struct mlxsw_sp_rif *new_rif);
1384 static int
1385 mlxsw_sp_ipip_entry_ol_lb_update(struct mlxsw_sp *mlxsw_sp,
1386 				 struct mlxsw_sp_ipip_entry *ipip_entry,
1387 				 bool keep_encap,
1388 				 struct netlink_ext_ack *extack)
1389 {
1390 	struct mlxsw_sp_rif_ipip_lb *old_lb_rif = ipip_entry->ol_lb;
1391 	struct mlxsw_sp_rif_ipip_lb *new_lb_rif;
1392 
1393 	new_lb_rif = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp,
1394 						     ipip_entry->ipipt,
1395 						     ipip_entry->ol_dev,
1396 						     extack);
1397 	if (IS_ERR(new_lb_rif))
1398 		return PTR_ERR(new_lb_rif);
1399 	ipip_entry->ol_lb = new_lb_rif;
1400 
1401 	if (keep_encap)
1402 		mlxsw_sp_nexthop_rif_migrate(mlxsw_sp, &old_lb_rif->common,
1403 					     &new_lb_rif->common);
1404 
1405 	mlxsw_sp_rif_destroy(&old_lb_rif->common);
1406 
1407 	return 0;
1408 }
1409 
1410 static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp,
1411 					struct mlxsw_sp_rif *rif);
1412 
1413 /**
1414  * Update the offload related to an IPIP entry. This always updates decap, and
1415  * in addition to that it also:
1416  * @recreate_loopback: recreates the associated loopback RIF
1417  * @keep_encap: updates next hops that use the tunnel netdevice. This is only
1418  *              relevant when recreate_loopback is true.
1419  * @update_nexthops: updates next hops, keeping the current loopback RIF. This
1420  *                   is only relevant when recreate_loopback is false.
1421  */
1422 int __mlxsw_sp_ipip_entry_update_tunnel(struct mlxsw_sp *mlxsw_sp,
1423 					struct mlxsw_sp_ipip_entry *ipip_entry,
1424 					bool recreate_loopback,
1425 					bool keep_encap,
1426 					bool update_nexthops,
1427 					struct netlink_ext_ack *extack)
1428 {
1429 	int err;
1430 
1431 	/* RIFs can't be edited, so to update loopback, we need to destroy and
1432 	 * recreate it. That creates a window of opportunity where RALUE and
1433 	 * RATR registers end up referencing a RIF that's already gone. RATRs
1434 	 * are handled in mlxsw_sp_ipip_entry_ol_lb_update(), and to take care
1435 	 * of RALUE, demote the decap route back.
1436 	 */
1437 	if (ipip_entry->decap_fib_entry)
1438 		mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1439 
1440 	if (recreate_loopback) {
1441 		err = mlxsw_sp_ipip_entry_ol_lb_update(mlxsw_sp, ipip_entry,
1442 						       keep_encap, extack);
1443 		if (err)
1444 			return err;
1445 	} else if (update_nexthops) {
1446 		mlxsw_sp_nexthop_rif_update(mlxsw_sp,
1447 					    &ipip_entry->ol_lb->common);
1448 	}
1449 
1450 	if (ipip_entry->ol_dev->flags & IFF_UP)
1451 		mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry);
1452 
1453 	return 0;
1454 }
1455 
1456 static int mlxsw_sp_netdevice_ipip_ol_vrf_event(struct mlxsw_sp *mlxsw_sp,
1457 						struct net_device *ol_dev,
1458 						struct netlink_ext_ack *extack)
1459 {
1460 	struct mlxsw_sp_ipip_entry *ipip_entry =
1461 		mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1462 	enum mlxsw_sp_l3proto ul_proto;
1463 	union mlxsw_sp_l3addr saddr;
1464 	u32 ul_tb_id;
1465 
1466 	if (!ipip_entry)
1467 		return 0;
1468 
1469 	/* For flat configuration cases, moving overlay to a different VRF might
1470 	 * cause local address conflict, and the conflicting tunnels need to be
1471 	 * demoted.
1472 	 */
1473 	ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev);
1474 	ul_proto = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt]->ul_proto;
1475 	saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev);
1476 	if (mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto,
1477 						 saddr, ul_tb_id,
1478 						 ipip_entry)) {
1479 		mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1480 		return 0;
1481 	}
1482 
1483 	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1484 						   true, false, false, extack);
1485 }
1486 
1487 static int
1488 mlxsw_sp_netdevice_ipip_ul_vrf_event(struct mlxsw_sp *mlxsw_sp,
1489 				     struct mlxsw_sp_ipip_entry *ipip_entry,
1490 				     struct net_device *ul_dev,
1491 				     struct netlink_ext_ack *extack)
1492 {
1493 	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1494 						   true, true, false, extack);
1495 }
1496 
1497 static int
1498 mlxsw_sp_netdevice_ipip_ul_up_event(struct mlxsw_sp *mlxsw_sp,
1499 				    struct mlxsw_sp_ipip_entry *ipip_entry,
1500 				    struct net_device *ul_dev)
1501 {
1502 	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1503 						   false, false, true, NULL);
1504 }
1505 
1506 static int
1507 mlxsw_sp_netdevice_ipip_ul_down_event(struct mlxsw_sp *mlxsw_sp,
1508 				      struct mlxsw_sp_ipip_entry *ipip_entry,
1509 				      struct net_device *ul_dev)
1510 {
1511 	/* A down underlay device causes encapsulated packets to not be
1512 	 * forwarded, but decap still works. So refresh next hops without
1513 	 * touching anything else.
1514 	 */
1515 	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1516 						   false, false, true, NULL);
1517 }
1518 
1519 static int
1520 mlxsw_sp_netdevice_ipip_ol_change_event(struct mlxsw_sp *mlxsw_sp,
1521 					struct net_device *ol_dev,
1522 					struct netlink_ext_ack *extack)
1523 {
1524 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1525 	struct mlxsw_sp_ipip_entry *ipip_entry;
1526 	int err;
1527 
1528 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1529 	if (!ipip_entry)
1530 		/* A change might make a tunnel eligible for offloading, but
1531 		 * that is currently not implemented. What falls to slow path
1532 		 * stays there.
1533 		 */
1534 		return 0;
1535 
1536 	/* A change might make a tunnel not eligible for offloading. */
1537 	if (!mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev,
1538 						 ipip_entry->ipipt)) {
1539 		mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1540 		return 0;
1541 	}
1542 
1543 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1544 	err = ipip_ops->ol_netdev_change(mlxsw_sp, ipip_entry, extack);
1545 	return err;
1546 }
1547 
1548 void mlxsw_sp_ipip_entry_demote_tunnel(struct mlxsw_sp *mlxsw_sp,
1549 				       struct mlxsw_sp_ipip_entry *ipip_entry)
1550 {
1551 	struct net_device *ol_dev = ipip_entry->ol_dev;
1552 
1553 	if (ol_dev->flags & IFF_UP)
1554 		mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry);
1555 	mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
1556 }
1557 
1558 /* The configuration where several tunnels have the same local address in the
1559  * same underlay table needs special treatment in the HW. That is currently not
1560  * implemented in the driver. This function finds and demotes the first tunnel
1561  * with a given source address, except the one passed in in the argument
1562  * `except'.
1563  */
1564 bool
1565 mlxsw_sp_ipip_demote_tunnel_by_saddr(struct mlxsw_sp *mlxsw_sp,
1566 				     enum mlxsw_sp_l3proto ul_proto,
1567 				     union mlxsw_sp_l3addr saddr,
1568 				     u32 ul_tb_id,
1569 				     const struct mlxsw_sp_ipip_entry *except)
1570 {
1571 	struct mlxsw_sp_ipip_entry *ipip_entry, *tmp;
1572 
1573 	list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list,
1574 				 ipip_list_node) {
1575 		if (ipip_entry != except &&
1576 		    mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, saddr,
1577 						      ul_tb_id, ipip_entry)) {
1578 			mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1579 			return true;
1580 		}
1581 	}
1582 
1583 	return false;
1584 }
1585 
1586 static void mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(struct mlxsw_sp *mlxsw_sp,
1587 						     struct net_device *ul_dev)
1588 {
1589 	struct mlxsw_sp_ipip_entry *ipip_entry, *tmp;
1590 
1591 	list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list,
1592 				 ipip_list_node) {
1593 		struct net_device *ipip_ul_dev =
1594 			__mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev);
1595 
1596 		if (ipip_ul_dev == ul_dev)
1597 			mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1598 	}
1599 }
1600 
1601 int mlxsw_sp_netdevice_ipip_ol_event(struct mlxsw_sp *mlxsw_sp,
1602 				     struct net_device *ol_dev,
1603 				     unsigned long event,
1604 				     struct netdev_notifier_info *info)
1605 {
1606 	struct netdev_notifier_changeupper_info *chup;
1607 	struct netlink_ext_ack *extack;
1608 
1609 	switch (event) {
1610 	case NETDEV_REGISTER:
1611 		return mlxsw_sp_netdevice_ipip_ol_reg_event(mlxsw_sp, ol_dev);
1612 	case NETDEV_UNREGISTER:
1613 		mlxsw_sp_netdevice_ipip_ol_unreg_event(mlxsw_sp, ol_dev);
1614 		return 0;
1615 	case NETDEV_UP:
1616 		mlxsw_sp_netdevice_ipip_ol_up_event(mlxsw_sp, ol_dev);
1617 		return 0;
1618 	case NETDEV_DOWN:
1619 		mlxsw_sp_netdevice_ipip_ol_down_event(mlxsw_sp, ol_dev);
1620 		return 0;
1621 	case NETDEV_CHANGEUPPER:
1622 		chup = container_of(info, typeof(*chup), info);
1623 		extack = info->extack;
1624 		if (netif_is_l3_master(chup->upper_dev))
1625 			return mlxsw_sp_netdevice_ipip_ol_vrf_event(mlxsw_sp,
1626 								    ol_dev,
1627 								    extack);
1628 		return 0;
1629 	case NETDEV_CHANGE:
1630 		extack = info->extack;
1631 		return mlxsw_sp_netdevice_ipip_ol_change_event(mlxsw_sp,
1632 							       ol_dev, extack);
1633 	}
1634 	return 0;
1635 }
1636 
1637 static int
1638 __mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
1639 				   struct mlxsw_sp_ipip_entry *ipip_entry,
1640 				   struct net_device *ul_dev,
1641 				   unsigned long event,
1642 				   struct netdev_notifier_info *info)
1643 {
1644 	struct netdev_notifier_changeupper_info *chup;
1645 	struct netlink_ext_ack *extack;
1646 
1647 	switch (event) {
1648 	case NETDEV_CHANGEUPPER:
1649 		chup = container_of(info, typeof(*chup), info);
1650 		extack = info->extack;
1651 		if (netif_is_l3_master(chup->upper_dev))
1652 			return mlxsw_sp_netdevice_ipip_ul_vrf_event(mlxsw_sp,
1653 								    ipip_entry,
1654 								    ul_dev,
1655 								    extack);
1656 		break;
1657 
1658 	case NETDEV_UP:
1659 		return mlxsw_sp_netdevice_ipip_ul_up_event(mlxsw_sp, ipip_entry,
1660 							   ul_dev);
1661 	case NETDEV_DOWN:
1662 		return mlxsw_sp_netdevice_ipip_ul_down_event(mlxsw_sp,
1663 							     ipip_entry,
1664 							     ul_dev);
1665 	}
1666 	return 0;
1667 }
1668 
1669 int
1670 mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
1671 				 struct net_device *ul_dev,
1672 				 unsigned long event,
1673 				 struct netdev_notifier_info *info)
1674 {
1675 	struct mlxsw_sp_ipip_entry *ipip_entry = NULL;
1676 	int err;
1677 
1678 	while ((ipip_entry = mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp,
1679 								ul_dev,
1680 								ipip_entry))) {
1681 		err = __mlxsw_sp_netdevice_ipip_ul_event(mlxsw_sp, ipip_entry,
1682 							 ul_dev, event, info);
1683 		if (err) {
1684 			mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(mlxsw_sp,
1685 								 ul_dev);
1686 			return err;
1687 		}
1688 	}
1689 
1690 	return 0;
1691 }
1692 
1693 struct mlxsw_sp_neigh_key {
1694 	struct neighbour *n;
1695 };
1696 
1697 struct mlxsw_sp_neigh_entry {
1698 	struct list_head rif_list_node;
1699 	struct rhash_head ht_node;
1700 	struct mlxsw_sp_neigh_key key;
1701 	u16 rif;
1702 	bool connected;
1703 	unsigned char ha[ETH_ALEN];
1704 	struct list_head nexthop_list; /* list of nexthops using
1705 					* this neigh entry
1706 					*/
1707 	struct list_head nexthop_neighs_list_node;
1708 	unsigned int counter_index;
1709 	bool counter_valid;
1710 };
1711 
1712 static const struct rhashtable_params mlxsw_sp_neigh_ht_params = {
1713 	.key_offset = offsetof(struct mlxsw_sp_neigh_entry, key),
1714 	.head_offset = offsetof(struct mlxsw_sp_neigh_entry, ht_node),
1715 	.key_len = sizeof(struct mlxsw_sp_neigh_key),
1716 };
1717 
1718 struct mlxsw_sp_neigh_entry *
1719 mlxsw_sp_rif_neigh_next(struct mlxsw_sp_rif *rif,
1720 			struct mlxsw_sp_neigh_entry *neigh_entry)
1721 {
1722 	if (!neigh_entry) {
1723 		if (list_empty(&rif->neigh_list))
1724 			return NULL;
1725 		else
1726 			return list_first_entry(&rif->neigh_list,
1727 						typeof(*neigh_entry),
1728 						rif_list_node);
1729 	}
1730 	if (list_is_last(&neigh_entry->rif_list_node, &rif->neigh_list))
1731 		return NULL;
1732 	return list_next_entry(neigh_entry, rif_list_node);
1733 }
1734 
1735 int mlxsw_sp_neigh_entry_type(struct mlxsw_sp_neigh_entry *neigh_entry)
1736 {
1737 	return neigh_entry->key.n->tbl->family;
1738 }
1739 
1740 unsigned char *
1741 mlxsw_sp_neigh_entry_ha(struct mlxsw_sp_neigh_entry *neigh_entry)
1742 {
1743 	return neigh_entry->ha;
1744 }
1745 
1746 u32 mlxsw_sp_neigh4_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
1747 {
1748 	struct neighbour *n;
1749 
1750 	n = neigh_entry->key.n;
1751 	return ntohl(*((__be32 *) n->primary_key));
1752 }
1753 
1754 struct in6_addr *
1755 mlxsw_sp_neigh6_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
1756 {
1757 	struct neighbour *n;
1758 
1759 	n = neigh_entry->key.n;
1760 	return (struct in6_addr *) &n->primary_key;
1761 }
1762 
1763 int mlxsw_sp_neigh_counter_get(struct mlxsw_sp *mlxsw_sp,
1764 			       struct mlxsw_sp_neigh_entry *neigh_entry,
1765 			       u64 *p_counter)
1766 {
1767 	if (!neigh_entry->counter_valid)
1768 		return -EINVAL;
1769 
1770 	return mlxsw_sp_flow_counter_get(mlxsw_sp, neigh_entry->counter_index,
1771 					 p_counter, NULL);
1772 }
1773 
1774 static struct mlxsw_sp_neigh_entry *
1775 mlxsw_sp_neigh_entry_alloc(struct mlxsw_sp *mlxsw_sp, struct neighbour *n,
1776 			   u16 rif)
1777 {
1778 	struct mlxsw_sp_neigh_entry *neigh_entry;
1779 
1780 	neigh_entry = kzalloc(sizeof(*neigh_entry), GFP_KERNEL);
1781 	if (!neigh_entry)
1782 		return NULL;
1783 
1784 	neigh_entry->key.n = n;
1785 	neigh_entry->rif = rif;
1786 	INIT_LIST_HEAD(&neigh_entry->nexthop_list);
1787 
1788 	return neigh_entry;
1789 }
1790 
1791 static void mlxsw_sp_neigh_entry_free(struct mlxsw_sp_neigh_entry *neigh_entry)
1792 {
1793 	kfree(neigh_entry);
1794 }
1795 
1796 static int
1797 mlxsw_sp_neigh_entry_insert(struct mlxsw_sp *mlxsw_sp,
1798 			    struct mlxsw_sp_neigh_entry *neigh_entry)
1799 {
1800 	return rhashtable_insert_fast(&mlxsw_sp->router->neigh_ht,
1801 				      &neigh_entry->ht_node,
1802 				      mlxsw_sp_neigh_ht_params);
1803 }
1804 
1805 static void
1806 mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp,
1807 			    struct mlxsw_sp_neigh_entry *neigh_entry)
1808 {
1809 	rhashtable_remove_fast(&mlxsw_sp->router->neigh_ht,
1810 			       &neigh_entry->ht_node,
1811 			       mlxsw_sp_neigh_ht_params);
1812 }
1813 
1814 static bool
1815 mlxsw_sp_neigh_counter_should_alloc(struct mlxsw_sp *mlxsw_sp,
1816 				    struct mlxsw_sp_neigh_entry *neigh_entry)
1817 {
1818 	struct devlink *devlink;
1819 	const char *table_name;
1820 
1821 	switch (mlxsw_sp_neigh_entry_type(neigh_entry)) {
1822 	case AF_INET:
1823 		table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST4;
1824 		break;
1825 	case AF_INET6:
1826 		table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST6;
1827 		break;
1828 	default:
1829 		WARN_ON(1);
1830 		return false;
1831 	}
1832 
1833 	devlink = priv_to_devlink(mlxsw_sp->core);
1834 	return devlink_dpipe_table_counter_enabled(devlink, table_name);
1835 }
1836 
1837 static void
1838 mlxsw_sp_neigh_counter_alloc(struct mlxsw_sp *mlxsw_sp,
1839 			     struct mlxsw_sp_neigh_entry *neigh_entry)
1840 {
1841 	if (!mlxsw_sp_neigh_counter_should_alloc(mlxsw_sp, neigh_entry))
1842 		return;
1843 
1844 	if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &neigh_entry->counter_index))
1845 		return;
1846 
1847 	neigh_entry->counter_valid = true;
1848 }
1849 
1850 static void
1851 mlxsw_sp_neigh_counter_free(struct mlxsw_sp *mlxsw_sp,
1852 			    struct mlxsw_sp_neigh_entry *neigh_entry)
1853 {
1854 	if (!neigh_entry->counter_valid)
1855 		return;
1856 	mlxsw_sp_flow_counter_free(mlxsw_sp,
1857 				   neigh_entry->counter_index);
1858 	neigh_entry->counter_valid = false;
1859 }
1860 
1861 static struct mlxsw_sp_neigh_entry *
1862 mlxsw_sp_neigh_entry_create(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
1863 {
1864 	struct mlxsw_sp_neigh_entry *neigh_entry;
1865 	struct mlxsw_sp_rif *rif;
1866 	int err;
1867 
1868 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, n->dev);
1869 	if (!rif)
1870 		return ERR_PTR(-EINVAL);
1871 
1872 	neigh_entry = mlxsw_sp_neigh_entry_alloc(mlxsw_sp, n, rif->rif_index);
1873 	if (!neigh_entry)
1874 		return ERR_PTR(-ENOMEM);
1875 
1876 	err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
1877 	if (err)
1878 		goto err_neigh_entry_insert;
1879 
1880 	mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
1881 	list_add(&neigh_entry->rif_list_node, &rif->neigh_list);
1882 
1883 	return neigh_entry;
1884 
1885 err_neigh_entry_insert:
1886 	mlxsw_sp_neigh_entry_free(neigh_entry);
1887 	return ERR_PTR(err);
1888 }
1889 
1890 static void
1891 mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp *mlxsw_sp,
1892 			     struct mlxsw_sp_neigh_entry *neigh_entry)
1893 {
1894 	list_del(&neigh_entry->rif_list_node);
1895 	mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
1896 	mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry);
1897 	mlxsw_sp_neigh_entry_free(neigh_entry);
1898 }
1899 
1900 static struct mlxsw_sp_neigh_entry *
1901 mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
1902 {
1903 	struct mlxsw_sp_neigh_key key;
1904 
1905 	key.n = n;
1906 	return rhashtable_lookup_fast(&mlxsw_sp->router->neigh_ht,
1907 				      &key, mlxsw_sp_neigh_ht_params);
1908 }
1909 
1910 static void
1911 mlxsw_sp_router_neighs_update_interval_init(struct mlxsw_sp *mlxsw_sp)
1912 {
1913 	unsigned long interval;
1914 
1915 #if IS_ENABLED(CONFIG_IPV6)
1916 	interval = min_t(unsigned long,
1917 			 NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME),
1918 			 NEIGH_VAR(&nd_tbl.parms, DELAY_PROBE_TIME));
1919 #else
1920 	interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
1921 #endif
1922 	mlxsw_sp->router->neighs_update.interval = jiffies_to_msecs(interval);
1923 }
1924 
1925 static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp,
1926 						   char *rauhtd_pl,
1927 						   int ent_index)
1928 {
1929 	struct net_device *dev;
1930 	struct neighbour *n;
1931 	__be32 dipn;
1932 	u32 dip;
1933 	u16 rif;
1934 
1935 	mlxsw_reg_rauhtd_ent_ipv4_unpack(rauhtd_pl, ent_index, &rif, &dip);
1936 
1937 	if (!mlxsw_sp->router->rifs[rif]) {
1938 		dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
1939 		return;
1940 	}
1941 
1942 	dipn = htonl(dip);
1943 	dev = mlxsw_sp->router->rifs[rif]->dev;
1944 	n = neigh_lookup(&arp_tbl, &dipn, dev);
1945 	if (!n)
1946 		return;
1947 
1948 	netdev_dbg(dev, "Updating neighbour with IP=%pI4h\n", &dip);
1949 	neigh_event_send(n, NULL);
1950 	neigh_release(n);
1951 }
1952 
1953 #if IS_ENABLED(CONFIG_IPV6)
1954 static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
1955 						   char *rauhtd_pl,
1956 						   int rec_index)
1957 {
1958 	struct net_device *dev;
1959 	struct neighbour *n;
1960 	struct in6_addr dip;
1961 	u16 rif;
1962 
1963 	mlxsw_reg_rauhtd_ent_ipv6_unpack(rauhtd_pl, rec_index, &rif,
1964 					 (char *) &dip);
1965 
1966 	if (!mlxsw_sp->router->rifs[rif]) {
1967 		dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
1968 		return;
1969 	}
1970 
1971 	dev = mlxsw_sp->router->rifs[rif]->dev;
1972 	n = neigh_lookup(&nd_tbl, &dip, dev);
1973 	if (!n)
1974 		return;
1975 
1976 	netdev_dbg(dev, "Updating neighbour with IP=%pI6c\n", &dip);
1977 	neigh_event_send(n, NULL);
1978 	neigh_release(n);
1979 }
1980 #else
1981 static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
1982 						   char *rauhtd_pl,
1983 						   int rec_index)
1984 {
1985 }
1986 #endif
1987 
1988 static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp,
1989 						   char *rauhtd_pl,
1990 						   int rec_index)
1991 {
1992 	u8 num_entries;
1993 	int i;
1994 
1995 	num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
1996 								rec_index);
1997 	/* Hardware starts counting at 0, so add 1. */
1998 	num_entries++;
1999 
2000 	/* Each record consists of several neighbour entries. */
2001 	for (i = 0; i < num_entries; i++) {
2002 		int ent_index;
2003 
2004 		ent_index = rec_index * MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC + i;
2005 		mlxsw_sp_router_neigh_ent_ipv4_process(mlxsw_sp, rauhtd_pl,
2006 						       ent_index);
2007 	}
2008 
2009 }
2010 
2011 static void mlxsw_sp_router_neigh_rec_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2012 						   char *rauhtd_pl,
2013 						   int rec_index)
2014 {
2015 	/* One record contains one entry. */
2016 	mlxsw_sp_router_neigh_ent_ipv6_process(mlxsw_sp, rauhtd_pl,
2017 					       rec_index);
2018 }
2019 
2020 static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp,
2021 					      char *rauhtd_pl, int rec_index)
2022 {
2023 	switch (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, rec_index)) {
2024 	case MLXSW_REG_RAUHTD_TYPE_IPV4:
2025 		mlxsw_sp_router_neigh_rec_ipv4_process(mlxsw_sp, rauhtd_pl,
2026 						       rec_index);
2027 		break;
2028 	case MLXSW_REG_RAUHTD_TYPE_IPV6:
2029 		mlxsw_sp_router_neigh_rec_ipv6_process(mlxsw_sp, rauhtd_pl,
2030 						       rec_index);
2031 		break;
2032 	}
2033 }
2034 
2035 static bool mlxsw_sp_router_rauhtd_is_full(char *rauhtd_pl)
2036 {
2037 	u8 num_rec, last_rec_index, num_entries;
2038 
2039 	num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
2040 	last_rec_index = num_rec - 1;
2041 
2042 	if (num_rec < MLXSW_REG_RAUHTD_REC_MAX_NUM)
2043 		return false;
2044 	if (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, last_rec_index) ==
2045 	    MLXSW_REG_RAUHTD_TYPE_IPV6)
2046 		return true;
2047 
2048 	num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
2049 								last_rec_index);
2050 	if (++num_entries == MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC)
2051 		return true;
2052 	return false;
2053 }
2054 
2055 static int
2056 __mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp,
2057 				       char *rauhtd_pl,
2058 				       enum mlxsw_reg_rauhtd_type type)
2059 {
2060 	int i, num_rec;
2061 	int err;
2062 
2063 	/* Make sure the neighbour's netdev isn't removed in the
2064 	 * process.
2065 	 */
2066 	rtnl_lock();
2067 	do {
2068 		mlxsw_reg_rauhtd_pack(rauhtd_pl, type);
2069 		err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd),
2070 				      rauhtd_pl);
2071 		if (err) {
2072 			dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to dump neighbour table\n");
2073 			break;
2074 		}
2075 		num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
2076 		for (i = 0; i < num_rec; i++)
2077 			mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl,
2078 							  i);
2079 	} while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl));
2080 	rtnl_unlock();
2081 
2082 	return err;
2083 }
2084 
2085 static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp)
2086 {
2087 	enum mlxsw_reg_rauhtd_type type;
2088 	char *rauhtd_pl;
2089 	int err;
2090 
2091 	rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL);
2092 	if (!rauhtd_pl)
2093 		return -ENOMEM;
2094 
2095 	type = MLXSW_REG_RAUHTD_TYPE_IPV4;
2096 	err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
2097 	if (err)
2098 		goto out;
2099 
2100 	type = MLXSW_REG_RAUHTD_TYPE_IPV6;
2101 	err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
2102 out:
2103 	kfree(rauhtd_pl);
2104 	return err;
2105 }
2106 
2107 static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp *mlxsw_sp)
2108 {
2109 	struct mlxsw_sp_neigh_entry *neigh_entry;
2110 
2111 	/* Take RTNL mutex here to prevent lists from changes */
2112 	rtnl_lock();
2113 	list_for_each_entry(neigh_entry, &mlxsw_sp->router->nexthop_neighs_list,
2114 			    nexthop_neighs_list_node)
2115 		/* If this neigh have nexthops, make the kernel think this neigh
2116 		 * is active regardless of the traffic.
2117 		 */
2118 		neigh_event_send(neigh_entry->key.n, NULL);
2119 	rtnl_unlock();
2120 }
2121 
2122 static void
2123 mlxsw_sp_router_neighs_update_work_schedule(struct mlxsw_sp *mlxsw_sp)
2124 {
2125 	unsigned long interval = mlxsw_sp->router->neighs_update.interval;
2126 
2127 	mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw,
2128 			       msecs_to_jiffies(interval));
2129 }
2130 
2131 static void mlxsw_sp_router_neighs_update_work(struct work_struct *work)
2132 {
2133 	struct mlxsw_sp_router *router;
2134 	int err;
2135 
2136 	router = container_of(work, struct mlxsw_sp_router,
2137 			      neighs_update.dw.work);
2138 	err = mlxsw_sp_router_neighs_update_rauhtd(router->mlxsw_sp);
2139 	if (err)
2140 		dev_err(router->mlxsw_sp->bus_info->dev, "Could not update kernel for neigh activity");
2141 
2142 	mlxsw_sp_router_neighs_update_nh(router->mlxsw_sp);
2143 
2144 	mlxsw_sp_router_neighs_update_work_schedule(router->mlxsw_sp);
2145 }
2146 
2147 static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work)
2148 {
2149 	struct mlxsw_sp_neigh_entry *neigh_entry;
2150 	struct mlxsw_sp_router *router;
2151 
2152 	router = container_of(work, struct mlxsw_sp_router,
2153 			      nexthop_probe_dw.work);
2154 	/* Iterate over nexthop neighbours, find those who are unresolved and
2155 	 * send arp on them. This solves the chicken-egg problem when
2156 	 * the nexthop wouldn't get offloaded until the neighbor is resolved
2157 	 * but it wouldn't get resolved ever in case traffic is flowing in HW
2158 	 * using different nexthop.
2159 	 *
2160 	 * Take RTNL mutex here to prevent lists from changes.
2161 	 */
2162 	rtnl_lock();
2163 	list_for_each_entry(neigh_entry, &router->nexthop_neighs_list,
2164 			    nexthop_neighs_list_node)
2165 		if (!neigh_entry->connected)
2166 			neigh_event_send(neigh_entry->key.n, NULL);
2167 	rtnl_unlock();
2168 
2169 	mlxsw_core_schedule_dw(&router->nexthop_probe_dw,
2170 			       MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL);
2171 }
2172 
2173 static void
2174 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
2175 			      struct mlxsw_sp_neigh_entry *neigh_entry,
2176 			      bool removing);
2177 
2178 static enum mlxsw_reg_rauht_op mlxsw_sp_rauht_op(bool adding)
2179 {
2180 	return adding ? MLXSW_REG_RAUHT_OP_WRITE_ADD :
2181 			MLXSW_REG_RAUHT_OP_WRITE_DELETE;
2182 }
2183 
2184 static void
2185 mlxsw_sp_router_neigh_entry_op4(struct mlxsw_sp *mlxsw_sp,
2186 				struct mlxsw_sp_neigh_entry *neigh_entry,
2187 				enum mlxsw_reg_rauht_op op)
2188 {
2189 	struct neighbour *n = neigh_entry->key.n;
2190 	u32 dip = ntohl(*((__be32 *) n->primary_key));
2191 	char rauht_pl[MLXSW_REG_RAUHT_LEN];
2192 
2193 	mlxsw_reg_rauht_pack4(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
2194 			      dip);
2195 	if (neigh_entry->counter_valid)
2196 		mlxsw_reg_rauht_pack_counter(rauht_pl,
2197 					     neigh_entry->counter_index);
2198 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
2199 }
2200 
2201 static void
2202 mlxsw_sp_router_neigh_entry_op6(struct mlxsw_sp *mlxsw_sp,
2203 				struct mlxsw_sp_neigh_entry *neigh_entry,
2204 				enum mlxsw_reg_rauht_op op)
2205 {
2206 	struct neighbour *n = neigh_entry->key.n;
2207 	char rauht_pl[MLXSW_REG_RAUHT_LEN];
2208 	const char *dip = n->primary_key;
2209 
2210 	mlxsw_reg_rauht_pack6(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
2211 			      dip);
2212 	if (neigh_entry->counter_valid)
2213 		mlxsw_reg_rauht_pack_counter(rauht_pl,
2214 					     neigh_entry->counter_index);
2215 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
2216 }
2217 
2218 bool mlxsw_sp_neigh_ipv6_ignore(struct mlxsw_sp_neigh_entry *neigh_entry)
2219 {
2220 	struct neighbour *n = neigh_entry->key.n;
2221 
2222 	/* Packets with a link-local destination address are trapped
2223 	 * after LPM lookup and never reach the neighbour table, so
2224 	 * there is no need to program such neighbours to the device.
2225 	 */
2226 	if (ipv6_addr_type((struct in6_addr *) &n->primary_key) &
2227 	    IPV6_ADDR_LINKLOCAL)
2228 		return true;
2229 	return false;
2230 }
2231 
2232 static void
2233 mlxsw_sp_neigh_entry_update(struct mlxsw_sp *mlxsw_sp,
2234 			    struct mlxsw_sp_neigh_entry *neigh_entry,
2235 			    bool adding)
2236 {
2237 	if (!adding && !neigh_entry->connected)
2238 		return;
2239 	neigh_entry->connected = adding;
2240 	if (neigh_entry->key.n->tbl->family == AF_INET) {
2241 		mlxsw_sp_router_neigh_entry_op4(mlxsw_sp, neigh_entry,
2242 						mlxsw_sp_rauht_op(adding));
2243 	} else if (neigh_entry->key.n->tbl->family == AF_INET6) {
2244 		if (mlxsw_sp_neigh_ipv6_ignore(neigh_entry))
2245 			return;
2246 		mlxsw_sp_router_neigh_entry_op6(mlxsw_sp, neigh_entry,
2247 						mlxsw_sp_rauht_op(adding));
2248 	} else {
2249 		WARN_ON_ONCE(1);
2250 	}
2251 }
2252 
2253 void
2254 mlxsw_sp_neigh_entry_counter_update(struct mlxsw_sp *mlxsw_sp,
2255 				    struct mlxsw_sp_neigh_entry *neigh_entry,
2256 				    bool adding)
2257 {
2258 	if (adding)
2259 		mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
2260 	else
2261 		mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
2262 	mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, true);
2263 }
2264 
2265 struct mlxsw_sp_netevent_work {
2266 	struct work_struct work;
2267 	struct mlxsw_sp *mlxsw_sp;
2268 	struct neighbour *n;
2269 };
2270 
2271 static void mlxsw_sp_router_neigh_event_work(struct work_struct *work)
2272 {
2273 	struct mlxsw_sp_netevent_work *net_work =
2274 		container_of(work, struct mlxsw_sp_netevent_work, work);
2275 	struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2276 	struct mlxsw_sp_neigh_entry *neigh_entry;
2277 	struct neighbour *n = net_work->n;
2278 	unsigned char ha[ETH_ALEN];
2279 	bool entry_connected;
2280 	u8 nud_state, dead;
2281 
2282 	/* If these parameters are changed after we release the lock,
2283 	 * then we are guaranteed to receive another event letting us
2284 	 * know about it.
2285 	 */
2286 	read_lock_bh(&n->lock);
2287 	memcpy(ha, n->ha, ETH_ALEN);
2288 	nud_state = n->nud_state;
2289 	dead = n->dead;
2290 	read_unlock_bh(&n->lock);
2291 
2292 	rtnl_lock();
2293 	entry_connected = nud_state & NUD_VALID && !dead;
2294 	neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
2295 	if (!entry_connected && !neigh_entry)
2296 		goto out;
2297 	if (!neigh_entry) {
2298 		neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
2299 		if (IS_ERR(neigh_entry))
2300 			goto out;
2301 	}
2302 
2303 	memcpy(neigh_entry->ha, ha, ETH_ALEN);
2304 	mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, entry_connected);
2305 	mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, !entry_connected);
2306 
2307 	if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
2308 		mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2309 
2310 out:
2311 	rtnl_unlock();
2312 	neigh_release(n);
2313 	kfree(net_work);
2314 }
2315 
2316 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp);
2317 
2318 static void mlxsw_sp_router_mp_hash_event_work(struct work_struct *work)
2319 {
2320 	struct mlxsw_sp_netevent_work *net_work =
2321 		container_of(work, struct mlxsw_sp_netevent_work, work);
2322 	struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2323 
2324 	mlxsw_sp_mp_hash_init(mlxsw_sp);
2325 	kfree(net_work);
2326 }
2327 
2328 static int mlxsw_sp_router_netevent_event(struct notifier_block *nb,
2329 					  unsigned long event, void *ptr)
2330 {
2331 	struct mlxsw_sp_netevent_work *net_work;
2332 	struct mlxsw_sp_port *mlxsw_sp_port;
2333 	struct mlxsw_sp_router *router;
2334 	struct mlxsw_sp *mlxsw_sp;
2335 	unsigned long interval;
2336 	struct neigh_parms *p;
2337 	struct neighbour *n;
2338 	struct net *net;
2339 
2340 	switch (event) {
2341 	case NETEVENT_DELAY_PROBE_TIME_UPDATE:
2342 		p = ptr;
2343 
2344 		/* We don't care about changes in the default table. */
2345 		if (!p->dev || (p->tbl->family != AF_INET &&
2346 				p->tbl->family != AF_INET6))
2347 			return NOTIFY_DONE;
2348 
2349 		/* We are in atomic context and can't take RTNL mutex,
2350 		 * so use RCU variant to walk the device chain.
2351 		 */
2352 		mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(p->dev);
2353 		if (!mlxsw_sp_port)
2354 			return NOTIFY_DONE;
2355 
2356 		mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
2357 		interval = jiffies_to_msecs(NEIGH_VAR(p, DELAY_PROBE_TIME));
2358 		mlxsw_sp->router->neighs_update.interval = interval;
2359 
2360 		mlxsw_sp_port_dev_put(mlxsw_sp_port);
2361 		break;
2362 	case NETEVENT_NEIGH_UPDATE:
2363 		n = ptr;
2364 
2365 		if (n->tbl->family != AF_INET && n->tbl->family != AF_INET6)
2366 			return NOTIFY_DONE;
2367 
2368 		mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(n->dev);
2369 		if (!mlxsw_sp_port)
2370 			return NOTIFY_DONE;
2371 
2372 		net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC);
2373 		if (!net_work) {
2374 			mlxsw_sp_port_dev_put(mlxsw_sp_port);
2375 			return NOTIFY_BAD;
2376 		}
2377 
2378 		INIT_WORK(&net_work->work, mlxsw_sp_router_neigh_event_work);
2379 		net_work->mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
2380 		net_work->n = n;
2381 
2382 		/* Take a reference to ensure the neighbour won't be
2383 		 * destructed until we drop the reference in delayed
2384 		 * work.
2385 		 */
2386 		neigh_clone(n);
2387 		mlxsw_core_schedule_work(&net_work->work);
2388 		mlxsw_sp_port_dev_put(mlxsw_sp_port);
2389 		break;
2390 	case NETEVENT_MULTIPATH_HASH_UPDATE:
2391 		net = ptr;
2392 
2393 		if (!net_eq(net, &init_net))
2394 			return NOTIFY_DONE;
2395 
2396 		net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC);
2397 		if (!net_work)
2398 			return NOTIFY_BAD;
2399 
2400 		router = container_of(nb, struct mlxsw_sp_router, netevent_nb);
2401 		INIT_WORK(&net_work->work, mlxsw_sp_router_mp_hash_event_work);
2402 		net_work->mlxsw_sp = router->mlxsw_sp;
2403 		mlxsw_core_schedule_work(&net_work->work);
2404 		break;
2405 	}
2406 
2407 	return NOTIFY_DONE;
2408 }
2409 
2410 static int mlxsw_sp_neigh_init(struct mlxsw_sp *mlxsw_sp)
2411 {
2412 	int err;
2413 
2414 	err = rhashtable_init(&mlxsw_sp->router->neigh_ht,
2415 			      &mlxsw_sp_neigh_ht_params);
2416 	if (err)
2417 		return err;
2418 
2419 	/* Initialize the polling interval according to the default
2420 	 * table.
2421 	 */
2422 	mlxsw_sp_router_neighs_update_interval_init(mlxsw_sp);
2423 
2424 	/* Create the delayed works for the activity_update */
2425 	INIT_DELAYED_WORK(&mlxsw_sp->router->neighs_update.dw,
2426 			  mlxsw_sp_router_neighs_update_work);
2427 	INIT_DELAYED_WORK(&mlxsw_sp->router->nexthop_probe_dw,
2428 			  mlxsw_sp_router_probe_unresolved_nexthops);
2429 	mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw, 0);
2430 	mlxsw_core_schedule_dw(&mlxsw_sp->router->nexthop_probe_dw, 0);
2431 	return 0;
2432 }
2433 
2434 static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp)
2435 {
2436 	cancel_delayed_work_sync(&mlxsw_sp->router->neighs_update.dw);
2437 	cancel_delayed_work_sync(&mlxsw_sp->router->nexthop_probe_dw);
2438 	rhashtable_destroy(&mlxsw_sp->router->neigh_ht);
2439 }
2440 
2441 static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
2442 					 struct mlxsw_sp_rif *rif)
2443 {
2444 	struct mlxsw_sp_neigh_entry *neigh_entry, *tmp;
2445 
2446 	list_for_each_entry_safe(neigh_entry, tmp, &rif->neigh_list,
2447 				 rif_list_node) {
2448 		mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, false);
2449 		mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2450 	}
2451 }
2452 
2453 enum mlxsw_sp_nexthop_type {
2454 	MLXSW_SP_NEXTHOP_TYPE_ETH,
2455 	MLXSW_SP_NEXTHOP_TYPE_IPIP,
2456 };
2457 
2458 struct mlxsw_sp_nexthop_key {
2459 	struct fib_nh *fib_nh;
2460 };
2461 
2462 struct mlxsw_sp_nexthop {
2463 	struct list_head neigh_list_node; /* member of neigh entry list */
2464 	struct list_head rif_list_node;
2465 	struct list_head router_list_node;
2466 	struct mlxsw_sp_nexthop_group *nh_grp; /* pointer back to the group
2467 						* this belongs to
2468 						*/
2469 	struct rhash_head ht_node;
2470 	struct mlxsw_sp_nexthop_key key;
2471 	unsigned char gw_addr[sizeof(struct in6_addr)];
2472 	int ifindex;
2473 	int nh_weight;
2474 	int norm_nh_weight;
2475 	int num_adj_entries;
2476 	struct mlxsw_sp_rif *rif;
2477 	u8 should_offload:1, /* set indicates this neigh is connected and
2478 			      * should be put to KVD linear area of this group.
2479 			      */
2480 	   offloaded:1, /* set in case the neigh is actually put into
2481 			 * KVD linear area of this group.
2482 			 */
2483 	   update:1; /* set indicates that MAC of this neigh should be
2484 		      * updated in HW
2485 		      */
2486 	enum mlxsw_sp_nexthop_type type;
2487 	union {
2488 		struct mlxsw_sp_neigh_entry *neigh_entry;
2489 		struct mlxsw_sp_ipip_entry *ipip_entry;
2490 	};
2491 	unsigned int counter_index;
2492 	bool counter_valid;
2493 };
2494 
2495 struct mlxsw_sp_nexthop_group {
2496 	void *priv;
2497 	struct rhash_head ht_node;
2498 	struct list_head fib_list; /* list of fib entries that use this group */
2499 	struct neigh_table *neigh_tbl;
2500 	u8 adj_index_valid:1,
2501 	   gateway:1; /* routes using the group use a gateway */
2502 	u32 adj_index;
2503 	u16 ecmp_size;
2504 	u16 count;
2505 	int sum_norm_weight;
2506 	struct mlxsw_sp_nexthop nexthops[0];
2507 #define nh_rif	nexthops[0].rif
2508 };
2509 
2510 void mlxsw_sp_nexthop_counter_alloc(struct mlxsw_sp *mlxsw_sp,
2511 				    struct mlxsw_sp_nexthop *nh)
2512 {
2513 	struct devlink *devlink;
2514 
2515 	devlink = priv_to_devlink(mlxsw_sp->core);
2516 	if (!devlink_dpipe_table_counter_enabled(devlink,
2517 						 MLXSW_SP_DPIPE_TABLE_NAME_ADJ))
2518 		return;
2519 
2520 	if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &nh->counter_index))
2521 		return;
2522 
2523 	nh->counter_valid = true;
2524 }
2525 
2526 void mlxsw_sp_nexthop_counter_free(struct mlxsw_sp *mlxsw_sp,
2527 				   struct mlxsw_sp_nexthop *nh)
2528 {
2529 	if (!nh->counter_valid)
2530 		return;
2531 	mlxsw_sp_flow_counter_free(mlxsw_sp, nh->counter_index);
2532 	nh->counter_valid = false;
2533 }
2534 
2535 int mlxsw_sp_nexthop_counter_get(struct mlxsw_sp *mlxsw_sp,
2536 				 struct mlxsw_sp_nexthop *nh, u64 *p_counter)
2537 {
2538 	if (!nh->counter_valid)
2539 		return -EINVAL;
2540 
2541 	return mlxsw_sp_flow_counter_get(mlxsw_sp, nh->counter_index,
2542 					 p_counter, NULL);
2543 }
2544 
2545 struct mlxsw_sp_nexthop *mlxsw_sp_nexthop_next(struct mlxsw_sp_router *router,
2546 					       struct mlxsw_sp_nexthop *nh)
2547 {
2548 	if (!nh) {
2549 		if (list_empty(&router->nexthop_list))
2550 			return NULL;
2551 		else
2552 			return list_first_entry(&router->nexthop_list,
2553 						typeof(*nh), router_list_node);
2554 	}
2555 	if (list_is_last(&nh->router_list_node, &router->nexthop_list))
2556 		return NULL;
2557 	return list_next_entry(nh, router_list_node);
2558 }
2559 
2560 bool mlxsw_sp_nexthop_offload(struct mlxsw_sp_nexthop *nh)
2561 {
2562 	return nh->offloaded;
2563 }
2564 
2565 unsigned char *mlxsw_sp_nexthop_ha(struct mlxsw_sp_nexthop *nh)
2566 {
2567 	if (!nh->offloaded)
2568 		return NULL;
2569 	return nh->neigh_entry->ha;
2570 }
2571 
2572 int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop *nh, u32 *p_adj_index,
2573 			     u32 *p_adj_size, u32 *p_adj_hash_index)
2574 {
2575 	struct mlxsw_sp_nexthop_group *nh_grp = nh->nh_grp;
2576 	u32 adj_hash_index = 0;
2577 	int i;
2578 
2579 	if (!nh->offloaded || !nh_grp->adj_index_valid)
2580 		return -EINVAL;
2581 
2582 	*p_adj_index = nh_grp->adj_index;
2583 	*p_adj_size = nh_grp->ecmp_size;
2584 
2585 	for (i = 0; i < nh_grp->count; i++) {
2586 		struct mlxsw_sp_nexthop *nh_iter = &nh_grp->nexthops[i];
2587 
2588 		if (nh_iter == nh)
2589 			break;
2590 		if (nh_iter->offloaded)
2591 			adj_hash_index += nh_iter->num_adj_entries;
2592 	}
2593 
2594 	*p_adj_hash_index = adj_hash_index;
2595 	return 0;
2596 }
2597 
2598 struct mlxsw_sp_rif *mlxsw_sp_nexthop_rif(struct mlxsw_sp_nexthop *nh)
2599 {
2600 	return nh->rif;
2601 }
2602 
2603 bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh)
2604 {
2605 	struct mlxsw_sp_nexthop_group *nh_grp = nh->nh_grp;
2606 	int i;
2607 
2608 	for (i = 0; i < nh_grp->count; i++) {
2609 		struct mlxsw_sp_nexthop *nh_iter = &nh_grp->nexthops[i];
2610 
2611 		if (nh_iter->type == MLXSW_SP_NEXTHOP_TYPE_IPIP)
2612 			return true;
2613 	}
2614 	return false;
2615 }
2616 
2617 static struct fib_info *
2618 mlxsw_sp_nexthop4_group_fi(const struct mlxsw_sp_nexthop_group *nh_grp)
2619 {
2620 	return nh_grp->priv;
2621 }
2622 
2623 struct mlxsw_sp_nexthop_group_cmp_arg {
2624 	enum mlxsw_sp_l3proto proto;
2625 	union {
2626 		struct fib_info *fi;
2627 		struct mlxsw_sp_fib6_entry *fib6_entry;
2628 	};
2629 };
2630 
2631 static bool
2632 mlxsw_sp_nexthop6_group_has_nexthop(const struct mlxsw_sp_nexthop_group *nh_grp,
2633 				    const struct in6_addr *gw, int ifindex)
2634 {
2635 	int i;
2636 
2637 	for (i = 0; i < nh_grp->count; i++) {
2638 		const struct mlxsw_sp_nexthop *nh;
2639 
2640 		nh = &nh_grp->nexthops[i];
2641 		if (nh->ifindex == ifindex &&
2642 		    ipv6_addr_equal(gw, (struct in6_addr *) nh->gw_addr))
2643 			return true;
2644 	}
2645 
2646 	return false;
2647 }
2648 
2649 static bool
2650 mlxsw_sp_nexthop6_group_cmp(const struct mlxsw_sp_nexthop_group *nh_grp,
2651 			    const struct mlxsw_sp_fib6_entry *fib6_entry)
2652 {
2653 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
2654 
2655 	if (nh_grp->count != fib6_entry->nrt6)
2656 		return false;
2657 
2658 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
2659 		struct in6_addr *gw;
2660 		int ifindex;
2661 
2662 		ifindex = mlxsw_sp_rt6->rt->dst.dev->ifindex;
2663 		gw = &mlxsw_sp_rt6->rt->rt6i_gateway;
2664 		if (!mlxsw_sp_nexthop6_group_has_nexthop(nh_grp, gw, ifindex))
2665 			return false;
2666 	}
2667 
2668 	return true;
2669 }
2670 
2671 static int
2672 mlxsw_sp_nexthop_group_cmp(struct rhashtable_compare_arg *arg, const void *ptr)
2673 {
2674 	const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = arg->key;
2675 	const struct mlxsw_sp_nexthop_group *nh_grp = ptr;
2676 
2677 	switch (cmp_arg->proto) {
2678 	case MLXSW_SP_L3_PROTO_IPV4:
2679 		return cmp_arg->fi != mlxsw_sp_nexthop4_group_fi(nh_grp);
2680 	case MLXSW_SP_L3_PROTO_IPV6:
2681 		return !mlxsw_sp_nexthop6_group_cmp(nh_grp,
2682 						    cmp_arg->fib6_entry);
2683 	default:
2684 		WARN_ON(1);
2685 		return 1;
2686 	}
2687 }
2688 
2689 static int
2690 mlxsw_sp_nexthop_group_type(const struct mlxsw_sp_nexthop_group *nh_grp)
2691 {
2692 	return nh_grp->neigh_tbl->family;
2693 }
2694 
2695 static u32 mlxsw_sp_nexthop_group_hash_obj(const void *data, u32 len, u32 seed)
2696 {
2697 	const struct mlxsw_sp_nexthop_group *nh_grp = data;
2698 	const struct mlxsw_sp_nexthop *nh;
2699 	struct fib_info *fi;
2700 	unsigned int val;
2701 	int i;
2702 
2703 	switch (mlxsw_sp_nexthop_group_type(nh_grp)) {
2704 	case AF_INET:
2705 		fi = mlxsw_sp_nexthop4_group_fi(nh_grp);
2706 		return jhash(&fi, sizeof(fi), seed);
2707 	case AF_INET6:
2708 		val = nh_grp->count;
2709 		for (i = 0; i < nh_grp->count; i++) {
2710 			nh = &nh_grp->nexthops[i];
2711 			val ^= nh->ifindex;
2712 		}
2713 		return jhash(&val, sizeof(val), seed);
2714 	default:
2715 		WARN_ON(1);
2716 		return 0;
2717 	}
2718 }
2719 
2720 static u32
2721 mlxsw_sp_nexthop6_group_hash(struct mlxsw_sp_fib6_entry *fib6_entry, u32 seed)
2722 {
2723 	unsigned int val = fib6_entry->nrt6;
2724 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
2725 	struct net_device *dev;
2726 
2727 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
2728 		dev = mlxsw_sp_rt6->rt->dst.dev;
2729 		val ^= dev->ifindex;
2730 	}
2731 
2732 	return jhash(&val, sizeof(val), seed);
2733 }
2734 
2735 static u32
2736 mlxsw_sp_nexthop_group_hash(const void *data, u32 len, u32 seed)
2737 {
2738 	const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = data;
2739 
2740 	switch (cmp_arg->proto) {
2741 	case MLXSW_SP_L3_PROTO_IPV4:
2742 		return jhash(&cmp_arg->fi, sizeof(cmp_arg->fi), seed);
2743 	case MLXSW_SP_L3_PROTO_IPV6:
2744 		return mlxsw_sp_nexthop6_group_hash(cmp_arg->fib6_entry, seed);
2745 	default:
2746 		WARN_ON(1);
2747 		return 0;
2748 	}
2749 }
2750 
2751 static const struct rhashtable_params mlxsw_sp_nexthop_group_ht_params = {
2752 	.head_offset = offsetof(struct mlxsw_sp_nexthop_group, ht_node),
2753 	.hashfn	     = mlxsw_sp_nexthop_group_hash,
2754 	.obj_hashfn  = mlxsw_sp_nexthop_group_hash_obj,
2755 	.obj_cmpfn   = mlxsw_sp_nexthop_group_cmp,
2756 };
2757 
2758 static int mlxsw_sp_nexthop_group_insert(struct mlxsw_sp *mlxsw_sp,
2759 					 struct mlxsw_sp_nexthop_group *nh_grp)
2760 {
2761 	if (mlxsw_sp_nexthop_group_type(nh_grp) == AF_INET6 &&
2762 	    !nh_grp->gateway)
2763 		return 0;
2764 
2765 	return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_group_ht,
2766 				      &nh_grp->ht_node,
2767 				      mlxsw_sp_nexthop_group_ht_params);
2768 }
2769 
2770 static void mlxsw_sp_nexthop_group_remove(struct mlxsw_sp *mlxsw_sp,
2771 					  struct mlxsw_sp_nexthop_group *nh_grp)
2772 {
2773 	if (mlxsw_sp_nexthop_group_type(nh_grp) == AF_INET6 &&
2774 	    !nh_grp->gateway)
2775 		return;
2776 
2777 	rhashtable_remove_fast(&mlxsw_sp->router->nexthop_group_ht,
2778 			       &nh_grp->ht_node,
2779 			       mlxsw_sp_nexthop_group_ht_params);
2780 }
2781 
2782 static struct mlxsw_sp_nexthop_group *
2783 mlxsw_sp_nexthop4_group_lookup(struct mlxsw_sp *mlxsw_sp,
2784 			       struct fib_info *fi)
2785 {
2786 	struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
2787 
2788 	cmp_arg.proto = MLXSW_SP_L3_PROTO_IPV4;
2789 	cmp_arg.fi = fi;
2790 	return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
2791 				      &cmp_arg,
2792 				      mlxsw_sp_nexthop_group_ht_params);
2793 }
2794 
2795 static struct mlxsw_sp_nexthop_group *
2796 mlxsw_sp_nexthop6_group_lookup(struct mlxsw_sp *mlxsw_sp,
2797 			       struct mlxsw_sp_fib6_entry *fib6_entry)
2798 {
2799 	struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
2800 
2801 	cmp_arg.proto = MLXSW_SP_L3_PROTO_IPV6;
2802 	cmp_arg.fib6_entry = fib6_entry;
2803 	return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
2804 				      &cmp_arg,
2805 				      mlxsw_sp_nexthop_group_ht_params);
2806 }
2807 
2808 static const struct rhashtable_params mlxsw_sp_nexthop_ht_params = {
2809 	.key_offset = offsetof(struct mlxsw_sp_nexthop, key),
2810 	.head_offset = offsetof(struct mlxsw_sp_nexthop, ht_node),
2811 	.key_len = sizeof(struct mlxsw_sp_nexthop_key),
2812 };
2813 
2814 static int mlxsw_sp_nexthop_insert(struct mlxsw_sp *mlxsw_sp,
2815 				   struct mlxsw_sp_nexthop *nh)
2816 {
2817 	return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_ht,
2818 				      &nh->ht_node, mlxsw_sp_nexthop_ht_params);
2819 }
2820 
2821 static void mlxsw_sp_nexthop_remove(struct mlxsw_sp *mlxsw_sp,
2822 				    struct mlxsw_sp_nexthop *nh)
2823 {
2824 	rhashtable_remove_fast(&mlxsw_sp->router->nexthop_ht, &nh->ht_node,
2825 			       mlxsw_sp_nexthop_ht_params);
2826 }
2827 
2828 static struct mlxsw_sp_nexthop *
2829 mlxsw_sp_nexthop_lookup(struct mlxsw_sp *mlxsw_sp,
2830 			struct mlxsw_sp_nexthop_key key)
2831 {
2832 	return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_ht, &key,
2833 				      mlxsw_sp_nexthop_ht_params);
2834 }
2835 
2836 static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp,
2837 					     const struct mlxsw_sp_fib *fib,
2838 					     u32 adj_index, u16 ecmp_size,
2839 					     u32 new_adj_index,
2840 					     u16 new_ecmp_size)
2841 {
2842 	char raleu_pl[MLXSW_REG_RALEU_LEN];
2843 
2844 	mlxsw_reg_raleu_pack(raleu_pl,
2845 			     (enum mlxsw_reg_ralxx_protocol) fib->proto,
2846 			     fib->vr->id, adj_index, ecmp_size, new_adj_index,
2847 			     new_ecmp_size);
2848 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raleu), raleu_pl);
2849 }
2850 
2851 static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp,
2852 					  struct mlxsw_sp_nexthop_group *nh_grp,
2853 					  u32 old_adj_index, u16 old_ecmp_size)
2854 {
2855 	struct mlxsw_sp_fib_entry *fib_entry;
2856 	struct mlxsw_sp_fib *fib = NULL;
2857 	int err;
2858 
2859 	list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
2860 		if (fib == fib_entry->fib_node->fib)
2861 			continue;
2862 		fib = fib_entry->fib_node->fib;
2863 		err = mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, fib,
2864 							old_adj_index,
2865 							old_ecmp_size,
2866 							nh_grp->adj_index,
2867 							nh_grp->ecmp_size);
2868 		if (err)
2869 			return err;
2870 	}
2871 	return 0;
2872 }
2873 
2874 static int __mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
2875 				     struct mlxsw_sp_nexthop *nh)
2876 {
2877 	struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
2878 	char ratr_pl[MLXSW_REG_RATR_LEN];
2879 
2880 	mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY,
2881 			    true, MLXSW_REG_RATR_TYPE_ETHERNET,
2882 			    adj_index, neigh_entry->rif);
2883 	mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha);
2884 	if (nh->counter_valid)
2885 		mlxsw_reg_ratr_counter_pack(ratr_pl, nh->counter_index, true);
2886 	else
2887 		mlxsw_reg_ratr_counter_pack(ratr_pl, 0, false);
2888 
2889 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
2890 }
2891 
2892 int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
2893 			    struct mlxsw_sp_nexthop *nh)
2894 {
2895 	int i;
2896 
2897 	for (i = 0; i < nh->num_adj_entries; i++) {
2898 		int err;
2899 
2900 		err = __mlxsw_sp_nexthop_update(mlxsw_sp, adj_index + i, nh);
2901 		if (err)
2902 			return err;
2903 	}
2904 
2905 	return 0;
2906 }
2907 
2908 static int __mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
2909 					  u32 adj_index,
2910 					  struct mlxsw_sp_nexthop *nh)
2911 {
2912 	const struct mlxsw_sp_ipip_ops *ipip_ops;
2913 
2914 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[nh->ipip_entry->ipipt];
2915 	return ipip_ops->nexthop_update(mlxsw_sp, adj_index, nh->ipip_entry);
2916 }
2917 
2918 static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
2919 					u32 adj_index,
2920 					struct mlxsw_sp_nexthop *nh)
2921 {
2922 	int i;
2923 
2924 	for (i = 0; i < nh->num_adj_entries; i++) {
2925 		int err;
2926 
2927 		err = __mlxsw_sp_nexthop_ipip_update(mlxsw_sp, adj_index + i,
2928 						     nh);
2929 		if (err)
2930 			return err;
2931 	}
2932 
2933 	return 0;
2934 }
2935 
2936 static int
2937 mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp,
2938 			      struct mlxsw_sp_nexthop_group *nh_grp,
2939 			      bool reallocate)
2940 {
2941 	u32 adj_index = nh_grp->adj_index; /* base */
2942 	struct mlxsw_sp_nexthop *nh;
2943 	int i;
2944 	int err;
2945 
2946 	for (i = 0; i < nh_grp->count; i++) {
2947 		nh = &nh_grp->nexthops[i];
2948 
2949 		if (!nh->should_offload) {
2950 			nh->offloaded = 0;
2951 			continue;
2952 		}
2953 
2954 		if (nh->update || reallocate) {
2955 			switch (nh->type) {
2956 			case MLXSW_SP_NEXTHOP_TYPE_ETH:
2957 				err = mlxsw_sp_nexthop_update
2958 					    (mlxsw_sp, adj_index, nh);
2959 				break;
2960 			case MLXSW_SP_NEXTHOP_TYPE_IPIP:
2961 				err = mlxsw_sp_nexthop_ipip_update
2962 					    (mlxsw_sp, adj_index, nh);
2963 				break;
2964 			}
2965 			if (err)
2966 				return err;
2967 			nh->update = 0;
2968 			nh->offloaded = 1;
2969 		}
2970 		adj_index += nh->num_adj_entries;
2971 	}
2972 	return 0;
2973 }
2974 
2975 static bool
2976 mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node,
2977 				 const struct mlxsw_sp_fib_entry *fib_entry);
2978 
2979 static int
2980 mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp,
2981 				    struct mlxsw_sp_nexthop_group *nh_grp)
2982 {
2983 	struct mlxsw_sp_fib_entry *fib_entry;
2984 	int err;
2985 
2986 	list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
2987 		if (!mlxsw_sp_fib_node_entry_is_first(fib_entry->fib_node,
2988 						      fib_entry))
2989 			continue;
2990 		err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
2991 		if (err)
2992 			return err;
2993 	}
2994 	return 0;
2995 }
2996 
2997 static void
2998 mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
2999 				   enum mlxsw_reg_ralue_op op, int err);
3000 
3001 static void
3002 mlxsw_sp_nexthop_fib_entries_refresh(struct mlxsw_sp_nexthop_group *nh_grp)
3003 {
3004 	enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_WRITE;
3005 	struct mlxsw_sp_fib_entry *fib_entry;
3006 
3007 	list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
3008 		if (!mlxsw_sp_fib_node_entry_is_first(fib_entry->fib_node,
3009 						      fib_entry))
3010 			continue;
3011 		mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0);
3012 	}
3013 }
3014 
3015 static void mlxsw_sp_adj_grp_size_round_up(u16 *p_adj_grp_size)
3016 {
3017 	/* Valid sizes for an adjacency group are:
3018 	 * 1-64, 512, 1024, 2048 and 4096.
3019 	 */
3020 	if (*p_adj_grp_size <= 64)
3021 		return;
3022 	else if (*p_adj_grp_size <= 512)
3023 		*p_adj_grp_size = 512;
3024 	else if (*p_adj_grp_size <= 1024)
3025 		*p_adj_grp_size = 1024;
3026 	else if (*p_adj_grp_size <= 2048)
3027 		*p_adj_grp_size = 2048;
3028 	else
3029 		*p_adj_grp_size = 4096;
3030 }
3031 
3032 static void mlxsw_sp_adj_grp_size_round_down(u16 *p_adj_grp_size,
3033 					     unsigned int alloc_size)
3034 {
3035 	if (alloc_size >= 4096)
3036 		*p_adj_grp_size = 4096;
3037 	else if (alloc_size >= 2048)
3038 		*p_adj_grp_size = 2048;
3039 	else if (alloc_size >= 1024)
3040 		*p_adj_grp_size = 1024;
3041 	else if (alloc_size >= 512)
3042 		*p_adj_grp_size = 512;
3043 }
3044 
3045 static int mlxsw_sp_fix_adj_grp_size(struct mlxsw_sp *mlxsw_sp,
3046 				     u16 *p_adj_grp_size)
3047 {
3048 	unsigned int alloc_size;
3049 	int err;
3050 
3051 	/* Round up the requested group size to the next size supported
3052 	 * by the device and make sure the request can be satisfied.
3053 	 */
3054 	mlxsw_sp_adj_grp_size_round_up(p_adj_grp_size);
3055 	err = mlxsw_sp_kvdl_alloc_size_query(mlxsw_sp, *p_adj_grp_size,
3056 					     &alloc_size);
3057 	if (err)
3058 		return err;
3059 	/* It is possible the allocation results in more allocated
3060 	 * entries than requested. Try to use as much of them as
3061 	 * possible.
3062 	 */
3063 	mlxsw_sp_adj_grp_size_round_down(p_adj_grp_size, alloc_size);
3064 
3065 	return 0;
3066 }
3067 
3068 static void
3069 mlxsw_sp_nexthop_group_normalize(struct mlxsw_sp_nexthop_group *nh_grp)
3070 {
3071 	int i, g = 0, sum_norm_weight = 0;
3072 	struct mlxsw_sp_nexthop *nh;
3073 
3074 	for (i = 0; i < nh_grp->count; i++) {
3075 		nh = &nh_grp->nexthops[i];
3076 
3077 		if (!nh->should_offload)
3078 			continue;
3079 		if (g > 0)
3080 			g = gcd(nh->nh_weight, g);
3081 		else
3082 			g = nh->nh_weight;
3083 	}
3084 
3085 	for (i = 0; i < nh_grp->count; i++) {
3086 		nh = &nh_grp->nexthops[i];
3087 
3088 		if (!nh->should_offload)
3089 			continue;
3090 		nh->norm_nh_weight = nh->nh_weight / g;
3091 		sum_norm_weight += nh->norm_nh_weight;
3092 	}
3093 
3094 	nh_grp->sum_norm_weight = sum_norm_weight;
3095 }
3096 
3097 static void
3098 mlxsw_sp_nexthop_group_rebalance(struct mlxsw_sp_nexthop_group *nh_grp)
3099 {
3100 	int total = nh_grp->sum_norm_weight;
3101 	u16 ecmp_size = nh_grp->ecmp_size;
3102 	int i, weight = 0, lower_bound = 0;
3103 
3104 	for (i = 0; i < nh_grp->count; i++) {
3105 		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3106 		int upper_bound;
3107 
3108 		if (!nh->should_offload)
3109 			continue;
3110 		weight += nh->norm_nh_weight;
3111 		upper_bound = DIV_ROUND_CLOSEST(ecmp_size * weight, total);
3112 		nh->num_adj_entries = upper_bound - lower_bound;
3113 		lower_bound = upper_bound;
3114 	}
3115 }
3116 
3117 static void
3118 mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
3119 			       struct mlxsw_sp_nexthop_group *nh_grp)
3120 {
3121 	u16 ecmp_size, old_ecmp_size;
3122 	struct mlxsw_sp_nexthop *nh;
3123 	bool offload_change = false;
3124 	u32 adj_index;
3125 	bool old_adj_index_valid;
3126 	u32 old_adj_index;
3127 	int i;
3128 	int err;
3129 
3130 	if (!nh_grp->gateway) {
3131 		mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3132 		return;
3133 	}
3134 
3135 	for (i = 0; i < nh_grp->count; i++) {
3136 		nh = &nh_grp->nexthops[i];
3137 
3138 		if (nh->should_offload != nh->offloaded) {
3139 			offload_change = true;
3140 			if (nh->should_offload)
3141 				nh->update = 1;
3142 		}
3143 	}
3144 	if (!offload_change) {
3145 		/* Nothing was added or removed, so no need to reallocate. Just
3146 		 * update MAC on existing adjacency indexes.
3147 		 */
3148 		err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, false);
3149 		if (err) {
3150 			dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
3151 			goto set_trap;
3152 		}
3153 		return;
3154 	}
3155 	mlxsw_sp_nexthop_group_normalize(nh_grp);
3156 	if (!nh_grp->sum_norm_weight)
3157 		/* No neigh of this group is connected so we just set
3158 		 * the trap and let everthing flow through kernel.
3159 		 */
3160 		goto set_trap;
3161 
3162 	ecmp_size = nh_grp->sum_norm_weight;
3163 	err = mlxsw_sp_fix_adj_grp_size(mlxsw_sp, &ecmp_size);
3164 	if (err)
3165 		/* No valid allocation size available. */
3166 		goto set_trap;
3167 
3168 	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, ecmp_size, &adj_index);
3169 	if (err) {
3170 		/* We ran out of KVD linear space, just set the
3171 		 * trap and let everything flow through kernel.
3172 		 */
3173 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to allocate KVD linear area for nexthop group.\n");
3174 		goto set_trap;
3175 	}
3176 	old_adj_index_valid = nh_grp->adj_index_valid;
3177 	old_adj_index = nh_grp->adj_index;
3178 	old_ecmp_size = nh_grp->ecmp_size;
3179 	nh_grp->adj_index_valid = 1;
3180 	nh_grp->adj_index = adj_index;
3181 	nh_grp->ecmp_size = ecmp_size;
3182 	mlxsw_sp_nexthop_group_rebalance(nh_grp);
3183 	err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, true);
3184 	if (err) {
3185 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
3186 		goto set_trap;
3187 	}
3188 
3189 	if (!old_adj_index_valid) {
3190 		/* The trap was set for fib entries, so we have to call
3191 		 * fib entry update to unset it and use adjacency index.
3192 		 */
3193 		err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3194 		if (err) {
3195 			dev_warn(mlxsw_sp->bus_info->dev, "Failed to add adjacency index to fib entries.\n");
3196 			goto set_trap;
3197 		}
3198 		return;
3199 	}
3200 
3201 	err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, nh_grp,
3202 					     old_adj_index, old_ecmp_size);
3203 	mlxsw_sp_kvdl_free(mlxsw_sp, old_adj_index);
3204 	if (err) {
3205 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to mass-update adjacency index for nexthop group.\n");
3206 		goto set_trap;
3207 	}
3208 
3209 	/* Offload state within the group changed, so update the flags. */
3210 	mlxsw_sp_nexthop_fib_entries_refresh(nh_grp);
3211 
3212 	return;
3213 
3214 set_trap:
3215 	old_adj_index_valid = nh_grp->adj_index_valid;
3216 	nh_grp->adj_index_valid = 0;
3217 	for (i = 0; i < nh_grp->count; i++) {
3218 		nh = &nh_grp->nexthops[i];
3219 		nh->offloaded = 0;
3220 	}
3221 	err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3222 	if (err)
3223 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n");
3224 	if (old_adj_index_valid)
3225 		mlxsw_sp_kvdl_free(mlxsw_sp, nh_grp->adj_index);
3226 }
3227 
3228 static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh,
3229 					    bool removing)
3230 {
3231 	if (!removing)
3232 		nh->should_offload = 1;
3233 	else
3234 		nh->should_offload = 0;
3235 	nh->update = 1;
3236 }
3237 
3238 static void
3239 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
3240 			      struct mlxsw_sp_neigh_entry *neigh_entry,
3241 			      bool removing)
3242 {
3243 	struct mlxsw_sp_nexthop *nh;
3244 
3245 	list_for_each_entry(nh, &neigh_entry->nexthop_list,
3246 			    neigh_list_node) {
3247 		__mlxsw_sp_nexthop_neigh_update(nh, removing);
3248 		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3249 	}
3250 }
3251 
3252 static void mlxsw_sp_nexthop_rif_init(struct mlxsw_sp_nexthop *nh,
3253 				      struct mlxsw_sp_rif *rif)
3254 {
3255 	if (nh->rif)
3256 		return;
3257 
3258 	nh->rif = rif;
3259 	list_add(&nh->rif_list_node, &rif->nexthop_list);
3260 }
3261 
3262 static void mlxsw_sp_nexthop_rif_fini(struct mlxsw_sp_nexthop *nh)
3263 {
3264 	if (!nh->rif)
3265 		return;
3266 
3267 	list_del(&nh->rif_list_node);
3268 	nh->rif = NULL;
3269 }
3270 
3271 static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp,
3272 				       struct mlxsw_sp_nexthop *nh)
3273 {
3274 	struct mlxsw_sp_neigh_entry *neigh_entry;
3275 	struct neighbour *n;
3276 	u8 nud_state, dead;
3277 	int err;
3278 
3279 	if (!nh->nh_grp->gateway || nh->neigh_entry)
3280 		return 0;
3281 
3282 	/* Take a reference of neigh here ensuring that neigh would
3283 	 * not be destructed before the nexthop entry is finished.
3284 	 * The reference is taken either in neigh_lookup() or
3285 	 * in neigh_create() in case n is not found.
3286 	 */
3287 	n = neigh_lookup(nh->nh_grp->neigh_tbl, &nh->gw_addr, nh->rif->dev);
3288 	if (!n) {
3289 		n = neigh_create(nh->nh_grp->neigh_tbl, &nh->gw_addr,
3290 				 nh->rif->dev);
3291 		if (IS_ERR(n))
3292 			return PTR_ERR(n);
3293 		neigh_event_send(n, NULL);
3294 	}
3295 	neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
3296 	if (!neigh_entry) {
3297 		neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
3298 		if (IS_ERR(neigh_entry)) {
3299 			err = -EINVAL;
3300 			goto err_neigh_entry_create;
3301 		}
3302 	}
3303 
3304 	/* If that is the first nexthop connected to that neigh, add to
3305 	 * nexthop_neighs_list
3306 	 */
3307 	if (list_empty(&neigh_entry->nexthop_list))
3308 		list_add_tail(&neigh_entry->nexthop_neighs_list_node,
3309 			      &mlxsw_sp->router->nexthop_neighs_list);
3310 
3311 	nh->neigh_entry = neigh_entry;
3312 	list_add_tail(&nh->neigh_list_node, &neigh_entry->nexthop_list);
3313 	read_lock_bh(&n->lock);
3314 	nud_state = n->nud_state;
3315 	dead = n->dead;
3316 	read_unlock_bh(&n->lock);
3317 	__mlxsw_sp_nexthop_neigh_update(nh, !(nud_state & NUD_VALID && !dead));
3318 
3319 	return 0;
3320 
3321 err_neigh_entry_create:
3322 	neigh_release(n);
3323 	return err;
3324 }
3325 
3326 static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp,
3327 					struct mlxsw_sp_nexthop *nh)
3328 {
3329 	struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
3330 	struct neighbour *n;
3331 
3332 	if (!neigh_entry)
3333 		return;
3334 	n = neigh_entry->key.n;
3335 
3336 	__mlxsw_sp_nexthop_neigh_update(nh, true);
3337 	list_del(&nh->neigh_list_node);
3338 	nh->neigh_entry = NULL;
3339 
3340 	/* If that is the last nexthop connected to that neigh, remove from
3341 	 * nexthop_neighs_list
3342 	 */
3343 	if (list_empty(&neigh_entry->nexthop_list))
3344 		list_del(&neigh_entry->nexthop_neighs_list_node);
3345 
3346 	if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
3347 		mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
3348 
3349 	neigh_release(n);
3350 }
3351 
3352 static bool mlxsw_sp_ipip_netdev_ul_up(struct net_device *ol_dev)
3353 {
3354 	struct net_device *ul_dev = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
3355 
3356 	return ul_dev ? (ul_dev->flags & IFF_UP) : true;
3357 }
3358 
3359 static void mlxsw_sp_nexthop_ipip_init(struct mlxsw_sp *mlxsw_sp,
3360 				       struct mlxsw_sp_nexthop *nh,
3361 				       struct mlxsw_sp_ipip_entry *ipip_entry)
3362 {
3363 	bool removing;
3364 
3365 	if (!nh->nh_grp->gateway || nh->ipip_entry)
3366 		return;
3367 
3368 	nh->ipip_entry = ipip_entry;
3369 	removing = !mlxsw_sp_ipip_netdev_ul_up(ipip_entry->ol_dev);
3370 	__mlxsw_sp_nexthop_neigh_update(nh, removing);
3371 	mlxsw_sp_nexthop_rif_init(nh, &ipip_entry->ol_lb->common);
3372 }
3373 
3374 static void mlxsw_sp_nexthop_ipip_fini(struct mlxsw_sp *mlxsw_sp,
3375 				       struct mlxsw_sp_nexthop *nh)
3376 {
3377 	struct mlxsw_sp_ipip_entry *ipip_entry = nh->ipip_entry;
3378 
3379 	if (!ipip_entry)
3380 		return;
3381 
3382 	__mlxsw_sp_nexthop_neigh_update(nh, true);
3383 	nh->ipip_entry = NULL;
3384 }
3385 
3386 static bool mlxsw_sp_nexthop4_ipip_type(const struct mlxsw_sp *mlxsw_sp,
3387 					const struct fib_nh *fib_nh,
3388 					enum mlxsw_sp_ipip_type *p_ipipt)
3389 {
3390 	struct net_device *dev = fib_nh->nh_dev;
3391 
3392 	return dev &&
3393 	       fib_nh->nh_parent->fib_type == RTN_UNICAST &&
3394 	       mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, p_ipipt);
3395 }
3396 
3397 static void mlxsw_sp_nexthop_type_fini(struct mlxsw_sp *mlxsw_sp,
3398 				       struct mlxsw_sp_nexthop *nh)
3399 {
3400 	switch (nh->type) {
3401 	case MLXSW_SP_NEXTHOP_TYPE_ETH:
3402 		mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
3403 		mlxsw_sp_nexthop_rif_fini(nh);
3404 		break;
3405 	case MLXSW_SP_NEXTHOP_TYPE_IPIP:
3406 		mlxsw_sp_nexthop_rif_fini(nh);
3407 		mlxsw_sp_nexthop_ipip_fini(mlxsw_sp, nh);
3408 		break;
3409 	}
3410 }
3411 
3412 static int mlxsw_sp_nexthop4_type_init(struct mlxsw_sp *mlxsw_sp,
3413 				       struct mlxsw_sp_nexthop *nh,
3414 				       struct fib_nh *fib_nh)
3415 {
3416 	const struct mlxsw_sp_ipip_ops *ipip_ops;
3417 	struct net_device *dev = fib_nh->nh_dev;
3418 	struct mlxsw_sp_ipip_entry *ipip_entry;
3419 	struct mlxsw_sp_rif *rif;
3420 	int err;
3421 
3422 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, dev);
3423 	if (ipip_entry) {
3424 		ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
3425 		if (ipip_ops->can_offload(mlxsw_sp, dev,
3426 					  MLXSW_SP_L3_PROTO_IPV4)) {
3427 			nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP;
3428 			mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, ipip_entry);
3429 			return 0;
3430 		}
3431 	}
3432 
3433 	nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH;
3434 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
3435 	if (!rif)
3436 		return 0;
3437 
3438 	mlxsw_sp_nexthop_rif_init(nh, rif);
3439 	err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
3440 	if (err)
3441 		goto err_neigh_init;
3442 
3443 	return 0;
3444 
3445 err_neigh_init:
3446 	mlxsw_sp_nexthop_rif_fini(nh);
3447 	return err;
3448 }
3449 
3450 static void mlxsw_sp_nexthop4_type_fini(struct mlxsw_sp *mlxsw_sp,
3451 					struct mlxsw_sp_nexthop *nh)
3452 {
3453 	mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
3454 }
3455 
3456 static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp,
3457 				  struct mlxsw_sp_nexthop_group *nh_grp,
3458 				  struct mlxsw_sp_nexthop *nh,
3459 				  struct fib_nh *fib_nh)
3460 {
3461 	struct net_device *dev = fib_nh->nh_dev;
3462 	struct in_device *in_dev;
3463 	int err;
3464 
3465 	nh->nh_grp = nh_grp;
3466 	nh->key.fib_nh = fib_nh;
3467 #ifdef CONFIG_IP_ROUTE_MULTIPATH
3468 	nh->nh_weight = fib_nh->nh_weight;
3469 #else
3470 	nh->nh_weight = 1;
3471 #endif
3472 	memcpy(&nh->gw_addr, &fib_nh->nh_gw, sizeof(fib_nh->nh_gw));
3473 	err = mlxsw_sp_nexthop_insert(mlxsw_sp, nh);
3474 	if (err)
3475 		return err;
3476 
3477 	mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
3478 	list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
3479 
3480 	if (!dev)
3481 		return 0;
3482 
3483 	in_dev = __in_dev_get_rtnl(dev);
3484 	if (in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
3485 	    fib_nh->nh_flags & RTNH_F_LINKDOWN)
3486 		return 0;
3487 
3488 	err = mlxsw_sp_nexthop4_type_init(mlxsw_sp, nh, fib_nh);
3489 	if (err)
3490 		goto err_nexthop_neigh_init;
3491 
3492 	return 0;
3493 
3494 err_nexthop_neigh_init:
3495 	mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
3496 	return err;
3497 }
3498 
3499 static void mlxsw_sp_nexthop4_fini(struct mlxsw_sp *mlxsw_sp,
3500 				   struct mlxsw_sp_nexthop *nh)
3501 {
3502 	mlxsw_sp_nexthop4_type_fini(mlxsw_sp, nh);
3503 	list_del(&nh->router_list_node);
3504 	mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
3505 	mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
3506 }
3507 
3508 static void mlxsw_sp_nexthop4_event(struct mlxsw_sp *mlxsw_sp,
3509 				    unsigned long event, struct fib_nh *fib_nh)
3510 {
3511 	struct mlxsw_sp_nexthop_key key;
3512 	struct mlxsw_sp_nexthop *nh;
3513 
3514 	if (mlxsw_sp->router->aborted)
3515 		return;
3516 
3517 	key.fib_nh = fib_nh;
3518 	nh = mlxsw_sp_nexthop_lookup(mlxsw_sp, key);
3519 	if (WARN_ON_ONCE(!nh))
3520 		return;
3521 
3522 	switch (event) {
3523 	case FIB_EVENT_NH_ADD:
3524 		mlxsw_sp_nexthop4_type_init(mlxsw_sp, nh, fib_nh);
3525 		break;
3526 	case FIB_EVENT_NH_DEL:
3527 		mlxsw_sp_nexthop4_type_fini(mlxsw_sp, nh);
3528 		break;
3529 	}
3530 
3531 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3532 }
3533 
3534 static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp,
3535 					struct mlxsw_sp_rif *rif)
3536 {
3537 	struct mlxsw_sp_nexthop *nh;
3538 	bool removing;
3539 
3540 	list_for_each_entry(nh, &rif->nexthop_list, rif_list_node) {
3541 		switch (nh->type) {
3542 		case MLXSW_SP_NEXTHOP_TYPE_ETH:
3543 			removing = false;
3544 			break;
3545 		case MLXSW_SP_NEXTHOP_TYPE_IPIP:
3546 			removing = !mlxsw_sp_ipip_netdev_ul_up(rif->dev);
3547 			break;
3548 		default:
3549 			WARN_ON(1);
3550 			continue;
3551 		}
3552 
3553 		__mlxsw_sp_nexthop_neigh_update(nh, removing);
3554 		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3555 	}
3556 }
3557 
3558 static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp,
3559 					 struct mlxsw_sp_rif *old_rif,
3560 					 struct mlxsw_sp_rif *new_rif)
3561 {
3562 	struct mlxsw_sp_nexthop *nh;
3563 
3564 	list_splice_init(&old_rif->nexthop_list, &new_rif->nexthop_list);
3565 	list_for_each_entry(nh, &new_rif->nexthop_list, rif_list_node)
3566 		nh->rif = new_rif;
3567 	mlxsw_sp_nexthop_rif_update(mlxsw_sp, new_rif);
3568 }
3569 
3570 static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
3571 					   struct mlxsw_sp_rif *rif)
3572 {
3573 	struct mlxsw_sp_nexthop *nh, *tmp;
3574 
3575 	list_for_each_entry_safe(nh, tmp, &rif->nexthop_list, rif_list_node) {
3576 		mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
3577 		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3578 	}
3579 }
3580 
3581 static bool mlxsw_sp_fi_is_gateway(const struct mlxsw_sp *mlxsw_sp,
3582 				   const struct fib_info *fi)
3583 {
3584 	return fi->fib_nh->nh_scope == RT_SCOPE_LINK ||
3585 	       mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, fi->fib_nh, NULL);
3586 }
3587 
3588 static struct mlxsw_sp_nexthop_group *
3589 mlxsw_sp_nexthop4_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
3590 {
3591 	struct mlxsw_sp_nexthop_group *nh_grp;
3592 	struct mlxsw_sp_nexthop *nh;
3593 	struct fib_nh *fib_nh;
3594 	size_t alloc_size;
3595 	int i;
3596 	int err;
3597 
3598 	alloc_size = sizeof(*nh_grp) +
3599 		     fi->fib_nhs * sizeof(struct mlxsw_sp_nexthop);
3600 	nh_grp = kzalloc(alloc_size, GFP_KERNEL);
3601 	if (!nh_grp)
3602 		return ERR_PTR(-ENOMEM);
3603 	nh_grp->priv = fi;
3604 	INIT_LIST_HEAD(&nh_grp->fib_list);
3605 	nh_grp->neigh_tbl = &arp_tbl;
3606 
3607 	nh_grp->gateway = mlxsw_sp_fi_is_gateway(mlxsw_sp, fi);
3608 	nh_grp->count = fi->fib_nhs;
3609 	fib_info_hold(fi);
3610 	for (i = 0; i < nh_grp->count; i++) {
3611 		nh = &nh_grp->nexthops[i];
3612 		fib_nh = &fi->fib_nh[i];
3613 		err = mlxsw_sp_nexthop4_init(mlxsw_sp, nh_grp, nh, fib_nh);
3614 		if (err)
3615 			goto err_nexthop4_init;
3616 	}
3617 	err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
3618 	if (err)
3619 		goto err_nexthop_group_insert;
3620 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
3621 	return nh_grp;
3622 
3623 err_nexthop_group_insert:
3624 err_nexthop4_init:
3625 	for (i--; i >= 0; i--) {
3626 		nh = &nh_grp->nexthops[i];
3627 		mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
3628 	}
3629 	fib_info_put(fi);
3630 	kfree(nh_grp);
3631 	return ERR_PTR(err);
3632 }
3633 
3634 static void
3635 mlxsw_sp_nexthop4_group_destroy(struct mlxsw_sp *mlxsw_sp,
3636 				struct mlxsw_sp_nexthop_group *nh_grp)
3637 {
3638 	struct mlxsw_sp_nexthop *nh;
3639 	int i;
3640 
3641 	mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
3642 	for (i = 0; i < nh_grp->count; i++) {
3643 		nh = &nh_grp->nexthops[i];
3644 		mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
3645 	}
3646 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
3647 	WARN_ON_ONCE(nh_grp->adj_index_valid);
3648 	fib_info_put(mlxsw_sp_nexthop4_group_fi(nh_grp));
3649 	kfree(nh_grp);
3650 }
3651 
3652 static int mlxsw_sp_nexthop4_group_get(struct mlxsw_sp *mlxsw_sp,
3653 				       struct mlxsw_sp_fib_entry *fib_entry,
3654 				       struct fib_info *fi)
3655 {
3656 	struct mlxsw_sp_nexthop_group *nh_grp;
3657 
3658 	nh_grp = mlxsw_sp_nexthop4_group_lookup(mlxsw_sp, fi);
3659 	if (!nh_grp) {
3660 		nh_grp = mlxsw_sp_nexthop4_group_create(mlxsw_sp, fi);
3661 		if (IS_ERR(nh_grp))
3662 			return PTR_ERR(nh_grp);
3663 	}
3664 	list_add_tail(&fib_entry->nexthop_group_node, &nh_grp->fib_list);
3665 	fib_entry->nh_group = nh_grp;
3666 	return 0;
3667 }
3668 
3669 static void mlxsw_sp_nexthop4_group_put(struct mlxsw_sp *mlxsw_sp,
3670 					struct mlxsw_sp_fib_entry *fib_entry)
3671 {
3672 	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3673 
3674 	list_del(&fib_entry->nexthop_group_node);
3675 	if (!list_empty(&nh_grp->fib_list))
3676 		return;
3677 	mlxsw_sp_nexthop4_group_destroy(mlxsw_sp, nh_grp);
3678 }
3679 
3680 static bool
3681 mlxsw_sp_fib4_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
3682 {
3683 	struct mlxsw_sp_fib4_entry *fib4_entry;
3684 
3685 	fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry,
3686 				  common);
3687 	return !fib4_entry->tos;
3688 }
3689 
3690 static bool
3691 mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
3692 {
3693 	struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group;
3694 
3695 	switch (fib_entry->fib_node->fib->proto) {
3696 	case MLXSW_SP_L3_PROTO_IPV4:
3697 		if (!mlxsw_sp_fib4_entry_should_offload(fib_entry))
3698 			return false;
3699 		break;
3700 	case MLXSW_SP_L3_PROTO_IPV6:
3701 		break;
3702 	}
3703 
3704 	switch (fib_entry->type) {
3705 	case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
3706 		return !!nh_group->adj_index_valid;
3707 	case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
3708 		return !!nh_group->nh_rif;
3709 	case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
3710 		return true;
3711 	default:
3712 		return false;
3713 	}
3714 }
3715 
3716 static struct mlxsw_sp_nexthop *
3717 mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp,
3718 		     const struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
3719 {
3720 	int i;
3721 
3722 	for (i = 0; i < nh_grp->count; i++) {
3723 		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3724 		struct rt6_info *rt = mlxsw_sp_rt6->rt;
3725 
3726 		if (nh->rif && nh->rif->dev == rt->dst.dev &&
3727 		    ipv6_addr_equal((const struct in6_addr *) &nh->gw_addr,
3728 				    &rt->rt6i_gateway))
3729 			return nh;
3730 		continue;
3731 	}
3732 
3733 	return NULL;
3734 }
3735 
3736 static void
3737 mlxsw_sp_fib4_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
3738 {
3739 	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3740 	int i;
3741 
3742 	if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL ||
3743 	    fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP) {
3744 		nh_grp->nexthops->key.fib_nh->nh_flags |= RTNH_F_OFFLOAD;
3745 		return;
3746 	}
3747 
3748 	for (i = 0; i < nh_grp->count; i++) {
3749 		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3750 
3751 		if (nh->offloaded)
3752 			nh->key.fib_nh->nh_flags |= RTNH_F_OFFLOAD;
3753 		else
3754 			nh->key.fib_nh->nh_flags &= ~RTNH_F_OFFLOAD;
3755 	}
3756 }
3757 
3758 static void
3759 mlxsw_sp_fib4_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
3760 {
3761 	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3762 	int i;
3763 
3764 	for (i = 0; i < nh_grp->count; i++) {
3765 		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3766 
3767 		nh->key.fib_nh->nh_flags &= ~RTNH_F_OFFLOAD;
3768 	}
3769 }
3770 
3771 static void
3772 mlxsw_sp_fib6_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
3773 {
3774 	struct mlxsw_sp_fib6_entry *fib6_entry;
3775 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
3776 
3777 	fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
3778 				  common);
3779 
3780 	if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL) {
3781 		list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
3782 				 list)->rt->rt6i_nh_flags |= RTNH_F_OFFLOAD;
3783 		return;
3784 	}
3785 
3786 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
3787 		struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3788 		struct mlxsw_sp_nexthop *nh;
3789 
3790 		nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6);
3791 		if (nh && nh->offloaded)
3792 			mlxsw_sp_rt6->rt->rt6i_nh_flags |= RTNH_F_OFFLOAD;
3793 		else
3794 			mlxsw_sp_rt6->rt->rt6i_nh_flags &= ~RTNH_F_OFFLOAD;
3795 	}
3796 }
3797 
3798 static void
3799 mlxsw_sp_fib6_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
3800 {
3801 	struct mlxsw_sp_fib6_entry *fib6_entry;
3802 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
3803 
3804 	fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
3805 				  common);
3806 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
3807 		struct rt6_info *rt = mlxsw_sp_rt6->rt;
3808 
3809 		rt->rt6i_nh_flags &= ~RTNH_F_OFFLOAD;
3810 	}
3811 }
3812 
3813 static void mlxsw_sp_fib_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
3814 {
3815 	switch (fib_entry->fib_node->fib->proto) {
3816 	case MLXSW_SP_L3_PROTO_IPV4:
3817 		mlxsw_sp_fib4_entry_offload_set(fib_entry);
3818 		break;
3819 	case MLXSW_SP_L3_PROTO_IPV6:
3820 		mlxsw_sp_fib6_entry_offload_set(fib_entry);
3821 		break;
3822 	}
3823 }
3824 
3825 static void
3826 mlxsw_sp_fib_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
3827 {
3828 	switch (fib_entry->fib_node->fib->proto) {
3829 	case MLXSW_SP_L3_PROTO_IPV4:
3830 		mlxsw_sp_fib4_entry_offload_unset(fib_entry);
3831 		break;
3832 	case MLXSW_SP_L3_PROTO_IPV6:
3833 		mlxsw_sp_fib6_entry_offload_unset(fib_entry);
3834 		break;
3835 	}
3836 }
3837 
3838 static void
3839 mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
3840 				   enum mlxsw_reg_ralue_op op, int err)
3841 {
3842 	switch (op) {
3843 	case MLXSW_REG_RALUE_OP_WRITE_DELETE:
3844 		return mlxsw_sp_fib_entry_offload_unset(fib_entry);
3845 	case MLXSW_REG_RALUE_OP_WRITE_WRITE:
3846 		if (err)
3847 			return;
3848 		if (mlxsw_sp_fib_entry_should_offload(fib_entry))
3849 			mlxsw_sp_fib_entry_offload_set(fib_entry);
3850 		else
3851 			mlxsw_sp_fib_entry_offload_unset(fib_entry);
3852 		return;
3853 	default:
3854 		return;
3855 	}
3856 }
3857 
3858 static void
3859 mlxsw_sp_fib_entry_ralue_pack(char *ralue_pl,
3860 			      const struct mlxsw_sp_fib_entry *fib_entry,
3861 			      enum mlxsw_reg_ralue_op op)
3862 {
3863 	struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib;
3864 	enum mlxsw_reg_ralxx_protocol proto;
3865 	u32 *p_dip;
3866 
3867 	proto = (enum mlxsw_reg_ralxx_protocol) fib->proto;
3868 
3869 	switch (fib->proto) {
3870 	case MLXSW_SP_L3_PROTO_IPV4:
3871 		p_dip = (u32 *) fib_entry->fib_node->key.addr;
3872 		mlxsw_reg_ralue_pack4(ralue_pl, proto, op, fib->vr->id,
3873 				      fib_entry->fib_node->key.prefix_len,
3874 				      *p_dip);
3875 		break;
3876 	case MLXSW_SP_L3_PROTO_IPV6:
3877 		mlxsw_reg_ralue_pack6(ralue_pl, proto, op, fib->vr->id,
3878 				      fib_entry->fib_node->key.prefix_len,
3879 				      fib_entry->fib_node->key.addr);
3880 		break;
3881 	}
3882 }
3883 
3884 static int mlxsw_sp_fib_entry_op_remote(struct mlxsw_sp *mlxsw_sp,
3885 					struct mlxsw_sp_fib_entry *fib_entry,
3886 					enum mlxsw_reg_ralue_op op)
3887 {
3888 	char ralue_pl[MLXSW_REG_RALUE_LEN];
3889 	enum mlxsw_reg_ralue_trap_action trap_action;
3890 	u16 trap_id = 0;
3891 	u32 adjacency_index = 0;
3892 	u16 ecmp_size = 0;
3893 
3894 	/* In case the nexthop group adjacency index is valid, use it
3895 	 * with provided ECMP size. Otherwise, setup trap and pass
3896 	 * traffic to kernel.
3897 	 */
3898 	if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
3899 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
3900 		adjacency_index = fib_entry->nh_group->adj_index;
3901 		ecmp_size = fib_entry->nh_group->ecmp_size;
3902 	} else {
3903 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
3904 		trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
3905 	}
3906 
3907 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
3908 	mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id,
3909 					adjacency_index, ecmp_size);
3910 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
3911 }
3912 
3913 static int mlxsw_sp_fib_entry_op_local(struct mlxsw_sp *mlxsw_sp,
3914 				       struct mlxsw_sp_fib_entry *fib_entry,
3915 				       enum mlxsw_reg_ralue_op op)
3916 {
3917 	struct mlxsw_sp_rif *rif = fib_entry->nh_group->nh_rif;
3918 	enum mlxsw_reg_ralue_trap_action trap_action;
3919 	char ralue_pl[MLXSW_REG_RALUE_LEN];
3920 	u16 trap_id = 0;
3921 	u16 rif_index = 0;
3922 
3923 	if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
3924 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
3925 		rif_index = rif->rif_index;
3926 	} else {
3927 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
3928 		trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
3929 	}
3930 
3931 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
3932 	mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id,
3933 				       rif_index);
3934 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
3935 }
3936 
3937 static int mlxsw_sp_fib_entry_op_trap(struct mlxsw_sp *mlxsw_sp,
3938 				      struct mlxsw_sp_fib_entry *fib_entry,
3939 				      enum mlxsw_reg_ralue_op op)
3940 {
3941 	char ralue_pl[MLXSW_REG_RALUE_LEN];
3942 
3943 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
3944 	mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
3945 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
3946 }
3947 
3948 static int
3949 mlxsw_sp_fib_entry_op_ipip_decap(struct mlxsw_sp *mlxsw_sp,
3950 				 struct mlxsw_sp_fib_entry *fib_entry,
3951 				 enum mlxsw_reg_ralue_op op)
3952 {
3953 	struct mlxsw_sp_ipip_entry *ipip_entry = fib_entry->decap.ipip_entry;
3954 	const struct mlxsw_sp_ipip_ops *ipip_ops;
3955 
3956 	if (WARN_ON(!ipip_entry))
3957 		return -EINVAL;
3958 
3959 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
3960 	return ipip_ops->fib_entry_op(mlxsw_sp, ipip_entry, op,
3961 				      fib_entry->decap.tunnel_index);
3962 }
3963 
3964 static int __mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
3965 				   struct mlxsw_sp_fib_entry *fib_entry,
3966 				   enum mlxsw_reg_ralue_op op)
3967 {
3968 	switch (fib_entry->type) {
3969 	case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
3970 		return mlxsw_sp_fib_entry_op_remote(mlxsw_sp, fib_entry, op);
3971 	case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
3972 		return mlxsw_sp_fib_entry_op_local(mlxsw_sp, fib_entry, op);
3973 	case MLXSW_SP_FIB_ENTRY_TYPE_TRAP:
3974 		return mlxsw_sp_fib_entry_op_trap(mlxsw_sp, fib_entry, op);
3975 	case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
3976 		return mlxsw_sp_fib_entry_op_ipip_decap(mlxsw_sp,
3977 							fib_entry, op);
3978 	}
3979 	return -EINVAL;
3980 }
3981 
3982 static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
3983 				 struct mlxsw_sp_fib_entry *fib_entry,
3984 				 enum mlxsw_reg_ralue_op op)
3985 {
3986 	int err = __mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, op);
3987 
3988 	mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, err);
3989 
3990 	return err;
3991 }
3992 
3993 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
3994 				     struct mlxsw_sp_fib_entry *fib_entry)
3995 {
3996 	return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
3997 				     MLXSW_REG_RALUE_OP_WRITE_WRITE);
3998 }
3999 
4000 static int mlxsw_sp_fib_entry_del(struct mlxsw_sp *mlxsw_sp,
4001 				  struct mlxsw_sp_fib_entry *fib_entry)
4002 {
4003 	return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
4004 				     MLXSW_REG_RALUE_OP_WRITE_DELETE);
4005 }
4006 
4007 static int
4008 mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp,
4009 			     const struct fib_entry_notifier_info *fen_info,
4010 			     struct mlxsw_sp_fib_entry *fib_entry)
4011 {
4012 	union mlxsw_sp_l3addr dip = { .addr4 = htonl(fen_info->dst) };
4013 	struct net_device *dev = fen_info->fi->fib_dev;
4014 	struct mlxsw_sp_ipip_entry *ipip_entry;
4015 	struct fib_info *fi = fen_info->fi;
4016 
4017 	switch (fen_info->type) {
4018 	case RTN_LOCAL:
4019 		ipip_entry = mlxsw_sp_ipip_entry_find_by_decap(mlxsw_sp, dev,
4020 						 MLXSW_SP_L3_PROTO_IPV4, dip);
4021 		if (ipip_entry && ipip_entry->ol_dev->flags & IFF_UP) {
4022 			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
4023 			return mlxsw_sp_fib_entry_decap_init(mlxsw_sp,
4024 							     fib_entry,
4025 							     ipip_entry);
4026 		}
4027 		/* fall through */
4028 	case RTN_BROADCAST:
4029 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
4030 		return 0;
4031 	case RTN_UNREACHABLE: /* fall through */
4032 	case RTN_BLACKHOLE: /* fall through */
4033 	case RTN_PROHIBIT:
4034 		/* Packets hitting these routes need to be trapped, but
4035 		 * can do so with a lower priority than packets directed
4036 		 * at the host, so use action type local instead of trap.
4037 		 */
4038 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
4039 		return 0;
4040 	case RTN_UNICAST:
4041 		if (mlxsw_sp_fi_is_gateway(mlxsw_sp, fi))
4042 			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
4043 		else
4044 			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
4045 		return 0;
4046 	default:
4047 		return -EINVAL;
4048 	}
4049 }
4050 
4051 static struct mlxsw_sp_fib4_entry *
4052 mlxsw_sp_fib4_entry_create(struct mlxsw_sp *mlxsw_sp,
4053 			   struct mlxsw_sp_fib_node *fib_node,
4054 			   const struct fib_entry_notifier_info *fen_info)
4055 {
4056 	struct mlxsw_sp_fib4_entry *fib4_entry;
4057 	struct mlxsw_sp_fib_entry *fib_entry;
4058 	int err;
4059 
4060 	fib4_entry = kzalloc(sizeof(*fib4_entry), GFP_KERNEL);
4061 	if (!fib4_entry)
4062 		return ERR_PTR(-ENOMEM);
4063 	fib_entry = &fib4_entry->common;
4064 
4065 	err = mlxsw_sp_fib4_entry_type_set(mlxsw_sp, fen_info, fib_entry);
4066 	if (err)
4067 		goto err_fib4_entry_type_set;
4068 
4069 	err = mlxsw_sp_nexthop4_group_get(mlxsw_sp, fib_entry, fen_info->fi);
4070 	if (err)
4071 		goto err_nexthop4_group_get;
4072 
4073 	fib4_entry->prio = fen_info->fi->fib_priority;
4074 	fib4_entry->tb_id = fen_info->tb_id;
4075 	fib4_entry->type = fen_info->type;
4076 	fib4_entry->tos = fen_info->tos;
4077 
4078 	fib_entry->fib_node = fib_node;
4079 
4080 	return fib4_entry;
4081 
4082 err_nexthop4_group_get:
4083 err_fib4_entry_type_set:
4084 	kfree(fib4_entry);
4085 	return ERR_PTR(err);
4086 }
4087 
4088 static void mlxsw_sp_fib4_entry_destroy(struct mlxsw_sp *mlxsw_sp,
4089 					struct mlxsw_sp_fib4_entry *fib4_entry)
4090 {
4091 	mlxsw_sp_nexthop4_group_put(mlxsw_sp, &fib4_entry->common);
4092 	kfree(fib4_entry);
4093 }
4094 
4095 static struct mlxsw_sp_fib4_entry *
4096 mlxsw_sp_fib4_entry_lookup(struct mlxsw_sp *mlxsw_sp,
4097 			   const struct fib_entry_notifier_info *fen_info)
4098 {
4099 	struct mlxsw_sp_fib4_entry *fib4_entry;
4100 	struct mlxsw_sp_fib_node *fib_node;
4101 	struct mlxsw_sp_fib *fib;
4102 	struct mlxsw_sp_vr *vr;
4103 
4104 	vr = mlxsw_sp_vr_find(mlxsw_sp, fen_info->tb_id);
4105 	if (!vr)
4106 		return NULL;
4107 	fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV4);
4108 
4109 	fib_node = mlxsw_sp_fib_node_lookup(fib, &fen_info->dst,
4110 					    sizeof(fen_info->dst),
4111 					    fen_info->dst_len);
4112 	if (!fib_node)
4113 		return NULL;
4114 
4115 	list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) {
4116 		if (fib4_entry->tb_id == fen_info->tb_id &&
4117 		    fib4_entry->tos == fen_info->tos &&
4118 		    fib4_entry->type == fen_info->type &&
4119 		    mlxsw_sp_nexthop4_group_fi(fib4_entry->common.nh_group) ==
4120 		    fen_info->fi) {
4121 			return fib4_entry;
4122 		}
4123 	}
4124 
4125 	return NULL;
4126 }
4127 
4128 static const struct rhashtable_params mlxsw_sp_fib_ht_params = {
4129 	.key_offset = offsetof(struct mlxsw_sp_fib_node, key),
4130 	.head_offset = offsetof(struct mlxsw_sp_fib_node, ht_node),
4131 	.key_len = sizeof(struct mlxsw_sp_fib_key),
4132 	.automatic_shrinking = true,
4133 };
4134 
4135 static int mlxsw_sp_fib_node_insert(struct mlxsw_sp_fib *fib,
4136 				    struct mlxsw_sp_fib_node *fib_node)
4137 {
4138 	return rhashtable_insert_fast(&fib->ht, &fib_node->ht_node,
4139 				      mlxsw_sp_fib_ht_params);
4140 }
4141 
4142 static void mlxsw_sp_fib_node_remove(struct mlxsw_sp_fib *fib,
4143 				     struct mlxsw_sp_fib_node *fib_node)
4144 {
4145 	rhashtable_remove_fast(&fib->ht, &fib_node->ht_node,
4146 			       mlxsw_sp_fib_ht_params);
4147 }
4148 
4149 static struct mlxsw_sp_fib_node *
4150 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
4151 			 size_t addr_len, unsigned char prefix_len)
4152 {
4153 	struct mlxsw_sp_fib_key key;
4154 
4155 	memset(&key, 0, sizeof(key));
4156 	memcpy(key.addr, addr, addr_len);
4157 	key.prefix_len = prefix_len;
4158 	return rhashtable_lookup_fast(&fib->ht, &key, mlxsw_sp_fib_ht_params);
4159 }
4160 
4161 static struct mlxsw_sp_fib_node *
4162 mlxsw_sp_fib_node_create(struct mlxsw_sp_fib *fib, const void *addr,
4163 			 size_t addr_len, unsigned char prefix_len)
4164 {
4165 	struct mlxsw_sp_fib_node *fib_node;
4166 
4167 	fib_node = kzalloc(sizeof(*fib_node), GFP_KERNEL);
4168 	if (!fib_node)
4169 		return NULL;
4170 
4171 	INIT_LIST_HEAD(&fib_node->entry_list);
4172 	list_add(&fib_node->list, &fib->node_list);
4173 	memcpy(fib_node->key.addr, addr, addr_len);
4174 	fib_node->key.prefix_len = prefix_len;
4175 
4176 	return fib_node;
4177 }
4178 
4179 static void mlxsw_sp_fib_node_destroy(struct mlxsw_sp_fib_node *fib_node)
4180 {
4181 	list_del(&fib_node->list);
4182 	WARN_ON(!list_empty(&fib_node->entry_list));
4183 	kfree(fib_node);
4184 }
4185 
4186 static bool
4187 mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node,
4188 				 const struct mlxsw_sp_fib_entry *fib_entry)
4189 {
4190 	return list_first_entry(&fib_node->entry_list,
4191 				struct mlxsw_sp_fib_entry, list) == fib_entry;
4192 }
4193 
4194 static int mlxsw_sp_fib_lpm_tree_link(struct mlxsw_sp *mlxsw_sp,
4195 				      struct mlxsw_sp_fib *fib,
4196 				      struct mlxsw_sp_fib_node *fib_node)
4197 {
4198 	struct mlxsw_sp_prefix_usage req_prefix_usage = {{ 0 } };
4199 	struct mlxsw_sp_lpm_tree *lpm_tree;
4200 	int err;
4201 
4202 	/* Since the tree is shared between all virtual routers we must
4203 	 * make sure it contains all the required prefix lengths. This
4204 	 * can be computed by either adding the new prefix length to the
4205 	 * existing prefix usage of a bound tree, or by aggregating the
4206 	 * prefix lengths across all virtual routers and adding the new
4207 	 * one as well.
4208 	 */
4209 	if (fib->lpm_tree)
4210 		mlxsw_sp_prefix_usage_cpy(&req_prefix_usage,
4211 					  &fib->lpm_tree->prefix_usage);
4212 	else
4213 		mlxsw_sp_vrs_prefixes(mlxsw_sp, fib->proto, &req_prefix_usage);
4214 	mlxsw_sp_prefix_usage_set(&req_prefix_usage, fib_node->key.prefix_len);
4215 
4216 	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
4217 					 fib->proto);
4218 	if (IS_ERR(lpm_tree))
4219 		return PTR_ERR(lpm_tree);
4220 
4221 	if (fib->lpm_tree && fib->lpm_tree->id == lpm_tree->id)
4222 		return 0;
4223 
4224 	err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
4225 	if (err)
4226 		return err;
4227 
4228 	return 0;
4229 }
4230 
4231 static void mlxsw_sp_fib_lpm_tree_unlink(struct mlxsw_sp *mlxsw_sp,
4232 					 struct mlxsw_sp_fib *fib)
4233 {
4234 	if (!mlxsw_sp_prefix_usage_none(&fib->prefix_usage))
4235 		return;
4236 	mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, fib);
4237 	mlxsw_sp_lpm_tree_put(mlxsw_sp, fib->lpm_tree);
4238 	fib->lpm_tree = NULL;
4239 }
4240 
4241 static void mlxsw_sp_fib_node_prefix_inc(struct mlxsw_sp_fib_node *fib_node)
4242 {
4243 	unsigned char prefix_len = fib_node->key.prefix_len;
4244 	struct mlxsw_sp_fib *fib = fib_node->fib;
4245 
4246 	if (fib->prefix_ref_count[prefix_len]++ == 0)
4247 		mlxsw_sp_prefix_usage_set(&fib->prefix_usage, prefix_len);
4248 }
4249 
4250 static void mlxsw_sp_fib_node_prefix_dec(struct mlxsw_sp_fib_node *fib_node)
4251 {
4252 	unsigned char prefix_len = fib_node->key.prefix_len;
4253 	struct mlxsw_sp_fib *fib = fib_node->fib;
4254 
4255 	if (--fib->prefix_ref_count[prefix_len] == 0)
4256 		mlxsw_sp_prefix_usage_clear(&fib->prefix_usage, prefix_len);
4257 }
4258 
4259 static int mlxsw_sp_fib_node_init(struct mlxsw_sp *mlxsw_sp,
4260 				  struct mlxsw_sp_fib_node *fib_node,
4261 				  struct mlxsw_sp_fib *fib)
4262 {
4263 	int err;
4264 
4265 	err = mlxsw_sp_fib_node_insert(fib, fib_node);
4266 	if (err)
4267 		return err;
4268 	fib_node->fib = fib;
4269 
4270 	err = mlxsw_sp_fib_lpm_tree_link(mlxsw_sp, fib, fib_node);
4271 	if (err)
4272 		goto err_fib_lpm_tree_link;
4273 
4274 	mlxsw_sp_fib_node_prefix_inc(fib_node);
4275 
4276 	return 0;
4277 
4278 err_fib_lpm_tree_link:
4279 	fib_node->fib = NULL;
4280 	mlxsw_sp_fib_node_remove(fib, fib_node);
4281 	return err;
4282 }
4283 
4284 static void mlxsw_sp_fib_node_fini(struct mlxsw_sp *mlxsw_sp,
4285 				   struct mlxsw_sp_fib_node *fib_node)
4286 {
4287 	struct mlxsw_sp_fib *fib = fib_node->fib;
4288 
4289 	mlxsw_sp_fib_node_prefix_dec(fib_node);
4290 	mlxsw_sp_fib_lpm_tree_unlink(mlxsw_sp, fib);
4291 	fib_node->fib = NULL;
4292 	mlxsw_sp_fib_node_remove(fib, fib_node);
4293 }
4294 
4295 static struct mlxsw_sp_fib_node *
4296 mlxsw_sp_fib_node_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id, const void *addr,
4297 		      size_t addr_len, unsigned char prefix_len,
4298 		      enum mlxsw_sp_l3proto proto)
4299 {
4300 	struct mlxsw_sp_fib_node *fib_node;
4301 	struct mlxsw_sp_fib *fib;
4302 	struct mlxsw_sp_vr *vr;
4303 	int err;
4304 
4305 	vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id, NULL);
4306 	if (IS_ERR(vr))
4307 		return ERR_CAST(vr);
4308 	fib = mlxsw_sp_vr_fib(vr, proto);
4309 
4310 	fib_node = mlxsw_sp_fib_node_lookup(fib, addr, addr_len, prefix_len);
4311 	if (fib_node)
4312 		return fib_node;
4313 
4314 	fib_node = mlxsw_sp_fib_node_create(fib, addr, addr_len, prefix_len);
4315 	if (!fib_node) {
4316 		err = -ENOMEM;
4317 		goto err_fib_node_create;
4318 	}
4319 
4320 	err = mlxsw_sp_fib_node_init(mlxsw_sp, fib_node, fib);
4321 	if (err)
4322 		goto err_fib_node_init;
4323 
4324 	return fib_node;
4325 
4326 err_fib_node_init:
4327 	mlxsw_sp_fib_node_destroy(fib_node);
4328 err_fib_node_create:
4329 	mlxsw_sp_vr_put(vr);
4330 	return ERR_PTR(err);
4331 }
4332 
4333 static void mlxsw_sp_fib_node_put(struct mlxsw_sp *mlxsw_sp,
4334 				  struct mlxsw_sp_fib_node *fib_node)
4335 {
4336 	struct mlxsw_sp_vr *vr = fib_node->fib->vr;
4337 
4338 	if (!list_empty(&fib_node->entry_list))
4339 		return;
4340 	mlxsw_sp_fib_node_fini(mlxsw_sp, fib_node);
4341 	mlxsw_sp_fib_node_destroy(fib_node);
4342 	mlxsw_sp_vr_put(vr);
4343 }
4344 
4345 static struct mlxsw_sp_fib4_entry *
4346 mlxsw_sp_fib4_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
4347 			      const struct mlxsw_sp_fib4_entry *new4_entry)
4348 {
4349 	struct mlxsw_sp_fib4_entry *fib4_entry;
4350 
4351 	list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) {
4352 		if (fib4_entry->tb_id > new4_entry->tb_id)
4353 			continue;
4354 		if (fib4_entry->tb_id != new4_entry->tb_id)
4355 			break;
4356 		if (fib4_entry->tos > new4_entry->tos)
4357 			continue;
4358 		if (fib4_entry->prio >= new4_entry->prio ||
4359 		    fib4_entry->tos < new4_entry->tos)
4360 			return fib4_entry;
4361 	}
4362 
4363 	return NULL;
4364 }
4365 
4366 static int
4367 mlxsw_sp_fib4_node_list_append(struct mlxsw_sp_fib4_entry *fib4_entry,
4368 			       struct mlxsw_sp_fib4_entry *new4_entry)
4369 {
4370 	struct mlxsw_sp_fib_node *fib_node;
4371 
4372 	if (WARN_ON(!fib4_entry))
4373 		return -EINVAL;
4374 
4375 	fib_node = fib4_entry->common.fib_node;
4376 	list_for_each_entry_from(fib4_entry, &fib_node->entry_list,
4377 				 common.list) {
4378 		if (fib4_entry->tb_id != new4_entry->tb_id ||
4379 		    fib4_entry->tos != new4_entry->tos ||
4380 		    fib4_entry->prio != new4_entry->prio)
4381 			break;
4382 	}
4383 
4384 	list_add_tail(&new4_entry->common.list, &fib4_entry->common.list);
4385 	return 0;
4386 }
4387 
4388 static int
4389 mlxsw_sp_fib4_node_list_insert(struct mlxsw_sp_fib4_entry *new4_entry,
4390 			       bool replace, bool append)
4391 {
4392 	struct mlxsw_sp_fib_node *fib_node = new4_entry->common.fib_node;
4393 	struct mlxsw_sp_fib4_entry *fib4_entry;
4394 
4395 	fib4_entry = mlxsw_sp_fib4_node_entry_find(fib_node, new4_entry);
4396 
4397 	if (append)
4398 		return mlxsw_sp_fib4_node_list_append(fib4_entry, new4_entry);
4399 	if (replace && WARN_ON(!fib4_entry))
4400 		return -EINVAL;
4401 
4402 	/* Insert new entry before replaced one, so that we can later
4403 	 * remove the second.
4404 	 */
4405 	if (fib4_entry) {
4406 		list_add_tail(&new4_entry->common.list,
4407 			      &fib4_entry->common.list);
4408 	} else {
4409 		struct mlxsw_sp_fib4_entry *last;
4410 
4411 		list_for_each_entry(last, &fib_node->entry_list, common.list) {
4412 			if (new4_entry->tb_id > last->tb_id)
4413 				break;
4414 			fib4_entry = last;
4415 		}
4416 
4417 		if (fib4_entry)
4418 			list_add(&new4_entry->common.list,
4419 				 &fib4_entry->common.list);
4420 		else
4421 			list_add(&new4_entry->common.list,
4422 				 &fib_node->entry_list);
4423 	}
4424 
4425 	return 0;
4426 }
4427 
4428 static void
4429 mlxsw_sp_fib4_node_list_remove(struct mlxsw_sp_fib4_entry *fib4_entry)
4430 {
4431 	list_del(&fib4_entry->common.list);
4432 }
4433 
4434 static int mlxsw_sp_fib_node_entry_add(struct mlxsw_sp *mlxsw_sp,
4435 				       struct mlxsw_sp_fib_entry *fib_entry)
4436 {
4437 	struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
4438 
4439 	if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
4440 		return 0;
4441 
4442 	/* To prevent packet loss, overwrite the previously offloaded
4443 	 * entry.
4444 	 */
4445 	if (!list_is_singular(&fib_node->entry_list)) {
4446 		enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
4447 		struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list);
4448 
4449 		mlxsw_sp_fib_entry_offload_refresh(n, op, 0);
4450 	}
4451 
4452 	return mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
4453 }
4454 
4455 static void mlxsw_sp_fib_node_entry_del(struct mlxsw_sp *mlxsw_sp,
4456 					struct mlxsw_sp_fib_entry *fib_entry)
4457 {
4458 	struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
4459 
4460 	if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
4461 		return;
4462 
4463 	/* Promote the next entry by overwriting the deleted entry */
4464 	if (!list_is_singular(&fib_node->entry_list)) {
4465 		struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list);
4466 		enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
4467 
4468 		mlxsw_sp_fib_entry_update(mlxsw_sp, n);
4469 		mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0);
4470 		return;
4471 	}
4472 
4473 	mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry);
4474 }
4475 
4476 static int mlxsw_sp_fib4_node_entry_link(struct mlxsw_sp *mlxsw_sp,
4477 					 struct mlxsw_sp_fib4_entry *fib4_entry,
4478 					 bool replace, bool append)
4479 {
4480 	int err;
4481 
4482 	err = mlxsw_sp_fib4_node_list_insert(fib4_entry, replace, append);
4483 	if (err)
4484 		return err;
4485 
4486 	err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib4_entry->common);
4487 	if (err)
4488 		goto err_fib_node_entry_add;
4489 
4490 	return 0;
4491 
4492 err_fib_node_entry_add:
4493 	mlxsw_sp_fib4_node_list_remove(fib4_entry);
4494 	return err;
4495 }
4496 
4497 static void
4498 mlxsw_sp_fib4_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
4499 				struct mlxsw_sp_fib4_entry *fib4_entry)
4500 {
4501 	mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib4_entry->common);
4502 	mlxsw_sp_fib4_node_list_remove(fib4_entry);
4503 
4504 	if (fib4_entry->common.type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP)
4505 		mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, &fib4_entry->common);
4506 }
4507 
4508 static void mlxsw_sp_fib4_entry_replace(struct mlxsw_sp *mlxsw_sp,
4509 					struct mlxsw_sp_fib4_entry *fib4_entry,
4510 					bool replace)
4511 {
4512 	struct mlxsw_sp_fib_node *fib_node = fib4_entry->common.fib_node;
4513 	struct mlxsw_sp_fib4_entry *replaced;
4514 
4515 	if (!replace)
4516 		return;
4517 
4518 	/* We inserted the new entry before replaced one */
4519 	replaced = list_next_entry(fib4_entry, common.list);
4520 
4521 	mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, replaced);
4522 	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, replaced);
4523 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4524 }
4525 
4526 static int
4527 mlxsw_sp_router_fib4_add(struct mlxsw_sp *mlxsw_sp,
4528 			 const struct fib_entry_notifier_info *fen_info,
4529 			 bool replace, bool append)
4530 {
4531 	struct mlxsw_sp_fib4_entry *fib4_entry;
4532 	struct mlxsw_sp_fib_node *fib_node;
4533 	int err;
4534 
4535 	if (mlxsw_sp->router->aborted)
4536 		return 0;
4537 
4538 	fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, fen_info->tb_id,
4539 					 &fen_info->dst, sizeof(fen_info->dst),
4540 					 fen_info->dst_len,
4541 					 MLXSW_SP_L3_PROTO_IPV4);
4542 	if (IS_ERR(fib_node)) {
4543 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to get FIB node\n");
4544 		return PTR_ERR(fib_node);
4545 	}
4546 
4547 	fib4_entry = mlxsw_sp_fib4_entry_create(mlxsw_sp, fib_node, fen_info);
4548 	if (IS_ERR(fib4_entry)) {
4549 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to create FIB entry\n");
4550 		err = PTR_ERR(fib4_entry);
4551 		goto err_fib4_entry_create;
4552 	}
4553 
4554 	err = mlxsw_sp_fib4_node_entry_link(mlxsw_sp, fib4_entry, replace,
4555 					    append);
4556 	if (err) {
4557 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to link FIB entry to node\n");
4558 		goto err_fib4_node_entry_link;
4559 	}
4560 
4561 	mlxsw_sp_fib4_entry_replace(mlxsw_sp, fib4_entry, replace);
4562 
4563 	return 0;
4564 
4565 err_fib4_node_entry_link:
4566 	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
4567 err_fib4_entry_create:
4568 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4569 	return err;
4570 }
4571 
4572 static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp,
4573 				     struct fib_entry_notifier_info *fen_info)
4574 {
4575 	struct mlxsw_sp_fib4_entry *fib4_entry;
4576 	struct mlxsw_sp_fib_node *fib_node;
4577 
4578 	if (mlxsw_sp->router->aborted)
4579 		return;
4580 
4581 	fib4_entry = mlxsw_sp_fib4_entry_lookup(mlxsw_sp, fen_info);
4582 	if (WARN_ON(!fib4_entry))
4583 		return;
4584 	fib_node = fib4_entry->common.fib_node;
4585 
4586 	mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry);
4587 	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
4588 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4589 }
4590 
4591 static bool mlxsw_sp_fib6_rt_should_ignore(const struct rt6_info *rt)
4592 {
4593 	/* Packets with link-local destination IP arriving to the router
4594 	 * are trapped to the CPU, so no need to program specific routes
4595 	 * for them.
4596 	 */
4597 	if (ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LINKLOCAL)
4598 		return true;
4599 
4600 	/* Multicast routes aren't supported, so ignore them. Neighbour
4601 	 * Discovery packets are specifically trapped.
4602 	 */
4603 	if (ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_MULTICAST)
4604 		return true;
4605 
4606 	/* Cloned routes are irrelevant in the forwarding path. */
4607 	if (rt->rt6i_flags & RTF_CACHE)
4608 		return true;
4609 
4610 	return false;
4611 }
4612 
4613 static struct mlxsw_sp_rt6 *mlxsw_sp_rt6_create(struct rt6_info *rt)
4614 {
4615 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4616 
4617 	mlxsw_sp_rt6 = kzalloc(sizeof(*mlxsw_sp_rt6), GFP_KERNEL);
4618 	if (!mlxsw_sp_rt6)
4619 		return ERR_PTR(-ENOMEM);
4620 
4621 	/* In case of route replace, replaced route is deleted with
4622 	 * no notification. Take reference to prevent accessing freed
4623 	 * memory.
4624 	 */
4625 	mlxsw_sp_rt6->rt = rt;
4626 	rt6_hold(rt);
4627 
4628 	return mlxsw_sp_rt6;
4629 }
4630 
4631 #if IS_ENABLED(CONFIG_IPV6)
4632 static void mlxsw_sp_rt6_release(struct rt6_info *rt)
4633 {
4634 	rt6_release(rt);
4635 }
4636 #else
4637 static void mlxsw_sp_rt6_release(struct rt6_info *rt)
4638 {
4639 }
4640 #endif
4641 
4642 static void mlxsw_sp_rt6_destroy(struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
4643 {
4644 	mlxsw_sp_rt6_release(mlxsw_sp_rt6->rt);
4645 	kfree(mlxsw_sp_rt6);
4646 }
4647 
4648 static bool mlxsw_sp_fib6_rt_can_mp(const struct rt6_info *rt)
4649 {
4650 	/* RTF_CACHE routes are ignored */
4651 	return (rt->rt6i_flags & (RTF_GATEWAY | RTF_ADDRCONF)) == RTF_GATEWAY;
4652 }
4653 
4654 static struct rt6_info *
4655 mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry)
4656 {
4657 	return list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
4658 				list)->rt;
4659 }
4660 
4661 static struct mlxsw_sp_fib6_entry *
4662 mlxsw_sp_fib6_node_mp_entry_find(const struct mlxsw_sp_fib_node *fib_node,
4663 				 const struct rt6_info *nrt, bool replace)
4664 {
4665 	struct mlxsw_sp_fib6_entry *fib6_entry;
4666 
4667 	if (!mlxsw_sp_fib6_rt_can_mp(nrt) || replace)
4668 		return NULL;
4669 
4670 	list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
4671 		struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
4672 
4673 		/* RT6_TABLE_LOCAL and RT6_TABLE_MAIN share the same
4674 		 * virtual router.
4675 		 */
4676 		if (rt->rt6i_table->tb6_id > nrt->rt6i_table->tb6_id)
4677 			continue;
4678 		if (rt->rt6i_table->tb6_id != nrt->rt6i_table->tb6_id)
4679 			break;
4680 		if (rt->rt6i_metric < nrt->rt6i_metric)
4681 			continue;
4682 		if (rt->rt6i_metric == nrt->rt6i_metric &&
4683 		    mlxsw_sp_fib6_rt_can_mp(rt))
4684 			return fib6_entry;
4685 		if (rt->rt6i_metric > nrt->rt6i_metric)
4686 			break;
4687 	}
4688 
4689 	return NULL;
4690 }
4691 
4692 static struct mlxsw_sp_rt6 *
4693 mlxsw_sp_fib6_entry_rt_find(const struct mlxsw_sp_fib6_entry *fib6_entry,
4694 			    const struct rt6_info *rt)
4695 {
4696 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4697 
4698 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
4699 		if (mlxsw_sp_rt6->rt == rt)
4700 			return mlxsw_sp_rt6;
4701 	}
4702 
4703 	return NULL;
4704 }
4705 
4706 static bool mlxsw_sp_nexthop6_ipip_type(const struct mlxsw_sp *mlxsw_sp,
4707 					const struct rt6_info *rt,
4708 					enum mlxsw_sp_ipip_type *ret)
4709 {
4710 	return rt->dst.dev &&
4711 	       mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->dst.dev, ret);
4712 }
4713 
4714 static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp *mlxsw_sp,
4715 				       struct mlxsw_sp_nexthop_group *nh_grp,
4716 				       struct mlxsw_sp_nexthop *nh,
4717 				       const struct rt6_info *rt)
4718 {
4719 	const struct mlxsw_sp_ipip_ops *ipip_ops;
4720 	struct mlxsw_sp_ipip_entry *ipip_entry;
4721 	struct net_device *dev = rt->dst.dev;
4722 	struct mlxsw_sp_rif *rif;
4723 	int err;
4724 
4725 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, dev);
4726 	if (ipip_entry) {
4727 		ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
4728 		if (ipip_ops->can_offload(mlxsw_sp, dev,
4729 					  MLXSW_SP_L3_PROTO_IPV6)) {
4730 			nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP;
4731 			mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, ipip_entry);
4732 			return 0;
4733 		}
4734 	}
4735 
4736 	nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH;
4737 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
4738 	if (!rif)
4739 		return 0;
4740 	mlxsw_sp_nexthop_rif_init(nh, rif);
4741 
4742 	err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
4743 	if (err)
4744 		goto err_nexthop_neigh_init;
4745 
4746 	return 0;
4747 
4748 err_nexthop_neigh_init:
4749 	mlxsw_sp_nexthop_rif_fini(nh);
4750 	return err;
4751 }
4752 
4753 static void mlxsw_sp_nexthop6_type_fini(struct mlxsw_sp *mlxsw_sp,
4754 					struct mlxsw_sp_nexthop *nh)
4755 {
4756 	mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
4757 }
4758 
4759 static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp,
4760 				  struct mlxsw_sp_nexthop_group *nh_grp,
4761 				  struct mlxsw_sp_nexthop *nh,
4762 				  const struct rt6_info *rt)
4763 {
4764 	struct net_device *dev = rt->dst.dev;
4765 
4766 	nh->nh_grp = nh_grp;
4767 	nh->nh_weight = 1;
4768 	memcpy(&nh->gw_addr, &rt->rt6i_gateway, sizeof(nh->gw_addr));
4769 	mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
4770 
4771 	list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
4772 
4773 	if (!dev)
4774 		return 0;
4775 	nh->ifindex = dev->ifindex;
4776 
4777 	return mlxsw_sp_nexthop6_type_init(mlxsw_sp, nh_grp, nh, rt);
4778 }
4779 
4780 static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp,
4781 				   struct mlxsw_sp_nexthop *nh)
4782 {
4783 	mlxsw_sp_nexthop6_type_fini(mlxsw_sp, nh);
4784 	list_del(&nh->router_list_node);
4785 	mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
4786 }
4787 
4788 static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp *mlxsw_sp,
4789 				    const struct rt6_info *rt)
4790 {
4791 	return rt->rt6i_flags & RTF_GATEWAY ||
4792 	       mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, NULL);
4793 }
4794 
4795 static struct mlxsw_sp_nexthop_group *
4796 mlxsw_sp_nexthop6_group_create(struct mlxsw_sp *mlxsw_sp,
4797 			       struct mlxsw_sp_fib6_entry *fib6_entry)
4798 {
4799 	struct mlxsw_sp_nexthop_group *nh_grp;
4800 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4801 	struct mlxsw_sp_nexthop *nh;
4802 	size_t alloc_size;
4803 	int i = 0;
4804 	int err;
4805 
4806 	alloc_size = sizeof(*nh_grp) +
4807 		     fib6_entry->nrt6 * sizeof(struct mlxsw_sp_nexthop);
4808 	nh_grp = kzalloc(alloc_size, GFP_KERNEL);
4809 	if (!nh_grp)
4810 		return ERR_PTR(-ENOMEM);
4811 	INIT_LIST_HEAD(&nh_grp->fib_list);
4812 #if IS_ENABLED(CONFIG_IPV6)
4813 	nh_grp->neigh_tbl = &nd_tbl;
4814 #endif
4815 	mlxsw_sp_rt6 = list_first_entry(&fib6_entry->rt6_list,
4816 					struct mlxsw_sp_rt6, list);
4817 	nh_grp->gateway = mlxsw_sp_rt6_is_gateway(mlxsw_sp, mlxsw_sp_rt6->rt);
4818 	nh_grp->count = fib6_entry->nrt6;
4819 	for (i = 0; i < nh_grp->count; i++) {
4820 		struct rt6_info *rt = mlxsw_sp_rt6->rt;
4821 
4822 		nh = &nh_grp->nexthops[i];
4823 		err = mlxsw_sp_nexthop6_init(mlxsw_sp, nh_grp, nh, rt);
4824 		if (err)
4825 			goto err_nexthop6_init;
4826 		mlxsw_sp_rt6 = list_next_entry(mlxsw_sp_rt6, list);
4827 	}
4828 
4829 	err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
4830 	if (err)
4831 		goto err_nexthop_group_insert;
4832 
4833 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
4834 	return nh_grp;
4835 
4836 err_nexthop_group_insert:
4837 err_nexthop6_init:
4838 	for (i--; i >= 0; i--) {
4839 		nh = &nh_grp->nexthops[i];
4840 		mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
4841 	}
4842 	kfree(nh_grp);
4843 	return ERR_PTR(err);
4844 }
4845 
4846 static void
4847 mlxsw_sp_nexthop6_group_destroy(struct mlxsw_sp *mlxsw_sp,
4848 				struct mlxsw_sp_nexthop_group *nh_grp)
4849 {
4850 	struct mlxsw_sp_nexthop *nh;
4851 	int i = nh_grp->count;
4852 
4853 	mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
4854 	for (i--; i >= 0; i--) {
4855 		nh = &nh_grp->nexthops[i];
4856 		mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
4857 	}
4858 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
4859 	WARN_ON(nh_grp->adj_index_valid);
4860 	kfree(nh_grp);
4861 }
4862 
4863 static int mlxsw_sp_nexthop6_group_get(struct mlxsw_sp *mlxsw_sp,
4864 				       struct mlxsw_sp_fib6_entry *fib6_entry)
4865 {
4866 	struct mlxsw_sp_nexthop_group *nh_grp;
4867 
4868 	nh_grp = mlxsw_sp_nexthop6_group_lookup(mlxsw_sp, fib6_entry);
4869 	if (!nh_grp) {
4870 		nh_grp = mlxsw_sp_nexthop6_group_create(mlxsw_sp, fib6_entry);
4871 		if (IS_ERR(nh_grp))
4872 			return PTR_ERR(nh_grp);
4873 	}
4874 
4875 	list_add_tail(&fib6_entry->common.nexthop_group_node,
4876 		      &nh_grp->fib_list);
4877 	fib6_entry->common.nh_group = nh_grp;
4878 
4879 	return 0;
4880 }
4881 
4882 static void mlxsw_sp_nexthop6_group_put(struct mlxsw_sp *mlxsw_sp,
4883 					struct mlxsw_sp_fib_entry *fib_entry)
4884 {
4885 	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
4886 
4887 	list_del(&fib_entry->nexthop_group_node);
4888 	if (!list_empty(&nh_grp->fib_list))
4889 		return;
4890 	mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, nh_grp);
4891 }
4892 
4893 static int
4894 mlxsw_sp_nexthop6_group_update(struct mlxsw_sp *mlxsw_sp,
4895 			       struct mlxsw_sp_fib6_entry *fib6_entry)
4896 {
4897 	struct mlxsw_sp_nexthop_group *old_nh_grp = fib6_entry->common.nh_group;
4898 	int err;
4899 
4900 	fib6_entry->common.nh_group = NULL;
4901 	list_del(&fib6_entry->common.nexthop_group_node);
4902 
4903 	err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
4904 	if (err)
4905 		goto err_nexthop6_group_get;
4906 
4907 	/* In case this entry is offloaded, then the adjacency index
4908 	 * currently associated with it in the device's table is that
4909 	 * of the old group. Start using the new one instead.
4910 	 */
4911 	err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common);
4912 	if (err)
4913 		goto err_fib_node_entry_add;
4914 
4915 	if (list_empty(&old_nh_grp->fib_list))
4916 		mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, old_nh_grp);
4917 
4918 	return 0;
4919 
4920 err_fib_node_entry_add:
4921 	mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
4922 err_nexthop6_group_get:
4923 	list_add_tail(&fib6_entry->common.nexthop_group_node,
4924 		      &old_nh_grp->fib_list);
4925 	fib6_entry->common.nh_group = old_nh_grp;
4926 	return err;
4927 }
4928 
4929 static int
4930 mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp,
4931 				struct mlxsw_sp_fib6_entry *fib6_entry,
4932 				struct rt6_info *rt)
4933 {
4934 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4935 	int err;
4936 
4937 	mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt);
4938 	if (IS_ERR(mlxsw_sp_rt6))
4939 		return PTR_ERR(mlxsw_sp_rt6);
4940 
4941 	list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
4942 	fib6_entry->nrt6++;
4943 
4944 	err = mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
4945 	if (err)
4946 		goto err_nexthop6_group_update;
4947 
4948 	return 0;
4949 
4950 err_nexthop6_group_update:
4951 	fib6_entry->nrt6--;
4952 	list_del(&mlxsw_sp_rt6->list);
4953 	mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
4954 	return err;
4955 }
4956 
4957 static void
4958 mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp,
4959 				struct mlxsw_sp_fib6_entry *fib6_entry,
4960 				struct rt6_info *rt)
4961 {
4962 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4963 
4964 	mlxsw_sp_rt6 = mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt);
4965 	if (WARN_ON(!mlxsw_sp_rt6))
4966 		return;
4967 
4968 	fib6_entry->nrt6--;
4969 	list_del(&mlxsw_sp_rt6->list);
4970 	mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
4971 	mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
4972 }
4973 
4974 static void mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp *mlxsw_sp,
4975 					 struct mlxsw_sp_fib_entry *fib_entry,
4976 					 const struct rt6_info *rt)
4977 {
4978 	/* Packets hitting RTF_REJECT routes need to be discarded by the
4979 	 * stack. We can rely on their destination device not having a
4980 	 * RIF (it's the loopback device) and can thus use action type
4981 	 * local, which will cause them to be trapped with a lower
4982 	 * priority than packets that need to be locally received.
4983 	 */
4984 	if (rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST))
4985 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
4986 	else if (rt->rt6i_flags & RTF_REJECT)
4987 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
4988 	else if (mlxsw_sp_rt6_is_gateway(mlxsw_sp, rt))
4989 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
4990 	else
4991 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
4992 }
4993 
4994 static void
4995 mlxsw_sp_fib6_entry_rt_destroy_all(struct mlxsw_sp_fib6_entry *fib6_entry)
4996 {
4997 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6, *tmp;
4998 
4999 	list_for_each_entry_safe(mlxsw_sp_rt6, tmp, &fib6_entry->rt6_list,
5000 				 list) {
5001 		fib6_entry->nrt6--;
5002 		list_del(&mlxsw_sp_rt6->list);
5003 		mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5004 	}
5005 }
5006 
5007 static struct mlxsw_sp_fib6_entry *
5008 mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp,
5009 			   struct mlxsw_sp_fib_node *fib_node,
5010 			   struct rt6_info *rt)
5011 {
5012 	struct mlxsw_sp_fib6_entry *fib6_entry;
5013 	struct mlxsw_sp_fib_entry *fib_entry;
5014 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5015 	int err;
5016 
5017 	fib6_entry = kzalloc(sizeof(*fib6_entry), GFP_KERNEL);
5018 	if (!fib6_entry)
5019 		return ERR_PTR(-ENOMEM);
5020 	fib_entry = &fib6_entry->common;
5021 
5022 	mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt);
5023 	if (IS_ERR(mlxsw_sp_rt6)) {
5024 		err = PTR_ERR(mlxsw_sp_rt6);
5025 		goto err_rt6_create;
5026 	}
5027 
5028 	mlxsw_sp_fib6_entry_type_set(mlxsw_sp, fib_entry, mlxsw_sp_rt6->rt);
5029 
5030 	INIT_LIST_HEAD(&fib6_entry->rt6_list);
5031 	list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
5032 	fib6_entry->nrt6 = 1;
5033 	err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
5034 	if (err)
5035 		goto err_nexthop6_group_get;
5036 
5037 	fib_entry->fib_node = fib_node;
5038 
5039 	return fib6_entry;
5040 
5041 err_nexthop6_group_get:
5042 	list_del(&mlxsw_sp_rt6->list);
5043 	mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5044 err_rt6_create:
5045 	kfree(fib6_entry);
5046 	return ERR_PTR(err);
5047 }
5048 
5049 static void mlxsw_sp_fib6_entry_destroy(struct mlxsw_sp *mlxsw_sp,
5050 					struct mlxsw_sp_fib6_entry *fib6_entry)
5051 {
5052 	mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
5053 	mlxsw_sp_fib6_entry_rt_destroy_all(fib6_entry);
5054 	WARN_ON(fib6_entry->nrt6);
5055 	kfree(fib6_entry);
5056 }
5057 
5058 static struct mlxsw_sp_fib6_entry *
5059 mlxsw_sp_fib6_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
5060 			      const struct rt6_info *nrt, bool replace)
5061 {
5062 	struct mlxsw_sp_fib6_entry *fib6_entry, *fallback = NULL;
5063 
5064 	list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
5065 		struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
5066 
5067 		if (rt->rt6i_table->tb6_id > nrt->rt6i_table->tb6_id)
5068 			continue;
5069 		if (rt->rt6i_table->tb6_id != nrt->rt6i_table->tb6_id)
5070 			break;
5071 		if (replace && rt->rt6i_metric == nrt->rt6i_metric) {
5072 			if (mlxsw_sp_fib6_rt_can_mp(rt) ==
5073 			    mlxsw_sp_fib6_rt_can_mp(nrt))
5074 				return fib6_entry;
5075 			if (mlxsw_sp_fib6_rt_can_mp(nrt))
5076 				fallback = fallback ?: fib6_entry;
5077 		}
5078 		if (rt->rt6i_metric > nrt->rt6i_metric)
5079 			return fallback ?: fib6_entry;
5080 	}
5081 
5082 	return fallback;
5083 }
5084 
5085 static int
5086 mlxsw_sp_fib6_node_list_insert(struct mlxsw_sp_fib6_entry *new6_entry,
5087 			       bool replace)
5088 {
5089 	struct mlxsw_sp_fib_node *fib_node = new6_entry->common.fib_node;
5090 	struct rt6_info *nrt = mlxsw_sp_fib6_entry_rt(new6_entry);
5091 	struct mlxsw_sp_fib6_entry *fib6_entry;
5092 
5093 	fib6_entry = mlxsw_sp_fib6_node_entry_find(fib_node, nrt, replace);
5094 
5095 	if (replace && WARN_ON(!fib6_entry))
5096 		return -EINVAL;
5097 
5098 	if (fib6_entry) {
5099 		list_add_tail(&new6_entry->common.list,
5100 			      &fib6_entry->common.list);
5101 	} else {
5102 		struct mlxsw_sp_fib6_entry *last;
5103 
5104 		list_for_each_entry(last, &fib_node->entry_list, common.list) {
5105 			struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(last);
5106 
5107 			if (nrt->rt6i_table->tb6_id > rt->rt6i_table->tb6_id)
5108 				break;
5109 			fib6_entry = last;
5110 		}
5111 
5112 		if (fib6_entry)
5113 			list_add(&new6_entry->common.list,
5114 				 &fib6_entry->common.list);
5115 		else
5116 			list_add(&new6_entry->common.list,
5117 				 &fib_node->entry_list);
5118 	}
5119 
5120 	return 0;
5121 }
5122 
5123 static void
5124 mlxsw_sp_fib6_node_list_remove(struct mlxsw_sp_fib6_entry *fib6_entry)
5125 {
5126 	list_del(&fib6_entry->common.list);
5127 }
5128 
5129 static int mlxsw_sp_fib6_node_entry_link(struct mlxsw_sp *mlxsw_sp,
5130 					 struct mlxsw_sp_fib6_entry *fib6_entry,
5131 					 bool replace)
5132 {
5133 	int err;
5134 
5135 	err = mlxsw_sp_fib6_node_list_insert(fib6_entry, replace);
5136 	if (err)
5137 		return err;
5138 
5139 	err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common);
5140 	if (err)
5141 		goto err_fib_node_entry_add;
5142 
5143 	return 0;
5144 
5145 err_fib_node_entry_add:
5146 	mlxsw_sp_fib6_node_list_remove(fib6_entry);
5147 	return err;
5148 }
5149 
5150 static void
5151 mlxsw_sp_fib6_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
5152 				struct mlxsw_sp_fib6_entry *fib6_entry)
5153 {
5154 	mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib6_entry->common);
5155 	mlxsw_sp_fib6_node_list_remove(fib6_entry);
5156 }
5157 
5158 static struct mlxsw_sp_fib6_entry *
5159 mlxsw_sp_fib6_entry_lookup(struct mlxsw_sp *mlxsw_sp,
5160 			   const struct rt6_info *rt)
5161 {
5162 	struct mlxsw_sp_fib6_entry *fib6_entry;
5163 	struct mlxsw_sp_fib_node *fib_node;
5164 	struct mlxsw_sp_fib *fib;
5165 	struct mlxsw_sp_vr *vr;
5166 
5167 	vr = mlxsw_sp_vr_find(mlxsw_sp, rt->rt6i_table->tb6_id);
5168 	if (!vr)
5169 		return NULL;
5170 	fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV6);
5171 
5172 	fib_node = mlxsw_sp_fib_node_lookup(fib, &rt->rt6i_dst.addr,
5173 					    sizeof(rt->rt6i_dst.addr),
5174 					    rt->rt6i_dst.plen);
5175 	if (!fib_node)
5176 		return NULL;
5177 
5178 	list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
5179 		struct rt6_info *iter_rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
5180 
5181 		if (rt->rt6i_table->tb6_id == iter_rt->rt6i_table->tb6_id &&
5182 		    rt->rt6i_metric == iter_rt->rt6i_metric &&
5183 		    mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt))
5184 			return fib6_entry;
5185 	}
5186 
5187 	return NULL;
5188 }
5189 
5190 static void mlxsw_sp_fib6_entry_replace(struct mlxsw_sp *mlxsw_sp,
5191 					struct mlxsw_sp_fib6_entry *fib6_entry,
5192 					bool replace)
5193 {
5194 	struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node;
5195 	struct mlxsw_sp_fib6_entry *replaced;
5196 
5197 	if (!replace)
5198 		return;
5199 
5200 	replaced = list_next_entry(fib6_entry, common.list);
5201 
5202 	mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, replaced);
5203 	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, replaced);
5204 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5205 }
5206 
5207 static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp,
5208 				    struct rt6_info *rt, bool replace)
5209 {
5210 	struct mlxsw_sp_fib6_entry *fib6_entry;
5211 	struct mlxsw_sp_fib_node *fib_node;
5212 	int err;
5213 
5214 	if (mlxsw_sp->router->aborted)
5215 		return 0;
5216 
5217 	if (rt->rt6i_src.plen)
5218 		return -EINVAL;
5219 
5220 	if (mlxsw_sp_fib6_rt_should_ignore(rt))
5221 		return 0;
5222 
5223 	fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->rt6i_table->tb6_id,
5224 					 &rt->rt6i_dst.addr,
5225 					 sizeof(rt->rt6i_dst.addr),
5226 					 rt->rt6i_dst.plen,
5227 					 MLXSW_SP_L3_PROTO_IPV6);
5228 	if (IS_ERR(fib_node))
5229 		return PTR_ERR(fib_node);
5230 
5231 	/* Before creating a new entry, try to append route to an existing
5232 	 * multipath entry.
5233 	 */
5234 	fib6_entry = mlxsw_sp_fib6_node_mp_entry_find(fib_node, rt, replace);
5235 	if (fib6_entry) {
5236 		err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry, rt);
5237 		if (err)
5238 			goto err_fib6_entry_nexthop_add;
5239 		return 0;
5240 	}
5241 
5242 	fib6_entry = mlxsw_sp_fib6_entry_create(mlxsw_sp, fib_node, rt);
5243 	if (IS_ERR(fib6_entry)) {
5244 		err = PTR_ERR(fib6_entry);
5245 		goto err_fib6_entry_create;
5246 	}
5247 
5248 	err = mlxsw_sp_fib6_node_entry_link(mlxsw_sp, fib6_entry, replace);
5249 	if (err)
5250 		goto err_fib6_node_entry_link;
5251 
5252 	mlxsw_sp_fib6_entry_replace(mlxsw_sp, fib6_entry, replace);
5253 
5254 	return 0;
5255 
5256 err_fib6_node_entry_link:
5257 	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5258 err_fib6_entry_create:
5259 err_fib6_entry_nexthop_add:
5260 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5261 	return err;
5262 }
5263 
5264 static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp,
5265 				     struct rt6_info *rt)
5266 {
5267 	struct mlxsw_sp_fib6_entry *fib6_entry;
5268 	struct mlxsw_sp_fib_node *fib_node;
5269 
5270 	if (mlxsw_sp->router->aborted)
5271 		return;
5272 
5273 	if (mlxsw_sp_fib6_rt_should_ignore(rt))
5274 		return;
5275 
5276 	fib6_entry = mlxsw_sp_fib6_entry_lookup(mlxsw_sp, rt);
5277 	if (WARN_ON(!fib6_entry))
5278 		return;
5279 
5280 	/* If route is part of a multipath entry, but not the last one
5281 	 * removed, then only reduce its nexthop group.
5282 	 */
5283 	if (!list_is_singular(&fib6_entry->rt6_list)) {
5284 		mlxsw_sp_fib6_entry_nexthop_del(mlxsw_sp, fib6_entry, rt);
5285 		return;
5286 	}
5287 
5288 	fib_node = fib6_entry->common.fib_node;
5289 
5290 	mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry);
5291 	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5292 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5293 }
5294 
5295 static int __mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp,
5296 					    enum mlxsw_reg_ralxx_protocol proto,
5297 					    u8 tree_id)
5298 {
5299 	char ralta_pl[MLXSW_REG_RALTA_LEN];
5300 	char ralst_pl[MLXSW_REG_RALST_LEN];
5301 	int i, err;
5302 
5303 	mlxsw_reg_ralta_pack(ralta_pl, true, proto, tree_id);
5304 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
5305 	if (err)
5306 		return err;
5307 
5308 	mlxsw_reg_ralst_pack(ralst_pl, 0xff, tree_id);
5309 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
5310 	if (err)
5311 		return err;
5312 
5313 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
5314 		struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
5315 		char raltb_pl[MLXSW_REG_RALTB_LEN];
5316 		char ralue_pl[MLXSW_REG_RALUE_LEN];
5317 
5318 		mlxsw_reg_raltb_pack(raltb_pl, vr->id, proto, tree_id);
5319 		err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb),
5320 				      raltb_pl);
5321 		if (err)
5322 			return err;
5323 
5324 		mlxsw_reg_ralue_pack(ralue_pl, proto,
5325 				     MLXSW_REG_RALUE_OP_WRITE_WRITE, vr->id, 0);
5326 		mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
5327 		err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue),
5328 				      ralue_pl);
5329 		if (err)
5330 			return err;
5331 	}
5332 
5333 	return 0;
5334 }
5335 
5336 static int mlxsw_sp_router_fibmr_add(struct mlxsw_sp *mlxsw_sp,
5337 				     struct mfc_entry_notifier_info *men_info,
5338 				     bool replace)
5339 {
5340 	struct mlxsw_sp_vr *vr;
5341 
5342 	if (mlxsw_sp->router->aborted)
5343 		return 0;
5344 
5345 	vr = mlxsw_sp_vr_get(mlxsw_sp, men_info->tb_id, NULL);
5346 	if (IS_ERR(vr))
5347 		return PTR_ERR(vr);
5348 
5349 	return mlxsw_sp_mr_route4_add(vr->mr4_table, men_info->mfc, replace);
5350 }
5351 
5352 static void mlxsw_sp_router_fibmr_del(struct mlxsw_sp *mlxsw_sp,
5353 				      struct mfc_entry_notifier_info *men_info)
5354 {
5355 	struct mlxsw_sp_vr *vr;
5356 
5357 	if (mlxsw_sp->router->aborted)
5358 		return;
5359 
5360 	vr = mlxsw_sp_vr_find(mlxsw_sp, men_info->tb_id);
5361 	if (WARN_ON(!vr))
5362 		return;
5363 
5364 	mlxsw_sp_mr_route4_del(vr->mr4_table, men_info->mfc);
5365 	mlxsw_sp_vr_put(vr);
5366 }
5367 
5368 static int
5369 mlxsw_sp_router_fibmr_vif_add(struct mlxsw_sp *mlxsw_sp,
5370 			      struct vif_entry_notifier_info *ven_info)
5371 {
5372 	struct mlxsw_sp_rif *rif;
5373 	struct mlxsw_sp_vr *vr;
5374 
5375 	if (mlxsw_sp->router->aborted)
5376 		return 0;
5377 
5378 	vr = mlxsw_sp_vr_get(mlxsw_sp, ven_info->tb_id, NULL);
5379 	if (IS_ERR(vr))
5380 		return PTR_ERR(vr);
5381 
5382 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, ven_info->dev);
5383 	return mlxsw_sp_mr_vif_add(vr->mr4_table, ven_info->dev,
5384 				   ven_info->vif_index,
5385 				   ven_info->vif_flags, rif);
5386 }
5387 
5388 static void
5389 mlxsw_sp_router_fibmr_vif_del(struct mlxsw_sp *mlxsw_sp,
5390 			      struct vif_entry_notifier_info *ven_info)
5391 {
5392 	struct mlxsw_sp_vr *vr;
5393 
5394 	if (mlxsw_sp->router->aborted)
5395 		return;
5396 
5397 	vr = mlxsw_sp_vr_find(mlxsw_sp, ven_info->tb_id);
5398 	if (WARN_ON(!vr))
5399 		return;
5400 
5401 	mlxsw_sp_mr_vif_del(vr->mr4_table, ven_info->vif_index);
5402 	mlxsw_sp_vr_put(vr);
5403 }
5404 
5405 static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp)
5406 {
5407 	enum mlxsw_reg_ralxx_protocol proto = MLXSW_REG_RALXX_PROTOCOL_IPV4;
5408 	int err;
5409 
5410 	err = __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto,
5411 					       MLXSW_SP_LPM_TREE_MIN);
5412 	if (err)
5413 		return err;
5414 
5415 	/* The multicast router code does not need an abort trap as by default,
5416 	 * packets that don't match any routes are trapped to the CPU.
5417 	 */
5418 
5419 	proto = MLXSW_REG_RALXX_PROTOCOL_IPV6;
5420 	return __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto,
5421 						MLXSW_SP_LPM_TREE_MIN + 1);
5422 }
5423 
5424 static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp,
5425 				     struct mlxsw_sp_fib_node *fib_node)
5426 {
5427 	struct mlxsw_sp_fib4_entry *fib4_entry, *tmp;
5428 
5429 	list_for_each_entry_safe(fib4_entry, tmp, &fib_node->entry_list,
5430 				 common.list) {
5431 		bool do_break = &tmp->common.list == &fib_node->entry_list;
5432 
5433 		mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry);
5434 		mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
5435 		mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5436 		/* Break when entry list is empty and node was freed.
5437 		 * Otherwise, we'll access freed memory in the next
5438 		 * iteration.
5439 		 */
5440 		if (do_break)
5441 			break;
5442 	}
5443 }
5444 
5445 static void mlxsw_sp_fib6_node_flush(struct mlxsw_sp *mlxsw_sp,
5446 				     struct mlxsw_sp_fib_node *fib_node)
5447 {
5448 	struct mlxsw_sp_fib6_entry *fib6_entry, *tmp;
5449 
5450 	list_for_each_entry_safe(fib6_entry, tmp, &fib_node->entry_list,
5451 				 common.list) {
5452 		bool do_break = &tmp->common.list == &fib_node->entry_list;
5453 
5454 		mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry);
5455 		mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5456 		mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5457 		if (do_break)
5458 			break;
5459 	}
5460 }
5461 
5462 static void mlxsw_sp_fib_node_flush(struct mlxsw_sp *mlxsw_sp,
5463 				    struct mlxsw_sp_fib_node *fib_node)
5464 {
5465 	switch (fib_node->fib->proto) {
5466 	case MLXSW_SP_L3_PROTO_IPV4:
5467 		mlxsw_sp_fib4_node_flush(mlxsw_sp, fib_node);
5468 		break;
5469 	case MLXSW_SP_L3_PROTO_IPV6:
5470 		mlxsw_sp_fib6_node_flush(mlxsw_sp, fib_node);
5471 		break;
5472 	}
5473 }
5474 
5475 static void mlxsw_sp_vr_fib_flush(struct mlxsw_sp *mlxsw_sp,
5476 				  struct mlxsw_sp_vr *vr,
5477 				  enum mlxsw_sp_l3proto proto)
5478 {
5479 	struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
5480 	struct mlxsw_sp_fib_node *fib_node, *tmp;
5481 
5482 	list_for_each_entry_safe(fib_node, tmp, &fib->node_list, list) {
5483 		bool do_break = &tmp->list == &fib->node_list;
5484 
5485 		mlxsw_sp_fib_node_flush(mlxsw_sp, fib_node);
5486 		if (do_break)
5487 			break;
5488 	}
5489 }
5490 
5491 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp)
5492 {
5493 	int i;
5494 
5495 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
5496 		struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
5497 
5498 		if (!mlxsw_sp_vr_is_used(vr))
5499 			continue;
5500 
5501 		mlxsw_sp_mr_table_flush(vr->mr4_table);
5502 		mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
5503 
5504 		/* If virtual router was only used for IPv4, then it's no
5505 		 * longer used.
5506 		 */
5507 		if (!mlxsw_sp_vr_is_used(vr))
5508 			continue;
5509 		mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
5510 	}
5511 }
5512 
5513 static void mlxsw_sp_router_fib_abort(struct mlxsw_sp *mlxsw_sp)
5514 {
5515 	int err;
5516 
5517 	if (mlxsw_sp->router->aborted)
5518 		return;
5519 	dev_warn(mlxsw_sp->bus_info->dev, "FIB abort triggered. Note that FIB entries are no longer being offloaded to this device.\n");
5520 	mlxsw_sp_router_fib_flush(mlxsw_sp);
5521 	mlxsw_sp->router->aborted = true;
5522 	err = mlxsw_sp_router_set_abort_trap(mlxsw_sp);
5523 	if (err)
5524 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to set abort trap.\n");
5525 }
5526 
5527 struct mlxsw_sp_fib_event_work {
5528 	struct work_struct work;
5529 	union {
5530 		struct fib6_entry_notifier_info fen6_info;
5531 		struct fib_entry_notifier_info fen_info;
5532 		struct fib_rule_notifier_info fr_info;
5533 		struct fib_nh_notifier_info fnh_info;
5534 		struct mfc_entry_notifier_info men_info;
5535 		struct vif_entry_notifier_info ven_info;
5536 	};
5537 	struct mlxsw_sp *mlxsw_sp;
5538 	unsigned long event;
5539 };
5540 
5541 static void mlxsw_sp_router_fib4_event_work(struct work_struct *work)
5542 {
5543 	struct mlxsw_sp_fib_event_work *fib_work =
5544 		container_of(work, struct mlxsw_sp_fib_event_work, work);
5545 	struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5546 	bool replace, append;
5547 	int err;
5548 
5549 	/* Protect internal structures from changes */
5550 	rtnl_lock();
5551 	switch (fib_work->event) {
5552 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5553 	case FIB_EVENT_ENTRY_APPEND: /* fall through */
5554 	case FIB_EVENT_ENTRY_ADD:
5555 		replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
5556 		append = fib_work->event == FIB_EVENT_ENTRY_APPEND;
5557 		err = mlxsw_sp_router_fib4_add(mlxsw_sp, &fib_work->fen_info,
5558 					       replace, append);
5559 		if (err)
5560 			mlxsw_sp_router_fib_abort(mlxsw_sp);
5561 		fib_info_put(fib_work->fen_info.fi);
5562 		break;
5563 	case FIB_EVENT_ENTRY_DEL:
5564 		mlxsw_sp_router_fib4_del(mlxsw_sp, &fib_work->fen_info);
5565 		fib_info_put(fib_work->fen_info.fi);
5566 		break;
5567 	case FIB_EVENT_RULE_ADD:
5568 		/* if we get here, a rule was added that we do not support.
5569 		 * just do the fib_abort
5570 		 */
5571 		mlxsw_sp_router_fib_abort(mlxsw_sp);
5572 		break;
5573 	case FIB_EVENT_NH_ADD: /* fall through */
5574 	case FIB_EVENT_NH_DEL:
5575 		mlxsw_sp_nexthop4_event(mlxsw_sp, fib_work->event,
5576 					fib_work->fnh_info.fib_nh);
5577 		fib_info_put(fib_work->fnh_info.fib_nh->nh_parent);
5578 		break;
5579 	}
5580 	rtnl_unlock();
5581 	kfree(fib_work);
5582 }
5583 
5584 static void mlxsw_sp_router_fib6_event_work(struct work_struct *work)
5585 {
5586 	struct mlxsw_sp_fib_event_work *fib_work =
5587 		container_of(work, struct mlxsw_sp_fib_event_work, work);
5588 	struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5589 	bool replace;
5590 	int err;
5591 
5592 	rtnl_lock();
5593 	switch (fib_work->event) {
5594 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5595 	case FIB_EVENT_ENTRY_ADD:
5596 		replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
5597 		err = mlxsw_sp_router_fib6_add(mlxsw_sp,
5598 					       fib_work->fen6_info.rt, replace);
5599 		if (err)
5600 			mlxsw_sp_router_fib_abort(mlxsw_sp);
5601 		mlxsw_sp_rt6_release(fib_work->fen6_info.rt);
5602 		break;
5603 	case FIB_EVENT_ENTRY_DEL:
5604 		mlxsw_sp_router_fib6_del(mlxsw_sp, fib_work->fen6_info.rt);
5605 		mlxsw_sp_rt6_release(fib_work->fen6_info.rt);
5606 		break;
5607 	case FIB_EVENT_RULE_ADD:
5608 		/* if we get here, a rule was added that we do not support.
5609 		 * just do the fib_abort
5610 		 */
5611 		mlxsw_sp_router_fib_abort(mlxsw_sp);
5612 		break;
5613 	}
5614 	rtnl_unlock();
5615 	kfree(fib_work);
5616 }
5617 
5618 static void mlxsw_sp_router_fibmr_event_work(struct work_struct *work)
5619 {
5620 	struct mlxsw_sp_fib_event_work *fib_work =
5621 		container_of(work, struct mlxsw_sp_fib_event_work, work);
5622 	struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5623 	bool replace;
5624 	int err;
5625 
5626 	rtnl_lock();
5627 	switch (fib_work->event) {
5628 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5629 	case FIB_EVENT_ENTRY_ADD:
5630 		replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
5631 
5632 		err = mlxsw_sp_router_fibmr_add(mlxsw_sp, &fib_work->men_info,
5633 						replace);
5634 		if (err)
5635 			mlxsw_sp_router_fib_abort(mlxsw_sp);
5636 		ipmr_cache_put(fib_work->men_info.mfc);
5637 		break;
5638 	case FIB_EVENT_ENTRY_DEL:
5639 		mlxsw_sp_router_fibmr_del(mlxsw_sp, &fib_work->men_info);
5640 		ipmr_cache_put(fib_work->men_info.mfc);
5641 		break;
5642 	case FIB_EVENT_VIF_ADD:
5643 		err = mlxsw_sp_router_fibmr_vif_add(mlxsw_sp,
5644 						    &fib_work->ven_info);
5645 		if (err)
5646 			mlxsw_sp_router_fib_abort(mlxsw_sp);
5647 		dev_put(fib_work->ven_info.dev);
5648 		break;
5649 	case FIB_EVENT_VIF_DEL:
5650 		mlxsw_sp_router_fibmr_vif_del(mlxsw_sp,
5651 					      &fib_work->ven_info);
5652 		dev_put(fib_work->ven_info.dev);
5653 		break;
5654 	case FIB_EVENT_RULE_ADD:
5655 		/* if we get here, a rule was added that we do not support.
5656 		 * just do the fib_abort
5657 		 */
5658 		mlxsw_sp_router_fib_abort(mlxsw_sp);
5659 		break;
5660 	}
5661 	rtnl_unlock();
5662 	kfree(fib_work);
5663 }
5664 
5665 static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work,
5666 				       struct fib_notifier_info *info)
5667 {
5668 	struct fib_entry_notifier_info *fen_info;
5669 	struct fib_nh_notifier_info *fnh_info;
5670 
5671 	switch (fib_work->event) {
5672 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5673 	case FIB_EVENT_ENTRY_APPEND: /* fall through */
5674 	case FIB_EVENT_ENTRY_ADD: /* fall through */
5675 	case FIB_EVENT_ENTRY_DEL:
5676 		fen_info = container_of(info, struct fib_entry_notifier_info,
5677 					info);
5678 		fib_work->fen_info = *fen_info;
5679 		/* Take reference on fib_info to prevent it from being
5680 		 * freed while work is queued. Release it afterwards.
5681 		 */
5682 		fib_info_hold(fib_work->fen_info.fi);
5683 		break;
5684 	case FIB_EVENT_NH_ADD: /* fall through */
5685 	case FIB_EVENT_NH_DEL:
5686 		fnh_info = container_of(info, struct fib_nh_notifier_info,
5687 					info);
5688 		fib_work->fnh_info = *fnh_info;
5689 		fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
5690 		break;
5691 	}
5692 }
5693 
5694 static void mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work,
5695 				       struct fib_notifier_info *info)
5696 {
5697 	struct fib6_entry_notifier_info *fen6_info;
5698 
5699 	switch (fib_work->event) {
5700 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5701 	case FIB_EVENT_ENTRY_ADD: /* fall through */
5702 	case FIB_EVENT_ENTRY_DEL:
5703 		fen6_info = container_of(info, struct fib6_entry_notifier_info,
5704 					 info);
5705 		fib_work->fen6_info = *fen6_info;
5706 		rt6_hold(fib_work->fen6_info.rt);
5707 		break;
5708 	}
5709 }
5710 
5711 static void
5712 mlxsw_sp_router_fibmr_event(struct mlxsw_sp_fib_event_work *fib_work,
5713 			    struct fib_notifier_info *info)
5714 {
5715 	switch (fib_work->event) {
5716 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5717 	case FIB_EVENT_ENTRY_ADD: /* fall through */
5718 	case FIB_EVENT_ENTRY_DEL:
5719 		memcpy(&fib_work->men_info, info, sizeof(fib_work->men_info));
5720 		ipmr_cache_hold(fib_work->men_info.mfc);
5721 		break;
5722 	case FIB_EVENT_VIF_ADD: /* fall through */
5723 	case FIB_EVENT_VIF_DEL:
5724 		memcpy(&fib_work->ven_info, info, sizeof(fib_work->ven_info));
5725 		dev_hold(fib_work->ven_info.dev);
5726 		break;
5727 	}
5728 }
5729 
5730 static int mlxsw_sp_router_fib_rule_event(unsigned long event,
5731 					  struct fib_notifier_info *info,
5732 					  struct mlxsw_sp *mlxsw_sp)
5733 {
5734 	struct netlink_ext_ack *extack = info->extack;
5735 	struct fib_rule_notifier_info *fr_info;
5736 	struct fib_rule *rule;
5737 	int err = 0;
5738 
5739 	/* nothing to do at the moment */
5740 	if (event == FIB_EVENT_RULE_DEL)
5741 		return 0;
5742 
5743 	if (mlxsw_sp->router->aborted)
5744 		return 0;
5745 
5746 	fr_info = container_of(info, struct fib_rule_notifier_info, info);
5747 	rule = fr_info->rule;
5748 
5749 	switch (info->family) {
5750 	case AF_INET:
5751 		if (!fib4_rule_default(rule) && !rule->l3mdev)
5752 			err = -1;
5753 		break;
5754 	case AF_INET6:
5755 		if (!fib6_rule_default(rule) && !rule->l3mdev)
5756 			err = -1;
5757 		break;
5758 	case RTNL_FAMILY_IPMR:
5759 		if (!ipmr_rule_default(rule) && !rule->l3mdev)
5760 			err = -1;
5761 		break;
5762 	}
5763 
5764 	if (err < 0)
5765 		NL_SET_ERR_MSG(extack, "spectrum: FIB rules not supported. Aborting offload");
5766 
5767 	return err;
5768 }
5769 
5770 /* Called with rcu_read_lock() */
5771 static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
5772 				     unsigned long event, void *ptr)
5773 {
5774 	struct mlxsw_sp_fib_event_work *fib_work;
5775 	struct fib_notifier_info *info = ptr;
5776 	struct mlxsw_sp_router *router;
5777 	int err;
5778 
5779 	if (!net_eq(info->net, &init_net) ||
5780 	    (info->family != AF_INET && info->family != AF_INET6 &&
5781 	     info->family != RTNL_FAMILY_IPMR))
5782 		return NOTIFY_DONE;
5783 
5784 	router = container_of(nb, struct mlxsw_sp_router, fib_nb);
5785 
5786 	switch (event) {
5787 	case FIB_EVENT_RULE_ADD: /* fall through */
5788 	case FIB_EVENT_RULE_DEL:
5789 		err = mlxsw_sp_router_fib_rule_event(event, info,
5790 						     router->mlxsw_sp);
5791 		if (!err)
5792 			return NOTIFY_DONE;
5793 	}
5794 
5795 	fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
5796 	if (WARN_ON(!fib_work))
5797 		return NOTIFY_BAD;
5798 
5799 	fib_work->mlxsw_sp = router->mlxsw_sp;
5800 	fib_work->event = event;
5801 
5802 	switch (info->family) {
5803 	case AF_INET:
5804 		INIT_WORK(&fib_work->work, mlxsw_sp_router_fib4_event_work);
5805 		mlxsw_sp_router_fib4_event(fib_work, info);
5806 		break;
5807 	case AF_INET6:
5808 		INIT_WORK(&fib_work->work, mlxsw_sp_router_fib6_event_work);
5809 		mlxsw_sp_router_fib6_event(fib_work, info);
5810 		break;
5811 	case RTNL_FAMILY_IPMR:
5812 		INIT_WORK(&fib_work->work, mlxsw_sp_router_fibmr_event_work);
5813 		mlxsw_sp_router_fibmr_event(fib_work, info);
5814 		break;
5815 	}
5816 
5817 	mlxsw_core_schedule_work(&fib_work->work);
5818 
5819 	return NOTIFY_DONE;
5820 }
5821 
5822 static struct mlxsw_sp_rif *
5823 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
5824 			 const struct net_device *dev)
5825 {
5826 	int i;
5827 
5828 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
5829 		if (mlxsw_sp->router->rifs[i] &&
5830 		    mlxsw_sp->router->rifs[i]->dev == dev)
5831 			return mlxsw_sp->router->rifs[i];
5832 
5833 	return NULL;
5834 }
5835 
5836 static int mlxsw_sp_router_rif_disable(struct mlxsw_sp *mlxsw_sp, u16 rif)
5837 {
5838 	char ritr_pl[MLXSW_REG_RITR_LEN];
5839 	int err;
5840 
5841 	mlxsw_reg_ritr_rif_pack(ritr_pl, rif);
5842 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
5843 	if (WARN_ON_ONCE(err))
5844 		return err;
5845 
5846 	mlxsw_reg_ritr_enable_set(ritr_pl, false);
5847 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
5848 }
5849 
5850 static void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
5851 					  struct mlxsw_sp_rif *rif)
5852 {
5853 	mlxsw_sp_router_rif_disable(mlxsw_sp, rif->rif_index);
5854 	mlxsw_sp_nexthop_rif_gone_sync(mlxsw_sp, rif);
5855 	mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, rif);
5856 }
5857 
5858 static bool
5859 mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *rif, struct net_device *dev,
5860 			   unsigned long event)
5861 {
5862 	struct inet6_dev *inet6_dev;
5863 	bool addr_list_empty = true;
5864 	struct in_device *idev;
5865 
5866 	switch (event) {
5867 	case NETDEV_UP:
5868 		return rif == NULL;
5869 	case NETDEV_DOWN:
5870 		idev = __in_dev_get_rtnl(dev);
5871 		if (idev && idev->ifa_list)
5872 			addr_list_empty = false;
5873 
5874 		inet6_dev = __in6_dev_get(dev);
5875 		if (addr_list_empty && inet6_dev &&
5876 		    !list_empty(&inet6_dev->addr_list))
5877 			addr_list_empty = false;
5878 
5879 		if (rif && addr_list_empty &&
5880 		    !netif_is_l3_slave(rif->dev))
5881 			return true;
5882 		/* It is possible we already removed the RIF ourselves
5883 		 * if it was assigned to a netdev that is now a bridge
5884 		 * or LAG slave.
5885 		 */
5886 		return false;
5887 	}
5888 
5889 	return false;
5890 }
5891 
5892 static enum mlxsw_sp_rif_type
5893 mlxsw_sp_dev_rif_type(const struct mlxsw_sp *mlxsw_sp,
5894 		      const struct net_device *dev)
5895 {
5896 	enum mlxsw_sp_fid_type type;
5897 
5898 	if (mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL))
5899 		return MLXSW_SP_RIF_TYPE_IPIP_LB;
5900 
5901 	/* Otherwise RIF type is derived from the type of the underlying FID. */
5902 	if (is_vlan_dev(dev) && netif_is_bridge_master(vlan_dev_real_dev(dev)))
5903 		type = MLXSW_SP_FID_TYPE_8021Q;
5904 	else if (netif_is_bridge_master(dev) && br_vlan_enabled(dev))
5905 		type = MLXSW_SP_FID_TYPE_8021Q;
5906 	else if (netif_is_bridge_master(dev))
5907 		type = MLXSW_SP_FID_TYPE_8021D;
5908 	else
5909 		type = MLXSW_SP_FID_TYPE_RFID;
5910 
5911 	return mlxsw_sp_fid_type_rif_type(mlxsw_sp, type);
5912 }
5913 
5914 static int mlxsw_sp_rif_index_alloc(struct mlxsw_sp *mlxsw_sp, u16 *p_rif_index)
5915 {
5916 	int i;
5917 
5918 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) {
5919 		if (!mlxsw_sp->router->rifs[i]) {
5920 			*p_rif_index = i;
5921 			return 0;
5922 		}
5923 	}
5924 
5925 	return -ENOBUFS;
5926 }
5927 
5928 static struct mlxsw_sp_rif *mlxsw_sp_rif_alloc(size_t rif_size, u16 rif_index,
5929 					       u16 vr_id,
5930 					       struct net_device *l3_dev)
5931 {
5932 	struct mlxsw_sp_rif *rif;
5933 
5934 	rif = kzalloc(rif_size, GFP_KERNEL);
5935 	if (!rif)
5936 		return NULL;
5937 
5938 	INIT_LIST_HEAD(&rif->nexthop_list);
5939 	INIT_LIST_HEAD(&rif->neigh_list);
5940 	ether_addr_copy(rif->addr, l3_dev->dev_addr);
5941 	rif->mtu = l3_dev->mtu;
5942 	rif->vr_id = vr_id;
5943 	rif->dev = l3_dev;
5944 	rif->rif_index = rif_index;
5945 
5946 	return rif;
5947 }
5948 
5949 struct mlxsw_sp_rif *mlxsw_sp_rif_by_index(const struct mlxsw_sp *mlxsw_sp,
5950 					   u16 rif_index)
5951 {
5952 	return mlxsw_sp->router->rifs[rif_index];
5953 }
5954 
5955 u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif)
5956 {
5957 	return rif->rif_index;
5958 }
5959 
5960 u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
5961 {
5962 	return lb_rif->common.rif_index;
5963 }
5964 
5965 u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
5966 {
5967 	return lb_rif->ul_vr_id;
5968 }
5969 
5970 int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif)
5971 {
5972 	return rif->dev->ifindex;
5973 }
5974 
5975 const struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif)
5976 {
5977 	return rif->dev;
5978 }
5979 
5980 static struct mlxsw_sp_rif *
5981 mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
5982 		    const struct mlxsw_sp_rif_params *params,
5983 		    struct netlink_ext_ack *extack)
5984 {
5985 	u32 tb_id = l3mdev_fib_table(params->dev);
5986 	const struct mlxsw_sp_rif_ops *ops;
5987 	struct mlxsw_sp_fid *fid = NULL;
5988 	enum mlxsw_sp_rif_type type;
5989 	struct mlxsw_sp_rif *rif;
5990 	struct mlxsw_sp_vr *vr;
5991 	u16 rif_index;
5992 	int err;
5993 
5994 	type = mlxsw_sp_dev_rif_type(mlxsw_sp, params->dev);
5995 	ops = mlxsw_sp->router->rif_ops_arr[type];
5996 
5997 	vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN, extack);
5998 	if (IS_ERR(vr))
5999 		return ERR_CAST(vr);
6000 	vr->rif_count++;
6001 
6002 	err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index);
6003 	if (err) {
6004 		NL_SET_ERR_MSG(extack, "spectrum: Exceeded number of supported router interfaces");
6005 		goto err_rif_index_alloc;
6006 	}
6007 
6008 	rif = mlxsw_sp_rif_alloc(ops->rif_size, rif_index, vr->id, params->dev);
6009 	if (!rif) {
6010 		err = -ENOMEM;
6011 		goto err_rif_alloc;
6012 	}
6013 	rif->mlxsw_sp = mlxsw_sp;
6014 	rif->ops = ops;
6015 
6016 	if (ops->fid_get) {
6017 		fid = ops->fid_get(rif);
6018 		if (IS_ERR(fid)) {
6019 			err = PTR_ERR(fid);
6020 			goto err_fid_get;
6021 		}
6022 		rif->fid = fid;
6023 	}
6024 
6025 	if (ops->setup)
6026 		ops->setup(rif, params);
6027 
6028 	err = ops->configure(rif);
6029 	if (err)
6030 		goto err_configure;
6031 
6032 	err = mlxsw_sp_mr_rif_add(vr->mr4_table, rif);
6033 	if (err)
6034 		goto err_mr_rif_add;
6035 
6036 	mlxsw_sp_rif_counters_alloc(rif);
6037 	mlxsw_sp->router->rifs[rif_index] = rif;
6038 
6039 	return rif;
6040 
6041 err_mr_rif_add:
6042 	ops->deconfigure(rif);
6043 err_configure:
6044 	if (fid)
6045 		mlxsw_sp_fid_put(fid);
6046 err_fid_get:
6047 	kfree(rif);
6048 err_rif_alloc:
6049 err_rif_index_alloc:
6050 	vr->rif_count--;
6051 	mlxsw_sp_vr_put(vr);
6052 	return ERR_PTR(err);
6053 }
6054 
6055 void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif)
6056 {
6057 	const struct mlxsw_sp_rif_ops *ops = rif->ops;
6058 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6059 	struct mlxsw_sp_fid *fid = rif->fid;
6060 	struct mlxsw_sp_vr *vr;
6061 
6062 	mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif);
6063 	vr = &mlxsw_sp->router->vrs[rif->vr_id];
6064 
6065 	mlxsw_sp->router->rifs[rif->rif_index] = NULL;
6066 	mlxsw_sp_rif_counters_free(rif);
6067 	mlxsw_sp_mr_rif_del(vr->mr4_table, rif);
6068 	ops->deconfigure(rif);
6069 	if (fid)
6070 		/* Loopback RIFs are not associated with a FID. */
6071 		mlxsw_sp_fid_put(fid);
6072 	kfree(rif);
6073 	vr->rif_count--;
6074 	mlxsw_sp_vr_put(vr);
6075 }
6076 
6077 static void
6078 mlxsw_sp_rif_subport_params_init(struct mlxsw_sp_rif_params *params,
6079 				 struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
6080 {
6081 	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
6082 
6083 	params->vid = mlxsw_sp_port_vlan->vid;
6084 	params->lag = mlxsw_sp_port->lagged;
6085 	if (params->lag)
6086 		params->lag_id = mlxsw_sp_port->lag_id;
6087 	else
6088 		params->system_port = mlxsw_sp_port->local_port;
6089 }
6090 
6091 static int
6092 mlxsw_sp_port_vlan_router_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan,
6093 			       struct net_device *l3_dev,
6094 			       struct netlink_ext_ack *extack)
6095 {
6096 	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
6097 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
6098 	u16 vid = mlxsw_sp_port_vlan->vid;
6099 	struct mlxsw_sp_rif *rif;
6100 	struct mlxsw_sp_fid *fid;
6101 	int err;
6102 
6103 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
6104 	if (!rif) {
6105 		struct mlxsw_sp_rif_params params = {
6106 			.dev = l3_dev,
6107 		};
6108 
6109 		mlxsw_sp_rif_subport_params_init(&params, mlxsw_sp_port_vlan);
6110 		rif = mlxsw_sp_rif_create(mlxsw_sp, &params, extack);
6111 		if (IS_ERR(rif))
6112 			return PTR_ERR(rif);
6113 	}
6114 
6115 	/* FID was already created, just take a reference */
6116 	fid = rif->ops->fid_get(rif);
6117 	err = mlxsw_sp_fid_port_vid_map(fid, mlxsw_sp_port, vid);
6118 	if (err)
6119 		goto err_fid_port_vid_map;
6120 
6121 	err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, false);
6122 	if (err)
6123 		goto err_port_vid_learning_set;
6124 
6125 	err = mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid,
6126 					BR_STATE_FORWARDING);
6127 	if (err)
6128 		goto err_port_vid_stp_set;
6129 
6130 	mlxsw_sp_port_vlan->fid = fid;
6131 
6132 	return 0;
6133 
6134 err_port_vid_stp_set:
6135 	mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
6136 err_port_vid_learning_set:
6137 	mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
6138 err_fid_port_vid_map:
6139 	mlxsw_sp_fid_put(fid);
6140 	return err;
6141 }
6142 
6143 void
6144 mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
6145 {
6146 	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
6147 	struct mlxsw_sp_fid *fid = mlxsw_sp_port_vlan->fid;
6148 	u16 vid = mlxsw_sp_port_vlan->vid;
6149 
6150 	if (WARN_ON(mlxsw_sp_fid_type(fid) != MLXSW_SP_FID_TYPE_RFID))
6151 		return;
6152 
6153 	mlxsw_sp_port_vlan->fid = NULL;
6154 	mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid, BR_STATE_BLOCKING);
6155 	mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
6156 	mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
6157 	/* If router port holds the last reference on the rFID, then the
6158 	 * associated Sub-port RIF will be destroyed.
6159 	 */
6160 	mlxsw_sp_fid_put(fid);
6161 }
6162 
6163 static int mlxsw_sp_inetaddr_port_vlan_event(struct net_device *l3_dev,
6164 					     struct net_device *port_dev,
6165 					     unsigned long event, u16 vid,
6166 					     struct netlink_ext_ack *extack)
6167 {
6168 	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(port_dev);
6169 	struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
6170 
6171 	mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid);
6172 	if (WARN_ON(!mlxsw_sp_port_vlan))
6173 		return -EINVAL;
6174 
6175 	switch (event) {
6176 	case NETDEV_UP:
6177 		return mlxsw_sp_port_vlan_router_join(mlxsw_sp_port_vlan,
6178 						      l3_dev, extack);
6179 	case NETDEV_DOWN:
6180 		mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan);
6181 		break;
6182 	}
6183 
6184 	return 0;
6185 }
6186 
6187 static int mlxsw_sp_inetaddr_port_event(struct net_device *port_dev,
6188 					unsigned long event,
6189 					struct netlink_ext_ack *extack)
6190 {
6191 	if (netif_is_bridge_port(port_dev) ||
6192 	    netif_is_lag_port(port_dev) ||
6193 	    netif_is_ovs_port(port_dev))
6194 		return 0;
6195 
6196 	return mlxsw_sp_inetaddr_port_vlan_event(port_dev, port_dev, event, 1,
6197 						 extack);
6198 }
6199 
6200 static int __mlxsw_sp_inetaddr_lag_event(struct net_device *l3_dev,
6201 					 struct net_device *lag_dev,
6202 					 unsigned long event, u16 vid,
6203 					 struct netlink_ext_ack *extack)
6204 {
6205 	struct net_device *port_dev;
6206 	struct list_head *iter;
6207 	int err;
6208 
6209 	netdev_for_each_lower_dev(lag_dev, port_dev, iter) {
6210 		if (mlxsw_sp_port_dev_check(port_dev)) {
6211 			err = mlxsw_sp_inetaddr_port_vlan_event(l3_dev,
6212 								port_dev,
6213 								event, vid,
6214 								extack);
6215 			if (err)
6216 				return err;
6217 		}
6218 	}
6219 
6220 	return 0;
6221 }
6222 
6223 static int mlxsw_sp_inetaddr_lag_event(struct net_device *lag_dev,
6224 				       unsigned long event,
6225 				       struct netlink_ext_ack *extack)
6226 {
6227 	if (netif_is_bridge_port(lag_dev))
6228 		return 0;
6229 
6230 	return __mlxsw_sp_inetaddr_lag_event(lag_dev, lag_dev, event, 1,
6231 					     extack);
6232 }
6233 
6234 static int mlxsw_sp_inetaddr_bridge_event(struct net_device *l3_dev,
6235 					  unsigned long event,
6236 					  struct netlink_ext_ack *extack)
6237 {
6238 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
6239 	struct mlxsw_sp_rif_params params = {
6240 		.dev = l3_dev,
6241 	};
6242 	struct mlxsw_sp_rif *rif;
6243 
6244 	switch (event) {
6245 	case NETDEV_UP:
6246 		rif = mlxsw_sp_rif_create(mlxsw_sp, &params, extack);
6247 		if (IS_ERR(rif))
6248 			return PTR_ERR(rif);
6249 		break;
6250 	case NETDEV_DOWN:
6251 		rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
6252 		mlxsw_sp_rif_destroy(rif);
6253 		break;
6254 	}
6255 
6256 	return 0;
6257 }
6258 
6259 static int mlxsw_sp_inetaddr_vlan_event(struct net_device *vlan_dev,
6260 					unsigned long event,
6261 					struct netlink_ext_ack *extack)
6262 {
6263 	struct net_device *real_dev = vlan_dev_real_dev(vlan_dev);
6264 	u16 vid = vlan_dev_vlan_id(vlan_dev);
6265 
6266 	if (netif_is_bridge_port(vlan_dev))
6267 		return 0;
6268 
6269 	if (mlxsw_sp_port_dev_check(real_dev))
6270 		return mlxsw_sp_inetaddr_port_vlan_event(vlan_dev, real_dev,
6271 							 event, vid, extack);
6272 	else if (netif_is_lag_master(real_dev))
6273 		return __mlxsw_sp_inetaddr_lag_event(vlan_dev, real_dev, event,
6274 						     vid, extack);
6275 	else if (netif_is_bridge_master(real_dev) && br_vlan_enabled(real_dev))
6276 		return mlxsw_sp_inetaddr_bridge_event(vlan_dev, event, extack);
6277 
6278 	return 0;
6279 }
6280 
6281 static int __mlxsw_sp_inetaddr_event(struct net_device *dev,
6282 				     unsigned long event,
6283 				     struct netlink_ext_ack *extack)
6284 {
6285 	if (mlxsw_sp_port_dev_check(dev))
6286 		return mlxsw_sp_inetaddr_port_event(dev, event, extack);
6287 	else if (netif_is_lag_master(dev))
6288 		return mlxsw_sp_inetaddr_lag_event(dev, event, extack);
6289 	else if (netif_is_bridge_master(dev))
6290 		return mlxsw_sp_inetaddr_bridge_event(dev, event, extack);
6291 	else if (is_vlan_dev(dev))
6292 		return mlxsw_sp_inetaddr_vlan_event(dev, event, extack);
6293 	else
6294 		return 0;
6295 }
6296 
6297 int mlxsw_sp_inetaddr_event(struct notifier_block *unused,
6298 			    unsigned long event, void *ptr)
6299 {
6300 	struct in_ifaddr *ifa = (struct in_ifaddr *) ptr;
6301 	struct net_device *dev = ifa->ifa_dev->dev;
6302 	struct mlxsw_sp *mlxsw_sp;
6303 	struct mlxsw_sp_rif *rif;
6304 	int err = 0;
6305 
6306 	/* NETDEV_UP event is handled by mlxsw_sp_inetaddr_valid_event */
6307 	if (event == NETDEV_UP)
6308 		goto out;
6309 
6310 	mlxsw_sp = mlxsw_sp_lower_get(dev);
6311 	if (!mlxsw_sp)
6312 		goto out;
6313 
6314 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6315 	if (!mlxsw_sp_rif_should_config(rif, dev, event))
6316 		goto out;
6317 
6318 	err = __mlxsw_sp_inetaddr_event(dev, event, NULL);
6319 out:
6320 	return notifier_from_errno(err);
6321 }
6322 
6323 int mlxsw_sp_inetaddr_valid_event(struct notifier_block *unused,
6324 				  unsigned long event, void *ptr)
6325 {
6326 	struct in_validator_info *ivi = (struct in_validator_info *) ptr;
6327 	struct net_device *dev = ivi->ivi_dev->dev;
6328 	struct mlxsw_sp *mlxsw_sp;
6329 	struct mlxsw_sp_rif *rif;
6330 	int err = 0;
6331 
6332 	mlxsw_sp = mlxsw_sp_lower_get(dev);
6333 	if (!mlxsw_sp)
6334 		goto out;
6335 
6336 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6337 	if (!mlxsw_sp_rif_should_config(rif, dev, event))
6338 		goto out;
6339 
6340 	err = __mlxsw_sp_inetaddr_event(dev, event, ivi->extack);
6341 out:
6342 	return notifier_from_errno(err);
6343 }
6344 
6345 struct mlxsw_sp_inet6addr_event_work {
6346 	struct work_struct work;
6347 	struct net_device *dev;
6348 	unsigned long event;
6349 };
6350 
6351 static void mlxsw_sp_inet6addr_event_work(struct work_struct *work)
6352 {
6353 	struct mlxsw_sp_inet6addr_event_work *inet6addr_work =
6354 		container_of(work, struct mlxsw_sp_inet6addr_event_work, work);
6355 	struct net_device *dev = inet6addr_work->dev;
6356 	unsigned long event = inet6addr_work->event;
6357 	struct mlxsw_sp *mlxsw_sp;
6358 	struct mlxsw_sp_rif *rif;
6359 
6360 	rtnl_lock();
6361 	mlxsw_sp = mlxsw_sp_lower_get(dev);
6362 	if (!mlxsw_sp)
6363 		goto out;
6364 
6365 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6366 	if (!mlxsw_sp_rif_should_config(rif, dev, event))
6367 		goto out;
6368 
6369 	__mlxsw_sp_inetaddr_event(dev, event, NULL);
6370 out:
6371 	rtnl_unlock();
6372 	dev_put(dev);
6373 	kfree(inet6addr_work);
6374 }
6375 
6376 /* Called with rcu_read_lock() */
6377 int mlxsw_sp_inet6addr_event(struct notifier_block *unused,
6378 			     unsigned long event, void *ptr)
6379 {
6380 	struct inet6_ifaddr *if6 = (struct inet6_ifaddr *) ptr;
6381 	struct mlxsw_sp_inet6addr_event_work *inet6addr_work;
6382 	struct net_device *dev = if6->idev->dev;
6383 
6384 	/* NETDEV_UP event is handled by mlxsw_sp_inet6addr_valid_event */
6385 	if (event == NETDEV_UP)
6386 		return NOTIFY_DONE;
6387 
6388 	if (!mlxsw_sp_port_dev_lower_find_rcu(dev))
6389 		return NOTIFY_DONE;
6390 
6391 	inet6addr_work = kzalloc(sizeof(*inet6addr_work), GFP_ATOMIC);
6392 	if (!inet6addr_work)
6393 		return NOTIFY_BAD;
6394 
6395 	INIT_WORK(&inet6addr_work->work, mlxsw_sp_inet6addr_event_work);
6396 	inet6addr_work->dev = dev;
6397 	inet6addr_work->event = event;
6398 	dev_hold(dev);
6399 	mlxsw_core_schedule_work(&inet6addr_work->work);
6400 
6401 	return NOTIFY_DONE;
6402 }
6403 
6404 int mlxsw_sp_inet6addr_valid_event(struct notifier_block *unused,
6405 				   unsigned long event, void *ptr)
6406 {
6407 	struct in6_validator_info *i6vi = (struct in6_validator_info *) ptr;
6408 	struct net_device *dev = i6vi->i6vi_dev->dev;
6409 	struct mlxsw_sp *mlxsw_sp;
6410 	struct mlxsw_sp_rif *rif;
6411 	int err = 0;
6412 
6413 	mlxsw_sp = mlxsw_sp_lower_get(dev);
6414 	if (!mlxsw_sp)
6415 		goto out;
6416 
6417 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6418 	if (!mlxsw_sp_rif_should_config(rif, dev, event))
6419 		goto out;
6420 
6421 	err = __mlxsw_sp_inetaddr_event(dev, event, i6vi->extack);
6422 out:
6423 	return notifier_from_errno(err);
6424 }
6425 
6426 static int mlxsw_sp_rif_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
6427 			     const char *mac, int mtu)
6428 {
6429 	char ritr_pl[MLXSW_REG_RITR_LEN];
6430 	int err;
6431 
6432 	mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
6433 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6434 	if (err)
6435 		return err;
6436 
6437 	mlxsw_reg_ritr_mtu_set(ritr_pl, mtu);
6438 	mlxsw_reg_ritr_if_mac_memcpy_to(ritr_pl, mac);
6439 	mlxsw_reg_ritr_op_set(ritr_pl, MLXSW_REG_RITR_RIF_CREATE);
6440 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6441 }
6442 
6443 int mlxsw_sp_netdevice_router_port_event(struct net_device *dev)
6444 {
6445 	struct mlxsw_sp *mlxsw_sp;
6446 	struct mlxsw_sp_rif *rif;
6447 	u16 fid_index;
6448 	int err;
6449 
6450 	mlxsw_sp = mlxsw_sp_lower_get(dev);
6451 	if (!mlxsw_sp)
6452 		return 0;
6453 
6454 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6455 	if (!rif)
6456 		return 0;
6457 	fid_index = mlxsw_sp_fid_index(rif->fid);
6458 
6459 	err = mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, false);
6460 	if (err)
6461 		return err;
6462 
6463 	err = mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, dev->dev_addr,
6464 				dev->mtu);
6465 	if (err)
6466 		goto err_rif_edit;
6467 
6468 	err = mlxsw_sp_rif_fdb_op(mlxsw_sp, dev->dev_addr, fid_index, true);
6469 	if (err)
6470 		goto err_rif_fdb_op;
6471 
6472 	if (rif->mtu != dev->mtu) {
6473 		struct mlxsw_sp_vr *vr;
6474 
6475 		/* The RIF is relevant only to its mr_table instance, as unlike
6476 		 * unicast routing, in multicast routing a RIF cannot be shared
6477 		 * between several multicast routing tables.
6478 		 */
6479 		vr = &mlxsw_sp->router->vrs[rif->vr_id];
6480 		mlxsw_sp_mr_rif_mtu_update(vr->mr4_table, rif, dev->mtu);
6481 	}
6482 
6483 	ether_addr_copy(rif->addr, dev->dev_addr);
6484 	rif->mtu = dev->mtu;
6485 
6486 	netdev_dbg(dev, "Updated RIF=%d\n", rif->rif_index);
6487 
6488 	return 0;
6489 
6490 err_rif_fdb_op:
6491 	mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, rif->addr, rif->mtu);
6492 err_rif_edit:
6493 	mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, true);
6494 	return err;
6495 }
6496 
6497 static int mlxsw_sp_port_vrf_join(struct mlxsw_sp *mlxsw_sp,
6498 				  struct net_device *l3_dev,
6499 				  struct netlink_ext_ack *extack)
6500 {
6501 	struct mlxsw_sp_rif *rif;
6502 
6503 	/* If netdev is already associated with a RIF, then we need to
6504 	 * destroy it and create a new one with the new virtual router ID.
6505 	 */
6506 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
6507 	if (rif)
6508 		__mlxsw_sp_inetaddr_event(l3_dev, NETDEV_DOWN, extack);
6509 
6510 	return __mlxsw_sp_inetaddr_event(l3_dev, NETDEV_UP, extack);
6511 }
6512 
6513 static void mlxsw_sp_port_vrf_leave(struct mlxsw_sp *mlxsw_sp,
6514 				    struct net_device *l3_dev)
6515 {
6516 	struct mlxsw_sp_rif *rif;
6517 
6518 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
6519 	if (!rif)
6520 		return;
6521 	__mlxsw_sp_inetaddr_event(l3_dev, NETDEV_DOWN, NULL);
6522 }
6523 
6524 int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event,
6525 				 struct netdev_notifier_changeupper_info *info)
6526 {
6527 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
6528 	int err = 0;
6529 
6530 	if (!mlxsw_sp)
6531 		return 0;
6532 
6533 	switch (event) {
6534 	case NETDEV_PRECHANGEUPPER:
6535 		return 0;
6536 	case NETDEV_CHANGEUPPER:
6537 		if (info->linking) {
6538 			struct netlink_ext_ack *extack;
6539 
6540 			extack = netdev_notifier_info_to_extack(&info->info);
6541 			err = mlxsw_sp_port_vrf_join(mlxsw_sp, l3_dev, extack);
6542 		} else {
6543 			mlxsw_sp_port_vrf_leave(mlxsw_sp, l3_dev);
6544 		}
6545 		break;
6546 	}
6547 
6548 	return err;
6549 }
6550 
6551 static struct mlxsw_sp_rif_subport *
6552 mlxsw_sp_rif_subport_rif(const struct mlxsw_sp_rif *rif)
6553 {
6554 	return container_of(rif, struct mlxsw_sp_rif_subport, common);
6555 }
6556 
6557 static void mlxsw_sp_rif_subport_setup(struct mlxsw_sp_rif *rif,
6558 				       const struct mlxsw_sp_rif_params *params)
6559 {
6560 	struct mlxsw_sp_rif_subport *rif_subport;
6561 
6562 	rif_subport = mlxsw_sp_rif_subport_rif(rif);
6563 	rif_subport->vid = params->vid;
6564 	rif_subport->lag = params->lag;
6565 	if (params->lag)
6566 		rif_subport->lag_id = params->lag_id;
6567 	else
6568 		rif_subport->system_port = params->system_port;
6569 }
6570 
6571 static int mlxsw_sp_rif_subport_op(struct mlxsw_sp_rif *rif, bool enable)
6572 {
6573 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6574 	struct mlxsw_sp_rif_subport *rif_subport;
6575 	char ritr_pl[MLXSW_REG_RITR_LEN];
6576 
6577 	rif_subport = mlxsw_sp_rif_subport_rif(rif);
6578 	mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_SP_IF,
6579 			    rif->rif_index, rif->vr_id, rif->dev->mtu);
6580 	mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr);
6581 	mlxsw_reg_ritr_sp_if_pack(ritr_pl, rif_subport->lag,
6582 				  rif_subport->lag ? rif_subport->lag_id :
6583 						     rif_subport->system_port,
6584 				  rif_subport->vid);
6585 
6586 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6587 }
6588 
6589 static int mlxsw_sp_rif_subport_configure(struct mlxsw_sp_rif *rif)
6590 {
6591 	int err;
6592 
6593 	err = mlxsw_sp_rif_subport_op(rif, true);
6594 	if (err)
6595 		return err;
6596 
6597 	err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6598 				  mlxsw_sp_fid_index(rif->fid), true);
6599 	if (err)
6600 		goto err_rif_fdb_op;
6601 
6602 	mlxsw_sp_fid_rif_set(rif->fid, rif);
6603 	return 0;
6604 
6605 err_rif_fdb_op:
6606 	mlxsw_sp_rif_subport_op(rif, false);
6607 	return err;
6608 }
6609 
6610 static void mlxsw_sp_rif_subport_deconfigure(struct mlxsw_sp_rif *rif)
6611 {
6612 	struct mlxsw_sp_fid *fid = rif->fid;
6613 
6614 	mlxsw_sp_fid_rif_set(fid, NULL);
6615 	mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6616 			    mlxsw_sp_fid_index(fid), false);
6617 	mlxsw_sp_rif_subport_op(rif, false);
6618 }
6619 
6620 static struct mlxsw_sp_fid *
6621 mlxsw_sp_rif_subport_fid_get(struct mlxsw_sp_rif *rif)
6622 {
6623 	return mlxsw_sp_fid_rfid_get(rif->mlxsw_sp, rif->rif_index);
6624 }
6625 
6626 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_subport_ops = {
6627 	.type			= MLXSW_SP_RIF_TYPE_SUBPORT,
6628 	.rif_size		= sizeof(struct mlxsw_sp_rif_subport),
6629 	.setup			= mlxsw_sp_rif_subport_setup,
6630 	.configure		= mlxsw_sp_rif_subport_configure,
6631 	.deconfigure		= mlxsw_sp_rif_subport_deconfigure,
6632 	.fid_get		= mlxsw_sp_rif_subport_fid_get,
6633 };
6634 
6635 static int mlxsw_sp_rif_vlan_fid_op(struct mlxsw_sp_rif *rif,
6636 				    enum mlxsw_reg_ritr_if_type type,
6637 				    u16 vid_fid, bool enable)
6638 {
6639 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6640 	char ritr_pl[MLXSW_REG_RITR_LEN];
6641 
6642 	mlxsw_reg_ritr_pack(ritr_pl, enable, type, rif->rif_index, rif->vr_id,
6643 			    rif->dev->mtu);
6644 	mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr);
6645 	mlxsw_reg_ritr_fid_set(ritr_pl, type, vid_fid);
6646 
6647 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6648 }
6649 
6650 u8 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp)
6651 {
6652 	return mlxsw_core_max_ports(mlxsw_sp->core) + 1;
6653 }
6654 
6655 static int mlxsw_sp_rif_vlan_configure(struct mlxsw_sp_rif *rif)
6656 {
6657 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6658 	u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
6659 	int err;
6660 
6661 	err = mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, true);
6662 	if (err)
6663 		return err;
6664 
6665 	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
6666 				     mlxsw_sp_router_port(mlxsw_sp), true);
6667 	if (err)
6668 		goto err_fid_mc_flood_set;
6669 
6670 	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
6671 				     mlxsw_sp_router_port(mlxsw_sp), true);
6672 	if (err)
6673 		goto err_fid_bc_flood_set;
6674 
6675 	err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6676 				  mlxsw_sp_fid_index(rif->fid), true);
6677 	if (err)
6678 		goto err_rif_fdb_op;
6679 
6680 	mlxsw_sp_fid_rif_set(rif->fid, rif);
6681 	return 0;
6682 
6683 err_rif_fdb_op:
6684 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
6685 			       mlxsw_sp_router_port(mlxsw_sp), false);
6686 err_fid_bc_flood_set:
6687 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
6688 			       mlxsw_sp_router_port(mlxsw_sp), false);
6689 err_fid_mc_flood_set:
6690 	mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false);
6691 	return err;
6692 }
6693 
6694 static void mlxsw_sp_rif_vlan_deconfigure(struct mlxsw_sp_rif *rif)
6695 {
6696 	u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
6697 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6698 	struct mlxsw_sp_fid *fid = rif->fid;
6699 
6700 	mlxsw_sp_fid_rif_set(fid, NULL);
6701 	mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6702 			    mlxsw_sp_fid_index(fid), false);
6703 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
6704 			       mlxsw_sp_router_port(mlxsw_sp), false);
6705 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
6706 			       mlxsw_sp_router_port(mlxsw_sp), false);
6707 	mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false);
6708 }
6709 
6710 static struct mlxsw_sp_fid *
6711 mlxsw_sp_rif_vlan_fid_get(struct mlxsw_sp_rif *rif)
6712 {
6713 	u16 vid = is_vlan_dev(rif->dev) ? vlan_dev_vlan_id(rif->dev) : 1;
6714 
6715 	return mlxsw_sp_fid_8021q_get(rif->mlxsw_sp, vid);
6716 }
6717 
6718 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_vlan_ops = {
6719 	.type			= MLXSW_SP_RIF_TYPE_VLAN,
6720 	.rif_size		= sizeof(struct mlxsw_sp_rif),
6721 	.configure		= mlxsw_sp_rif_vlan_configure,
6722 	.deconfigure		= mlxsw_sp_rif_vlan_deconfigure,
6723 	.fid_get		= mlxsw_sp_rif_vlan_fid_get,
6724 };
6725 
6726 static int mlxsw_sp_rif_fid_configure(struct mlxsw_sp_rif *rif)
6727 {
6728 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6729 	u16 fid_index = mlxsw_sp_fid_index(rif->fid);
6730 	int err;
6731 
6732 	err = mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index,
6733 				       true);
6734 	if (err)
6735 		return err;
6736 
6737 	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
6738 				     mlxsw_sp_router_port(mlxsw_sp), true);
6739 	if (err)
6740 		goto err_fid_mc_flood_set;
6741 
6742 	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
6743 				     mlxsw_sp_router_port(mlxsw_sp), true);
6744 	if (err)
6745 		goto err_fid_bc_flood_set;
6746 
6747 	err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6748 				  mlxsw_sp_fid_index(rif->fid), true);
6749 	if (err)
6750 		goto err_rif_fdb_op;
6751 
6752 	mlxsw_sp_fid_rif_set(rif->fid, rif);
6753 	return 0;
6754 
6755 err_rif_fdb_op:
6756 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
6757 			       mlxsw_sp_router_port(mlxsw_sp), false);
6758 err_fid_bc_flood_set:
6759 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
6760 			       mlxsw_sp_router_port(mlxsw_sp), false);
6761 err_fid_mc_flood_set:
6762 	mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false);
6763 	return err;
6764 }
6765 
6766 static void mlxsw_sp_rif_fid_deconfigure(struct mlxsw_sp_rif *rif)
6767 {
6768 	u16 fid_index = mlxsw_sp_fid_index(rif->fid);
6769 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6770 	struct mlxsw_sp_fid *fid = rif->fid;
6771 
6772 	mlxsw_sp_fid_rif_set(fid, NULL);
6773 	mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6774 			    mlxsw_sp_fid_index(fid), false);
6775 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
6776 			       mlxsw_sp_router_port(mlxsw_sp), false);
6777 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
6778 			       mlxsw_sp_router_port(mlxsw_sp), false);
6779 	mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false);
6780 }
6781 
6782 static struct mlxsw_sp_fid *
6783 mlxsw_sp_rif_fid_fid_get(struct mlxsw_sp_rif *rif)
6784 {
6785 	return mlxsw_sp_fid_8021d_get(rif->mlxsw_sp, rif->dev->ifindex);
6786 }
6787 
6788 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_fid_ops = {
6789 	.type			= MLXSW_SP_RIF_TYPE_FID,
6790 	.rif_size		= sizeof(struct mlxsw_sp_rif),
6791 	.configure		= mlxsw_sp_rif_fid_configure,
6792 	.deconfigure		= mlxsw_sp_rif_fid_deconfigure,
6793 	.fid_get		= mlxsw_sp_rif_fid_fid_get,
6794 };
6795 
6796 static struct mlxsw_sp_rif_ipip_lb *
6797 mlxsw_sp_rif_ipip_lb_rif(struct mlxsw_sp_rif *rif)
6798 {
6799 	return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
6800 }
6801 
6802 static void
6803 mlxsw_sp_rif_ipip_lb_setup(struct mlxsw_sp_rif *rif,
6804 			   const struct mlxsw_sp_rif_params *params)
6805 {
6806 	struct mlxsw_sp_rif_params_ipip_lb *params_lb;
6807 	struct mlxsw_sp_rif_ipip_lb *rif_lb;
6808 
6809 	params_lb = container_of(params, struct mlxsw_sp_rif_params_ipip_lb,
6810 				 common);
6811 	rif_lb = mlxsw_sp_rif_ipip_lb_rif(rif);
6812 	rif_lb->lb_config = params_lb->lb_config;
6813 }
6814 
6815 static int
6816 mlxsw_sp_rif_ipip_lb_op(struct mlxsw_sp_rif_ipip_lb *lb_rif,
6817 			struct mlxsw_sp_vr *ul_vr, bool enable)
6818 {
6819 	struct mlxsw_sp_rif_ipip_lb_config lb_cf = lb_rif->lb_config;
6820 	struct mlxsw_sp_rif *rif = &lb_rif->common;
6821 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6822 	char ritr_pl[MLXSW_REG_RITR_LEN];
6823 	u32 saddr4;
6824 
6825 	switch (lb_cf.ul_protocol) {
6826 	case MLXSW_SP_L3_PROTO_IPV4:
6827 		saddr4 = be32_to_cpu(lb_cf.saddr.addr4);
6828 		mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF,
6829 				    rif->rif_index, rif->vr_id, rif->dev->mtu);
6830 		mlxsw_reg_ritr_loopback_ipip4_pack(ritr_pl, lb_cf.lb_ipipt,
6831 			    MLXSW_REG_RITR_LOOPBACK_IPIP_OPTIONS_GRE_KEY_PRESET,
6832 			    ul_vr->id, saddr4, lb_cf.okey);
6833 		break;
6834 
6835 	case MLXSW_SP_L3_PROTO_IPV6:
6836 		return -EAFNOSUPPORT;
6837 	}
6838 
6839 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6840 }
6841 
6842 static int
6843 mlxsw_sp_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif)
6844 {
6845 	struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
6846 	u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(rif->dev);
6847 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6848 	struct mlxsw_sp_vr *ul_vr;
6849 	int err;
6850 
6851 	ul_vr = mlxsw_sp_vr_get(mlxsw_sp, ul_tb_id, NULL);
6852 	if (IS_ERR(ul_vr))
6853 		return PTR_ERR(ul_vr);
6854 
6855 	err = mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, true);
6856 	if (err)
6857 		goto err_loopback_op;
6858 
6859 	lb_rif->ul_vr_id = ul_vr->id;
6860 	++ul_vr->rif_count;
6861 	return 0;
6862 
6863 err_loopback_op:
6864 	mlxsw_sp_vr_put(ul_vr);
6865 	return err;
6866 }
6867 
6868 static void mlxsw_sp_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif)
6869 {
6870 	struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
6871 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6872 	struct mlxsw_sp_vr *ul_vr;
6873 
6874 	ul_vr = &mlxsw_sp->router->vrs[lb_rif->ul_vr_id];
6875 	mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, false);
6876 
6877 	--ul_vr->rif_count;
6878 	mlxsw_sp_vr_put(ul_vr);
6879 }
6880 
6881 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_ipip_lb_ops = {
6882 	.type			= MLXSW_SP_RIF_TYPE_IPIP_LB,
6883 	.rif_size		= sizeof(struct mlxsw_sp_rif_ipip_lb),
6884 	.setup                  = mlxsw_sp_rif_ipip_lb_setup,
6885 	.configure		= mlxsw_sp_rif_ipip_lb_configure,
6886 	.deconfigure		= mlxsw_sp_rif_ipip_lb_deconfigure,
6887 };
6888 
6889 static const struct mlxsw_sp_rif_ops *mlxsw_sp_rif_ops_arr[] = {
6890 	[MLXSW_SP_RIF_TYPE_SUBPORT]	= &mlxsw_sp_rif_subport_ops,
6891 	[MLXSW_SP_RIF_TYPE_VLAN]	= &mlxsw_sp_rif_vlan_ops,
6892 	[MLXSW_SP_RIF_TYPE_FID]		= &mlxsw_sp_rif_fid_ops,
6893 	[MLXSW_SP_RIF_TYPE_IPIP_LB]	= &mlxsw_sp_rif_ipip_lb_ops,
6894 };
6895 
6896 static int mlxsw_sp_rifs_init(struct mlxsw_sp *mlxsw_sp)
6897 {
6898 	u64 max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
6899 
6900 	mlxsw_sp->router->rifs = kcalloc(max_rifs,
6901 					 sizeof(struct mlxsw_sp_rif *),
6902 					 GFP_KERNEL);
6903 	if (!mlxsw_sp->router->rifs)
6904 		return -ENOMEM;
6905 
6906 	mlxsw_sp->router->rif_ops_arr = mlxsw_sp_rif_ops_arr;
6907 
6908 	return 0;
6909 }
6910 
6911 static void mlxsw_sp_rifs_fini(struct mlxsw_sp *mlxsw_sp)
6912 {
6913 	int i;
6914 
6915 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
6916 		WARN_ON_ONCE(mlxsw_sp->router->rifs[i]);
6917 
6918 	kfree(mlxsw_sp->router->rifs);
6919 }
6920 
6921 static int
6922 mlxsw_sp_ipip_config_tigcr(struct mlxsw_sp *mlxsw_sp)
6923 {
6924 	char tigcr_pl[MLXSW_REG_TIGCR_LEN];
6925 
6926 	mlxsw_reg_tigcr_pack(tigcr_pl, true, 0);
6927 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tigcr), tigcr_pl);
6928 }
6929 
6930 static int mlxsw_sp_ipips_init(struct mlxsw_sp *mlxsw_sp)
6931 {
6932 	mlxsw_sp->router->ipip_ops_arr = mlxsw_sp_ipip_ops_arr;
6933 	INIT_LIST_HEAD(&mlxsw_sp->router->ipip_list);
6934 	return mlxsw_sp_ipip_config_tigcr(mlxsw_sp);
6935 }
6936 
6937 static void mlxsw_sp_ipips_fini(struct mlxsw_sp *mlxsw_sp)
6938 {
6939 	WARN_ON(!list_empty(&mlxsw_sp->router->ipip_list));
6940 }
6941 
6942 static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb)
6943 {
6944 	struct mlxsw_sp_router *router;
6945 
6946 	/* Flush pending FIB notifications and then flush the device's
6947 	 * table before requesting another dump. The FIB notification
6948 	 * block is unregistered, so no need to take RTNL.
6949 	 */
6950 	mlxsw_core_flush_owq();
6951 	router = container_of(nb, struct mlxsw_sp_router, fib_nb);
6952 	mlxsw_sp_router_fib_flush(router->mlxsw_sp);
6953 }
6954 
6955 #ifdef CONFIG_IP_ROUTE_MULTIPATH
6956 static void mlxsw_sp_mp_hash_header_set(char *recr2_pl, int header)
6957 {
6958 	mlxsw_reg_recr2_outer_header_enables_set(recr2_pl, header, true);
6959 }
6960 
6961 static void mlxsw_sp_mp_hash_field_set(char *recr2_pl, int field)
6962 {
6963 	mlxsw_reg_recr2_outer_header_fields_enable_set(recr2_pl, field, true);
6964 }
6965 
6966 static void mlxsw_sp_mp4_hash_init(char *recr2_pl)
6967 {
6968 	bool only_l3 = !init_net.ipv4.sysctl_fib_multipath_hash_policy;
6969 
6970 	mlxsw_sp_mp_hash_header_set(recr2_pl,
6971 				    MLXSW_REG_RECR2_IPV4_EN_NOT_TCP_NOT_UDP);
6972 	mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV4_EN_TCP_UDP);
6973 	mlxsw_reg_recr2_ipv4_sip_enable(recr2_pl);
6974 	mlxsw_reg_recr2_ipv4_dip_enable(recr2_pl);
6975 	if (only_l3)
6976 		return;
6977 	mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_EN_IPV4);
6978 	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV4_PROTOCOL);
6979 	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_SPORT);
6980 	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_DPORT);
6981 }
6982 
6983 static void mlxsw_sp_mp6_hash_init(char *recr2_pl)
6984 {
6985 	mlxsw_sp_mp_hash_header_set(recr2_pl,
6986 				    MLXSW_REG_RECR2_IPV6_EN_NOT_TCP_NOT_UDP);
6987 	mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV6_EN_TCP_UDP);
6988 	mlxsw_reg_recr2_ipv6_sip_enable(recr2_pl);
6989 	mlxsw_reg_recr2_ipv6_dip_enable(recr2_pl);
6990 	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV6_FLOW_LABEL);
6991 	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV6_NEXT_HEADER);
6992 }
6993 
6994 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
6995 {
6996 	char recr2_pl[MLXSW_REG_RECR2_LEN];
6997 	u32 seed;
6998 
6999 	get_random_bytes(&seed, sizeof(seed));
7000 	mlxsw_reg_recr2_pack(recr2_pl, seed);
7001 	mlxsw_sp_mp4_hash_init(recr2_pl);
7002 	mlxsw_sp_mp6_hash_init(recr2_pl);
7003 
7004 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(recr2), recr2_pl);
7005 }
7006 #else
7007 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
7008 {
7009 	return 0;
7010 }
7011 #endif
7012 
7013 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
7014 {
7015 	char rgcr_pl[MLXSW_REG_RGCR_LEN];
7016 	u64 max_rifs;
7017 	int err;
7018 
7019 	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_RIFS))
7020 		return -EIO;
7021 	max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
7022 
7023 	mlxsw_reg_rgcr_pack(rgcr_pl, true, true);
7024 	mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs);
7025 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
7026 	if (err)
7027 		return err;
7028 	return 0;
7029 }
7030 
7031 static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
7032 {
7033 	char rgcr_pl[MLXSW_REG_RGCR_LEN];
7034 
7035 	mlxsw_reg_rgcr_pack(rgcr_pl, false, false);
7036 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
7037 }
7038 
7039 int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
7040 {
7041 	struct mlxsw_sp_router *router;
7042 	int err;
7043 
7044 	router = kzalloc(sizeof(*mlxsw_sp->router), GFP_KERNEL);
7045 	if (!router)
7046 		return -ENOMEM;
7047 	mlxsw_sp->router = router;
7048 	router->mlxsw_sp = mlxsw_sp;
7049 
7050 	INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_neighs_list);
7051 	err = __mlxsw_sp_router_init(mlxsw_sp);
7052 	if (err)
7053 		goto err_router_init;
7054 
7055 	err = mlxsw_sp_rifs_init(mlxsw_sp);
7056 	if (err)
7057 		goto err_rifs_init;
7058 
7059 	err = mlxsw_sp_ipips_init(mlxsw_sp);
7060 	if (err)
7061 		goto err_ipips_init;
7062 
7063 	err = rhashtable_init(&mlxsw_sp->router->nexthop_ht,
7064 			      &mlxsw_sp_nexthop_ht_params);
7065 	if (err)
7066 		goto err_nexthop_ht_init;
7067 
7068 	err = rhashtable_init(&mlxsw_sp->router->nexthop_group_ht,
7069 			      &mlxsw_sp_nexthop_group_ht_params);
7070 	if (err)
7071 		goto err_nexthop_group_ht_init;
7072 
7073 	INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_list);
7074 	err = mlxsw_sp_lpm_init(mlxsw_sp);
7075 	if (err)
7076 		goto err_lpm_init;
7077 
7078 	err = mlxsw_sp_mr_init(mlxsw_sp, &mlxsw_sp_mr_tcam_ops);
7079 	if (err)
7080 		goto err_mr_init;
7081 
7082 	err = mlxsw_sp_vrs_init(mlxsw_sp);
7083 	if (err)
7084 		goto err_vrs_init;
7085 
7086 	err = mlxsw_sp_neigh_init(mlxsw_sp);
7087 	if (err)
7088 		goto err_neigh_init;
7089 
7090 	mlxsw_sp->router->netevent_nb.notifier_call =
7091 		mlxsw_sp_router_netevent_event;
7092 	err = register_netevent_notifier(&mlxsw_sp->router->netevent_nb);
7093 	if (err)
7094 		goto err_register_netevent_notifier;
7095 
7096 	err = mlxsw_sp_mp_hash_init(mlxsw_sp);
7097 	if (err)
7098 		goto err_mp_hash_init;
7099 
7100 	mlxsw_sp->router->fib_nb.notifier_call = mlxsw_sp_router_fib_event;
7101 	err = register_fib_notifier(&mlxsw_sp->router->fib_nb,
7102 				    mlxsw_sp_router_fib_dump_flush);
7103 	if (err)
7104 		goto err_register_fib_notifier;
7105 
7106 	return 0;
7107 
7108 err_register_fib_notifier:
7109 err_mp_hash_init:
7110 	unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
7111 err_register_netevent_notifier:
7112 	mlxsw_sp_neigh_fini(mlxsw_sp);
7113 err_neigh_init:
7114 	mlxsw_sp_vrs_fini(mlxsw_sp);
7115 err_vrs_init:
7116 	mlxsw_sp_mr_fini(mlxsw_sp);
7117 err_mr_init:
7118 	mlxsw_sp_lpm_fini(mlxsw_sp);
7119 err_lpm_init:
7120 	rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
7121 err_nexthop_group_ht_init:
7122 	rhashtable_destroy(&mlxsw_sp->router->nexthop_ht);
7123 err_nexthop_ht_init:
7124 	mlxsw_sp_ipips_fini(mlxsw_sp);
7125 err_ipips_init:
7126 	mlxsw_sp_rifs_fini(mlxsw_sp);
7127 err_rifs_init:
7128 	__mlxsw_sp_router_fini(mlxsw_sp);
7129 err_router_init:
7130 	kfree(mlxsw_sp->router);
7131 	return err;
7132 }
7133 
7134 void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
7135 {
7136 	unregister_fib_notifier(&mlxsw_sp->router->fib_nb);
7137 	unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
7138 	mlxsw_sp_neigh_fini(mlxsw_sp);
7139 	mlxsw_sp_vrs_fini(mlxsw_sp);
7140 	mlxsw_sp_mr_fini(mlxsw_sp);
7141 	mlxsw_sp_lpm_fini(mlxsw_sp);
7142 	rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
7143 	rhashtable_destroy(&mlxsw_sp->router->nexthop_ht);
7144 	mlxsw_sp_ipips_fini(mlxsw_sp);
7145 	mlxsw_sp_rifs_fini(mlxsw_sp);
7146 	__mlxsw_sp_router_fini(mlxsw_sp);
7147 	kfree(mlxsw_sp->router);
7148 }
7149