xref: /linux/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c (revision d96fc832bcb6269d96e33d506f33033d7ed08598)
1 /*
2  * drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
3  * Copyright (c) 2016-2018 Mellanox Technologies. All rights reserved.
4  * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
5  * Copyright (c) 2016 Ido Schimmel <idosch@mellanox.com>
6  * Copyright (c) 2016 Yotam Gigi <yotamg@mellanox.com>
7  * Copyright (c) 2017-2018 Petr Machata <petrm@mellanox.com>
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. Neither the names of the copyright holders nor the names of its
18  *    contributors may be used to endorse or promote products derived from
19  *    this software without specific prior written permission.
20  *
21  * Alternatively, this software may be distributed under the terms of the
22  * GNU General Public License ("GPL") version 2 as published by the Free
23  * Software Foundation.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35  * POSSIBILITY OF SUCH DAMAGE.
36  */
37 
38 #include <linux/kernel.h>
39 #include <linux/types.h>
40 #include <linux/rhashtable.h>
41 #include <linux/bitops.h>
42 #include <linux/in6.h>
43 #include <linux/notifier.h>
44 #include <linux/inetdevice.h>
45 #include <linux/netdevice.h>
46 #include <linux/if_bridge.h>
47 #include <linux/socket.h>
48 #include <linux/route.h>
49 #include <linux/gcd.h>
50 #include <linux/random.h>
51 #include <net/netevent.h>
52 #include <net/neighbour.h>
53 #include <net/arp.h>
54 #include <net/ip_fib.h>
55 #include <net/ip6_fib.h>
56 #include <net/fib_rules.h>
57 #include <net/ip_tunnels.h>
58 #include <net/l3mdev.h>
59 #include <net/addrconf.h>
60 #include <net/ndisc.h>
61 #include <net/ipv6.h>
62 #include <net/fib_notifier.h>
63 
64 #include "spectrum.h"
65 #include "core.h"
66 #include "reg.h"
67 #include "spectrum_cnt.h"
68 #include "spectrum_dpipe.h"
69 #include "spectrum_ipip.h"
70 #include "spectrum_mr.h"
71 #include "spectrum_mr_tcam.h"
72 #include "spectrum_router.h"
73 
74 struct mlxsw_sp_fib;
75 struct mlxsw_sp_vr;
76 struct mlxsw_sp_lpm_tree;
77 struct mlxsw_sp_rif_ops;
78 
79 struct mlxsw_sp_router {
80 	struct mlxsw_sp *mlxsw_sp;
81 	struct mlxsw_sp_rif **rifs;
82 	struct mlxsw_sp_vr *vrs;
83 	struct rhashtable neigh_ht;
84 	struct rhashtable nexthop_group_ht;
85 	struct rhashtable nexthop_ht;
86 	struct list_head nexthop_list;
87 	struct {
88 		/* One tree for each protocol: IPv4 and IPv6 */
89 		struct mlxsw_sp_lpm_tree *proto_trees[2];
90 		struct mlxsw_sp_lpm_tree *trees;
91 		unsigned int tree_count;
92 	} lpm;
93 	struct {
94 		struct delayed_work dw;
95 		unsigned long interval;	/* ms */
96 	} neighs_update;
97 	struct delayed_work nexthop_probe_dw;
98 #define MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL 5000 /* ms */
99 	struct list_head nexthop_neighs_list;
100 	struct list_head ipip_list;
101 	bool aborted;
102 	struct notifier_block fib_nb;
103 	struct notifier_block netevent_nb;
104 	const struct mlxsw_sp_rif_ops **rif_ops_arr;
105 	const struct mlxsw_sp_ipip_ops **ipip_ops_arr;
106 };
107 
108 struct mlxsw_sp_rif {
109 	struct list_head nexthop_list;
110 	struct list_head neigh_list;
111 	struct net_device *dev;
112 	struct mlxsw_sp_fid *fid;
113 	unsigned char addr[ETH_ALEN];
114 	int mtu;
115 	u16 rif_index;
116 	u16 vr_id;
117 	const struct mlxsw_sp_rif_ops *ops;
118 	struct mlxsw_sp *mlxsw_sp;
119 
120 	unsigned int counter_ingress;
121 	bool counter_ingress_valid;
122 	unsigned int counter_egress;
123 	bool counter_egress_valid;
124 };
125 
126 struct mlxsw_sp_rif_params {
127 	struct net_device *dev;
128 	union {
129 		u16 system_port;
130 		u16 lag_id;
131 	};
132 	u16 vid;
133 	bool lag;
134 };
135 
136 struct mlxsw_sp_rif_subport {
137 	struct mlxsw_sp_rif common;
138 	union {
139 		u16 system_port;
140 		u16 lag_id;
141 	};
142 	u16 vid;
143 	bool lag;
144 };
145 
146 struct mlxsw_sp_rif_ipip_lb {
147 	struct mlxsw_sp_rif common;
148 	struct mlxsw_sp_rif_ipip_lb_config lb_config;
149 	u16 ul_vr_id; /* Reserved for Spectrum-2. */
150 };
151 
152 struct mlxsw_sp_rif_params_ipip_lb {
153 	struct mlxsw_sp_rif_params common;
154 	struct mlxsw_sp_rif_ipip_lb_config lb_config;
155 };
156 
157 struct mlxsw_sp_rif_ops {
158 	enum mlxsw_sp_rif_type type;
159 	size_t rif_size;
160 
161 	void (*setup)(struct mlxsw_sp_rif *rif,
162 		      const struct mlxsw_sp_rif_params *params);
163 	int (*configure)(struct mlxsw_sp_rif *rif);
164 	void (*deconfigure)(struct mlxsw_sp_rif *rif);
165 	struct mlxsw_sp_fid * (*fid_get)(struct mlxsw_sp_rif *rif);
166 };
167 
168 static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree);
169 static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
170 				  struct mlxsw_sp_lpm_tree *lpm_tree);
171 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
172 				     const struct mlxsw_sp_fib *fib,
173 				     u8 tree_id);
174 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
175 				       const struct mlxsw_sp_fib *fib);
176 
177 static unsigned int *
178 mlxsw_sp_rif_p_counter_get(struct mlxsw_sp_rif *rif,
179 			   enum mlxsw_sp_rif_counter_dir dir)
180 {
181 	switch (dir) {
182 	case MLXSW_SP_RIF_COUNTER_EGRESS:
183 		return &rif->counter_egress;
184 	case MLXSW_SP_RIF_COUNTER_INGRESS:
185 		return &rif->counter_ingress;
186 	}
187 	return NULL;
188 }
189 
190 static bool
191 mlxsw_sp_rif_counter_valid_get(struct mlxsw_sp_rif *rif,
192 			       enum mlxsw_sp_rif_counter_dir dir)
193 {
194 	switch (dir) {
195 	case MLXSW_SP_RIF_COUNTER_EGRESS:
196 		return rif->counter_egress_valid;
197 	case MLXSW_SP_RIF_COUNTER_INGRESS:
198 		return rif->counter_ingress_valid;
199 	}
200 	return false;
201 }
202 
203 static void
204 mlxsw_sp_rif_counter_valid_set(struct mlxsw_sp_rif *rif,
205 			       enum mlxsw_sp_rif_counter_dir dir,
206 			       bool valid)
207 {
208 	switch (dir) {
209 	case MLXSW_SP_RIF_COUNTER_EGRESS:
210 		rif->counter_egress_valid = valid;
211 		break;
212 	case MLXSW_SP_RIF_COUNTER_INGRESS:
213 		rif->counter_ingress_valid = valid;
214 		break;
215 	}
216 }
217 
218 static int mlxsw_sp_rif_counter_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
219 				     unsigned int counter_index, bool enable,
220 				     enum mlxsw_sp_rif_counter_dir dir)
221 {
222 	char ritr_pl[MLXSW_REG_RITR_LEN];
223 	bool is_egress = false;
224 	int err;
225 
226 	if (dir == MLXSW_SP_RIF_COUNTER_EGRESS)
227 		is_egress = true;
228 	mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
229 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
230 	if (err)
231 		return err;
232 
233 	mlxsw_reg_ritr_counter_pack(ritr_pl, counter_index, enable,
234 				    is_egress);
235 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
236 }
237 
238 int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp,
239 				   struct mlxsw_sp_rif *rif,
240 				   enum mlxsw_sp_rif_counter_dir dir, u64 *cnt)
241 {
242 	char ricnt_pl[MLXSW_REG_RICNT_LEN];
243 	unsigned int *p_counter_index;
244 	bool valid;
245 	int err;
246 
247 	valid = mlxsw_sp_rif_counter_valid_get(rif, dir);
248 	if (!valid)
249 		return -EINVAL;
250 
251 	p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
252 	if (!p_counter_index)
253 		return -EINVAL;
254 	mlxsw_reg_ricnt_pack(ricnt_pl, *p_counter_index,
255 			     MLXSW_REG_RICNT_OPCODE_NOP);
256 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
257 	if (err)
258 		return err;
259 	*cnt = mlxsw_reg_ricnt_good_unicast_packets_get(ricnt_pl);
260 	return 0;
261 }
262 
263 static int mlxsw_sp_rif_counter_clear(struct mlxsw_sp *mlxsw_sp,
264 				      unsigned int counter_index)
265 {
266 	char ricnt_pl[MLXSW_REG_RICNT_LEN];
267 
268 	mlxsw_reg_ricnt_pack(ricnt_pl, counter_index,
269 			     MLXSW_REG_RICNT_OPCODE_CLEAR);
270 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
271 }
272 
273 int mlxsw_sp_rif_counter_alloc(struct mlxsw_sp *mlxsw_sp,
274 			       struct mlxsw_sp_rif *rif,
275 			       enum mlxsw_sp_rif_counter_dir dir)
276 {
277 	unsigned int *p_counter_index;
278 	int err;
279 
280 	p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
281 	if (!p_counter_index)
282 		return -EINVAL;
283 	err = mlxsw_sp_counter_alloc(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
284 				     p_counter_index);
285 	if (err)
286 		return err;
287 
288 	err = mlxsw_sp_rif_counter_clear(mlxsw_sp, *p_counter_index);
289 	if (err)
290 		goto err_counter_clear;
291 
292 	err = mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
293 					*p_counter_index, true, dir);
294 	if (err)
295 		goto err_counter_edit;
296 	mlxsw_sp_rif_counter_valid_set(rif, dir, true);
297 	return 0;
298 
299 err_counter_edit:
300 err_counter_clear:
301 	mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
302 			      *p_counter_index);
303 	return err;
304 }
305 
306 void mlxsw_sp_rif_counter_free(struct mlxsw_sp *mlxsw_sp,
307 			       struct mlxsw_sp_rif *rif,
308 			       enum mlxsw_sp_rif_counter_dir dir)
309 {
310 	unsigned int *p_counter_index;
311 
312 	if (!mlxsw_sp_rif_counter_valid_get(rif, dir))
313 		return;
314 
315 	p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
316 	if (WARN_ON(!p_counter_index))
317 		return;
318 	mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
319 				  *p_counter_index, false, dir);
320 	mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
321 			      *p_counter_index);
322 	mlxsw_sp_rif_counter_valid_set(rif, dir, false);
323 }
324 
325 static void mlxsw_sp_rif_counters_alloc(struct mlxsw_sp_rif *rif)
326 {
327 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
328 	struct devlink *devlink;
329 
330 	devlink = priv_to_devlink(mlxsw_sp->core);
331 	if (!devlink_dpipe_table_counter_enabled(devlink,
332 						 MLXSW_SP_DPIPE_TABLE_NAME_ERIF))
333 		return;
334 	mlxsw_sp_rif_counter_alloc(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS);
335 }
336 
337 static void mlxsw_sp_rif_counters_free(struct mlxsw_sp_rif *rif)
338 {
339 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
340 
341 	mlxsw_sp_rif_counter_free(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS);
342 }
343 
344 static struct mlxsw_sp_rif *
345 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
346 			 const struct net_device *dev);
347 
348 #define MLXSW_SP_PREFIX_COUNT (sizeof(struct in6_addr) * BITS_PER_BYTE + 1)
349 
350 struct mlxsw_sp_prefix_usage {
351 	DECLARE_BITMAP(b, MLXSW_SP_PREFIX_COUNT);
352 };
353 
354 #define mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) \
355 	for_each_set_bit(prefix, (prefix_usage)->b, MLXSW_SP_PREFIX_COUNT)
356 
357 static bool
358 mlxsw_sp_prefix_usage_eq(struct mlxsw_sp_prefix_usage *prefix_usage1,
359 			 struct mlxsw_sp_prefix_usage *prefix_usage2)
360 {
361 	return !memcmp(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
362 }
363 
364 static void
365 mlxsw_sp_prefix_usage_cpy(struct mlxsw_sp_prefix_usage *prefix_usage1,
366 			  struct mlxsw_sp_prefix_usage *prefix_usage2)
367 {
368 	memcpy(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
369 }
370 
371 static void
372 mlxsw_sp_prefix_usage_set(struct mlxsw_sp_prefix_usage *prefix_usage,
373 			  unsigned char prefix_len)
374 {
375 	set_bit(prefix_len, prefix_usage->b);
376 }
377 
378 static void
379 mlxsw_sp_prefix_usage_clear(struct mlxsw_sp_prefix_usage *prefix_usage,
380 			    unsigned char prefix_len)
381 {
382 	clear_bit(prefix_len, prefix_usage->b);
383 }
384 
385 struct mlxsw_sp_fib_key {
386 	unsigned char addr[sizeof(struct in6_addr)];
387 	unsigned char prefix_len;
388 };
389 
390 enum mlxsw_sp_fib_entry_type {
391 	MLXSW_SP_FIB_ENTRY_TYPE_REMOTE,
392 	MLXSW_SP_FIB_ENTRY_TYPE_LOCAL,
393 	MLXSW_SP_FIB_ENTRY_TYPE_TRAP,
394 
395 	/* This is a special case of local delivery, where a packet should be
396 	 * decapsulated on reception. Note that there is no corresponding ENCAP,
397 	 * because that's a type of next hop, not of FIB entry. (There can be
398 	 * several next hops in a REMOTE entry, and some of them may be
399 	 * encapsulating entries.)
400 	 */
401 	MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP,
402 };
403 
404 struct mlxsw_sp_nexthop_group;
405 
406 struct mlxsw_sp_fib_node {
407 	struct list_head entry_list;
408 	struct list_head list;
409 	struct rhash_head ht_node;
410 	struct mlxsw_sp_fib *fib;
411 	struct mlxsw_sp_fib_key key;
412 };
413 
414 struct mlxsw_sp_fib_entry_decap {
415 	struct mlxsw_sp_ipip_entry *ipip_entry;
416 	u32 tunnel_index;
417 };
418 
419 struct mlxsw_sp_fib_entry {
420 	struct list_head list;
421 	struct mlxsw_sp_fib_node *fib_node;
422 	enum mlxsw_sp_fib_entry_type type;
423 	struct list_head nexthop_group_node;
424 	struct mlxsw_sp_nexthop_group *nh_group;
425 	struct mlxsw_sp_fib_entry_decap decap; /* Valid for decap entries. */
426 };
427 
428 struct mlxsw_sp_fib4_entry {
429 	struct mlxsw_sp_fib_entry common;
430 	u32 tb_id;
431 	u32 prio;
432 	u8 tos;
433 	u8 type;
434 };
435 
436 struct mlxsw_sp_fib6_entry {
437 	struct mlxsw_sp_fib_entry common;
438 	struct list_head rt6_list;
439 	unsigned int nrt6;
440 };
441 
442 struct mlxsw_sp_rt6 {
443 	struct list_head list;
444 	struct rt6_info *rt;
445 };
446 
447 struct mlxsw_sp_lpm_tree {
448 	u8 id; /* tree ID */
449 	unsigned int ref_count;
450 	enum mlxsw_sp_l3proto proto;
451 	unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT];
452 	struct mlxsw_sp_prefix_usage prefix_usage;
453 };
454 
455 struct mlxsw_sp_fib {
456 	struct rhashtable ht;
457 	struct list_head node_list;
458 	struct mlxsw_sp_vr *vr;
459 	struct mlxsw_sp_lpm_tree *lpm_tree;
460 	enum mlxsw_sp_l3proto proto;
461 };
462 
463 struct mlxsw_sp_vr {
464 	u16 id; /* virtual router ID */
465 	u32 tb_id; /* kernel fib table id */
466 	unsigned int rif_count;
467 	struct mlxsw_sp_fib *fib4;
468 	struct mlxsw_sp_fib *fib6;
469 	struct mlxsw_sp_mr_table *mr4_table;
470 };
471 
472 static const struct rhashtable_params mlxsw_sp_fib_ht_params;
473 
474 static struct mlxsw_sp_fib *mlxsw_sp_fib_create(struct mlxsw_sp *mlxsw_sp,
475 						struct mlxsw_sp_vr *vr,
476 						enum mlxsw_sp_l3proto proto)
477 {
478 	struct mlxsw_sp_lpm_tree *lpm_tree;
479 	struct mlxsw_sp_fib *fib;
480 	int err;
481 
482 	lpm_tree = mlxsw_sp->router->lpm.proto_trees[proto];
483 	fib = kzalloc(sizeof(*fib), GFP_KERNEL);
484 	if (!fib)
485 		return ERR_PTR(-ENOMEM);
486 	err = rhashtable_init(&fib->ht, &mlxsw_sp_fib_ht_params);
487 	if (err)
488 		goto err_rhashtable_init;
489 	INIT_LIST_HEAD(&fib->node_list);
490 	fib->proto = proto;
491 	fib->vr = vr;
492 	fib->lpm_tree = lpm_tree;
493 	mlxsw_sp_lpm_tree_hold(lpm_tree);
494 	err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, lpm_tree->id);
495 	if (err)
496 		goto err_lpm_tree_bind;
497 	return fib;
498 
499 err_lpm_tree_bind:
500 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
501 err_rhashtable_init:
502 	kfree(fib);
503 	return ERR_PTR(err);
504 }
505 
506 static void mlxsw_sp_fib_destroy(struct mlxsw_sp *mlxsw_sp,
507 				 struct mlxsw_sp_fib *fib)
508 {
509 	mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, fib);
510 	mlxsw_sp_lpm_tree_put(mlxsw_sp, fib->lpm_tree);
511 	WARN_ON(!list_empty(&fib->node_list));
512 	rhashtable_destroy(&fib->ht);
513 	kfree(fib);
514 }
515 
516 static struct mlxsw_sp_lpm_tree *
517 mlxsw_sp_lpm_tree_find_unused(struct mlxsw_sp *mlxsw_sp)
518 {
519 	static struct mlxsw_sp_lpm_tree *lpm_tree;
520 	int i;
521 
522 	for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
523 		lpm_tree = &mlxsw_sp->router->lpm.trees[i];
524 		if (lpm_tree->ref_count == 0)
525 			return lpm_tree;
526 	}
527 	return NULL;
528 }
529 
530 static int mlxsw_sp_lpm_tree_alloc(struct mlxsw_sp *mlxsw_sp,
531 				   struct mlxsw_sp_lpm_tree *lpm_tree)
532 {
533 	char ralta_pl[MLXSW_REG_RALTA_LEN];
534 
535 	mlxsw_reg_ralta_pack(ralta_pl, true,
536 			     (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
537 			     lpm_tree->id);
538 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
539 }
540 
541 static void mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp,
542 				   struct mlxsw_sp_lpm_tree *lpm_tree)
543 {
544 	char ralta_pl[MLXSW_REG_RALTA_LEN];
545 
546 	mlxsw_reg_ralta_pack(ralta_pl, false,
547 			     (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
548 			     lpm_tree->id);
549 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
550 }
551 
552 static int
553 mlxsw_sp_lpm_tree_left_struct_set(struct mlxsw_sp *mlxsw_sp,
554 				  struct mlxsw_sp_prefix_usage *prefix_usage,
555 				  struct mlxsw_sp_lpm_tree *lpm_tree)
556 {
557 	char ralst_pl[MLXSW_REG_RALST_LEN];
558 	u8 root_bin = 0;
559 	u8 prefix;
560 	u8 last_prefix = MLXSW_REG_RALST_BIN_NO_CHILD;
561 
562 	mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage)
563 		root_bin = prefix;
564 
565 	mlxsw_reg_ralst_pack(ralst_pl, root_bin, lpm_tree->id);
566 	mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) {
567 		if (prefix == 0)
568 			continue;
569 		mlxsw_reg_ralst_bin_pack(ralst_pl, prefix, last_prefix,
570 					 MLXSW_REG_RALST_BIN_NO_CHILD);
571 		last_prefix = prefix;
572 	}
573 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
574 }
575 
576 static struct mlxsw_sp_lpm_tree *
577 mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp,
578 			 struct mlxsw_sp_prefix_usage *prefix_usage,
579 			 enum mlxsw_sp_l3proto proto)
580 {
581 	struct mlxsw_sp_lpm_tree *lpm_tree;
582 	int err;
583 
584 	lpm_tree = mlxsw_sp_lpm_tree_find_unused(mlxsw_sp);
585 	if (!lpm_tree)
586 		return ERR_PTR(-EBUSY);
587 	lpm_tree->proto = proto;
588 	err = mlxsw_sp_lpm_tree_alloc(mlxsw_sp, lpm_tree);
589 	if (err)
590 		return ERR_PTR(err);
591 
592 	err = mlxsw_sp_lpm_tree_left_struct_set(mlxsw_sp, prefix_usage,
593 						lpm_tree);
594 	if (err)
595 		goto err_left_struct_set;
596 	memcpy(&lpm_tree->prefix_usage, prefix_usage,
597 	       sizeof(lpm_tree->prefix_usage));
598 	memset(&lpm_tree->prefix_ref_count, 0,
599 	       sizeof(lpm_tree->prefix_ref_count));
600 	lpm_tree->ref_count = 1;
601 	return lpm_tree;
602 
603 err_left_struct_set:
604 	mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
605 	return ERR_PTR(err);
606 }
607 
608 static void mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp *mlxsw_sp,
609 				      struct mlxsw_sp_lpm_tree *lpm_tree)
610 {
611 	mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
612 }
613 
614 static struct mlxsw_sp_lpm_tree *
615 mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp,
616 		      struct mlxsw_sp_prefix_usage *prefix_usage,
617 		      enum mlxsw_sp_l3proto proto)
618 {
619 	struct mlxsw_sp_lpm_tree *lpm_tree;
620 	int i;
621 
622 	for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
623 		lpm_tree = &mlxsw_sp->router->lpm.trees[i];
624 		if (lpm_tree->ref_count != 0 &&
625 		    lpm_tree->proto == proto &&
626 		    mlxsw_sp_prefix_usage_eq(&lpm_tree->prefix_usage,
627 					     prefix_usage)) {
628 			mlxsw_sp_lpm_tree_hold(lpm_tree);
629 			return lpm_tree;
630 		}
631 	}
632 	return mlxsw_sp_lpm_tree_create(mlxsw_sp, prefix_usage, proto);
633 }
634 
635 static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree)
636 {
637 	lpm_tree->ref_count++;
638 }
639 
640 static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
641 				  struct mlxsw_sp_lpm_tree *lpm_tree)
642 {
643 	if (--lpm_tree->ref_count == 0)
644 		mlxsw_sp_lpm_tree_destroy(mlxsw_sp, lpm_tree);
645 }
646 
647 #define MLXSW_SP_LPM_TREE_MIN 1 /* tree 0 is reserved */
648 
649 static int mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp)
650 {
651 	struct mlxsw_sp_prefix_usage req_prefix_usage = {{ 0 } };
652 	struct mlxsw_sp_lpm_tree *lpm_tree;
653 	u64 max_trees;
654 	int err, i;
655 
656 	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_LPM_TREES))
657 		return -EIO;
658 
659 	max_trees = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_LPM_TREES);
660 	mlxsw_sp->router->lpm.tree_count = max_trees - MLXSW_SP_LPM_TREE_MIN;
661 	mlxsw_sp->router->lpm.trees = kcalloc(mlxsw_sp->router->lpm.tree_count,
662 					     sizeof(struct mlxsw_sp_lpm_tree),
663 					     GFP_KERNEL);
664 	if (!mlxsw_sp->router->lpm.trees)
665 		return -ENOMEM;
666 
667 	for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
668 		lpm_tree = &mlxsw_sp->router->lpm.trees[i];
669 		lpm_tree->id = i + MLXSW_SP_LPM_TREE_MIN;
670 	}
671 
672 	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
673 					 MLXSW_SP_L3_PROTO_IPV4);
674 	if (IS_ERR(lpm_tree)) {
675 		err = PTR_ERR(lpm_tree);
676 		goto err_ipv4_tree_get;
677 	}
678 	mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4] = lpm_tree;
679 
680 	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
681 					 MLXSW_SP_L3_PROTO_IPV6);
682 	if (IS_ERR(lpm_tree)) {
683 		err = PTR_ERR(lpm_tree);
684 		goto err_ipv6_tree_get;
685 	}
686 	mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV6] = lpm_tree;
687 
688 	return 0;
689 
690 err_ipv6_tree_get:
691 	lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4];
692 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
693 err_ipv4_tree_get:
694 	kfree(mlxsw_sp->router->lpm.trees);
695 	return err;
696 }
697 
698 static void mlxsw_sp_lpm_fini(struct mlxsw_sp *mlxsw_sp)
699 {
700 	struct mlxsw_sp_lpm_tree *lpm_tree;
701 
702 	lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV6];
703 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
704 
705 	lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4];
706 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
707 
708 	kfree(mlxsw_sp->router->lpm.trees);
709 }
710 
711 static bool mlxsw_sp_vr_is_used(const struct mlxsw_sp_vr *vr)
712 {
713 	return !!vr->fib4 || !!vr->fib6 || !!vr->mr4_table;
714 }
715 
716 static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp)
717 {
718 	struct mlxsw_sp_vr *vr;
719 	int i;
720 
721 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
722 		vr = &mlxsw_sp->router->vrs[i];
723 		if (!mlxsw_sp_vr_is_used(vr))
724 			return vr;
725 	}
726 	return NULL;
727 }
728 
729 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
730 				     const struct mlxsw_sp_fib *fib, u8 tree_id)
731 {
732 	char raltb_pl[MLXSW_REG_RALTB_LEN];
733 
734 	mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
735 			     (enum mlxsw_reg_ralxx_protocol) fib->proto,
736 			     tree_id);
737 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
738 }
739 
740 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
741 				       const struct mlxsw_sp_fib *fib)
742 {
743 	char raltb_pl[MLXSW_REG_RALTB_LEN];
744 
745 	/* Bind to tree 0 which is default */
746 	mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
747 			     (enum mlxsw_reg_ralxx_protocol) fib->proto, 0);
748 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
749 }
750 
751 static u32 mlxsw_sp_fix_tb_id(u32 tb_id)
752 {
753 	/* For our purpose, squash main, default and local tables into one */
754 	if (tb_id == RT_TABLE_LOCAL || tb_id == RT_TABLE_DEFAULT)
755 		tb_id = RT_TABLE_MAIN;
756 	return tb_id;
757 }
758 
759 static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp,
760 					    u32 tb_id)
761 {
762 	struct mlxsw_sp_vr *vr;
763 	int i;
764 
765 	tb_id = mlxsw_sp_fix_tb_id(tb_id);
766 
767 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
768 		vr = &mlxsw_sp->router->vrs[i];
769 		if (mlxsw_sp_vr_is_used(vr) && vr->tb_id == tb_id)
770 			return vr;
771 	}
772 	return NULL;
773 }
774 
775 static struct mlxsw_sp_fib *mlxsw_sp_vr_fib(const struct mlxsw_sp_vr *vr,
776 					    enum mlxsw_sp_l3proto proto)
777 {
778 	switch (proto) {
779 	case MLXSW_SP_L3_PROTO_IPV4:
780 		return vr->fib4;
781 	case MLXSW_SP_L3_PROTO_IPV6:
782 		return vr->fib6;
783 	}
784 	return NULL;
785 }
786 
787 static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp,
788 					      u32 tb_id,
789 					      struct netlink_ext_ack *extack)
790 {
791 	struct mlxsw_sp_mr_table *mr4_table;
792 	struct mlxsw_sp_fib *fib4;
793 	struct mlxsw_sp_fib *fib6;
794 	struct mlxsw_sp_vr *vr;
795 	int err;
796 
797 	vr = mlxsw_sp_vr_find_unused(mlxsw_sp);
798 	if (!vr) {
799 		NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported virtual routers");
800 		return ERR_PTR(-EBUSY);
801 	}
802 	fib4 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
803 	if (IS_ERR(fib4))
804 		return ERR_CAST(fib4);
805 	fib6 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
806 	if (IS_ERR(fib6)) {
807 		err = PTR_ERR(fib6);
808 		goto err_fib6_create;
809 	}
810 	mr4_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id,
811 					     MLXSW_SP_L3_PROTO_IPV4);
812 	if (IS_ERR(mr4_table)) {
813 		err = PTR_ERR(mr4_table);
814 		goto err_mr_table_create;
815 	}
816 	vr->fib4 = fib4;
817 	vr->fib6 = fib6;
818 	vr->mr4_table = mr4_table;
819 	vr->tb_id = tb_id;
820 	return vr;
821 
822 err_mr_table_create:
823 	mlxsw_sp_fib_destroy(mlxsw_sp, fib6);
824 err_fib6_create:
825 	mlxsw_sp_fib_destroy(mlxsw_sp, fib4);
826 	return ERR_PTR(err);
827 }
828 
829 static void mlxsw_sp_vr_destroy(struct mlxsw_sp *mlxsw_sp,
830 				struct mlxsw_sp_vr *vr)
831 {
832 	mlxsw_sp_mr_table_destroy(vr->mr4_table);
833 	vr->mr4_table = NULL;
834 	mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib6);
835 	vr->fib6 = NULL;
836 	mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib4);
837 	vr->fib4 = NULL;
838 }
839 
840 static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
841 					   struct netlink_ext_ack *extack)
842 {
843 	struct mlxsw_sp_vr *vr;
844 
845 	tb_id = mlxsw_sp_fix_tb_id(tb_id);
846 	vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
847 	if (!vr)
848 		vr = mlxsw_sp_vr_create(mlxsw_sp, tb_id, extack);
849 	return vr;
850 }
851 
852 static void mlxsw_sp_vr_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr)
853 {
854 	if (!vr->rif_count && list_empty(&vr->fib4->node_list) &&
855 	    list_empty(&vr->fib6->node_list) &&
856 	    mlxsw_sp_mr_table_empty(vr->mr4_table))
857 		mlxsw_sp_vr_destroy(mlxsw_sp, vr);
858 }
859 
860 static bool
861 mlxsw_sp_vr_lpm_tree_should_replace(struct mlxsw_sp_vr *vr,
862 				    enum mlxsw_sp_l3proto proto, u8 tree_id)
863 {
864 	struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
865 
866 	if (!mlxsw_sp_vr_is_used(vr))
867 		return false;
868 	if (fib->lpm_tree->id == tree_id)
869 		return true;
870 	return false;
871 }
872 
873 static int mlxsw_sp_vr_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
874 					struct mlxsw_sp_fib *fib,
875 					struct mlxsw_sp_lpm_tree *new_tree)
876 {
877 	struct mlxsw_sp_lpm_tree *old_tree = fib->lpm_tree;
878 	int err;
879 
880 	fib->lpm_tree = new_tree;
881 	mlxsw_sp_lpm_tree_hold(new_tree);
882 	err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, new_tree->id);
883 	if (err)
884 		goto err_tree_bind;
885 	mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree);
886 	return 0;
887 
888 err_tree_bind:
889 	mlxsw_sp_lpm_tree_put(mlxsw_sp, new_tree);
890 	fib->lpm_tree = old_tree;
891 	return err;
892 }
893 
894 static int mlxsw_sp_vrs_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
895 					 struct mlxsw_sp_fib *fib,
896 					 struct mlxsw_sp_lpm_tree *new_tree)
897 {
898 	enum mlxsw_sp_l3proto proto = fib->proto;
899 	struct mlxsw_sp_lpm_tree *old_tree;
900 	u8 old_id, new_id = new_tree->id;
901 	struct mlxsw_sp_vr *vr;
902 	int i, err;
903 
904 	old_tree = mlxsw_sp->router->lpm.proto_trees[proto];
905 	old_id = old_tree->id;
906 
907 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
908 		vr = &mlxsw_sp->router->vrs[i];
909 		if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, old_id))
910 			continue;
911 		err = mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
912 						   mlxsw_sp_vr_fib(vr, proto),
913 						   new_tree);
914 		if (err)
915 			goto err_tree_replace;
916 	}
917 
918 	memcpy(new_tree->prefix_ref_count, old_tree->prefix_ref_count,
919 	       sizeof(new_tree->prefix_ref_count));
920 	mlxsw_sp->router->lpm.proto_trees[proto] = new_tree;
921 	mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree);
922 
923 	return 0;
924 
925 err_tree_replace:
926 	for (i--; i >= 0; i--) {
927 		if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, new_id))
928 			continue;
929 		mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
930 					     mlxsw_sp_vr_fib(vr, proto),
931 					     old_tree);
932 	}
933 	return err;
934 }
935 
936 static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp)
937 {
938 	struct mlxsw_sp_vr *vr;
939 	u64 max_vrs;
940 	int i;
941 
942 	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_VRS))
943 		return -EIO;
944 
945 	max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
946 	mlxsw_sp->router->vrs = kcalloc(max_vrs, sizeof(struct mlxsw_sp_vr),
947 					GFP_KERNEL);
948 	if (!mlxsw_sp->router->vrs)
949 		return -ENOMEM;
950 
951 	for (i = 0; i < max_vrs; i++) {
952 		vr = &mlxsw_sp->router->vrs[i];
953 		vr->id = i;
954 	}
955 
956 	return 0;
957 }
958 
959 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp);
960 
961 static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp)
962 {
963 	/* At this stage we're guaranteed not to have new incoming
964 	 * FIB notifications and the work queue is free from FIBs
965 	 * sitting on top of mlxsw netdevs. However, we can still
966 	 * have other FIBs queued. Flush the queue before flushing
967 	 * the device's tables. No need for locks, as we're the only
968 	 * writer.
969 	 */
970 	mlxsw_core_flush_owq();
971 	mlxsw_sp_router_fib_flush(mlxsw_sp);
972 	kfree(mlxsw_sp->router->vrs);
973 }
974 
975 static struct net_device *
976 __mlxsw_sp_ipip_netdev_ul_dev_get(const struct net_device *ol_dev)
977 {
978 	struct ip_tunnel *tun = netdev_priv(ol_dev);
979 	struct net *net = dev_net(ol_dev);
980 
981 	return __dev_get_by_index(net, tun->parms.link);
982 }
983 
984 u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev)
985 {
986 	struct net_device *d = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
987 
988 	if (d)
989 		return l3mdev_fib_table(d) ? : RT_TABLE_MAIN;
990 	else
991 		return l3mdev_fib_table(ol_dev) ? : RT_TABLE_MAIN;
992 }
993 
994 static struct mlxsw_sp_rif *
995 mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
996 		    const struct mlxsw_sp_rif_params *params,
997 		    struct netlink_ext_ack *extack);
998 
999 static struct mlxsw_sp_rif_ipip_lb *
1000 mlxsw_sp_ipip_ol_ipip_lb_create(struct mlxsw_sp *mlxsw_sp,
1001 				enum mlxsw_sp_ipip_type ipipt,
1002 				struct net_device *ol_dev,
1003 				struct netlink_ext_ack *extack)
1004 {
1005 	struct mlxsw_sp_rif_params_ipip_lb lb_params;
1006 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1007 	struct mlxsw_sp_rif *rif;
1008 
1009 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
1010 	lb_params = (struct mlxsw_sp_rif_params_ipip_lb) {
1011 		.common.dev = ol_dev,
1012 		.common.lag = false,
1013 		.lb_config = ipip_ops->ol_loopback_config(mlxsw_sp, ol_dev),
1014 	};
1015 
1016 	rif = mlxsw_sp_rif_create(mlxsw_sp, &lb_params.common, extack);
1017 	if (IS_ERR(rif))
1018 		return ERR_CAST(rif);
1019 	return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
1020 }
1021 
1022 static struct mlxsw_sp_ipip_entry *
1023 mlxsw_sp_ipip_entry_alloc(struct mlxsw_sp *mlxsw_sp,
1024 			  enum mlxsw_sp_ipip_type ipipt,
1025 			  struct net_device *ol_dev)
1026 {
1027 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1028 	struct mlxsw_sp_ipip_entry *ipip_entry;
1029 	struct mlxsw_sp_ipip_entry *ret = NULL;
1030 
1031 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
1032 	ipip_entry = kzalloc(sizeof(*ipip_entry), GFP_KERNEL);
1033 	if (!ipip_entry)
1034 		return ERR_PTR(-ENOMEM);
1035 
1036 	ipip_entry->ol_lb = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp, ipipt,
1037 							    ol_dev, NULL);
1038 	if (IS_ERR(ipip_entry->ol_lb)) {
1039 		ret = ERR_CAST(ipip_entry->ol_lb);
1040 		goto err_ol_ipip_lb_create;
1041 	}
1042 
1043 	ipip_entry->ipipt = ipipt;
1044 	ipip_entry->ol_dev = ol_dev;
1045 
1046 	switch (ipip_ops->ul_proto) {
1047 	case MLXSW_SP_L3_PROTO_IPV4:
1048 		ipip_entry->parms4 = mlxsw_sp_ipip_netdev_parms4(ol_dev);
1049 		break;
1050 	case MLXSW_SP_L3_PROTO_IPV6:
1051 		WARN_ON(1);
1052 		break;
1053 	}
1054 
1055 	return ipip_entry;
1056 
1057 err_ol_ipip_lb_create:
1058 	kfree(ipip_entry);
1059 	return ret;
1060 }
1061 
1062 static void
1063 mlxsw_sp_ipip_entry_dealloc(struct mlxsw_sp_ipip_entry *ipip_entry)
1064 {
1065 	mlxsw_sp_rif_destroy(&ipip_entry->ol_lb->common);
1066 	kfree(ipip_entry);
1067 }
1068 
1069 static bool
1070 mlxsw_sp_ipip_entry_saddr_matches(struct mlxsw_sp *mlxsw_sp,
1071 				  const enum mlxsw_sp_l3proto ul_proto,
1072 				  union mlxsw_sp_l3addr saddr,
1073 				  u32 ul_tb_id,
1074 				  struct mlxsw_sp_ipip_entry *ipip_entry)
1075 {
1076 	u32 tun_ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
1077 	enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
1078 	union mlxsw_sp_l3addr tun_saddr;
1079 
1080 	if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
1081 		return false;
1082 
1083 	tun_saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ipip_entry->ol_dev);
1084 	return tun_ul_tb_id == ul_tb_id &&
1085 	       mlxsw_sp_l3addr_eq(&tun_saddr, &saddr);
1086 }
1087 
1088 static int
1089 mlxsw_sp_fib_entry_decap_init(struct mlxsw_sp *mlxsw_sp,
1090 			      struct mlxsw_sp_fib_entry *fib_entry,
1091 			      struct mlxsw_sp_ipip_entry *ipip_entry)
1092 {
1093 	u32 tunnel_index;
1094 	int err;
1095 
1096 	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, 1, &tunnel_index);
1097 	if (err)
1098 		return err;
1099 
1100 	ipip_entry->decap_fib_entry = fib_entry;
1101 	fib_entry->decap.ipip_entry = ipip_entry;
1102 	fib_entry->decap.tunnel_index = tunnel_index;
1103 	return 0;
1104 }
1105 
1106 static void mlxsw_sp_fib_entry_decap_fini(struct mlxsw_sp *mlxsw_sp,
1107 					  struct mlxsw_sp_fib_entry *fib_entry)
1108 {
1109 	/* Unlink this node from the IPIP entry that it's the decap entry of. */
1110 	fib_entry->decap.ipip_entry->decap_fib_entry = NULL;
1111 	fib_entry->decap.ipip_entry = NULL;
1112 	mlxsw_sp_kvdl_free(mlxsw_sp, fib_entry->decap.tunnel_index);
1113 }
1114 
1115 static struct mlxsw_sp_fib_node *
1116 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
1117 			 size_t addr_len, unsigned char prefix_len);
1118 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
1119 				     struct mlxsw_sp_fib_entry *fib_entry);
1120 
1121 static void
1122 mlxsw_sp_ipip_entry_demote_decap(struct mlxsw_sp *mlxsw_sp,
1123 				 struct mlxsw_sp_ipip_entry *ipip_entry)
1124 {
1125 	struct mlxsw_sp_fib_entry *fib_entry = ipip_entry->decap_fib_entry;
1126 
1127 	mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, fib_entry);
1128 	fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1129 
1130 	mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1131 }
1132 
1133 static void
1134 mlxsw_sp_ipip_entry_promote_decap(struct mlxsw_sp *mlxsw_sp,
1135 				  struct mlxsw_sp_ipip_entry *ipip_entry,
1136 				  struct mlxsw_sp_fib_entry *decap_fib_entry)
1137 {
1138 	if (mlxsw_sp_fib_entry_decap_init(mlxsw_sp, decap_fib_entry,
1139 					  ipip_entry))
1140 		return;
1141 	decap_fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
1142 
1143 	if (mlxsw_sp_fib_entry_update(mlxsw_sp, decap_fib_entry))
1144 		mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1145 }
1146 
1147 /* Given an IPIP entry, find the corresponding decap route. */
1148 static struct mlxsw_sp_fib_entry *
1149 mlxsw_sp_ipip_entry_find_decap(struct mlxsw_sp *mlxsw_sp,
1150 			       struct mlxsw_sp_ipip_entry *ipip_entry)
1151 {
1152 	static struct mlxsw_sp_fib_node *fib_node;
1153 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1154 	struct mlxsw_sp_fib_entry *fib_entry;
1155 	unsigned char saddr_prefix_len;
1156 	union mlxsw_sp_l3addr saddr;
1157 	struct mlxsw_sp_fib *ul_fib;
1158 	struct mlxsw_sp_vr *ul_vr;
1159 	const void *saddrp;
1160 	size_t saddr_len;
1161 	u32 ul_tb_id;
1162 	u32 saddr4;
1163 
1164 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1165 
1166 	ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
1167 	ul_vr = mlxsw_sp_vr_find(mlxsw_sp, ul_tb_id);
1168 	if (!ul_vr)
1169 		return NULL;
1170 
1171 	ul_fib = mlxsw_sp_vr_fib(ul_vr, ipip_ops->ul_proto);
1172 	saddr = mlxsw_sp_ipip_netdev_saddr(ipip_ops->ul_proto,
1173 					   ipip_entry->ol_dev);
1174 
1175 	switch (ipip_ops->ul_proto) {
1176 	case MLXSW_SP_L3_PROTO_IPV4:
1177 		saddr4 = be32_to_cpu(saddr.addr4);
1178 		saddrp = &saddr4;
1179 		saddr_len = 4;
1180 		saddr_prefix_len = 32;
1181 		break;
1182 	case MLXSW_SP_L3_PROTO_IPV6:
1183 		WARN_ON(1);
1184 		return NULL;
1185 	}
1186 
1187 	fib_node = mlxsw_sp_fib_node_lookup(ul_fib, saddrp, saddr_len,
1188 					    saddr_prefix_len);
1189 	if (!fib_node || list_empty(&fib_node->entry_list))
1190 		return NULL;
1191 
1192 	fib_entry = list_first_entry(&fib_node->entry_list,
1193 				     struct mlxsw_sp_fib_entry, list);
1194 	if (fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_TRAP)
1195 		return NULL;
1196 
1197 	return fib_entry;
1198 }
1199 
1200 static struct mlxsw_sp_ipip_entry *
1201 mlxsw_sp_ipip_entry_create(struct mlxsw_sp *mlxsw_sp,
1202 			   enum mlxsw_sp_ipip_type ipipt,
1203 			   struct net_device *ol_dev)
1204 {
1205 	struct mlxsw_sp_ipip_entry *ipip_entry;
1206 
1207 	ipip_entry = mlxsw_sp_ipip_entry_alloc(mlxsw_sp, ipipt, ol_dev);
1208 	if (IS_ERR(ipip_entry))
1209 		return ipip_entry;
1210 
1211 	list_add_tail(&ipip_entry->ipip_list_node,
1212 		      &mlxsw_sp->router->ipip_list);
1213 
1214 	return ipip_entry;
1215 }
1216 
1217 static void
1218 mlxsw_sp_ipip_entry_destroy(struct mlxsw_sp *mlxsw_sp,
1219 			    struct mlxsw_sp_ipip_entry *ipip_entry)
1220 {
1221 	list_del(&ipip_entry->ipip_list_node);
1222 	mlxsw_sp_ipip_entry_dealloc(ipip_entry);
1223 }
1224 
1225 static bool
1226 mlxsw_sp_ipip_entry_matches_decap(struct mlxsw_sp *mlxsw_sp,
1227 				  const struct net_device *ul_dev,
1228 				  enum mlxsw_sp_l3proto ul_proto,
1229 				  union mlxsw_sp_l3addr ul_dip,
1230 				  struct mlxsw_sp_ipip_entry *ipip_entry)
1231 {
1232 	u32 ul_tb_id = l3mdev_fib_table(ul_dev) ? : RT_TABLE_MAIN;
1233 	enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
1234 	struct net_device *ipip_ul_dev;
1235 
1236 	if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
1237 		return false;
1238 
1239 	ipip_ul_dev = __mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev);
1240 	return mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, ul_dip,
1241 						 ul_tb_id, ipip_entry) &&
1242 	       (!ipip_ul_dev || ipip_ul_dev == ul_dev);
1243 }
1244 
1245 /* Given decap parameters, find the corresponding IPIP entry. */
1246 static struct mlxsw_sp_ipip_entry *
1247 mlxsw_sp_ipip_entry_find_by_decap(struct mlxsw_sp *mlxsw_sp,
1248 				  const struct net_device *ul_dev,
1249 				  enum mlxsw_sp_l3proto ul_proto,
1250 				  union mlxsw_sp_l3addr ul_dip)
1251 {
1252 	struct mlxsw_sp_ipip_entry *ipip_entry;
1253 
1254 	list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
1255 			    ipip_list_node)
1256 		if (mlxsw_sp_ipip_entry_matches_decap(mlxsw_sp, ul_dev,
1257 						      ul_proto, ul_dip,
1258 						      ipip_entry))
1259 			return ipip_entry;
1260 
1261 	return NULL;
1262 }
1263 
1264 static bool mlxsw_sp_netdev_ipip_type(const struct mlxsw_sp *mlxsw_sp,
1265 				      const struct net_device *dev,
1266 				      enum mlxsw_sp_ipip_type *p_type)
1267 {
1268 	struct mlxsw_sp_router *router = mlxsw_sp->router;
1269 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1270 	enum mlxsw_sp_ipip_type ipipt;
1271 
1272 	for (ipipt = 0; ipipt < MLXSW_SP_IPIP_TYPE_MAX; ++ipipt) {
1273 		ipip_ops = router->ipip_ops_arr[ipipt];
1274 		if (dev->type == ipip_ops->dev_type) {
1275 			if (p_type)
1276 				*p_type = ipipt;
1277 			return true;
1278 		}
1279 	}
1280 	return false;
1281 }
1282 
1283 bool mlxsw_sp_netdev_is_ipip_ol(const struct mlxsw_sp *mlxsw_sp,
1284 				const struct net_device *dev)
1285 {
1286 	return mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL);
1287 }
1288 
1289 static struct mlxsw_sp_ipip_entry *
1290 mlxsw_sp_ipip_entry_find_by_ol_dev(struct mlxsw_sp *mlxsw_sp,
1291 				   const struct net_device *ol_dev)
1292 {
1293 	struct mlxsw_sp_ipip_entry *ipip_entry;
1294 
1295 	list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
1296 			    ipip_list_node)
1297 		if (ipip_entry->ol_dev == ol_dev)
1298 			return ipip_entry;
1299 
1300 	return NULL;
1301 }
1302 
1303 static struct mlxsw_sp_ipip_entry *
1304 mlxsw_sp_ipip_entry_find_by_ul_dev(const struct mlxsw_sp *mlxsw_sp,
1305 				   const struct net_device *ul_dev,
1306 				   struct mlxsw_sp_ipip_entry *start)
1307 {
1308 	struct mlxsw_sp_ipip_entry *ipip_entry;
1309 
1310 	ipip_entry = list_prepare_entry(start, &mlxsw_sp->router->ipip_list,
1311 					ipip_list_node);
1312 	list_for_each_entry_continue(ipip_entry, &mlxsw_sp->router->ipip_list,
1313 				     ipip_list_node) {
1314 		struct net_device *ipip_ul_dev =
1315 			__mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev);
1316 
1317 		if (ipip_ul_dev == ul_dev)
1318 			return ipip_entry;
1319 	}
1320 
1321 	return NULL;
1322 }
1323 
1324 bool mlxsw_sp_netdev_is_ipip_ul(const struct mlxsw_sp *mlxsw_sp,
1325 				const struct net_device *dev)
1326 {
1327 	return mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp, dev, NULL);
1328 }
1329 
1330 static bool mlxsw_sp_netdevice_ipip_can_offload(struct mlxsw_sp *mlxsw_sp,
1331 						const struct net_device *ol_dev,
1332 						enum mlxsw_sp_ipip_type ipipt)
1333 {
1334 	const struct mlxsw_sp_ipip_ops *ops
1335 		= mlxsw_sp->router->ipip_ops_arr[ipipt];
1336 
1337 	/* For deciding whether decap should be offloaded, we don't care about
1338 	 * overlay protocol, so ask whether either one is supported.
1339 	 */
1340 	return ops->can_offload(mlxsw_sp, ol_dev, MLXSW_SP_L3_PROTO_IPV4) ||
1341 	       ops->can_offload(mlxsw_sp, ol_dev, MLXSW_SP_L3_PROTO_IPV6);
1342 }
1343 
1344 static int mlxsw_sp_netdevice_ipip_ol_reg_event(struct mlxsw_sp *mlxsw_sp,
1345 						struct net_device *ol_dev)
1346 {
1347 	struct mlxsw_sp_ipip_entry *ipip_entry;
1348 	enum mlxsw_sp_l3proto ul_proto;
1349 	enum mlxsw_sp_ipip_type ipipt;
1350 	union mlxsw_sp_l3addr saddr;
1351 	u32 ul_tb_id;
1352 
1353 	mlxsw_sp_netdev_ipip_type(mlxsw_sp, ol_dev, &ipipt);
1354 	if (mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev, ipipt)) {
1355 		ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev);
1356 		ul_proto = mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto;
1357 		saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev);
1358 		if (!mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto,
1359 							  saddr, ul_tb_id,
1360 							  NULL)) {
1361 			ipip_entry = mlxsw_sp_ipip_entry_create(mlxsw_sp, ipipt,
1362 								ol_dev);
1363 			if (IS_ERR(ipip_entry))
1364 				return PTR_ERR(ipip_entry);
1365 		}
1366 	}
1367 
1368 	return 0;
1369 }
1370 
1371 static void mlxsw_sp_netdevice_ipip_ol_unreg_event(struct mlxsw_sp *mlxsw_sp,
1372 						   struct net_device *ol_dev)
1373 {
1374 	struct mlxsw_sp_ipip_entry *ipip_entry;
1375 
1376 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1377 	if (ipip_entry)
1378 		mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
1379 }
1380 
1381 static void
1382 mlxsw_sp_ipip_entry_ol_up_event(struct mlxsw_sp *mlxsw_sp,
1383 				struct mlxsw_sp_ipip_entry *ipip_entry)
1384 {
1385 	struct mlxsw_sp_fib_entry *decap_fib_entry;
1386 
1387 	decap_fib_entry = mlxsw_sp_ipip_entry_find_decap(mlxsw_sp, ipip_entry);
1388 	if (decap_fib_entry)
1389 		mlxsw_sp_ipip_entry_promote_decap(mlxsw_sp, ipip_entry,
1390 						  decap_fib_entry);
1391 }
1392 
1393 static void mlxsw_sp_netdevice_ipip_ol_up_event(struct mlxsw_sp *mlxsw_sp,
1394 						struct net_device *ol_dev)
1395 {
1396 	struct mlxsw_sp_ipip_entry *ipip_entry;
1397 
1398 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1399 	if (ipip_entry)
1400 		mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry);
1401 }
1402 
1403 static void
1404 mlxsw_sp_ipip_entry_ol_down_event(struct mlxsw_sp *mlxsw_sp,
1405 				  struct mlxsw_sp_ipip_entry *ipip_entry)
1406 {
1407 	if (ipip_entry->decap_fib_entry)
1408 		mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1409 }
1410 
1411 static void mlxsw_sp_netdevice_ipip_ol_down_event(struct mlxsw_sp *mlxsw_sp,
1412 						  struct net_device *ol_dev)
1413 {
1414 	struct mlxsw_sp_ipip_entry *ipip_entry;
1415 
1416 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1417 	if (ipip_entry)
1418 		mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry);
1419 }
1420 
1421 static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp,
1422 					 struct mlxsw_sp_rif *old_rif,
1423 					 struct mlxsw_sp_rif *new_rif);
1424 static int
1425 mlxsw_sp_ipip_entry_ol_lb_update(struct mlxsw_sp *mlxsw_sp,
1426 				 struct mlxsw_sp_ipip_entry *ipip_entry,
1427 				 bool keep_encap,
1428 				 struct netlink_ext_ack *extack)
1429 {
1430 	struct mlxsw_sp_rif_ipip_lb *old_lb_rif = ipip_entry->ol_lb;
1431 	struct mlxsw_sp_rif_ipip_lb *new_lb_rif;
1432 
1433 	new_lb_rif = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp,
1434 						     ipip_entry->ipipt,
1435 						     ipip_entry->ol_dev,
1436 						     extack);
1437 	if (IS_ERR(new_lb_rif))
1438 		return PTR_ERR(new_lb_rif);
1439 	ipip_entry->ol_lb = new_lb_rif;
1440 
1441 	if (keep_encap)
1442 		mlxsw_sp_nexthop_rif_migrate(mlxsw_sp, &old_lb_rif->common,
1443 					     &new_lb_rif->common);
1444 
1445 	mlxsw_sp_rif_destroy(&old_lb_rif->common);
1446 
1447 	return 0;
1448 }
1449 
1450 static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp,
1451 					struct mlxsw_sp_rif *rif);
1452 
1453 /**
1454  * Update the offload related to an IPIP entry. This always updates decap, and
1455  * in addition to that it also:
1456  * @recreate_loopback: recreates the associated loopback RIF
1457  * @keep_encap: updates next hops that use the tunnel netdevice. This is only
1458  *              relevant when recreate_loopback is true.
1459  * @update_nexthops: updates next hops, keeping the current loopback RIF. This
1460  *                   is only relevant when recreate_loopback is false.
1461  */
1462 int __mlxsw_sp_ipip_entry_update_tunnel(struct mlxsw_sp *mlxsw_sp,
1463 					struct mlxsw_sp_ipip_entry *ipip_entry,
1464 					bool recreate_loopback,
1465 					bool keep_encap,
1466 					bool update_nexthops,
1467 					struct netlink_ext_ack *extack)
1468 {
1469 	int err;
1470 
1471 	/* RIFs can't be edited, so to update loopback, we need to destroy and
1472 	 * recreate it. That creates a window of opportunity where RALUE and
1473 	 * RATR registers end up referencing a RIF that's already gone. RATRs
1474 	 * are handled in mlxsw_sp_ipip_entry_ol_lb_update(), and to take care
1475 	 * of RALUE, demote the decap route back.
1476 	 */
1477 	if (ipip_entry->decap_fib_entry)
1478 		mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1479 
1480 	if (recreate_loopback) {
1481 		err = mlxsw_sp_ipip_entry_ol_lb_update(mlxsw_sp, ipip_entry,
1482 						       keep_encap, extack);
1483 		if (err)
1484 			return err;
1485 	} else if (update_nexthops) {
1486 		mlxsw_sp_nexthop_rif_update(mlxsw_sp,
1487 					    &ipip_entry->ol_lb->common);
1488 	}
1489 
1490 	if (ipip_entry->ol_dev->flags & IFF_UP)
1491 		mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry);
1492 
1493 	return 0;
1494 }
1495 
1496 static int mlxsw_sp_netdevice_ipip_ol_vrf_event(struct mlxsw_sp *mlxsw_sp,
1497 						struct net_device *ol_dev,
1498 						struct netlink_ext_ack *extack)
1499 {
1500 	struct mlxsw_sp_ipip_entry *ipip_entry =
1501 		mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1502 	enum mlxsw_sp_l3proto ul_proto;
1503 	union mlxsw_sp_l3addr saddr;
1504 	u32 ul_tb_id;
1505 
1506 	if (!ipip_entry)
1507 		return 0;
1508 
1509 	/* For flat configuration cases, moving overlay to a different VRF might
1510 	 * cause local address conflict, and the conflicting tunnels need to be
1511 	 * demoted.
1512 	 */
1513 	ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev);
1514 	ul_proto = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt]->ul_proto;
1515 	saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev);
1516 	if (mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto,
1517 						 saddr, ul_tb_id,
1518 						 ipip_entry)) {
1519 		mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1520 		return 0;
1521 	}
1522 
1523 	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1524 						   true, false, false, extack);
1525 }
1526 
1527 static int
1528 mlxsw_sp_netdevice_ipip_ul_vrf_event(struct mlxsw_sp *mlxsw_sp,
1529 				     struct mlxsw_sp_ipip_entry *ipip_entry,
1530 				     struct net_device *ul_dev,
1531 				     struct netlink_ext_ack *extack)
1532 {
1533 	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1534 						   true, true, false, extack);
1535 }
1536 
1537 static int
1538 mlxsw_sp_netdevice_ipip_ul_up_event(struct mlxsw_sp *mlxsw_sp,
1539 				    struct mlxsw_sp_ipip_entry *ipip_entry,
1540 				    struct net_device *ul_dev)
1541 {
1542 	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1543 						   false, false, true, NULL);
1544 }
1545 
1546 static int
1547 mlxsw_sp_netdevice_ipip_ul_down_event(struct mlxsw_sp *mlxsw_sp,
1548 				      struct mlxsw_sp_ipip_entry *ipip_entry,
1549 				      struct net_device *ul_dev)
1550 {
1551 	/* A down underlay device causes encapsulated packets to not be
1552 	 * forwarded, but decap still works. So refresh next hops without
1553 	 * touching anything else.
1554 	 */
1555 	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1556 						   false, false, true, NULL);
1557 }
1558 
1559 static int
1560 mlxsw_sp_netdevice_ipip_ol_change_event(struct mlxsw_sp *mlxsw_sp,
1561 					struct net_device *ol_dev,
1562 					struct netlink_ext_ack *extack)
1563 {
1564 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1565 	struct mlxsw_sp_ipip_entry *ipip_entry;
1566 	int err;
1567 
1568 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1569 	if (!ipip_entry)
1570 		/* A change might make a tunnel eligible for offloading, but
1571 		 * that is currently not implemented. What falls to slow path
1572 		 * stays there.
1573 		 */
1574 		return 0;
1575 
1576 	/* A change might make a tunnel not eligible for offloading. */
1577 	if (!mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev,
1578 						 ipip_entry->ipipt)) {
1579 		mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1580 		return 0;
1581 	}
1582 
1583 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1584 	err = ipip_ops->ol_netdev_change(mlxsw_sp, ipip_entry, extack);
1585 	return err;
1586 }
1587 
1588 void mlxsw_sp_ipip_entry_demote_tunnel(struct mlxsw_sp *mlxsw_sp,
1589 				       struct mlxsw_sp_ipip_entry *ipip_entry)
1590 {
1591 	struct net_device *ol_dev = ipip_entry->ol_dev;
1592 
1593 	if (ol_dev->flags & IFF_UP)
1594 		mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry);
1595 	mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
1596 }
1597 
1598 /* The configuration where several tunnels have the same local address in the
1599  * same underlay table needs special treatment in the HW. That is currently not
1600  * implemented in the driver. This function finds and demotes the first tunnel
1601  * with a given source address, except the one passed in in the argument
1602  * `except'.
1603  */
1604 bool
1605 mlxsw_sp_ipip_demote_tunnel_by_saddr(struct mlxsw_sp *mlxsw_sp,
1606 				     enum mlxsw_sp_l3proto ul_proto,
1607 				     union mlxsw_sp_l3addr saddr,
1608 				     u32 ul_tb_id,
1609 				     const struct mlxsw_sp_ipip_entry *except)
1610 {
1611 	struct mlxsw_sp_ipip_entry *ipip_entry, *tmp;
1612 
1613 	list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list,
1614 				 ipip_list_node) {
1615 		if (ipip_entry != except &&
1616 		    mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, saddr,
1617 						      ul_tb_id, ipip_entry)) {
1618 			mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1619 			return true;
1620 		}
1621 	}
1622 
1623 	return false;
1624 }
1625 
1626 static void mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(struct mlxsw_sp *mlxsw_sp,
1627 						     struct net_device *ul_dev)
1628 {
1629 	struct mlxsw_sp_ipip_entry *ipip_entry, *tmp;
1630 
1631 	list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list,
1632 				 ipip_list_node) {
1633 		struct net_device *ipip_ul_dev =
1634 			__mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev);
1635 
1636 		if (ipip_ul_dev == ul_dev)
1637 			mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1638 	}
1639 }
1640 
1641 int mlxsw_sp_netdevice_ipip_ol_event(struct mlxsw_sp *mlxsw_sp,
1642 				     struct net_device *ol_dev,
1643 				     unsigned long event,
1644 				     struct netdev_notifier_info *info)
1645 {
1646 	struct netdev_notifier_changeupper_info *chup;
1647 	struct netlink_ext_ack *extack;
1648 
1649 	switch (event) {
1650 	case NETDEV_REGISTER:
1651 		return mlxsw_sp_netdevice_ipip_ol_reg_event(mlxsw_sp, ol_dev);
1652 	case NETDEV_UNREGISTER:
1653 		mlxsw_sp_netdevice_ipip_ol_unreg_event(mlxsw_sp, ol_dev);
1654 		return 0;
1655 	case NETDEV_UP:
1656 		mlxsw_sp_netdevice_ipip_ol_up_event(mlxsw_sp, ol_dev);
1657 		return 0;
1658 	case NETDEV_DOWN:
1659 		mlxsw_sp_netdevice_ipip_ol_down_event(mlxsw_sp, ol_dev);
1660 		return 0;
1661 	case NETDEV_CHANGEUPPER:
1662 		chup = container_of(info, typeof(*chup), info);
1663 		extack = info->extack;
1664 		if (netif_is_l3_master(chup->upper_dev))
1665 			return mlxsw_sp_netdevice_ipip_ol_vrf_event(mlxsw_sp,
1666 								    ol_dev,
1667 								    extack);
1668 		return 0;
1669 	case NETDEV_CHANGE:
1670 		extack = info->extack;
1671 		return mlxsw_sp_netdevice_ipip_ol_change_event(mlxsw_sp,
1672 							       ol_dev, extack);
1673 	}
1674 	return 0;
1675 }
1676 
1677 static int
1678 __mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
1679 				   struct mlxsw_sp_ipip_entry *ipip_entry,
1680 				   struct net_device *ul_dev,
1681 				   unsigned long event,
1682 				   struct netdev_notifier_info *info)
1683 {
1684 	struct netdev_notifier_changeupper_info *chup;
1685 	struct netlink_ext_ack *extack;
1686 
1687 	switch (event) {
1688 	case NETDEV_CHANGEUPPER:
1689 		chup = container_of(info, typeof(*chup), info);
1690 		extack = info->extack;
1691 		if (netif_is_l3_master(chup->upper_dev))
1692 			return mlxsw_sp_netdevice_ipip_ul_vrf_event(mlxsw_sp,
1693 								    ipip_entry,
1694 								    ul_dev,
1695 								    extack);
1696 		break;
1697 
1698 	case NETDEV_UP:
1699 		return mlxsw_sp_netdevice_ipip_ul_up_event(mlxsw_sp, ipip_entry,
1700 							   ul_dev);
1701 	case NETDEV_DOWN:
1702 		return mlxsw_sp_netdevice_ipip_ul_down_event(mlxsw_sp,
1703 							     ipip_entry,
1704 							     ul_dev);
1705 	}
1706 	return 0;
1707 }
1708 
1709 int
1710 mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
1711 				 struct net_device *ul_dev,
1712 				 unsigned long event,
1713 				 struct netdev_notifier_info *info)
1714 {
1715 	struct mlxsw_sp_ipip_entry *ipip_entry = NULL;
1716 	int err;
1717 
1718 	while ((ipip_entry = mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp,
1719 								ul_dev,
1720 								ipip_entry))) {
1721 		err = __mlxsw_sp_netdevice_ipip_ul_event(mlxsw_sp, ipip_entry,
1722 							 ul_dev, event, info);
1723 		if (err) {
1724 			mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(mlxsw_sp,
1725 								 ul_dev);
1726 			return err;
1727 		}
1728 	}
1729 
1730 	return 0;
1731 }
1732 
1733 struct mlxsw_sp_neigh_key {
1734 	struct neighbour *n;
1735 };
1736 
1737 struct mlxsw_sp_neigh_entry {
1738 	struct list_head rif_list_node;
1739 	struct rhash_head ht_node;
1740 	struct mlxsw_sp_neigh_key key;
1741 	u16 rif;
1742 	bool connected;
1743 	unsigned char ha[ETH_ALEN];
1744 	struct list_head nexthop_list; /* list of nexthops using
1745 					* this neigh entry
1746 					*/
1747 	struct list_head nexthop_neighs_list_node;
1748 	unsigned int counter_index;
1749 	bool counter_valid;
1750 };
1751 
1752 static const struct rhashtable_params mlxsw_sp_neigh_ht_params = {
1753 	.key_offset = offsetof(struct mlxsw_sp_neigh_entry, key),
1754 	.head_offset = offsetof(struct mlxsw_sp_neigh_entry, ht_node),
1755 	.key_len = sizeof(struct mlxsw_sp_neigh_key),
1756 };
1757 
1758 struct mlxsw_sp_neigh_entry *
1759 mlxsw_sp_rif_neigh_next(struct mlxsw_sp_rif *rif,
1760 			struct mlxsw_sp_neigh_entry *neigh_entry)
1761 {
1762 	if (!neigh_entry) {
1763 		if (list_empty(&rif->neigh_list))
1764 			return NULL;
1765 		else
1766 			return list_first_entry(&rif->neigh_list,
1767 						typeof(*neigh_entry),
1768 						rif_list_node);
1769 	}
1770 	if (list_is_last(&neigh_entry->rif_list_node, &rif->neigh_list))
1771 		return NULL;
1772 	return list_next_entry(neigh_entry, rif_list_node);
1773 }
1774 
1775 int mlxsw_sp_neigh_entry_type(struct mlxsw_sp_neigh_entry *neigh_entry)
1776 {
1777 	return neigh_entry->key.n->tbl->family;
1778 }
1779 
1780 unsigned char *
1781 mlxsw_sp_neigh_entry_ha(struct mlxsw_sp_neigh_entry *neigh_entry)
1782 {
1783 	return neigh_entry->ha;
1784 }
1785 
1786 u32 mlxsw_sp_neigh4_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
1787 {
1788 	struct neighbour *n;
1789 
1790 	n = neigh_entry->key.n;
1791 	return ntohl(*((__be32 *) n->primary_key));
1792 }
1793 
1794 struct in6_addr *
1795 mlxsw_sp_neigh6_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
1796 {
1797 	struct neighbour *n;
1798 
1799 	n = neigh_entry->key.n;
1800 	return (struct in6_addr *) &n->primary_key;
1801 }
1802 
1803 int mlxsw_sp_neigh_counter_get(struct mlxsw_sp *mlxsw_sp,
1804 			       struct mlxsw_sp_neigh_entry *neigh_entry,
1805 			       u64 *p_counter)
1806 {
1807 	if (!neigh_entry->counter_valid)
1808 		return -EINVAL;
1809 
1810 	return mlxsw_sp_flow_counter_get(mlxsw_sp, neigh_entry->counter_index,
1811 					 p_counter, NULL);
1812 }
1813 
1814 static struct mlxsw_sp_neigh_entry *
1815 mlxsw_sp_neigh_entry_alloc(struct mlxsw_sp *mlxsw_sp, struct neighbour *n,
1816 			   u16 rif)
1817 {
1818 	struct mlxsw_sp_neigh_entry *neigh_entry;
1819 
1820 	neigh_entry = kzalloc(sizeof(*neigh_entry), GFP_KERNEL);
1821 	if (!neigh_entry)
1822 		return NULL;
1823 
1824 	neigh_entry->key.n = n;
1825 	neigh_entry->rif = rif;
1826 	INIT_LIST_HEAD(&neigh_entry->nexthop_list);
1827 
1828 	return neigh_entry;
1829 }
1830 
1831 static void mlxsw_sp_neigh_entry_free(struct mlxsw_sp_neigh_entry *neigh_entry)
1832 {
1833 	kfree(neigh_entry);
1834 }
1835 
1836 static int
1837 mlxsw_sp_neigh_entry_insert(struct mlxsw_sp *mlxsw_sp,
1838 			    struct mlxsw_sp_neigh_entry *neigh_entry)
1839 {
1840 	return rhashtable_insert_fast(&mlxsw_sp->router->neigh_ht,
1841 				      &neigh_entry->ht_node,
1842 				      mlxsw_sp_neigh_ht_params);
1843 }
1844 
1845 static void
1846 mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp,
1847 			    struct mlxsw_sp_neigh_entry *neigh_entry)
1848 {
1849 	rhashtable_remove_fast(&mlxsw_sp->router->neigh_ht,
1850 			       &neigh_entry->ht_node,
1851 			       mlxsw_sp_neigh_ht_params);
1852 }
1853 
1854 static bool
1855 mlxsw_sp_neigh_counter_should_alloc(struct mlxsw_sp *mlxsw_sp,
1856 				    struct mlxsw_sp_neigh_entry *neigh_entry)
1857 {
1858 	struct devlink *devlink;
1859 	const char *table_name;
1860 
1861 	switch (mlxsw_sp_neigh_entry_type(neigh_entry)) {
1862 	case AF_INET:
1863 		table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST4;
1864 		break;
1865 	case AF_INET6:
1866 		table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST6;
1867 		break;
1868 	default:
1869 		WARN_ON(1);
1870 		return false;
1871 	}
1872 
1873 	devlink = priv_to_devlink(mlxsw_sp->core);
1874 	return devlink_dpipe_table_counter_enabled(devlink, table_name);
1875 }
1876 
1877 static void
1878 mlxsw_sp_neigh_counter_alloc(struct mlxsw_sp *mlxsw_sp,
1879 			     struct mlxsw_sp_neigh_entry *neigh_entry)
1880 {
1881 	if (!mlxsw_sp_neigh_counter_should_alloc(mlxsw_sp, neigh_entry))
1882 		return;
1883 
1884 	if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &neigh_entry->counter_index))
1885 		return;
1886 
1887 	neigh_entry->counter_valid = true;
1888 }
1889 
1890 static void
1891 mlxsw_sp_neigh_counter_free(struct mlxsw_sp *mlxsw_sp,
1892 			    struct mlxsw_sp_neigh_entry *neigh_entry)
1893 {
1894 	if (!neigh_entry->counter_valid)
1895 		return;
1896 	mlxsw_sp_flow_counter_free(mlxsw_sp,
1897 				   neigh_entry->counter_index);
1898 	neigh_entry->counter_valid = false;
1899 }
1900 
1901 static struct mlxsw_sp_neigh_entry *
1902 mlxsw_sp_neigh_entry_create(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
1903 {
1904 	struct mlxsw_sp_neigh_entry *neigh_entry;
1905 	struct mlxsw_sp_rif *rif;
1906 	int err;
1907 
1908 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, n->dev);
1909 	if (!rif)
1910 		return ERR_PTR(-EINVAL);
1911 
1912 	neigh_entry = mlxsw_sp_neigh_entry_alloc(mlxsw_sp, n, rif->rif_index);
1913 	if (!neigh_entry)
1914 		return ERR_PTR(-ENOMEM);
1915 
1916 	err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
1917 	if (err)
1918 		goto err_neigh_entry_insert;
1919 
1920 	mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
1921 	list_add(&neigh_entry->rif_list_node, &rif->neigh_list);
1922 
1923 	return neigh_entry;
1924 
1925 err_neigh_entry_insert:
1926 	mlxsw_sp_neigh_entry_free(neigh_entry);
1927 	return ERR_PTR(err);
1928 }
1929 
1930 static void
1931 mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp *mlxsw_sp,
1932 			     struct mlxsw_sp_neigh_entry *neigh_entry)
1933 {
1934 	list_del(&neigh_entry->rif_list_node);
1935 	mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
1936 	mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry);
1937 	mlxsw_sp_neigh_entry_free(neigh_entry);
1938 }
1939 
1940 static struct mlxsw_sp_neigh_entry *
1941 mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
1942 {
1943 	struct mlxsw_sp_neigh_key key;
1944 
1945 	key.n = n;
1946 	return rhashtable_lookup_fast(&mlxsw_sp->router->neigh_ht,
1947 				      &key, mlxsw_sp_neigh_ht_params);
1948 }
1949 
1950 static void
1951 mlxsw_sp_router_neighs_update_interval_init(struct mlxsw_sp *mlxsw_sp)
1952 {
1953 	unsigned long interval;
1954 
1955 #if IS_ENABLED(CONFIG_IPV6)
1956 	interval = min_t(unsigned long,
1957 			 NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME),
1958 			 NEIGH_VAR(&nd_tbl.parms, DELAY_PROBE_TIME));
1959 #else
1960 	interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
1961 #endif
1962 	mlxsw_sp->router->neighs_update.interval = jiffies_to_msecs(interval);
1963 }
1964 
1965 static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp,
1966 						   char *rauhtd_pl,
1967 						   int ent_index)
1968 {
1969 	struct net_device *dev;
1970 	struct neighbour *n;
1971 	__be32 dipn;
1972 	u32 dip;
1973 	u16 rif;
1974 
1975 	mlxsw_reg_rauhtd_ent_ipv4_unpack(rauhtd_pl, ent_index, &rif, &dip);
1976 
1977 	if (!mlxsw_sp->router->rifs[rif]) {
1978 		dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
1979 		return;
1980 	}
1981 
1982 	dipn = htonl(dip);
1983 	dev = mlxsw_sp->router->rifs[rif]->dev;
1984 	n = neigh_lookup(&arp_tbl, &dipn, dev);
1985 	if (!n)
1986 		return;
1987 
1988 	netdev_dbg(dev, "Updating neighbour with IP=%pI4h\n", &dip);
1989 	neigh_event_send(n, NULL);
1990 	neigh_release(n);
1991 }
1992 
1993 #if IS_ENABLED(CONFIG_IPV6)
1994 static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
1995 						   char *rauhtd_pl,
1996 						   int rec_index)
1997 {
1998 	struct net_device *dev;
1999 	struct neighbour *n;
2000 	struct in6_addr dip;
2001 	u16 rif;
2002 
2003 	mlxsw_reg_rauhtd_ent_ipv6_unpack(rauhtd_pl, rec_index, &rif,
2004 					 (char *) &dip);
2005 
2006 	if (!mlxsw_sp->router->rifs[rif]) {
2007 		dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
2008 		return;
2009 	}
2010 
2011 	dev = mlxsw_sp->router->rifs[rif]->dev;
2012 	n = neigh_lookup(&nd_tbl, &dip, dev);
2013 	if (!n)
2014 		return;
2015 
2016 	netdev_dbg(dev, "Updating neighbour with IP=%pI6c\n", &dip);
2017 	neigh_event_send(n, NULL);
2018 	neigh_release(n);
2019 }
2020 #else
2021 static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2022 						   char *rauhtd_pl,
2023 						   int rec_index)
2024 {
2025 }
2026 #endif
2027 
2028 static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp,
2029 						   char *rauhtd_pl,
2030 						   int rec_index)
2031 {
2032 	u8 num_entries;
2033 	int i;
2034 
2035 	num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
2036 								rec_index);
2037 	/* Hardware starts counting at 0, so add 1. */
2038 	num_entries++;
2039 
2040 	/* Each record consists of several neighbour entries. */
2041 	for (i = 0; i < num_entries; i++) {
2042 		int ent_index;
2043 
2044 		ent_index = rec_index * MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC + i;
2045 		mlxsw_sp_router_neigh_ent_ipv4_process(mlxsw_sp, rauhtd_pl,
2046 						       ent_index);
2047 	}
2048 
2049 }
2050 
2051 static void mlxsw_sp_router_neigh_rec_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2052 						   char *rauhtd_pl,
2053 						   int rec_index)
2054 {
2055 	/* One record contains one entry. */
2056 	mlxsw_sp_router_neigh_ent_ipv6_process(mlxsw_sp, rauhtd_pl,
2057 					       rec_index);
2058 }
2059 
2060 static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp,
2061 					      char *rauhtd_pl, int rec_index)
2062 {
2063 	switch (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, rec_index)) {
2064 	case MLXSW_REG_RAUHTD_TYPE_IPV4:
2065 		mlxsw_sp_router_neigh_rec_ipv4_process(mlxsw_sp, rauhtd_pl,
2066 						       rec_index);
2067 		break;
2068 	case MLXSW_REG_RAUHTD_TYPE_IPV6:
2069 		mlxsw_sp_router_neigh_rec_ipv6_process(mlxsw_sp, rauhtd_pl,
2070 						       rec_index);
2071 		break;
2072 	}
2073 }
2074 
2075 static bool mlxsw_sp_router_rauhtd_is_full(char *rauhtd_pl)
2076 {
2077 	u8 num_rec, last_rec_index, num_entries;
2078 
2079 	num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
2080 	last_rec_index = num_rec - 1;
2081 
2082 	if (num_rec < MLXSW_REG_RAUHTD_REC_MAX_NUM)
2083 		return false;
2084 	if (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, last_rec_index) ==
2085 	    MLXSW_REG_RAUHTD_TYPE_IPV6)
2086 		return true;
2087 
2088 	num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
2089 								last_rec_index);
2090 	if (++num_entries == MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC)
2091 		return true;
2092 	return false;
2093 }
2094 
2095 static int
2096 __mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp,
2097 				       char *rauhtd_pl,
2098 				       enum mlxsw_reg_rauhtd_type type)
2099 {
2100 	int i, num_rec;
2101 	int err;
2102 
2103 	/* Make sure the neighbour's netdev isn't removed in the
2104 	 * process.
2105 	 */
2106 	rtnl_lock();
2107 	do {
2108 		mlxsw_reg_rauhtd_pack(rauhtd_pl, type);
2109 		err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd),
2110 				      rauhtd_pl);
2111 		if (err) {
2112 			dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to dump neighbour table\n");
2113 			break;
2114 		}
2115 		num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
2116 		for (i = 0; i < num_rec; i++)
2117 			mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl,
2118 							  i);
2119 	} while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl));
2120 	rtnl_unlock();
2121 
2122 	return err;
2123 }
2124 
2125 static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp)
2126 {
2127 	enum mlxsw_reg_rauhtd_type type;
2128 	char *rauhtd_pl;
2129 	int err;
2130 
2131 	rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL);
2132 	if (!rauhtd_pl)
2133 		return -ENOMEM;
2134 
2135 	type = MLXSW_REG_RAUHTD_TYPE_IPV4;
2136 	err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
2137 	if (err)
2138 		goto out;
2139 
2140 	type = MLXSW_REG_RAUHTD_TYPE_IPV6;
2141 	err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
2142 out:
2143 	kfree(rauhtd_pl);
2144 	return err;
2145 }
2146 
2147 static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp *mlxsw_sp)
2148 {
2149 	struct mlxsw_sp_neigh_entry *neigh_entry;
2150 
2151 	/* Take RTNL mutex here to prevent lists from changes */
2152 	rtnl_lock();
2153 	list_for_each_entry(neigh_entry, &mlxsw_sp->router->nexthop_neighs_list,
2154 			    nexthop_neighs_list_node)
2155 		/* If this neigh have nexthops, make the kernel think this neigh
2156 		 * is active regardless of the traffic.
2157 		 */
2158 		neigh_event_send(neigh_entry->key.n, NULL);
2159 	rtnl_unlock();
2160 }
2161 
2162 static void
2163 mlxsw_sp_router_neighs_update_work_schedule(struct mlxsw_sp *mlxsw_sp)
2164 {
2165 	unsigned long interval = mlxsw_sp->router->neighs_update.interval;
2166 
2167 	mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw,
2168 			       msecs_to_jiffies(interval));
2169 }
2170 
2171 static void mlxsw_sp_router_neighs_update_work(struct work_struct *work)
2172 {
2173 	struct mlxsw_sp_router *router;
2174 	int err;
2175 
2176 	router = container_of(work, struct mlxsw_sp_router,
2177 			      neighs_update.dw.work);
2178 	err = mlxsw_sp_router_neighs_update_rauhtd(router->mlxsw_sp);
2179 	if (err)
2180 		dev_err(router->mlxsw_sp->bus_info->dev, "Could not update kernel for neigh activity");
2181 
2182 	mlxsw_sp_router_neighs_update_nh(router->mlxsw_sp);
2183 
2184 	mlxsw_sp_router_neighs_update_work_schedule(router->mlxsw_sp);
2185 }
2186 
2187 static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work)
2188 {
2189 	struct mlxsw_sp_neigh_entry *neigh_entry;
2190 	struct mlxsw_sp_router *router;
2191 
2192 	router = container_of(work, struct mlxsw_sp_router,
2193 			      nexthop_probe_dw.work);
2194 	/* Iterate over nexthop neighbours, find those who are unresolved and
2195 	 * send arp on them. This solves the chicken-egg problem when
2196 	 * the nexthop wouldn't get offloaded until the neighbor is resolved
2197 	 * but it wouldn't get resolved ever in case traffic is flowing in HW
2198 	 * using different nexthop.
2199 	 *
2200 	 * Take RTNL mutex here to prevent lists from changes.
2201 	 */
2202 	rtnl_lock();
2203 	list_for_each_entry(neigh_entry, &router->nexthop_neighs_list,
2204 			    nexthop_neighs_list_node)
2205 		if (!neigh_entry->connected)
2206 			neigh_event_send(neigh_entry->key.n, NULL);
2207 	rtnl_unlock();
2208 
2209 	mlxsw_core_schedule_dw(&router->nexthop_probe_dw,
2210 			       MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL);
2211 }
2212 
2213 static void
2214 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
2215 			      struct mlxsw_sp_neigh_entry *neigh_entry,
2216 			      bool removing);
2217 
2218 static enum mlxsw_reg_rauht_op mlxsw_sp_rauht_op(bool adding)
2219 {
2220 	return adding ? MLXSW_REG_RAUHT_OP_WRITE_ADD :
2221 			MLXSW_REG_RAUHT_OP_WRITE_DELETE;
2222 }
2223 
2224 static void
2225 mlxsw_sp_router_neigh_entry_op4(struct mlxsw_sp *mlxsw_sp,
2226 				struct mlxsw_sp_neigh_entry *neigh_entry,
2227 				enum mlxsw_reg_rauht_op op)
2228 {
2229 	struct neighbour *n = neigh_entry->key.n;
2230 	u32 dip = ntohl(*((__be32 *) n->primary_key));
2231 	char rauht_pl[MLXSW_REG_RAUHT_LEN];
2232 
2233 	mlxsw_reg_rauht_pack4(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
2234 			      dip);
2235 	if (neigh_entry->counter_valid)
2236 		mlxsw_reg_rauht_pack_counter(rauht_pl,
2237 					     neigh_entry->counter_index);
2238 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
2239 }
2240 
2241 static void
2242 mlxsw_sp_router_neigh_entry_op6(struct mlxsw_sp *mlxsw_sp,
2243 				struct mlxsw_sp_neigh_entry *neigh_entry,
2244 				enum mlxsw_reg_rauht_op op)
2245 {
2246 	struct neighbour *n = neigh_entry->key.n;
2247 	char rauht_pl[MLXSW_REG_RAUHT_LEN];
2248 	const char *dip = n->primary_key;
2249 
2250 	mlxsw_reg_rauht_pack6(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
2251 			      dip);
2252 	if (neigh_entry->counter_valid)
2253 		mlxsw_reg_rauht_pack_counter(rauht_pl,
2254 					     neigh_entry->counter_index);
2255 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
2256 }
2257 
2258 bool mlxsw_sp_neigh_ipv6_ignore(struct mlxsw_sp_neigh_entry *neigh_entry)
2259 {
2260 	struct neighbour *n = neigh_entry->key.n;
2261 
2262 	/* Packets with a link-local destination address are trapped
2263 	 * after LPM lookup and never reach the neighbour table, so
2264 	 * there is no need to program such neighbours to the device.
2265 	 */
2266 	if (ipv6_addr_type((struct in6_addr *) &n->primary_key) &
2267 	    IPV6_ADDR_LINKLOCAL)
2268 		return true;
2269 	return false;
2270 }
2271 
2272 static void
2273 mlxsw_sp_neigh_entry_update(struct mlxsw_sp *mlxsw_sp,
2274 			    struct mlxsw_sp_neigh_entry *neigh_entry,
2275 			    bool adding)
2276 {
2277 	if (!adding && !neigh_entry->connected)
2278 		return;
2279 	neigh_entry->connected = adding;
2280 	if (neigh_entry->key.n->tbl->family == AF_INET) {
2281 		mlxsw_sp_router_neigh_entry_op4(mlxsw_sp, neigh_entry,
2282 						mlxsw_sp_rauht_op(adding));
2283 	} else if (neigh_entry->key.n->tbl->family == AF_INET6) {
2284 		if (mlxsw_sp_neigh_ipv6_ignore(neigh_entry))
2285 			return;
2286 		mlxsw_sp_router_neigh_entry_op6(mlxsw_sp, neigh_entry,
2287 						mlxsw_sp_rauht_op(adding));
2288 	} else {
2289 		WARN_ON_ONCE(1);
2290 	}
2291 }
2292 
2293 void
2294 mlxsw_sp_neigh_entry_counter_update(struct mlxsw_sp *mlxsw_sp,
2295 				    struct mlxsw_sp_neigh_entry *neigh_entry,
2296 				    bool adding)
2297 {
2298 	if (adding)
2299 		mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
2300 	else
2301 		mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
2302 	mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, true);
2303 }
2304 
2305 struct mlxsw_sp_netevent_work {
2306 	struct work_struct work;
2307 	struct mlxsw_sp *mlxsw_sp;
2308 	struct neighbour *n;
2309 };
2310 
2311 static void mlxsw_sp_router_neigh_event_work(struct work_struct *work)
2312 {
2313 	struct mlxsw_sp_netevent_work *net_work =
2314 		container_of(work, struct mlxsw_sp_netevent_work, work);
2315 	struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2316 	struct mlxsw_sp_neigh_entry *neigh_entry;
2317 	struct neighbour *n = net_work->n;
2318 	unsigned char ha[ETH_ALEN];
2319 	bool entry_connected;
2320 	u8 nud_state, dead;
2321 
2322 	/* If these parameters are changed after we release the lock,
2323 	 * then we are guaranteed to receive another event letting us
2324 	 * know about it.
2325 	 */
2326 	read_lock_bh(&n->lock);
2327 	memcpy(ha, n->ha, ETH_ALEN);
2328 	nud_state = n->nud_state;
2329 	dead = n->dead;
2330 	read_unlock_bh(&n->lock);
2331 
2332 	rtnl_lock();
2333 	entry_connected = nud_state & NUD_VALID && !dead;
2334 	neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
2335 	if (!entry_connected && !neigh_entry)
2336 		goto out;
2337 	if (!neigh_entry) {
2338 		neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
2339 		if (IS_ERR(neigh_entry))
2340 			goto out;
2341 	}
2342 
2343 	memcpy(neigh_entry->ha, ha, ETH_ALEN);
2344 	mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, entry_connected);
2345 	mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, !entry_connected);
2346 
2347 	if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
2348 		mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2349 
2350 out:
2351 	rtnl_unlock();
2352 	neigh_release(n);
2353 	kfree(net_work);
2354 }
2355 
2356 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp);
2357 
2358 static void mlxsw_sp_router_mp_hash_event_work(struct work_struct *work)
2359 {
2360 	struct mlxsw_sp_netevent_work *net_work =
2361 		container_of(work, struct mlxsw_sp_netevent_work, work);
2362 	struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2363 
2364 	mlxsw_sp_mp_hash_init(mlxsw_sp);
2365 	kfree(net_work);
2366 }
2367 
2368 static int mlxsw_sp_router_netevent_event(struct notifier_block *nb,
2369 					  unsigned long event, void *ptr)
2370 {
2371 	struct mlxsw_sp_netevent_work *net_work;
2372 	struct mlxsw_sp_port *mlxsw_sp_port;
2373 	struct mlxsw_sp_router *router;
2374 	struct mlxsw_sp *mlxsw_sp;
2375 	unsigned long interval;
2376 	struct neigh_parms *p;
2377 	struct neighbour *n;
2378 	struct net *net;
2379 
2380 	switch (event) {
2381 	case NETEVENT_DELAY_PROBE_TIME_UPDATE:
2382 		p = ptr;
2383 
2384 		/* We don't care about changes in the default table. */
2385 		if (!p->dev || (p->tbl->family != AF_INET &&
2386 				p->tbl->family != AF_INET6))
2387 			return NOTIFY_DONE;
2388 
2389 		/* We are in atomic context and can't take RTNL mutex,
2390 		 * so use RCU variant to walk the device chain.
2391 		 */
2392 		mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(p->dev);
2393 		if (!mlxsw_sp_port)
2394 			return NOTIFY_DONE;
2395 
2396 		mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
2397 		interval = jiffies_to_msecs(NEIGH_VAR(p, DELAY_PROBE_TIME));
2398 		mlxsw_sp->router->neighs_update.interval = interval;
2399 
2400 		mlxsw_sp_port_dev_put(mlxsw_sp_port);
2401 		break;
2402 	case NETEVENT_NEIGH_UPDATE:
2403 		n = ptr;
2404 
2405 		if (n->tbl->family != AF_INET && n->tbl->family != AF_INET6)
2406 			return NOTIFY_DONE;
2407 
2408 		mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(n->dev);
2409 		if (!mlxsw_sp_port)
2410 			return NOTIFY_DONE;
2411 
2412 		net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC);
2413 		if (!net_work) {
2414 			mlxsw_sp_port_dev_put(mlxsw_sp_port);
2415 			return NOTIFY_BAD;
2416 		}
2417 
2418 		INIT_WORK(&net_work->work, mlxsw_sp_router_neigh_event_work);
2419 		net_work->mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
2420 		net_work->n = n;
2421 
2422 		/* Take a reference to ensure the neighbour won't be
2423 		 * destructed until we drop the reference in delayed
2424 		 * work.
2425 		 */
2426 		neigh_clone(n);
2427 		mlxsw_core_schedule_work(&net_work->work);
2428 		mlxsw_sp_port_dev_put(mlxsw_sp_port);
2429 		break;
2430 	case NETEVENT_MULTIPATH_HASH_UPDATE:
2431 		net = ptr;
2432 
2433 		if (!net_eq(net, &init_net))
2434 			return NOTIFY_DONE;
2435 
2436 		net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC);
2437 		if (!net_work)
2438 			return NOTIFY_BAD;
2439 
2440 		router = container_of(nb, struct mlxsw_sp_router, netevent_nb);
2441 		INIT_WORK(&net_work->work, mlxsw_sp_router_mp_hash_event_work);
2442 		net_work->mlxsw_sp = router->mlxsw_sp;
2443 		mlxsw_core_schedule_work(&net_work->work);
2444 		break;
2445 	}
2446 
2447 	return NOTIFY_DONE;
2448 }
2449 
2450 static int mlxsw_sp_neigh_init(struct mlxsw_sp *mlxsw_sp)
2451 {
2452 	int err;
2453 
2454 	err = rhashtable_init(&mlxsw_sp->router->neigh_ht,
2455 			      &mlxsw_sp_neigh_ht_params);
2456 	if (err)
2457 		return err;
2458 
2459 	/* Initialize the polling interval according to the default
2460 	 * table.
2461 	 */
2462 	mlxsw_sp_router_neighs_update_interval_init(mlxsw_sp);
2463 
2464 	/* Create the delayed works for the activity_update */
2465 	INIT_DELAYED_WORK(&mlxsw_sp->router->neighs_update.dw,
2466 			  mlxsw_sp_router_neighs_update_work);
2467 	INIT_DELAYED_WORK(&mlxsw_sp->router->nexthop_probe_dw,
2468 			  mlxsw_sp_router_probe_unresolved_nexthops);
2469 	mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw, 0);
2470 	mlxsw_core_schedule_dw(&mlxsw_sp->router->nexthop_probe_dw, 0);
2471 	return 0;
2472 }
2473 
2474 static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp)
2475 {
2476 	cancel_delayed_work_sync(&mlxsw_sp->router->neighs_update.dw);
2477 	cancel_delayed_work_sync(&mlxsw_sp->router->nexthop_probe_dw);
2478 	rhashtable_destroy(&mlxsw_sp->router->neigh_ht);
2479 }
2480 
2481 static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
2482 					 struct mlxsw_sp_rif *rif)
2483 {
2484 	struct mlxsw_sp_neigh_entry *neigh_entry, *tmp;
2485 
2486 	list_for_each_entry_safe(neigh_entry, tmp, &rif->neigh_list,
2487 				 rif_list_node) {
2488 		mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, false);
2489 		mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2490 	}
2491 }
2492 
2493 enum mlxsw_sp_nexthop_type {
2494 	MLXSW_SP_NEXTHOP_TYPE_ETH,
2495 	MLXSW_SP_NEXTHOP_TYPE_IPIP,
2496 };
2497 
2498 struct mlxsw_sp_nexthop_key {
2499 	struct fib_nh *fib_nh;
2500 };
2501 
2502 struct mlxsw_sp_nexthop {
2503 	struct list_head neigh_list_node; /* member of neigh entry list */
2504 	struct list_head rif_list_node;
2505 	struct list_head router_list_node;
2506 	struct mlxsw_sp_nexthop_group *nh_grp; /* pointer back to the group
2507 						* this belongs to
2508 						*/
2509 	struct rhash_head ht_node;
2510 	struct mlxsw_sp_nexthop_key key;
2511 	unsigned char gw_addr[sizeof(struct in6_addr)];
2512 	int ifindex;
2513 	int nh_weight;
2514 	int norm_nh_weight;
2515 	int num_adj_entries;
2516 	struct mlxsw_sp_rif *rif;
2517 	u8 should_offload:1, /* set indicates this neigh is connected and
2518 			      * should be put to KVD linear area of this group.
2519 			      */
2520 	   offloaded:1, /* set in case the neigh is actually put into
2521 			 * KVD linear area of this group.
2522 			 */
2523 	   update:1; /* set indicates that MAC of this neigh should be
2524 		      * updated in HW
2525 		      */
2526 	enum mlxsw_sp_nexthop_type type;
2527 	union {
2528 		struct mlxsw_sp_neigh_entry *neigh_entry;
2529 		struct mlxsw_sp_ipip_entry *ipip_entry;
2530 	};
2531 	unsigned int counter_index;
2532 	bool counter_valid;
2533 };
2534 
2535 struct mlxsw_sp_nexthop_group {
2536 	void *priv;
2537 	struct rhash_head ht_node;
2538 	struct list_head fib_list; /* list of fib entries that use this group */
2539 	struct neigh_table *neigh_tbl;
2540 	u8 adj_index_valid:1,
2541 	   gateway:1; /* routes using the group use a gateway */
2542 	u32 adj_index;
2543 	u16 ecmp_size;
2544 	u16 count;
2545 	int sum_norm_weight;
2546 	struct mlxsw_sp_nexthop nexthops[0];
2547 #define nh_rif	nexthops[0].rif
2548 };
2549 
2550 void mlxsw_sp_nexthop_counter_alloc(struct mlxsw_sp *mlxsw_sp,
2551 				    struct mlxsw_sp_nexthop *nh)
2552 {
2553 	struct devlink *devlink;
2554 
2555 	devlink = priv_to_devlink(mlxsw_sp->core);
2556 	if (!devlink_dpipe_table_counter_enabled(devlink,
2557 						 MLXSW_SP_DPIPE_TABLE_NAME_ADJ))
2558 		return;
2559 
2560 	if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &nh->counter_index))
2561 		return;
2562 
2563 	nh->counter_valid = true;
2564 }
2565 
2566 void mlxsw_sp_nexthop_counter_free(struct mlxsw_sp *mlxsw_sp,
2567 				   struct mlxsw_sp_nexthop *nh)
2568 {
2569 	if (!nh->counter_valid)
2570 		return;
2571 	mlxsw_sp_flow_counter_free(mlxsw_sp, nh->counter_index);
2572 	nh->counter_valid = false;
2573 }
2574 
2575 int mlxsw_sp_nexthop_counter_get(struct mlxsw_sp *mlxsw_sp,
2576 				 struct mlxsw_sp_nexthop *nh, u64 *p_counter)
2577 {
2578 	if (!nh->counter_valid)
2579 		return -EINVAL;
2580 
2581 	return mlxsw_sp_flow_counter_get(mlxsw_sp, nh->counter_index,
2582 					 p_counter, NULL);
2583 }
2584 
2585 struct mlxsw_sp_nexthop *mlxsw_sp_nexthop_next(struct mlxsw_sp_router *router,
2586 					       struct mlxsw_sp_nexthop *nh)
2587 {
2588 	if (!nh) {
2589 		if (list_empty(&router->nexthop_list))
2590 			return NULL;
2591 		else
2592 			return list_first_entry(&router->nexthop_list,
2593 						typeof(*nh), router_list_node);
2594 	}
2595 	if (list_is_last(&nh->router_list_node, &router->nexthop_list))
2596 		return NULL;
2597 	return list_next_entry(nh, router_list_node);
2598 }
2599 
2600 bool mlxsw_sp_nexthop_offload(struct mlxsw_sp_nexthop *nh)
2601 {
2602 	return nh->offloaded;
2603 }
2604 
2605 unsigned char *mlxsw_sp_nexthop_ha(struct mlxsw_sp_nexthop *nh)
2606 {
2607 	if (!nh->offloaded)
2608 		return NULL;
2609 	return nh->neigh_entry->ha;
2610 }
2611 
2612 int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop *nh, u32 *p_adj_index,
2613 			     u32 *p_adj_size, u32 *p_adj_hash_index)
2614 {
2615 	struct mlxsw_sp_nexthop_group *nh_grp = nh->nh_grp;
2616 	u32 adj_hash_index = 0;
2617 	int i;
2618 
2619 	if (!nh->offloaded || !nh_grp->adj_index_valid)
2620 		return -EINVAL;
2621 
2622 	*p_adj_index = nh_grp->adj_index;
2623 	*p_adj_size = nh_grp->ecmp_size;
2624 
2625 	for (i = 0; i < nh_grp->count; i++) {
2626 		struct mlxsw_sp_nexthop *nh_iter = &nh_grp->nexthops[i];
2627 
2628 		if (nh_iter == nh)
2629 			break;
2630 		if (nh_iter->offloaded)
2631 			adj_hash_index += nh_iter->num_adj_entries;
2632 	}
2633 
2634 	*p_adj_hash_index = adj_hash_index;
2635 	return 0;
2636 }
2637 
2638 struct mlxsw_sp_rif *mlxsw_sp_nexthop_rif(struct mlxsw_sp_nexthop *nh)
2639 {
2640 	return nh->rif;
2641 }
2642 
2643 bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh)
2644 {
2645 	struct mlxsw_sp_nexthop_group *nh_grp = nh->nh_grp;
2646 	int i;
2647 
2648 	for (i = 0; i < nh_grp->count; i++) {
2649 		struct mlxsw_sp_nexthop *nh_iter = &nh_grp->nexthops[i];
2650 
2651 		if (nh_iter->type == MLXSW_SP_NEXTHOP_TYPE_IPIP)
2652 			return true;
2653 	}
2654 	return false;
2655 }
2656 
2657 static struct fib_info *
2658 mlxsw_sp_nexthop4_group_fi(const struct mlxsw_sp_nexthop_group *nh_grp)
2659 {
2660 	return nh_grp->priv;
2661 }
2662 
2663 struct mlxsw_sp_nexthop_group_cmp_arg {
2664 	enum mlxsw_sp_l3proto proto;
2665 	union {
2666 		struct fib_info *fi;
2667 		struct mlxsw_sp_fib6_entry *fib6_entry;
2668 	};
2669 };
2670 
2671 static bool
2672 mlxsw_sp_nexthop6_group_has_nexthop(const struct mlxsw_sp_nexthop_group *nh_grp,
2673 				    const struct in6_addr *gw, int ifindex,
2674 				    int weight)
2675 {
2676 	int i;
2677 
2678 	for (i = 0; i < nh_grp->count; i++) {
2679 		const struct mlxsw_sp_nexthop *nh;
2680 
2681 		nh = &nh_grp->nexthops[i];
2682 		if (nh->ifindex == ifindex && nh->nh_weight == weight &&
2683 		    ipv6_addr_equal(gw, (struct in6_addr *) nh->gw_addr))
2684 			return true;
2685 	}
2686 
2687 	return false;
2688 }
2689 
2690 static bool
2691 mlxsw_sp_nexthop6_group_cmp(const struct mlxsw_sp_nexthop_group *nh_grp,
2692 			    const struct mlxsw_sp_fib6_entry *fib6_entry)
2693 {
2694 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
2695 
2696 	if (nh_grp->count != fib6_entry->nrt6)
2697 		return false;
2698 
2699 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
2700 		struct in6_addr *gw;
2701 		int ifindex, weight;
2702 
2703 		ifindex = mlxsw_sp_rt6->rt->dst.dev->ifindex;
2704 		weight = mlxsw_sp_rt6->rt->rt6i_nh_weight;
2705 		gw = &mlxsw_sp_rt6->rt->rt6i_gateway;
2706 		if (!mlxsw_sp_nexthop6_group_has_nexthop(nh_grp, gw, ifindex,
2707 							 weight))
2708 			return false;
2709 	}
2710 
2711 	return true;
2712 }
2713 
2714 static int
2715 mlxsw_sp_nexthop_group_cmp(struct rhashtable_compare_arg *arg, const void *ptr)
2716 {
2717 	const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = arg->key;
2718 	const struct mlxsw_sp_nexthop_group *nh_grp = ptr;
2719 
2720 	switch (cmp_arg->proto) {
2721 	case MLXSW_SP_L3_PROTO_IPV4:
2722 		return cmp_arg->fi != mlxsw_sp_nexthop4_group_fi(nh_grp);
2723 	case MLXSW_SP_L3_PROTO_IPV6:
2724 		return !mlxsw_sp_nexthop6_group_cmp(nh_grp,
2725 						    cmp_arg->fib6_entry);
2726 	default:
2727 		WARN_ON(1);
2728 		return 1;
2729 	}
2730 }
2731 
2732 static int
2733 mlxsw_sp_nexthop_group_type(const struct mlxsw_sp_nexthop_group *nh_grp)
2734 {
2735 	return nh_grp->neigh_tbl->family;
2736 }
2737 
2738 static u32 mlxsw_sp_nexthop_group_hash_obj(const void *data, u32 len, u32 seed)
2739 {
2740 	const struct mlxsw_sp_nexthop_group *nh_grp = data;
2741 	const struct mlxsw_sp_nexthop *nh;
2742 	struct fib_info *fi;
2743 	unsigned int val;
2744 	int i;
2745 
2746 	switch (mlxsw_sp_nexthop_group_type(nh_grp)) {
2747 	case AF_INET:
2748 		fi = mlxsw_sp_nexthop4_group_fi(nh_grp);
2749 		return jhash(&fi, sizeof(fi), seed);
2750 	case AF_INET6:
2751 		val = nh_grp->count;
2752 		for (i = 0; i < nh_grp->count; i++) {
2753 			nh = &nh_grp->nexthops[i];
2754 			val ^= nh->ifindex;
2755 		}
2756 		return jhash(&val, sizeof(val), seed);
2757 	default:
2758 		WARN_ON(1);
2759 		return 0;
2760 	}
2761 }
2762 
2763 static u32
2764 mlxsw_sp_nexthop6_group_hash(struct mlxsw_sp_fib6_entry *fib6_entry, u32 seed)
2765 {
2766 	unsigned int val = fib6_entry->nrt6;
2767 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
2768 	struct net_device *dev;
2769 
2770 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
2771 		dev = mlxsw_sp_rt6->rt->dst.dev;
2772 		val ^= dev->ifindex;
2773 	}
2774 
2775 	return jhash(&val, sizeof(val), seed);
2776 }
2777 
2778 static u32
2779 mlxsw_sp_nexthop_group_hash(const void *data, u32 len, u32 seed)
2780 {
2781 	const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = data;
2782 
2783 	switch (cmp_arg->proto) {
2784 	case MLXSW_SP_L3_PROTO_IPV4:
2785 		return jhash(&cmp_arg->fi, sizeof(cmp_arg->fi), seed);
2786 	case MLXSW_SP_L3_PROTO_IPV6:
2787 		return mlxsw_sp_nexthop6_group_hash(cmp_arg->fib6_entry, seed);
2788 	default:
2789 		WARN_ON(1);
2790 		return 0;
2791 	}
2792 }
2793 
2794 static const struct rhashtable_params mlxsw_sp_nexthop_group_ht_params = {
2795 	.head_offset = offsetof(struct mlxsw_sp_nexthop_group, ht_node),
2796 	.hashfn	     = mlxsw_sp_nexthop_group_hash,
2797 	.obj_hashfn  = mlxsw_sp_nexthop_group_hash_obj,
2798 	.obj_cmpfn   = mlxsw_sp_nexthop_group_cmp,
2799 };
2800 
2801 static int mlxsw_sp_nexthop_group_insert(struct mlxsw_sp *mlxsw_sp,
2802 					 struct mlxsw_sp_nexthop_group *nh_grp)
2803 {
2804 	if (mlxsw_sp_nexthop_group_type(nh_grp) == AF_INET6 &&
2805 	    !nh_grp->gateway)
2806 		return 0;
2807 
2808 	return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_group_ht,
2809 				      &nh_grp->ht_node,
2810 				      mlxsw_sp_nexthop_group_ht_params);
2811 }
2812 
2813 static void mlxsw_sp_nexthop_group_remove(struct mlxsw_sp *mlxsw_sp,
2814 					  struct mlxsw_sp_nexthop_group *nh_grp)
2815 {
2816 	if (mlxsw_sp_nexthop_group_type(nh_grp) == AF_INET6 &&
2817 	    !nh_grp->gateway)
2818 		return;
2819 
2820 	rhashtable_remove_fast(&mlxsw_sp->router->nexthop_group_ht,
2821 			       &nh_grp->ht_node,
2822 			       mlxsw_sp_nexthop_group_ht_params);
2823 }
2824 
2825 static struct mlxsw_sp_nexthop_group *
2826 mlxsw_sp_nexthop4_group_lookup(struct mlxsw_sp *mlxsw_sp,
2827 			       struct fib_info *fi)
2828 {
2829 	struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
2830 
2831 	cmp_arg.proto = MLXSW_SP_L3_PROTO_IPV4;
2832 	cmp_arg.fi = fi;
2833 	return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
2834 				      &cmp_arg,
2835 				      mlxsw_sp_nexthop_group_ht_params);
2836 }
2837 
2838 static struct mlxsw_sp_nexthop_group *
2839 mlxsw_sp_nexthop6_group_lookup(struct mlxsw_sp *mlxsw_sp,
2840 			       struct mlxsw_sp_fib6_entry *fib6_entry)
2841 {
2842 	struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
2843 
2844 	cmp_arg.proto = MLXSW_SP_L3_PROTO_IPV6;
2845 	cmp_arg.fib6_entry = fib6_entry;
2846 	return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
2847 				      &cmp_arg,
2848 				      mlxsw_sp_nexthop_group_ht_params);
2849 }
2850 
2851 static const struct rhashtable_params mlxsw_sp_nexthop_ht_params = {
2852 	.key_offset = offsetof(struct mlxsw_sp_nexthop, key),
2853 	.head_offset = offsetof(struct mlxsw_sp_nexthop, ht_node),
2854 	.key_len = sizeof(struct mlxsw_sp_nexthop_key),
2855 };
2856 
2857 static int mlxsw_sp_nexthop_insert(struct mlxsw_sp *mlxsw_sp,
2858 				   struct mlxsw_sp_nexthop *nh)
2859 {
2860 	return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_ht,
2861 				      &nh->ht_node, mlxsw_sp_nexthop_ht_params);
2862 }
2863 
2864 static void mlxsw_sp_nexthop_remove(struct mlxsw_sp *mlxsw_sp,
2865 				    struct mlxsw_sp_nexthop *nh)
2866 {
2867 	rhashtable_remove_fast(&mlxsw_sp->router->nexthop_ht, &nh->ht_node,
2868 			       mlxsw_sp_nexthop_ht_params);
2869 }
2870 
2871 static struct mlxsw_sp_nexthop *
2872 mlxsw_sp_nexthop_lookup(struct mlxsw_sp *mlxsw_sp,
2873 			struct mlxsw_sp_nexthop_key key)
2874 {
2875 	return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_ht, &key,
2876 				      mlxsw_sp_nexthop_ht_params);
2877 }
2878 
2879 static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp,
2880 					     const struct mlxsw_sp_fib *fib,
2881 					     u32 adj_index, u16 ecmp_size,
2882 					     u32 new_adj_index,
2883 					     u16 new_ecmp_size)
2884 {
2885 	char raleu_pl[MLXSW_REG_RALEU_LEN];
2886 
2887 	mlxsw_reg_raleu_pack(raleu_pl,
2888 			     (enum mlxsw_reg_ralxx_protocol) fib->proto,
2889 			     fib->vr->id, adj_index, ecmp_size, new_adj_index,
2890 			     new_ecmp_size);
2891 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raleu), raleu_pl);
2892 }
2893 
2894 static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp,
2895 					  struct mlxsw_sp_nexthop_group *nh_grp,
2896 					  u32 old_adj_index, u16 old_ecmp_size)
2897 {
2898 	struct mlxsw_sp_fib_entry *fib_entry;
2899 	struct mlxsw_sp_fib *fib = NULL;
2900 	int err;
2901 
2902 	list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
2903 		if (fib == fib_entry->fib_node->fib)
2904 			continue;
2905 		fib = fib_entry->fib_node->fib;
2906 		err = mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, fib,
2907 							old_adj_index,
2908 							old_ecmp_size,
2909 							nh_grp->adj_index,
2910 							nh_grp->ecmp_size);
2911 		if (err)
2912 			return err;
2913 	}
2914 	return 0;
2915 }
2916 
2917 static int __mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
2918 				     struct mlxsw_sp_nexthop *nh)
2919 {
2920 	struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
2921 	char ratr_pl[MLXSW_REG_RATR_LEN];
2922 
2923 	mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY,
2924 			    true, MLXSW_REG_RATR_TYPE_ETHERNET,
2925 			    adj_index, neigh_entry->rif);
2926 	mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha);
2927 	if (nh->counter_valid)
2928 		mlxsw_reg_ratr_counter_pack(ratr_pl, nh->counter_index, true);
2929 	else
2930 		mlxsw_reg_ratr_counter_pack(ratr_pl, 0, false);
2931 
2932 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
2933 }
2934 
2935 int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
2936 			    struct mlxsw_sp_nexthop *nh)
2937 {
2938 	int i;
2939 
2940 	for (i = 0; i < nh->num_adj_entries; i++) {
2941 		int err;
2942 
2943 		err = __mlxsw_sp_nexthop_update(mlxsw_sp, adj_index + i, nh);
2944 		if (err)
2945 			return err;
2946 	}
2947 
2948 	return 0;
2949 }
2950 
2951 static int __mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
2952 					  u32 adj_index,
2953 					  struct mlxsw_sp_nexthop *nh)
2954 {
2955 	const struct mlxsw_sp_ipip_ops *ipip_ops;
2956 
2957 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[nh->ipip_entry->ipipt];
2958 	return ipip_ops->nexthop_update(mlxsw_sp, adj_index, nh->ipip_entry);
2959 }
2960 
2961 static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
2962 					u32 adj_index,
2963 					struct mlxsw_sp_nexthop *nh)
2964 {
2965 	int i;
2966 
2967 	for (i = 0; i < nh->num_adj_entries; i++) {
2968 		int err;
2969 
2970 		err = __mlxsw_sp_nexthop_ipip_update(mlxsw_sp, adj_index + i,
2971 						     nh);
2972 		if (err)
2973 			return err;
2974 	}
2975 
2976 	return 0;
2977 }
2978 
2979 static int
2980 mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp,
2981 			      struct mlxsw_sp_nexthop_group *nh_grp,
2982 			      bool reallocate)
2983 {
2984 	u32 adj_index = nh_grp->adj_index; /* base */
2985 	struct mlxsw_sp_nexthop *nh;
2986 	int i;
2987 	int err;
2988 
2989 	for (i = 0; i < nh_grp->count; i++) {
2990 		nh = &nh_grp->nexthops[i];
2991 
2992 		if (!nh->should_offload) {
2993 			nh->offloaded = 0;
2994 			continue;
2995 		}
2996 
2997 		if (nh->update || reallocate) {
2998 			switch (nh->type) {
2999 			case MLXSW_SP_NEXTHOP_TYPE_ETH:
3000 				err = mlxsw_sp_nexthop_update
3001 					    (mlxsw_sp, adj_index, nh);
3002 				break;
3003 			case MLXSW_SP_NEXTHOP_TYPE_IPIP:
3004 				err = mlxsw_sp_nexthop_ipip_update
3005 					    (mlxsw_sp, adj_index, nh);
3006 				break;
3007 			}
3008 			if (err)
3009 				return err;
3010 			nh->update = 0;
3011 			nh->offloaded = 1;
3012 		}
3013 		adj_index += nh->num_adj_entries;
3014 	}
3015 	return 0;
3016 }
3017 
3018 static bool
3019 mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node,
3020 				 const struct mlxsw_sp_fib_entry *fib_entry);
3021 
3022 static int
3023 mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp,
3024 				    struct mlxsw_sp_nexthop_group *nh_grp)
3025 {
3026 	struct mlxsw_sp_fib_entry *fib_entry;
3027 	int err;
3028 
3029 	list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
3030 		if (!mlxsw_sp_fib_node_entry_is_first(fib_entry->fib_node,
3031 						      fib_entry))
3032 			continue;
3033 		err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
3034 		if (err)
3035 			return err;
3036 	}
3037 	return 0;
3038 }
3039 
3040 static void
3041 mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
3042 				   enum mlxsw_reg_ralue_op op, int err);
3043 
3044 static void
3045 mlxsw_sp_nexthop_fib_entries_refresh(struct mlxsw_sp_nexthop_group *nh_grp)
3046 {
3047 	enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_WRITE;
3048 	struct mlxsw_sp_fib_entry *fib_entry;
3049 
3050 	list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
3051 		if (!mlxsw_sp_fib_node_entry_is_first(fib_entry->fib_node,
3052 						      fib_entry))
3053 			continue;
3054 		mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0);
3055 	}
3056 }
3057 
3058 static void mlxsw_sp_adj_grp_size_round_up(u16 *p_adj_grp_size)
3059 {
3060 	/* Valid sizes for an adjacency group are:
3061 	 * 1-64, 512, 1024, 2048 and 4096.
3062 	 */
3063 	if (*p_adj_grp_size <= 64)
3064 		return;
3065 	else if (*p_adj_grp_size <= 512)
3066 		*p_adj_grp_size = 512;
3067 	else if (*p_adj_grp_size <= 1024)
3068 		*p_adj_grp_size = 1024;
3069 	else if (*p_adj_grp_size <= 2048)
3070 		*p_adj_grp_size = 2048;
3071 	else
3072 		*p_adj_grp_size = 4096;
3073 }
3074 
3075 static void mlxsw_sp_adj_grp_size_round_down(u16 *p_adj_grp_size,
3076 					     unsigned int alloc_size)
3077 {
3078 	if (alloc_size >= 4096)
3079 		*p_adj_grp_size = 4096;
3080 	else if (alloc_size >= 2048)
3081 		*p_adj_grp_size = 2048;
3082 	else if (alloc_size >= 1024)
3083 		*p_adj_grp_size = 1024;
3084 	else if (alloc_size >= 512)
3085 		*p_adj_grp_size = 512;
3086 }
3087 
3088 static int mlxsw_sp_fix_adj_grp_size(struct mlxsw_sp *mlxsw_sp,
3089 				     u16 *p_adj_grp_size)
3090 {
3091 	unsigned int alloc_size;
3092 	int err;
3093 
3094 	/* Round up the requested group size to the next size supported
3095 	 * by the device and make sure the request can be satisfied.
3096 	 */
3097 	mlxsw_sp_adj_grp_size_round_up(p_adj_grp_size);
3098 	err = mlxsw_sp_kvdl_alloc_size_query(mlxsw_sp, *p_adj_grp_size,
3099 					     &alloc_size);
3100 	if (err)
3101 		return err;
3102 	/* It is possible the allocation results in more allocated
3103 	 * entries than requested. Try to use as much of them as
3104 	 * possible.
3105 	 */
3106 	mlxsw_sp_adj_grp_size_round_down(p_adj_grp_size, alloc_size);
3107 
3108 	return 0;
3109 }
3110 
3111 static void
3112 mlxsw_sp_nexthop_group_normalize(struct mlxsw_sp_nexthop_group *nh_grp)
3113 {
3114 	int i, g = 0, sum_norm_weight = 0;
3115 	struct mlxsw_sp_nexthop *nh;
3116 
3117 	for (i = 0; i < nh_grp->count; i++) {
3118 		nh = &nh_grp->nexthops[i];
3119 
3120 		if (!nh->should_offload)
3121 			continue;
3122 		if (g > 0)
3123 			g = gcd(nh->nh_weight, g);
3124 		else
3125 			g = nh->nh_weight;
3126 	}
3127 
3128 	for (i = 0; i < nh_grp->count; i++) {
3129 		nh = &nh_grp->nexthops[i];
3130 
3131 		if (!nh->should_offload)
3132 			continue;
3133 		nh->norm_nh_weight = nh->nh_weight / g;
3134 		sum_norm_weight += nh->norm_nh_weight;
3135 	}
3136 
3137 	nh_grp->sum_norm_weight = sum_norm_weight;
3138 }
3139 
3140 static void
3141 mlxsw_sp_nexthop_group_rebalance(struct mlxsw_sp_nexthop_group *nh_grp)
3142 {
3143 	int total = nh_grp->sum_norm_weight;
3144 	u16 ecmp_size = nh_grp->ecmp_size;
3145 	int i, weight = 0, lower_bound = 0;
3146 
3147 	for (i = 0; i < nh_grp->count; i++) {
3148 		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3149 		int upper_bound;
3150 
3151 		if (!nh->should_offload)
3152 			continue;
3153 		weight += nh->norm_nh_weight;
3154 		upper_bound = DIV_ROUND_CLOSEST(ecmp_size * weight, total);
3155 		nh->num_adj_entries = upper_bound - lower_bound;
3156 		lower_bound = upper_bound;
3157 	}
3158 }
3159 
3160 static void
3161 mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
3162 			       struct mlxsw_sp_nexthop_group *nh_grp)
3163 {
3164 	u16 ecmp_size, old_ecmp_size;
3165 	struct mlxsw_sp_nexthop *nh;
3166 	bool offload_change = false;
3167 	u32 adj_index;
3168 	bool old_adj_index_valid;
3169 	u32 old_adj_index;
3170 	int i;
3171 	int err;
3172 
3173 	if (!nh_grp->gateway) {
3174 		mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3175 		return;
3176 	}
3177 
3178 	for (i = 0; i < nh_grp->count; i++) {
3179 		nh = &nh_grp->nexthops[i];
3180 
3181 		if (nh->should_offload != nh->offloaded) {
3182 			offload_change = true;
3183 			if (nh->should_offload)
3184 				nh->update = 1;
3185 		}
3186 	}
3187 	if (!offload_change) {
3188 		/* Nothing was added or removed, so no need to reallocate. Just
3189 		 * update MAC on existing adjacency indexes.
3190 		 */
3191 		err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, false);
3192 		if (err) {
3193 			dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
3194 			goto set_trap;
3195 		}
3196 		return;
3197 	}
3198 	mlxsw_sp_nexthop_group_normalize(nh_grp);
3199 	if (!nh_grp->sum_norm_weight)
3200 		/* No neigh of this group is connected so we just set
3201 		 * the trap and let everthing flow through kernel.
3202 		 */
3203 		goto set_trap;
3204 
3205 	ecmp_size = nh_grp->sum_norm_weight;
3206 	err = mlxsw_sp_fix_adj_grp_size(mlxsw_sp, &ecmp_size);
3207 	if (err)
3208 		/* No valid allocation size available. */
3209 		goto set_trap;
3210 
3211 	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, ecmp_size, &adj_index);
3212 	if (err) {
3213 		/* We ran out of KVD linear space, just set the
3214 		 * trap and let everything flow through kernel.
3215 		 */
3216 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to allocate KVD linear area for nexthop group.\n");
3217 		goto set_trap;
3218 	}
3219 	old_adj_index_valid = nh_grp->adj_index_valid;
3220 	old_adj_index = nh_grp->adj_index;
3221 	old_ecmp_size = nh_grp->ecmp_size;
3222 	nh_grp->adj_index_valid = 1;
3223 	nh_grp->adj_index = adj_index;
3224 	nh_grp->ecmp_size = ecmp_size;
3225 	mlxsw_sp_nexthop_group_rebalance(nh_grp);
3226 	err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, true);
3227 	if (err) {
3228 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
3229 		goto set_trap;
3230 	}
3231 
3232 	if (!old_adj_index_valid) {
3233 		/* The trap was set for fib entries, so we have to call
3234 		 * fib entry update to unset it and use adjacency index.
3235 		 */
3236 		err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3237 		if (err) {
3238 			dev_warn(mlxsw_sp->bus_info->dev, "Failed to add adjacency index to fib entries.\n");
3239 			goto set_trap;
3240 		}
3241 		return;
3242 	}
3243 
3244 	err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, nh_grp,
3245 					     old_adj_index, old_ecmp_size);
3246 	mlxsw_sp_kvdl_free(mlxsw_sp, old_adj_index);
3247 	if (err) {
3248 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to mass-update adjacency index for nexthop group.\n");
3249 		goto set_trap;
3250 	}
3251 
3252 	/* Offload state within the group changed, so update the flags. */
3253 	mlxsw_sp_nexthop_fib_entries_refresh(nh_grp);
3254 
3255 	return;
3256 
3257 set_trap:
3258 	old_adj_index_valid = nh_grp->adj_index_valid;
3259 	nh_grp->adj_index_valid = 0;
3260 	for (i = 0; i < nh_grp->count; i++) {
3261 		nh = &nh_grp->nexthops[i];
3262 		nh->offloaded = 0;
3263 	}
3264 	err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3265 	if (err)
3266 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n");
3267 	if (old_adj_index_valid)
3268 		mlxsw_sp_kvdl_free(mlxsw_sp, nh_grp->adj_index);
3269 }
3270 
3271 static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh,
3272 					    bool removing)
3273 {
3274 	if (!removing)
3275 		nh->should_offload = 1;
3276 	else
3277 		nh->should_offload = 0;
3278 	nh->update = 1;
3279 }
3280 
3281 static void
3282 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
3283 			      struct mlxsw_sp_neigh_entry *neigh_entry,
3284 			      bool removing)
3285 {
3286 	struct mlxsw_sp_nexthop *nh;
3287 
3288 	list_for_each_entry(nh, &neigh_entry->nexthop_list,
3289 			    neigh_list_node) {
3290 		__mlxsw_sp_nexthop_neigh_update(nh, removing);
3291 		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3292 	}
3293 }
3294 
3295 static void mlxsw_sp_nexthop_rif_init(struct mlxsw_sp_nexthop *nh,
3296 				      struct mlxsw_sp_rif *rif)
3297 {
3298 	if (nh->rif)
3299 		return;
3300 
3301 	nh->rif = rif;
3302 	list_add(&nh->rif_list_node, &rif->nexthop_list);
3303 }
3304 
3305 static void mlxsw_sp_nexthop_rif_fini(struct mlxsw_sp_nexthop *nh)
3306 {
3307 	if (!nh->rif)
3308 		return;
3309 
3310 	list_del(&nh->rif_list_node);
3311 	nh->rif = NULL;
3312 }
3313 
3314 static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp,
3315 				       struct mlxsw_sp_nexthop *nh)
3316 {
3317 	struct mlxsw_sp_neigh_entry *neigh_entry;
3318 	struct neighbour *n;
3319 	u8 nud_state, dead;
3320 	int err;
3321 
3322 	if (!nh->nh_grp->gateway || nh->neigh_entry)
3323 		return 0;
3324 
3325 	/* Take a reference of neigh here ensuring that neigh would
3326 	 * not be destructed before the nexthop entry is finished.
3327 	 * The reference is taken either in neigh_lookup() or
3328 	 * in neigh_create() in case n is not found.
3329 	 */
3330 	n = neigh_lookup(nh->nh_grp->neigh_tbl, &nh->gw_addr, nh->rif->dev);
3331 	if (!n) {
3332 		n = neigh_create(nh->nh_grp->neigh_tbl, &nh->gw_addr,
3333 				 nh->rif->dev);
3334 		if (IS_ERR(n))
3335 			return PTR_ERR(n);
3336 		neigh_event_send(n, NULL);
3337 	}
3338 	neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
3339 	if (!neigh_entry) {
3340 		neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
3341 		if (IS_ERR(neigh_entry)) {
3342 			err = -EINVAL;
3343 			goto err_neigh_entry_create;
3344 		}
3345 	}
3346 
3347 	/* If that is the first nexthop connected to that neigh, add to
3348 	 * nexthop_neighs_list
3349 	 */
3350 	if (list_empty(&neigh_entry->nexthop_list))
3351 		list_add_tail(&neigh_entry->nexthop_neighs_list_node,
3352 			      &mlxsw_sp->router->nexthop_neighs_list);
3353 
3354 	nh->neigh_entry = neigh_entry;
3355 	list_add_tail(&nh->neigh_list_node, &neigh_entry->nexthop_list);
3356 	read_lock_bh(&n->lock);
3357 	nud_state = n->nud_state;
3358 	dead = n->dead;
3359 	read_unlock_bh(&n->lock);
3360 	__mlxsw_sp_nexthop_neigh_update(nh, !(nud_state & NUD_VALID && !dead));
3361 
3362 	return 0;
3363 
3364 err_neigh_entry_create:
3365 	neigh_release(n);
3366 	return err;
3367 }
3368 
3369 static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp,
3370 					struct mlxsw_sp_nexthop *nh)
3371 {
3372 	struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
3373 	struct neighbour *n;
3374 
3375 	if (!neigh_entry)
3376 		return;
3377 	n = neigh_entry->key.n;
3378 
3379 	__mlxsw_sp_nexthop_neigh_update(nh, true);
3380 	list_del(&nh->neigh_list_node);
3381 	nh->neigh_entry = NULL;
3382 
3383 	/* If that is the last nexthop connected to that neigh, remove from
3384 	 * nexthop_neighs_list
3385 	 */
3386 	if (list_empty(&neigh_entry->nexthop_list))
3387 		list_del(&neigh_entry->nexthop_neighs_list_node);
3388 
3389 	if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
3390 		mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
3391 
3392 	neigh_release(n);
3393 }
3394 
3395 static bool mlxsw_sp_ipip_netdev_ul_up(struct net_device *ol_dev)
3396 {
3397 	struct net_device *ul_dev = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
3398 
3399 	return ul_dev ? (ul_dev->flags & IFF_UP) : true;
3400 }
3401 
3402 static void mlxsw_sp_nexthop_ipip_init(struct mlxsw_sp *mlxsw_sp,
3403 				       struct mlxsw_sp_nexthop *nh,
3404 				       struct mlxsw_sp_ipip_entry *ipip_entry)
3405 {
3406 	bool removing;
3407 
3408 	if (!nh->nh_grp->gateway || nh->ipip_entry)
3409 		return;
3410 
3411 	nh->ipip_entry = ipip_entry;
3412 	removing = !mlxsw_sp_ipip_netdev_ul_up(ipip_entry->ol_dev);
3413 	__mlxsw_sp_nexthop_neigh_update(nh, removing);
3414 	mlxsw_sp_nexthop_rif_init(nh, &ipip_entry->ol_lb->common);
3415 }
3416 
3417 static void mlxsw_sp_nexthop_ipip_fini(struct mlxsw_sp *mlxsw_sp,
3418 				       struct mlxsw_sp_nexthop *nh)
3419 {
3420 	struct mlxsw_sp_ipip_entry *ipip_entry = nh->ipip_entry;
3421 
3422 	if (!ipip_entry)
3423 		return;
3424 
3425 	__mlxsw_sp_nexthop_neigh_update(nh, true);
3426 	nh->ipip_entry = NULL;
3427 }
3428 
3429 static bool mlxsw_sp_nexthop4_ipip_type(const struct mlxsw_sp *mlxsw_sp,
3430 					const struct fib_nh *fib_nh,
3431 					enum mlxsw_sp_ipip_type *p_ipipt)
3432 {
3433 	struct net_device *dev = fib_nh->nh_dev;
3434 
3435 	return dev &&
3436 	       fib_nh->nh_parent->fib_type == RTN_UNICAST &&
3437 	       mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, p_ipipt);
3438 }
3439 
3440 static void mlxsw_sp_nexthop_type_fini(struct mlxsw_sp *mlxsw_sp,
3441 				       struct mlxsw_sp_nexthop *nh)
3442 {
3443 	switch (nh->type) {
3444 	case MLXSW_SP_NEXTHOP_TYPE_ETH:
3445 		mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
3446 		mlxsw_sp_nexthop_rif_fini(nh);
3447 		break;
3448 	case MLXSW_SP_NEXTHOP_TYPE_IPIP:
3449 		mlxsw_sp_nexthop_rif_fini(nh);
3450 		mlxsw_sp_nexthop_ipip_fini(mlxsw_sp, nh);
3451 		break;
3452 	}
3453 }
3454 
3455 static int mlxsw_sp_nexthop4_type_init(struct mlxsw_sp *mlxsw_sp,
3456 				       struct mlxsw_sp_nexthop *nh,
3457 				       struct fib_nh *fib_nh)
3458 {
3459 	const struct mlxsw_sp_ipip_ops *ipip_ops;
3460 	struct net_device *dev = fib_nh->nh_dev;
3461 	struct mlxsw_sp_ipip_entry *ipip_entry;
3462 	struct mlxsw_sp_rif *rif;
3463 	int err;
3464 
3465 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, dev);
3466 	if (ipip_entry) {
3467 		ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
3468 		if (ipip_ops->can_offload(mlxsw_sp, dev,
3469 					  MLXSW_SP_L3_PROTO_IPV4)) {
3470 			nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP;
3471 			mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, ipip_entry);
3472 			return 0;
3473 		}
3474 	}
3475 
3476 	nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH;
3477 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
3478 	if (!rif)
3479 		return 0;
3480 
3481 	mlxsw_sp_nexthop_rif_init(nh, rif);
3482 	err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
3483 	if (err)
3484 		goto err_neigh_init;
3485 
3486 	return 0;
3487 
3488 err_neigh_init:
3489 	mlxsw_sp_nexthop_rif_fini(nh);
3490 	return err;
3491 }
3492 
3493 static void mlxsw_sp_nexthop4_type_fini(struct mlxsw_sp *mlxsw_sp,
3494 					struct mlxsw_sp_nexthop *nh)
3495 {
3496 	mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
3497 }
3498 
3499 static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp,
3500 				  struct mlxsw_sp_nexthop_group *nh_grp,
3501 				  struct mlxsw_sp_nexthop *nh,
3502 				  struct fib_nh *fib_nh)
3503 {
3504 	struct net_device *dev = fib_nh->nh_dev;
3505 	struct in_device *in_dev;
3506 	int err;
3507 
3508 	nh->nh_grp = nh_grp;
3509 	nh->key.fib_nh = fib_nh;
3510 #ifdef CONFIG_IP_ROUTE_MULTIPATH
3511 	nh->nh_weight = fib_nh->nh_weight;
3512 #else
3513 	nh->nh_weight = 1;
3514 #endif
3515 	memcpy(&nh->gw_addr, &fib_nh->nh_gw, sizeof(fib_nh->nh_gw));
3516 	err = mlxsw_sp_nexthop_insert(mlxsw_sp, nh);
3517 	if (err)
3518 		return err;
3519 
3520 	mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
3521 	list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
3522 
3523 	if (!dev)
3524 		return 0;
3525 
3526 	in_dev = __in_dev_get_rtnl(dev);
3527 	if (in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
3528 	    fib_nh->nh_flags & RTNH_F_LINKDOWN)
3529 		return 0;
3530 
3531 	err = mlxsw_sp_nexthop4_type_init(mlxsw_sp, nh, fib_nh);
3532 	if (err)
3533 		goto err_nexthop_neigh_init;
3534 
3535 	return 0;
3536 
3537 err_nexthop_neigh_init:
3538 	mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
3539 	return err;
3540 }
3541 
3542 static void mlxsw_sp_nexthop4_fini(struct mlxsw_sp *mlxsw_sp,
3543 				   struct mlxsw_sp_nexthop *nh)
3544 {
3545 	mlxsw_sp_nexthop4_type_fini(mlxsw_sp, nh);
3546 	list_del(&nh->router_list_node);
3547 	mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
3548 	mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
3549 }
3550 
3551 static void mlxsw_sp_nexthop4_event(struct mlxsw_sp *mlxsw_sp,
3552 				    unsigned long event, struct fib_nh *fib_nh)
3553 {
3554 	struct mlxsw_sp_nexthop_key key;
3555 	struct mlxsw_sp_nexthop *nh;
3556 
3557 	if (mlxsw_sp->router->aborted)
3558 		return;
3559 
3560 	key.fib_nh = fib_nh;
3561 	nh = mlxsw_sp_nexthop_lookup(mlxsw_sp, key);
3562 	if (WARN_ON_ONCE(!nh))
3563 		return;
3564 
3565 	switch (event) {
3566 	case FIB_EVENT_NH_ADD:
3567 		mlxsw_sp_nexthop4_type_init(mlxsw_sp, nh, fib_nh);
3568 		break;
3569 	case FIB_EVENT_NH_DEL:
3570 		mlxsw_sp_nexthop4_type_fini(mlxsw_sp, nh);
3571 		break;
3572 	}
3573 
3574 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3575 }
3576 
3577 static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp,
3578 					struct mlxsw_sp_rif *rif)
3579 {
3580 	struct mlxsw_sp_nexthop *nh;
3581 	bool removing;
3582 
3583 	list_for_each_entry(nh, &rif->nexthop_list, rif_list_node) {
3584 		switch (nh->type) {
3585 		case MLXSW_SP_NEXTHOP_TYPE_ETH:
3586 			removing = false;
3587 			break;
3588 		case MLXSW_SP_NEXTHOP_TYPE_IPIP:
3589 			removing = !mlxsw_sp_ipip_netdev_ul_up(rif->dev);
3590 			break;
3591 		default:
3592 			WARN_ON(1);
3593 			continue;
3594 		}
3595 
3596 		__mlxsw_sp_nexthop_neigh_update(nh, removing);
3597 		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3598 	}
3599 }
3600 
3601 static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp,
3602 					 struct mlxsw_sp_rif *old_rif,
3603 					 struct mlxsw_sp_rif *new_rif)
3604 {
3605 	struct mlxsw_sp_nexthop *nh;
3606 
3607 	list_splice_init(&old_rif->nexthop_list, &new_rif->nexthop_list);
3608 	list_for_each_entry(nh, &new_rif->nexthop_list, rif_list_node)
3609 		nh->rif = new_rif;
3610 	mlxsw_sp_nexthop_rif_update(mlxsw_sp, new_rif);
3611 }
3612 
3613 static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
3614 					   struct mlxsw_sp_rif *rif)
3615 {
3616 	struct mlxsw_sp_nexthop *nh, *tmp;
3617 
3618 	list_for_each_entry_safe(nh, tmp, &rif->nexthop_list, rif_list_node) {
3619 		mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
3620 		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3621 	}
3622 }
3623 
3624 static bool mlxsw_sp_fi_is_gateway(const struct mlxsw_sp *mlxsw_sp,
3625 				   const struct fib_info *fi)
3626 {
3627 	return fi->fib_nh->nh_scope == RT_SCOPE_LINK ||
3628 	       mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, fi->fib_nh, NULL);
3629 }
3630 
3631 static struct mlxsw_sp_nexthop_group *
3632 mlxsw_sp_nexthop4_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
3633 {
3634 	struct mlxsw_sp_nexthop_group *nh_grp;
3635 	struct mlxsw_sp_nexthop *nh;
3636 	struct fib_nh *fib_nh;
3637 	size_t alloc_size;
3638 	int i;
3639 	int err;
3640 
3641 	alloc_size = sizeof(*nh_grp) +
3642 		     fi->fib_nhs * sizeof(struct mlxsw_sp_nexthop);
3643 	nh_grp = kzalloc(alloc_size, GFP_KERNEL);
3644 	if (!nh_grp)
3645 		return ERR_PTR(-ENOMEM);
3646 	nh_grp->priv = fi;
3647 	INIT_LIST_HEAD(&nh_grp->fib_list);
3648 	nh_grp->neigh_tbl = &arp_tbl;
3649 
3650 	nh_grp->gateway = mlxsw_sp_fi_is_gateway(mlxsw_sp, fi);
3651 	nh_grp->count = fi->fib_nhs;
3652 	fib_info_hold(fi);
3653 	for (i = 0; i < nh_grp->count; i++) {
3654 		nh = &nh_grp->nexthops[i];
3655 		fib_nh = &fi->fib_nh[i];
3656 		err = mlxsw_sp_nexthop4_init(mlxsw_sp, nh_grp, nh, fib_nh);
3657 		if (err)
3658 			goto err_nexthop4_init;
3659 	}
3660 	err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
3661 	if (err)
3662 		goto err_nexthop_group_insert;
3663 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
3664 	return nh_grp;
3665 
3666 err_nexthop_group_insert:
3667 err_nexthop4_init:
3668 	for (i--; i >= 0; i--) {
3669 		nh = &nh_grp->nexthops[i];
3670 		mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
3671 	}
3672 	fib_info_put(fi);
3673 	kfree(nh_grp);
3674 	return ERR_PTR(err);
3675 }
3676 
3677 static void
3678 mlxsw_sp_nexthop4_group_destroy(struct mlxsw_sp *mlxsw_sp,
3679 				struct mlxsw_sp_nexthop_group *nh_grp)
3680 {
3681 	struct mlxsw_sp_nexthop *nh;
3682 	int i;
3683 
3684 	mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
3685 	for (i = 0; i < nh_grp->count; i++) {
3686 		nh = &nh_grp->nexthops[i];
3687 		mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
3688 	}
3689 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
3690 	WARN_ON_ONCE(nh_grp->adj_index_valid);
3691 	fib_info_put(mlxsw_sp_nexthop4_group_fi(nh_grp));
3692 	kfree(nh_grp);
3693 }
3694 
3695 static int mlxsw_sp_nexthop4_group_get(struct mlxsw_sp *mlxsw_sp,
3696 				       struct mlxsw_sp_fib_entry *fib_entry,
3697 				       struct fib_info *fi)
3698 {
3699 	struct mlxsw_sp_nexthop_group *nh_grp;
3700 
3701 	nh_grp = mlxsw_sp_nexthop4_group_lookup(mlxsw_sp, fi);
3702 	if (!nh_grp) {
3703 		nh_grp = mlxsw_sp_nexthop4_group_create(mlxsw_sp, fi);
3704 		if (IS_ERR(nh_grp))
3705 			return PTR_ERR(nh_grp);
3706 	}
3707 	list_add_tail(&fib_entry->nexthop_group_node, &nh_grp->fib_list);
3708 	fib_entry->nh_group = nh_grp;
3709 	return 0;
3710 }
3711 
3712 static void mlxsw_sp_nexthop4_group_put(struct mlxsw_sp *mlxsw_sp,
3713 					struct mlxsw_sp_fib_entry *fib_entry)
3714 {
3715 	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3716 
3717 	list_del(&fib_entry->nexthop_group_node);
3718 	if (!list_empty(&nh_grp->fib_list))
3719 		return;
3720 	mlxsw_sp_nexthop4_group_destroy(mlxsw_sp, nh_grp);
3721 }
3722 
3723 static bool
3724 mlxsw_sp_fib4_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
3725 {
3726 	struct mlxsw_sp_fib4_entry *fib4_entry;
3727 
3728 	fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry,
3729 				  common);
3730 	return !fib4_entry->tos;
3731 }
3732 
3733 static bool
3734 mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
3735 {
3736 	struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group;
3737 
3738 	switch (fib_entry->fib_node->fib->proto) {
3739 	case MLXSW_SP_L3_PROTO_IPV4:
3740 		if (!mlxsw_sp_fib4_entry_should_offload(fib_entry))
3741 			return false;
3742 		break;
3743 	case MLXSW_SP_L3_PROTO_IPV6:
3744 		break;
3745 	}
3746 
3747 	switch (fib_entry->type) {
3748 	case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
3749 		return !!nh_group->adj_index_valid;
3750 	case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
3751 		return !!nh_group->nh_rif;
3752 	case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
3753 		return true;
3754 	default:
3755 		return false;
3756 	}
3757 }
3758 
3759 static struct mlxsw_sp_nexthop *
3760 mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp,
3761 		     const struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
3762 {
3763 	int i;
3764 
3765 	for (i = 0; i < nh_grp->count; i++) {
3766 		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3767 		struct rt6_info *rt = mlxsw_sp_rt6->rt;
3768 
3769 		if (nh->rif && nh->rif->dev == rt->dst.dev &&
3770 		    ipv6_addr_equal((const struct in6_addr *) &nh->gw_addr,
3771 				    &rt->rt6i_gateway))
3772 			return nh;
3773 		continue;
3774 	}
3775 
3776 	return NULL;
3777 }
3778 
3779 static void
3780 mlxsw_sp_fib4_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
3781 {
3782 	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3783 	int i;
3784 
3785 	if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL ||
3786 	    fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP) {
3787 		nh_grp->nexthops->key.fib_nh->nh_flags |= RTNH_F_OFFLOAD;
3788 		return;
3789 	}
3790 
3791 	for (i = 0; i < nh_grp->count; i++) {
3792 		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3793 
3794 		if (nh->offloaded)
3795 			nh->key.fib_nh->nh_flags |= RTNH_F_OFFLOAD;
3796 		else
3797 			nh->key.fib_nh->nh_flags &= ~RTNH_F_OFFLOAD;
3798 	}
3799 }
3800 
3801 static void
3802 mlxsw_sp_fib4_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
3803 {
3804 	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3805 	int i;
3806 
3807 	if (!list_is_singular(&nh_grp->fib_list))
3808 		return;
3809 
3810 	for (i = 0; i < nh_grp->count; i++) {
3811 		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3812 
3813 		nh->key.fib_nh->nh_flags &= ~RTNH_F_OFFLOAD;
3814 	}
3815 }
3816 
3817 static void
3818 mlxsw_sp_fib6_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
3819 {
3820 	struct mlxsw_sp_fib6_entry *fib6_entry;
3821 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
3822 
3823 	fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
3824 				  common);
3825 
3826 	if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL) {
3827 		list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
3828 				 list)->rt->rt6i_nh_flags |= RTNH_F_OFFLOAD;
3829 		return;
3830 	}
3831 
3832 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
3833 		struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3834 		struct mlxsw_sp_nexthop *nh;
3835 
3836 		nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6);
3837 		if (nh && nh->offloaded)
3838 			mlxsw_sp_rt6->rt->rt6i_nh_flags |= RTNH_F_OFFLOAD;
3839 		else
3840 			mlxsw_sp_rt6->rt->rt6i_nh_flags &= ~RTNH_F_OFFLOAD;
3841 	}
3842 }
3843 
3844 static void
3845 mlxsw_sp_fib6_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
3846 {
3847 	struct mlxsw_sp_fib6_entry *fib6_entry;
3848 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
3849 
3850 	fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
3851 				  common);
3852 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
3853 		struct rt6_info *rt = mlxsw_sp_rt6->rt;
3854 
3855 		rt->rt6i_nh_flags &= ~RTNH_F_OFFLOAD;
3856 	}
3857 }
3858 
3859 static void mlxsw_sp_fib_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
3860 {
3861 	switch (fib_entry->fib_node->fib->proto) {
3862 	case MLXSW_SP_L3_PROTO_IPV4:
3863 		mlxsw_sp_fib4_entry_offload_set(fib_entry);
3864 		break;
3865 	case MLXSW_SP_L3_PROTO_IPV6:
3866 		mlxsw_sp_fib6_entry_offload_set(fib_entry);
3867 		break;
3868 	}
3869 }
3870 
3871 static void
3872 mlxsw_sp_fib_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
3873 {
3874 	switch (fib_entry->fib_node->fib->proto) {
3875 	case MLXSW_SP_L3_PROTO_IPV4:
3876 		mlxsw_sp_fib4_entry_offload_unset(fib_entry);
3877 		break;
3878 	case MLXSW_SP_L3_PROTO_IPV6:
3879 		mlxsw_sp_fib6_entry_offload_unset(fib_entry);
3880 		break;
3881 	}
3882 }
3883 
3884 static void
3885 mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
3886 				   enum mlxsw_reg_ralue_op op, int err)
3887 {
3888 	switch (op) {
3889 	case MLXSW_REG_RALUE_OP_WRITE_DELETE:
3890 		return mlxsw_sp_fib_entry_offload_unset(fib_entry);
3891 	case MLXSW_REG_RALUE_OP_WRITE_WRITE:
3892 		if (err)
3893 			return;
3894 		if (mlxsw_sp_fib_entry_should_offload(fib_entry))
3895 			mlxsw_sp_fib_entry_offload_set(fib_entry);
3896 		else
3897 			mlxsw_sp_fib_entry_offload_unset(fib_entry);
3898 		return;
3899 	default:
3900 		return;
3901 	}
3902 }
3903 
3904 static void
3905 mlxsw_sp_fib_entry_ralue_pack(char *ralue_pl,
3906 			      const struct mlxsw_sp_fib_entry *fib_entry,
3907 			      enum mlxsw_reg_ralue_op op)
3908 {
3909 	struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib;
3910 	enum mlxsw_reg_ralxx_protocol proto;
3911 	u32 *p_dip;
3912 
3913 	proto = (enum mlxsw_reg_ralxx_protocol) fib->proto;
3914 
3915 	switch (fib->proto) {
3916 	case MLXSW_SP_L3_PROTO_IPV4:
3917 		p_dip = (u32 *) fib_entry->fib_node->key.addr;
3918 		mlxsw_reg_ralue_pack4(ralue_pl, proto, op, fib->vr->id,
3919 				      fib_entry->fib_node->key.prefix_len,
3920 				      *p_dip);
3921 		break;
3922 	case MLXSW_SP_L3_PROTO_IPV6:
3923 		mlxsw_reg_ralue_pack6(ralue_pl, proto, op, fib->vr->id,
3924 				      fib_entry->fib_node->key.prefix_len,
3925 				      fib_entry->fib_node->key.addr);
3926 		break;
3927 	}
3928 }
3929 
3930 static int mlxsw_sp_fib_entry_op_remote(struct mlxsw_sp *mlxsw_sp,
3931 					struct mlxsw_sp_fib_entry *fib_entry,
3932 					enum mlxsw_reg_ralue_op op)
3933 {
3934 	char ralue_pl[MLXSW_REG_RALUE_LEN];
3935 	enum mlxsw_reg_ralue_trap_action trap_action;
3936 	u16 trap_id = 0;
3937 	u32 adjacency_index = 0;
3938 	u16 ecmp_size = 0;
3939 
3940 	/* In case the nexthop group adjacency index is valid, use it
3941 	 * with provided ECMP size. Otherwise, setup trap and pass
3942 	 * traffic to kernel.
3943 	 */
3944 	if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
3945 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
3946 		adjacency_index = fib_entry->nh_group->adj_index;
3947 		ecmp_size = fib_entry->nh_group->ecmp_size;
3948 	} else {
3949 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
3950 		trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
3951 	}
3952 
3953 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
3954 	mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id,
3955 					adjacency_index, ecmp_size);
3956 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
3957 }
3958 
3959 static int mlxsw_sp_fib_entry_op_local(struct mlxsw_sp *mlxsw_sp,
3960 				       struct mlxsw_sp_fib_entry *fib_entry,
3961 				       enum mlxsw_reg_ralue_op op)
3962 {
3963 	struct mlxsw_sp_rif *rif = fib_entry->nh_group->nh_rif;
3964 	enum mlxsw_reg_ralue_trap_action trap_action;
3965 	char ralue_pl[MLXSW_REG_RALUE_LEN];
3966 	u16 trap_id = 0;
3967 	u16 rif_index = 0;
3968 
3969 	if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
3970 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
3971 		rif_index = rif->rif_index;
3972 	} else {
3973 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
3974 		trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
3975 	}
3976 
3977 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
3978 	mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id,
3979 				       rif_index);
3980 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
3981 }
3982 
3983 static int mlxsw_sp_fib_entry_op_trap(struct mlxsw_sp *mlxsw_sp,
3984 				      struct mlxsw_sp_fib_entry *fib_entry,
3985 				      enum mlxsw_reg_ralue_op op)
3986 {
3987 	char ralue_pl[MLXSW_REG_RALUE_LEN];
3988 
3989 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
3990 	mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
3991 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
3992 }
3993 
3994 static int
3995 mlxsw_sp_fib_entry_op_ipip_decap(struct mlxsw_sp *mlxsw_sp,
3996 				 struct mlxsw_sp_fib_entry *fib_entry,
3997 				 enum mlxsw_reg_ralue_op op)
3998 {
3999 	struct mlxsw_sp_ipip_entry *ipip_entry = fib_entry->decap.ipip_entry;
4000 	const struct mlxsw_sp_ipip_ops *ipip_ops;
4001 
4002 	if (WARN_ON(!ipip_entry))
4003 		return -EINVAL;
4004 
4005 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
4006 	return ipip_ops->fib_entry_op(mlxsw_sp, ipip_entry, op,
4007 				      fib_entry->decap.tunnel_index);
4008 }
4009 
4010 static int __mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
4011 				   struct mlxsw_sp_fib_entry *fib_entry,
4012 				   enum mlxsw_reg_ralue_op op)
4013 {
4014 	switch (fib_entry->type) {
4015 	case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
4016 		return mlxsw_sp_fib_entry_op_remote(mlxsw_sp, fib_entry, op);
4017 	case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
4018 		return mlxsw_sp_fib_entry_op_local(mlxsw_sp, fib_entry, op);
4019 	case MLXSW_SP_FIB_ENTRY_TYPE_TRAP:
4020 		return mlxsw_sp_fib_entry_op_trap(mlxsw_sp, fib_entry, op);
4021 	case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
4022 		return mlxsw_sp_fib_entry_op_ipip_decap(mlxsw_sp,
4023 							fib_entry, op);
4024 	}
4025 	return -EINVAL;
4026 }
4027 
4028 static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
4029 				 struct mlxsw_sp_fib_entry *fib_entry,
4030 				 enum mlxsw_reg_ralue_op op)
4031 {
4032 	int err = __mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, op);
4033 
4034 	mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, err);
4035 
4036 	return err;
4037 }
4038 
4039 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
4040 				     struct mlxsw_sp_fib_entry *fib_entry)
4041 {
4042 	return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
4043 				     MLXSW_REG_RALUE_OP_WRITE_WRITE);
4044 }
4045 
4046 static int mlxsw_sp_fib_entry_del(struct mlxsw_sp *mlxsw_sp,
4047 				  struct mlxsw_sp_fib_entry *fib_entry)
4048 {
4049 	return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
4050 				     MLXSW_REG_RALUE_OP_WRITE_DELETE);
4051 }
4052 
4053 static int
4054 mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp,
4055 			     const struct fib_entry_notifier_info *fen_info,
4056 			     struct mlxsw_sp_fib_entry *fib_entry)
4057 {
4058 	union mlxsw_sp_l3addr dip = { .addr4 = htonl(fen_info->dst) };
4059 	struct net_device *dev = fen_info->fi->fib_dev;
4060 	struct mlxsw_sp_ipip_entry *ipip_entry;
4061 	struct fib_info *fi = fen_info->fi;
4062 
4063 	switch (fen_info->type) {
4064 	case RTN_LOCAL:
4065 		ipip_entry = mlxsw_sp_ipip_entry_find_by_decap(mlxsw_sp, dev,
4066 						 MLXSW_SP_L3_PROTO_IPV4, dip);
4067 		if (ipip_entry && ipip_entry->ol_dev->flags & IFF_UP) {
4068 			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
4069 			return mlxsw_sp_fib_entry_decap_init(mlxsw_sp,
4070 							     fib_entry,
4071 							     ipip_entry);
4072 		}
4073 		/* fall through */
4074 	case RTN_BROADCAST:
4075 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
4076 		return 0;
4077 	case RTN_UNREACHABLE: /* fall through */
4078 	case RTN_BLACKHOLE: /* fall through */
4079 	case RTN_PROHIBIT:
4080 		/* Packets hitting these routes need to be trapped, but
4081 		 * can do so with a lower priority than packets directed
4082 		 * at the host, so use action type local instead of trap.
4083 		 */
4084 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
4085 		return 0;
4086 	case RTN_UNICAST:
4087 		if (mlxsw_sp_fi_is_gateway(mlxsw_sp, fi))
4088 			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
4089 		else
4090 			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
4091 		return 0;
4092 	default:
4093 		return -EINVAL;
4094 	}
4095 }
4096 
4097 static struct mlxsw_sp_fib4_entry *
4098 mlxsw_sp_fib4_entry_create(struct mlxsw_sp *mlxsw_sp,
4099 			   struct mlxsw_sp_fib_node *fib_node,
4100 			   const struct fib_entry_notifier_info *fen_info)
4101 {
4102 	struct mlxsw_sp_fib4_entry *fib4_entry;
4103 	struct mlxsw_sp_fib_entry *fib_entry;
4104 	int err;
4105 
4106 	fib4_entry = kzalloc(sizeof(*fib4_entry), GFP_KERNEL);
4107 	if (!fib4_entry)
4108 		return ERR_PTR(-ENOMEM);
4109 	fib_entry = &fib4_entry->common;
4110 
4111 	err = mlxsw_sp_fib4_entry_type_set(mlxsw_sp, fen_info, fib_entry);
4112 	if (err)
4113 		goto err_fib4_entry_type_set;
4114 
4115 	err = mlxsw_sp_nexthop4_group_get(mlxsw_sp, fib_entry, fen_info->fi);
4116 	if (err)
4117 		goto err_nexthop4_group_get;
4118 
4119 	fib4_entry->prio = fen_info->fi->fib_priority;
4120 	fib4_entry->tb_id = fen_info->tb_id;
4121 	fib4_entry->type = fen_info->type;
4122 	fib4_entry->tos = fen_info->tos;
4123 
4124 	fib_entry->fib_node = fib_node;
4125 
4126 	return fib4_entry;
4127 
4128 err_nexthop4_group_get:
4129 err_fib4_entry_type_set:
4130 	kfree(fib4_entry);
4131 	return ERR_PTR(err);
4132 }
4133 
4134 static void mlxsw_sp_fib4_entry_destroy(struct mlxsw_sp *mlxsw_sp,
4135 					struct mlxsw_sp_fib4_entry *fib4_entry)
4136 {
4137 	mlxsw_sp_nexthop4_group_put(mlxsw_sp, &fib4_entry->common);
4138 	kfree(fib4_entry);
4139 }
4140 
4141 static struct mlxsw_sp_fib4_entry *
4142 mlxsw_sp_fib4_entry_lookup(struct mlxsw_sp *mlxsw_sp,
4143 			   const struct fib_entry_notifier_info *fen_info)
4144 {
4145 	struct mlxsw_sp_fib4_entry *fib4_entry;
4146 	struct mlxsw_sp_fib_node *fib_node;
4147 	struct mlxsw_sp_fib *fib;
4148 	struct mlxsw_sp_vr *vr;
4149 
4150 	vr = mlxsw_sp_vr_find(mlxsw_sp, fen_info->tb_id);
4151 	if (!vr)
4152 		return NULL;
4153 	fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV4);
4154 
4155 	fib_node = mlxsw_sp_fib_node_lookup(fib, &fen_info->dst,
4156 					    sizeof(fen_info->dst),
4157 					    fen_info->dst_len);
4158 	if (!fib_node)
4159 		return NULL;
4160 
4161 	list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) {
4162 		if (fib4_entry->tb_id == fen_info->tb_id &&
4163 		    fib4_entry->tos == fen_info->tos &&
4164 		    fib4_entry->type == fen_info->type &&
4165 		    mlxsw_sp_nexthop4_group_fi(fib4_entry->common.nh_group) ==
4166 		    fen_info->fi) {
4167 			return fib4_entry;
4168 		}
4169 	}
4170 
4171 	return NULL;
4172 }
4173 
4174 static const struct rhashtable_params mlxsw_sp_fib_ht_params = {
4175 	.key_offset = offsetof(struct mlxsw_sp_fib_node, key),
4176 	.head_offset = offsetof(struct mlxsw_sp_fib_node, ht_node),
4177 	.key_len = sizeof(struct mlxsw_sp_fib_key),
4178 	.automatic_shrinking = true,
4179 };
4180 
4181 static int mlxsw_sp_fib_node_insert(struct mlxsw_sp_fib *fib,
4182 				    struct mlxsw_sp_fib_node *fib_node)
4183 {
4184 	return rhashtable_insert_fast(&fib->ht, &fib_node->ht_node,
4185 				      mlxsw_sp_fib_ht_params);
4186 }
4187 
4188 static void mlxsw_sp_fib_node_remove(struct mlxsw_sp_fib *fib,
4189 				     struct mlxsw_sp_fib_node *fib_node)
4190 {
4191 	rhashtable_remove_fast(&fib->ht, &fib_node->ht_node,
4192 			       mlxsw_sp_fib_ht_params);
4193 }
4194 
4195 static struct mlxsw_sp_fib_node *
4196 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
4197 			 size_t addr_len, unsigned char prefix_len)
4198 {
4199 	struct mlxsw_sp_fib_key key;
4200 
4201 	memset(&key, 0, sizeof(key));
4202 	memcpy(key.addr, addr, addr_len);
4203 	key.prefix_len = prefix_len;
4204 	return rhashtable_lookup_fast(&fib->ht, &key, mlxsw_sp_fib_ht_params);
4205 }
4206 
4207 static struct mlxsw_sp_fib_node *
4208 mlxsw_sp_fib_node_create(struct mlxsw_sp_fib *fib, const void *addr,
4209 			 size_t addr_len, unsigned char prefix_len)
4210 {
4211 	struct mlxsw_sp_fib_node *fib_node;
4212 
4213 	fib_node = kzalloc(sizeof(*fib_node), GFP_KERNEL);
4214 	if (!fib_node)
4215 		return NULL;
4216 
4217 	INIT_LIST_HEAD(&fib_node->entry_list);
4218 	list_add(&fib_node->list, &fib->node_list);
4219 	memcpy(fib_node->key.addr, addr, addr_len);
4220 	fib_node->key.prefix_len = prefix_len;
4221 
4222 	return fib_node;
4223 }
4224 
4225 static void mlxsw_sp_fib_node_destroy(struct mlxsw_sp_fib_node *fib_node)
4226 {
4227 	list_del(&fib_node->list);
4228 	WARN_ON(!list_empty(&fib_node->entry_list));
4229 	kfree(fib_node);
4230 }
4231 
4232 static bool
4233 mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node,
4234 				 const struct mlxsw_sp_fib_entry *fib_entry)
4235 {
4236 	return list_first_entry(&fib_node->entry_list,
4237 				struct mlxsw_sp_fib_entry, list) == fib_entry;
4238 }
4239 
4240 static int mlxsw_sp_fib_lpm_tree_link(struct mlxsw_sp *mlxsw_sp,
4241 				      struct mlxsw_sp_fib_node *fib_node)
4242 {
4243 	struct mlxsw_sp_prefix_usage req_prefix_usage;
4244 	struct mlxsw_sp_fib *fib = fib_node->fib;
4245 	struct mlxsw_sp_lpm_tree *lpm_tree;
4246 	int err;
4247 
4248 	lpm_tree = mlxsw_sp->router->lpm.proto_trees[fib->proto];
4249 	if (lpm_tree->prefix_ref_count[fib_node->key.prefix_len] != 0)
4250 		goto out;
4251 
4252 	mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &lpm_tree->prefix_usage);
4253 	mlxsw_sp_prefix_usage_set(&req_prefix_usage, fib_node->key.prefix_len);
4254 	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
4255 					 fib->proto);
4256 	if (IS_ERR(lpm_tree))
4257 		return PTR_ERR(lpm_tree);
4258 
4259 	err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
4260 	if (err)
4261 		goto err_lpm_tree_replace;
4262 
4263 out:
4264 	lpm_tree->prefix_ref_count[fib_node->key.prefix_len]++;
4265 	return 0;
4266 
4267 err_lpm_tree_replace:
4268 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
4269 	return err;
4270 }
4271 
4272 static void mlxsw_sp_fib_lpm_tree_unlink(struct mlxsw_sp *mlxsw_sp,
4273 					 struct mlxsw_sp_fib_node *fib_node)
4274 {
4275 	struct mlxsw_sp_lpm_tree *lpm_tree = fib_node->fib->lpm_tree;
4276 	struct mlxsw_sp_prefix_usage req_prefix_usage;
4277 	struct mlxsw_sp_fib *fib = fib_node->fib;
4278 	int err;
4279 
4280 	if (--lpm_tree->prefix_ref_count[fib_node->key.prefix_len] != 0)
4281 		return;
4282 	/* Try to construct a new LPM tree from the current prefix usage
4283 	 * minus the unused one. If we fail, continue using the old one.
4284 	 */
4285 	mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &lpm_tree->prefix_usage);
4286 	mlxsw_sp_prefix_usage_clear(&req_prefix_usage,
4287 				    fib_node->key.prefix_len);
4288 	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
4289 					 fib->proto);
4290 	if (IS_ERR(lpm_tree))
4291 		return;
4292 
4293 	err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
4294 	if (err)
4295 		goto err_lpm_tree_replace;
4296 
4297 	return;
4298 
4299 err_lpm_tree_replace:
4300 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
4301 }
4302 
4303 static int mlxsw_sp_fib_node_init(struct mlxsw_sp *mlxsw_sp,
4304 				  struct mlxsw_sp_fib_node *fib_node,
4305 				  struct mlxsw_sp_fib *fib)
4306 {
4307 	int err;
4308 
4309 	err = mlxsw_sp_fib_node_insert(fib, fib_node);
4310 	if (err)
4311 		return err;
4312 	fib_node->fib = fib;
4313 
4314 	err = mlxsw_sp_fib_lpm_tree_link(mlxsw_sp, fib_node);
4315 	if (err)
4316 		goto err_fib_lpm_tree_link;
4317 
4318 	return 0;
4319 
4320 err_fib_lpm_tree_link:
4321 	fib_node->fib = NULL;
4322 	mlxsw_sp_fib_node_remove(fib, fib_node);
4323 	return err;
4324 }
4325 
4326 static void mlxsw_sp_fib_node_fini(struct mlxsw_sp *mlxsw_sp,
4327 				   struct mlxsw_sp_fib_node *fib_node)
4328 {
4329 	struct mlxsw_sp_fib *fib = fib_node->fib;
4330 
4331 	mlxsw_sp_fib_lpm_tree_unlink(mlxsw_sp, fib_node);
4332 	fib_node->fib = NULL;
4333 	mlxsw_sp_fib_node_remove(fib, fib_node);
4334 }
4335 
4336 static struct mlxsw_sp_fib_node *
4337 mlxsw_sp_fib_node_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id, const void *addr,
4338 		      size_t addr_len, unsigned char prefix_len,
4339 		      enum mlxsw_sp_l3proto proto)
4340 {
4341 	struct mlxsw_sp_fib_node *fib_node;
4342 	struct mlxsw_sp_fib *fib;
4343 	struct mlxsw_sp_vr *vr;
4344 	int err;
4345 
4346 	vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id, NULL);
4347 	if (IS_ERR(vr))
4348 		return ERR_CAST(vr);
4349 	fib = mlxsw_sp_vr_fib(vr, proto);
4350 
4351 	fib_node = mlxsw_sp_fib_node_lookup(fib, addr, addr_len, prefix_len);
4352 	if (fib_node)
4353 		return fib_node;
4354 
4355 	fib_node = mlxsw_sp_fib_node_create(fib, addr, addr_len, prefix_len);
4356 	if (!fib_node) {
4357 		err = -ENOMEM;
4358 		goto err_fib_node_create;
4359 	}
4360 
4361 	err = mlxsw_sp_fib_node_init(mlxsw_sp, fib_node, fib);
4362 	if (err)
4363 		goto err_fib_node_init;
4364 
4365 	return fib_node;
4366 
4367 err_fib_node_init:
4368 	mlxsw_sp_fib_node_destroy(fib_node);
4369 err_fib_node_create:
4370 	mlxsw_sp_vr_put(mlxsw_sp, vr);
4371 	return ERR_PTR(err);
4372 }
4373 
4374 static void mlxsw_sp_fib_node_put(struct mlxsw_sp *mlxsw_sp,
4375 				  struct mlxsw_sp_fib_node *fib_node)
4376 {
4377 	struct mlxsw_sp_vr *vr = fib_node->fib->vr;
4378 
4379 	if (!list_empty(&fib_node->entry_list))
4380 		return;
4381 	mlxsw_sp_fib_node_fini(mlxsw_sp, fib_node);
4382 	mlxsw_sp_fib_node_destroy(fib_node);
4383 	mlxsw_sp_vr_put(mlxsw_sp, vr);
4384 }
4385 
4386 static struct mlxsw_sp_fib4_entry *
4387 mlxsw_sp_fib4_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
4388 			      const struct mlxsw_sp_fib4_entry *new4_entry)
4389 {
4390 	struct mlxsw_sp_fib4_entry *fib4_entry;
4391 
4392 	list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) {
4393 		if (fib4_entry->tb_id > new4_entry->tb_id)
4394 			continue;
4395 		if (fib4_entry->tb_id != new4_entry->tb_id)
4396 			break;
4397 		if (fib4_entry->tos > new4_entry->tos)
4398 			continue;
4399 		if (fib4_entry->prio >= new4_entry->prio ||
4400 		    fib4_entry->tos < new4_entry->tos)
4401 			return fib4_entry;
4402 	}
4403 
4404 	return NULL;
4405 }
4406 
4407 static int
4408 mlxsw_sp_fib4_node_list_append(struct mlxsw_sp_fib4_entry *fib4_entry,
4409 			       struct mlxsw_sp_fib4_entry *new4_entry)
4410 {
4411 	struct mlxsw_sp_fib_node *fib_node;
4412 
4413 	if (WARN_ON(!fib4_entry))
4414 		return -EINVAL;
4415 
4416 	fib_node = fib4_entry->common.fib_node;
4417 	list_for_each_entry_from(fib4_entry, &fib_node->entry_list,
4418 				 common.list) {
4419 		if (fib4_entry->tb_id != new4_entry->tb_id ||
4420 		    fib4_entry->tos != new4_entry->tos ||
4421 		    fib4_entry->prio != new4_entry->prio)
4422 			break;
4423 	}
4424 
4425 	list_add_tail(&new4_entry->common.list, &fib4_entry->common.list);
4426 	return 0;
4427 }
4428 
4429 static int
4430 mlxsw_sp_fib4_node_list_insert(struct mlxsw_sp_fib4_entry *new4_entry,
4431 			       bool replace, bool append)
4432 {
4433 	struct mlxsw_sp_fib_node *fib_node = new4_entry->common.fib_node;
4434 	struct mlxsw_sp_fib4_entry *fib4_entry;
4435 
4436 	fib4_entry = mlxsw_sp_fib4_node_entry_find(fib_node, new4_entry);
4437 
4438 	if (append)
4439 		return mlxsw_sp_fib4_node_list_append(fib4_entry, new4_entry);
4440 	if (replace && WARN_ON(!fib4_entry))
4441 		return -EINVAL;
4442 
4443 	/* Insert new entry before replaced one, so that we can later
4444 	 * remove the second.
4445 	 */
4446 	if (fib4_entry) {
4447 		list_add_tail(&new4_entry->common.list,
4448 			      &fib4_entry->common.list);
4449 	} else {
4450 		struct mlxsw_sp_fib4_entry *last;
4451 
4452 		list_for_each_entry(last, &fib_node->entry_list, common.list) {
4453 			if (new4_entry->tb_id > last->tb_id)
4454 				break;
4455 			fib4_entry = last;
4456 		}
4457 
4458 		if (fib4_entry)
4459 			list_add(&new4_entry->common.list,
4460 				 &fib4_entry->common.list);
4461 		else
4462 			list_add(&new4_entry->common.list,
4463 				 &fib_node->entry_list);
4464 	}
4465 
4466 	return 0;
4467 }
4468 
4469 static void
4470 mlxsw_sp_fib4_node_list_remove(struct mlxsw_sp_fib4_entry *fib4_entry)
4471 {
4472 	list_del(&fib4_entry->common.list);
4473 }
4474 
4475 static int mlxsw_sp_fib_node_entry_add(struct mlxsw_sp *mlxsw_sp,
4476 				       struct mlxsw_sp_fib_entry *fib_entry)
4477 {
4478 	struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
4479 
4480 	if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
4481 		return 0;
4482 
4483 	/* To prevent packet loss, overwrite the previously offloaded
4484 	 * entry.
4485 	 */
4486 	if (!list_is_singular(&fib_node->entry_list)) {
4487 		enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
4488 		struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list);
4489 
4490 		mlxsw_sp_fib_entry_offload_refresh(n, op, 0);
4491 	}
4492 
4493 	return mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
4494 }
4495 
4496 static void mlxsw_sp_fib_node_entry_del(struct mlxsw_sp *mlxsw_sp,
4497 					struct mlxsw_sp_fib_entry *fib_entry)
4498 {
4499 	struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
4500 
4501 	if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
4502 		return;
4503 
4504 	/* Promote the next entry by overwriting the deleted entry */
4505 	if (!list_is_singular(&fib_node->entry_list)) {
4506 		struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list);
4507 		enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
4508 
4509 		mlxsw_sp_fib_entry_update(mlxsw_sp, n);
4510 		mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0);
4511 		return;
4512 	}
4513 
4514 	mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry);
4515 }
4516 
4517 static int mlxsw_sp_fib4_node_entry_link(struct mlxsw_sp *mlxsw_sp,
4518 					 struct mlxsw_sp_fib4_entry *fib4_entry,
4519 					 bool replace, bool append)
4520 {
4521 	int err;
4522 
4523 	err = mlxsw_sp_fib4_node_list_insert(fib4_entry, replace, append);
4524 	if (err)
4525 		return err;
4526 
4527 	err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib4_entry->common);
4528 	if (err)
4529 		goto err_fib_node_entry_add;
4530 
4531 	return 0;
4532 
4533 err_fib_node_entry_add:
4534 	mlxsw_sp_fib4_node_list_remove(fib4_entry);
4535 	return err;
4536 }
4537 
4538 static void
4539 mlxsw_sp_fib4_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
4540 				struct mlxsw_sp_fib4_entry *fib4_entry)
4541 {
4542 	mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib4_entry->common);
4543 	mlxsw_sp_fib4_node_list_remove(fib4_entry);
4544 
4545 	if (fib4_entry->common.type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP)
4546 		mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, &fib4_entry->common);
4547 }
4548 
4549 static void mlxsw_sp_fib4_entry_replace(struct mlxsw_sp *mlxsw_sp,
4550 					struct mlxsw_sp_fib4_entry *fib4_entry,
4551 					bool replace)
4552 {
4553 	struct mlxsw_sp_fib_node *fib_node = fib4_entry->common.fib_node;
4554 	struct mlxsw_sp_fib4_entry *replaced;
4555 
4556 	if (!replace)
4557 		return;
4558 
4559 	/* We inserted the new entry before replaced one */
4560 	replaced = list_next_entry(fib4_entry, common.list);
4561 
4562 	mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, replaced);
4563 	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, replaced);
4564 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4565 }
4566 
4567 static int
4568 mlxsw_sp_router_fib4_add(struct mlxsw_sp *mlxsw_sp,
4569 			 const struct fib_entry_notifier_info *fen_info,
4570 			 bool replace, bool append)
4571 {
4572 	struct mlxsw_sp_fib4_entry *fib4_entry;
4573 	struct mlxsw_sp_fib_node *fib_node;
4574 	int err;
4575 
4576 	if (mlxsw_sp->router->aborted)
4577 		return 0;
4578 
4579 	fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, fen_info->tb_id,
4580 					 &fen_info->dst, sizeof(fen_info->dst),
4581 					 fen_info->dst_len,
4582 					 MLXSW_SP_L3_PROTO_IPV4);
4583 	if (IS_ERR(fib_node)) {
4584 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to get FIB node\n");
4585 		return PTR_ERR(fib_node);
4586 	}
4587 
4588 	fib4_entry = mlxsw_sp_fib4_entry_create(mlxsw_sp, fib_node, fen_info);
4589 	if (IS_ERR(fib4_entry)) {
4590 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to create FIB entry\n");
4591 		err = PTR_ERR(fib4_entry);
4592 		goto err_fib4_entry_create;
4593 	}
4594 
4595 	err = mlxsw_sp_fib4_node_entry_link(mlxsw_sp, fib4_entry, replace,
4596 					    append);
4597 	if (err) {
4598 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to link FIB entry to node\n");
4599 		goto err_fib4_node_entry_link;
4600 	}
4601 
4602 	mlxsw_sp_fib4_entry_replace(mlxsw_sp, fib4_entry, replace);
4603 
4604 	return 0;
4605 
4606 err_fib4_node_entry_link:
4607 	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
4608 err_fib4_entry_create:
4609 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4610 	return err;
4611 }
4612 
4613 static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp,
4614 				     struct fib_entry_notifier_info *fen_info)
4615 {
4616 	struct mlxsw_sp_fib4_entry *fib4_entry;
4617 	struct mlxsw_sp_fib_node *fib_node;
4618 
4619 	if (mlxsw_sp->router->aborted)
4620 		return;
4621 
4622 	fib4_entry = mlxsw_sp_fib4_entry_lookup(mlxsw_sp, fen_info);
4623 	if (WARN_ON(!fib4_entry))
4624 		return;
4625 	fib_node = fib4_entry->common.fib_node;
4626 
4627 	mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry);
4628 	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
4629 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4630 }
4631 
4632 static bool mlxsw_sp_fib6_rt_should_ignore(const struct rt6_info *rt)
4633 {
4634 	/* Packets with link-local destination IP arriving to the router
4635 	 * are trapped to the CPU, so no need to program specific routes
4636 	 * for them.
4637 	 */
4638 	if (ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LINKLOCAL)
4639 		return true;
4640 
4641 	/* Multicast routes aren't supported, so ignore them. Neighbour
4642 	 * Discovery packets are specifically trapped.
4643 	 */
4644 	if (ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_MULTICAST)
4645 		return true;
4646 
4647 	/* Cloned routes are irrelevant in the forwarding path. */
4648 	if (rt->rt6i_flags & RTF_CACHE)
4649 		return true;
4650 
4651 	return false;
4652 }
4653 
4654 static struct mlxsw_sp_rt6 *mlxsw_sp_rt6_create(struct rt6_info *rt)
4655 {
4656 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4657 
4658 	mlxsw_sp_rt6 = kzalloc(sizeof(*mlxsw_sp_rt6), GFP_KERNEL);
4659 	if (!mlxsw_sp_rt6)
4660 		return ERR_PTR(-ENOMEM);
4661 
4662 	/* In case of route replace, replaced route is deleted with
4663 	 * no notification. Take reference to prevent accessing freed
4664 	 * memory.
4665 	 */
4666 	mlxsw_sp_rt6->rt = rt;
4667 	rt6_hold(rt);
4668 
4669 	return mlxsw_sp_rt6;
4670 }
4671 
4672 #if IS_ENABLED(CONFIG_IPV6)
4673 static void mlxsw_sp_rt6_release(struct rt6_info *rt)
4674 {
4675 	rt6_release(rt);
4676 }
4677 #else
4678 static void mlxsw_sp_rt6_release(struct rt6_info *rt)
4679 {
4680 }
4681 #endif
4682 
4683 static void mlxsw_sp_rt6_destroy(struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
4684 {
4685 	mlxsw_sp_rt6_release(mlxsw_sp_rt6->rt);
4686 	kfree(mlxsw_sp_rt6);
4687 }
4688 
4689 static bool mlxsw_sp_fib6_rt_can_mp(const struct rt6_info *rt)
4690 {
4691 	/* RTF_CACHE routes are ignored */
4692 	return (rt->rt6i_flags & (RTF_GATEWAY | RTF_ADDRCONF)) == RTF_GATEWAY;
4693 }
4694 
4695 static struct rt6_info *
4696 mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry)
4697 {
4698 	return list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
4699 				list)->rt;
4700 }
4701 
4702 static struct mlxsw_sp_fib6_entry *
4703 mlxsw_sp_fib6_node_mp_entry_find(const struct mlxsw_sp_fib_node *fib_node,
4704 				 const struct rt6_info *nrt, bool replace)
4705 {
4706 	struct mlxsw_sp_fib6_entry *fib6_entry;
4707 
4708 	if (!mlxsw_sp_fib6_rt_can_mp(nrt) || replace)
4709 		return NULL;
4710 
4711 	list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
4712 		struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
4713 
4714 		/* RT6_TABLE_LOCAL and RT6_TABLE_MAIN share the same
4715 		 * virtual router.
4716 		 */
4717 		if (rt->rt6i_table->tb6_id > nrt->rt6i_table->tb6_id)
4718 			continue;
4719 		if (rt->rt6i_table->tb6_id != nrt->rt6i_table->tb6_id)
4720 			break;
4721 		if (rt->rt6i_metric < nrt->rt6i_metric)
4722 			continue;
4723 		if (rt->rt6i_metric == nrt->rt6i_metric &&
4724 		    mlxsw_sp_fib6_rt_can_mp(rt))
4725 			return fib6_entry;
4726 		if (rt->rt6i_metric > nrt->rt6i_metric)
4727 			break;
4728 	}
4729 
4730 	return NULL;
4731 }
4732 
4733 static struct mlxsw_sp_rt6 *
4734 mlxsw_sp_fib6_entry_rt_find(const struct mlxsw_sp_fib6_entry *fib6_entry,
4735 			    const struct rt6_info *rt)
4736 {
4737 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4738 
4739 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
4740 		if (mlxsw_sp_rt6->rt == rt)
4741 			return mlxsw_sp_rt6;
4742 	}
4743 
4744 	return NULL;
4745 }
4746 
4747 static bool mlxsw_sp_nexthop6_ipip_type(const struct mlxsw_sp *mlxsw_sp,
4748 					const struct rt6_info *rt,
4749 					enum mlxsw_sp_ipip_type *ret)
4750 {
4751 	return rt->dst.dev &&
4752 	       mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->dst.dev, ret);
4753 }
4754 
4755 static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp *mlxsw_sp,
4756 				       struct mlxsw_sp_nexthop_group *nh_grp,
4757 				       struct mlxsw_sp_nexthop *nh,
4758 				       const struct rt6_info *rt)
4759 {
4760 	const struct mlxsw_sp_ipip_ops *ipip_ops;
4761 	struct mlxsw_sp_ipip_entry *ipip_entry;
4762 	struct net_device *dev = rt->dst.dev;
4763 	struct mlxsw_sp_rif *rif;
4764 	int err;
4765 
4766 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, dev);
4767 	if (ipip_entry) {
4768 		ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
4769 		if (ipip_ops->can_offload(mlxsw_sp, dev,
4770 					  MLXSW_SP_L3_PROTO_IPV6)) {
4771 			nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP;
4772 			mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, ipip_entry);
4773 			return 0;
4774 		}
4775 	}
4776 
4777 	nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH;
4778 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
4779 	if (!rif)
4780 		return 0;
4781 	mlxsw_sp_nexthop_rif_init(nh, rif);
4782 
4783 	err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
4784 	if (err)
4785 		goto err_nexthop_neigh_init;
4786 
4787 	return 0;
4788 
4789 err_nexthop_neigh_init:
4790 	mlxsw_sp_nexthop_rif_fini(nh);
4791 	return err;
4792 }
4793 
4794 static void mlxsw_sp_nexthop6_type_fini(struct mlxsw_sp *mlxsw_sp,
4795 					struct mlxsw_sp_nexthop *nh)
4796 {
4797 	mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
4798 }
4799 
4800 static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp,
4801 				  struct mlxsw_sp_nexthop_group *nh_grp,
4802 				  struct mlxsw_sp_nexthop *nh,
4803 				  const struct rt6_info *rt)
4804 {
4805 	struct net_device *dev = rt->dst.dev;
4806 
4807 	nh->nh_grp = nh_grp;
4808 	nh->nh_weight = rt->rt6i_nh_weight;
4809 	memcpy(&nh->gw_addr, &rt->rt6i_gateway, sizeof(nh->gw_addr));
4810 	mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
4811 
4812 	list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
4813 
4814 	if (!dev)
4815 		return 0;
4816 	nh->ifindex = dev->ifindex;
4817 
4818 	return mlxsw_sp_nexthop6_type_init(mlxsw_sp, nh_grp, nh, rt);
4819 }
4820 
4821 static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp,
4822 				   struct mlxsw_sp_nexthop *nh)
4823 {
4824 	mlxsw_sp_nexthop6_type_fini(mlxsw_sp, nh);
4825 	list_del(&nh->router_list_node);
4826 	mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
4827 }
4828 
4829 static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp *mlxsw_sp,
4830 				    const struct rt6_info *rt)
4831 {
4832 	return rt->rt6i_flags & RTF_GATEWAY ||
4833 	       mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, NULL);
4834 }
4835 
4836 static struct mlxsw_sp_nexthop_group *
4837 mlxsw_sp_nexthop6_group_create(struct mlxsw_sp *mlxsw_sp,
4838 			       struct mlxsw_sp_fib6_entry *fib6_entry)
4839 {
4840 	struct mlxsw_sp_nexthop_group *nh_grp;
4841 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4842 	struct mlxsw_sp_nexthop *nh;
4843 	size_t alloc_size;
4844 	int i = 0;
4845 	int err;
4846 
4847 	alloc_size = sizeof(*nh_grp) +
4848 		     fib6_entry->nrt6 * sizeof(struct mlxsw_sp_nexthop);
4849 	nh_grp = kzalloc(alloc_size, GFP_KERNEL);
4850 	if (!nh_grp)
4851 		return ERR_PTR(-ENOMEM);
4852 	INIT_LIST_HEAD(&nh_grp->fib_list);
4853 #if IS_ENABLED(CONFIG_IPV6)
4854 	nh_grp->neigh_tbl = &nd_tbl;
4855 #endif
4856 	mlxsw_sp_rt6 = list_first_entry(&fib6_entry->rt6_list,
4857 					struct mlxsw_sp_rt6, list);
4858 	nh_grp->gateway = mlxsw_sp_rt6_is_gateway(mlxsw_sp, mlxsw_sp_rt6->rt);
4859 	nh_grp->count = fib6_entry->nrt6;
4860 	for (i = 0; i < nh_grp->count; i++) {
4861 		struct rt6_info *rt = mlxsw_sp_rt6->rt;
4862 
4863 		nh = &nh_grp->nexthops[i];
4864 		err = mlxsw_sp_nexthop6_init(mlxsw_sp, nh_grp, nh, rt);
4865 		if (err)
4866 			goto err_nexthop6_init;
4867 		mlxsw_sp_rt6 = list_next_entry(mlxsw_sp_rt6, list);
4868 	}
4869 
4870 	err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
4871 	if (err)
4872 		goto err_nexthop_group_insert;
4873 
4874 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
4875 	return nh_grp;
4876 
4877 err_nexthop_group_insert:
4878 err_nexthop6_init:
4879 	for (i--; i >= 0; i--) {
4880 		nh = &nh_grp->nexthops[i];
4881 		mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
4882 	}
4883 	kfree(nh_grp);
4884 	return ERR_PTR(err);
4885 }
4886 
4887 static void
4888 mlxsw_sp_nexthop6_group_destroy(struct mlxsw_sp *mlxsw_sp,
4889 				struct mlxsw_sp_nexthop_group *nh_grp)
4890 {
4891 	struct mlxsw_sp_nexthop *nh;
4892 	int i = nh_grp->count;
4893 
4894 	mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
4895 	for (i--; i >= 0; i--) {
4896 		nh = &nh_grp->nexthops[i];
4897 		mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
4898 	}
4899 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
4900 	WARN_ON(nh_grp->adj_index_valid);
4901 	kfree(nh_grp);
4902 }
4903 
4904 static int mlxsw_sp_nexthop6_group_get(struct mlxsw_sp *mlxsw_sp,
4905 				       struct mlxsw_sp_fib6_entry *fib6_entry)
4906 {
4907 	struct mlxsw_sp_nexthop_group *nh_grp;
4908 
4909 	nh_grp = mlxsw_sp_nexthop6_group_lookup(mlxsw_sp, fib6_entry);
4910 	if (!nh_grp) {
4911 		nh_grp = mlxsw_sp_nexthop6_group_create(mlxsw_sp, fib6_entry);
4912 		if (IS_ERR(nh_grp))
4913 			return PTR_ERR(nh_grp);
4914 	}
4915 
4916 	list_add_tail(&fib6_entry->common.nexthop_group_node,
4917 		      &nh_grp->fib_list);
4918 	fib6_entry->common.nh_group = nh_grp;
4919 
4920 	return 0;
4921 }
4922 
4923 static void mlxsw_sp_nexthop6_group_put(struct mlxsw_sp *mlxsw_sp,
4924 					struct mlxsw_sp_fib_entry *fib_entry)
4925 {
4926 	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
4927 
4928 	list_del(&fib_entry->nexthop_group_node);
4929 	if (!list_empty(&nh_grp->fib_list))
4930 		return;
4931 	mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, nh_grp);
4932 }
4933 
4934 static int
4935 mlxsw_sp_nexthop6_group_update(struct mlxsw_sp *mlxsw_sp,
4936 			       struct mlxsw_sp_fib6_entry *fib6_entry)
4937 {
4938 	struct mlxsw_sp_nexthop_group *old_nh_grp = fib6_entry->common.nh_group;
4939 	int err;
4940 
4941 	fib6_entry->common.nh_group = NULL;
4942 	list_del(&fib6_entry->common.nexthop_group_node);
4943 
4944 	err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
4945 	if (err)
4946 		goto err_nexthop6_group_get;
4947 
4948 	/* In case this entry is offloaded, then the adjacency index
4949 	 * currently associated with it in the device's table is that
4950 	 * of the old group. Start using the new one instead.
4951 	 */
4952 	err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common);
4953 	if (err)
4954 		goto err_fib_node_entry_add;
4955 
4956 	if (list_empty(&old_nh_grp->fib_list))
4957 		mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, old_nh_grp);
4958 
4959 	return 0;
4960 
4961 err_fib_node_entry_add:
4962 	mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
4963 err_nexthop6_group_get:
4964 	list_add_tail(&fib6_entry->common.nexthop_group_node,
4965 		      &old_nh_grp->fib_list);
4966 	fib6_entry->common.nh_group = old_nh_grp;
4967 	return err;
4968 }
4969 
4970 static int
4971 mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp,
4972 				struct mlxsw_sp_fib6_entry *fib6_entry,
4973 				struct rt6_info *rt)
4974 {
4975 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4976 	int err;
4977 
4978 	mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt);
4979 	if (IS_ERR(mlxsw_sp_rt6))
4980 		return PTR_ERR(mlxsw_sp_rt6);
4981 
4982 	list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
4983 	fib6_entry->nrt6++;
4984 
4985 	err = mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
4986 	if (err)
4987 		goto err_nexthop6_group_update;
4988 
4989 	return 0;
4990 
4991 err_nexthop6_group_update:
4992 	fib6_entry->nrt6--;
4993 	list_del(&mlxsw_sp_rt6->list);
4994 	mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
4995 	return err;
4996 }
4997 
4998 static void
4999 mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp,
5000 				struct mlxsw_sp_fib6_entry *fib6_entry,
5001 				struct rt6_info *rt)
5002 {
5003 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5004 
5005 	mlxsw_sp_rt6 = mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt);
5006 	if (WARN_ON(!mlxsw_sp_rt6))
5007 		return;
5008 
5009 	fib6_entry->nrt6--;
5010 	list_del(&mlxsw_sp_rt6->list);
5011 	mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
5012 	mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5013 }
5014 
5015 static void mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp *mlxsw_sp,
5016 					 struct mlxsw_sp_fib_entry *fib_entry,
5017 					 const struct rt6_info *rt)
5018 {
5019 	/* Packets hitting RTF_REJECT routes need to be discarded by the
5020 	 * stack. We can rely on their destination device not having a
5021 	 * RIF (it's the loopback device) and can thus use action type
5022 	 * local, which will cause them to be trapped with a lower
5023 	 * priority than packets that need to be locally received.
5024 	 */
5025 	if (rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST))
5026 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
5027 	else if (rt->rt6i_flags & RTF_REJECT)
5028 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
5029 	else if (mlxsw_sp_rt6_is_gateway(mlxsw_sp, rt))
5030 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
5031 	else
5032 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
5033 }
5034 
5035 static void
5036 mlxsw_sp_fib6_entry_rt_destroy_all(struct mlxsw_sp_fib6_entry *fib6_entry)
5037 {
5038 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6, *tmp;
5039 
5040 	list_for_each_entry_safe(mlxsw_sp_rt6, tmp, &fib6_entry->rt6_list,
5041 				 list) {
5042 		fib6_entry->nrt6--;
5043 		list_del(&mlxsw_sp_rt6->list);
5044 		mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5045 	}
5046 }
5047 
5048 static struct mlxsw_sp_fib6_entry *
5049 mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp,
5050 			   struct mlxsw_sp_fib_node *fib_node,
5051 			   struct rt6_info *rt)
5052 {
5053 	struct mlxsw_sp_fib6_entry *fib6_entry;
5054 	struct mlxsw_sp_fib_entry *fib_entry;
5055 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5056 	int err;
5057 
5058 	fib6_entry = kzalloc(sizeof(*fib6_entry), GFP_KERNEL);
5059 	if (!fib6_entry)
5060 		return ERR_PTR(-ENOMEM);
5061 	fib_entry = &fib6_entry->common;
5062 
5063 	mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt);
5064 	if (IS_ERR(mlxsw_sp_rt6)) {
5065 		err = PTR_ERR(mlxsw_sp_rt6);
5066 		goto err_rt6_create;
5067 	}
5068 
5069 	mlxsw_sp_fib6_entry_type_set(mlxsw_sp, fib_entry, mlxsw_sp_rt6->rt);
5070 
5071 	INIT_LIST_HEAD(&fib6_entry->rt6_list);
5072 	list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
5073 	fib6_entry->nrt6 = 1;
5074 	err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
5075 	if (err)
5076 		goto err_nexthop6_group_get;
5077 
5078 	fib_entry->fib_node = fib_node;
5079 
5080 	return fib6_entry;
5081 
5082 err_nexthop6_group_get:
5083 	list_del(&mlxsw_sp_rt6->list);
5084 	mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5085 err_rt6_create:
5086 	kfree(fib6_entry);
5087 	return ERR_PTR(err);
5088 }
5089 
5090 static void mlxsw_sp_fib6_entry_destroy(struct mlxsw_sp *mlxsw_sp,
5091 					struct mlxsw_sp_fib6_entry *fib6_entry)
5092 {
5093 	mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
5094 	mlxsw_sp_fib6_entry_rt_destroy_all(fib6_entry);
5095 	WARN_ON(fib6_entry->nrt6);
5096 	kfree(fib6_entry);
5097 }
5098 
5099 static struct mlxsw_sp_fib6_entry *
5100 mlxsw_sp_fib6_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
5101 			      const struct rt6_info *nrt, bool replace)
5102 {
5103 	struct mlxsw_sp_fib6_entry *fib6_entry, *fallback = NULL;
5104 
5105 	list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
5106 		struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
5107 
5108 		if (rt->rt6i_table->tb6_id > nrt->rt6i_table->tb6_id)
5109 			continue;
5110 		if (rt->rt6i_table->tb6_id != nrt->rt6i_table->tb6_id)
5111 			break;
5112 		if (replace && rt->rt6i_metric == nrt->rt6i_metric) {
5113 			if (mlxsw_sp_fib6_rt_can_mp(rt) ==
5114 			    mlxsw_sp_fib6_rt_can_mp(nrt))
5115 				return fib6_entry;
5116 			if (mlxsw_sp_fib6_rt_can_mp(nrt))
5117 				fallback = fallback ?: fib6_entry;
5118 		}
5119 		if (rt->rt6i_metric > nrt->rt6i_metric)
5120 			return fallback ?: fib6_entry;
5121 	}
5122 
5123 	return fallback;
5124 }
5125 
5126 static int
5127 mlxsw_sp_fib6_node_list_insert(struct mlxsw_sp_fib6_entry *new6_entry,
5128 			       bool replace)
5129 {
5130 	struct mlxsw_sp_fib_node *fib_node = new6_entry->common.fib_node;
5131 	struct rt6_info *nrt = mlxsw_sp_fib6_entry_rt(new6_entry);
5132 	struct mlxsw_sp_fib6_entry *fib6_entry;
5133 
5134 	fib6_entry = mlxsw_sp_fib6_node_entry_find(fib_node, nrt, replace);
5135 
5136 	if (replace && WARN_ON(!fib6_entry))
5137 		return -EINVAL;
5138 
5139 	if (fib6_entry) {
5140 		list_add_tail(&new6_entry->common.list,
5141 			      &fib6_entry->common.list);
5142 	} else {
5143 		struct mlxsw_sp_fib6_entry *last;
5144 
5145 		list_for_each_entry(last, &fib_node->entry_list, common.list) {
5146 			struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(last);
5147 
5148 			if (nrt->rt6i_table->tb6_id > rt->rt6i_table->tb6_id)
5149 				break;
5150 			fib6_entry = last;
5151 		}
5152 
5153 		if (fib6_entry)
5154 			list_add(&new6_entry->common.list,
5155 				 &fib6_entry->common.list);
5156 		else
5157 			list_add(&new6_entry->common.list,
5158 				 &fib_node->entry_list);
5159 	}
5160 
5161 	return 0;
5162 }
5163 
5164 static void
5165 mlxsw_sp_fib6_node_list_remove(struct mlxsw_sp_fib6_entry *fib6_entry)
5166 {
5167 	list_del(&fib6_entry->common.list);
5168 }
5169 
5170 static int mlxsw_sp_fib6_node_entry_link(struct mlxsw_sp *mlxsw_sp,
5171 					 struct mlxsw_sp_fib6_entry *fib6_entry,
5172 					 bool replace)
5173 {
5174 	int err;
5175 
5176 	err = mlxsw_sp_fib6_node_list_insert(fib6_entry, replace);
5177 	if (err)
5178 		return err;
5179 
5180 	err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common);
5181 	if (err)
5182 		goto err_fib_node_entry_add;
5183 
5184 	return 0;
5185 
5186 err_fib_node_entry_add:
5187 	mlxsw_sp_fib6_node_list_remove(fib6_entry);
5188 	return err;
5189 }
5190 
5191 static void
5192 mlxsw_sp_fib6_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
5193 				struct mlxsw_sp_fib6_entry *fib6_entry)
5194 {
5195 	mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib6_entry->common);
5196 	mlxsw_sp_fib6_node_list_remove(fib6_entry);
5197 }
5198 
5199 static struct mlxsw_sp_fib6_entry *
5200 mlxsw_sp_fib6_entry_lookup(struct mlxsw_sp *mlxsw_sp,
5201 			   const struct rt6_info *rt)
5202 {
5203 	struct mlxsw_sp_fib6_entry *fib6_entry;
5204 	struct mlxsw_sp_fib_node *fib_node;
5205 	struct mlxsw_sp_fib *fib;
5206 	struct mlxsw_sp_vr *vr;
5207 
5208 	vr = mlxsw_sp_vr_find(mlxsw_sp, rt->rt6i_table->tb6_id);
5209 	if (!vr)
5210 		return NULL;
5211 	fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV6);
5212 
5213 	fib_node = mlxsw_sp_fib_node_lookup(fib, &rt->rt6i_dst.addr,
5214 					    sizeof(rt->rt6i_dst.addr),
5215 					    rt->rt6i_dst.plen);
5216 	if (!fib_node)
5217 		return NULL;
5218 
5219 	list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
5220 		struct rt6_info *iter_rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
5221 
5222 		if (rt->rt6i_table->tb6_id == iter_rt->rt6i_table->tb6_id &&
5223 		    rt->rt6i_metric == iter_rt->rt6i_metric &&
5224 		    mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt))
5225 			return fib6_entry;
5226 	}
5227 
5228 	return NULL;
5229 }
5230 
5231 static void mlxsw_sp_fib6_entry_replace(struct mlxsw_sp *mlxsw_sp,
5232 					struct mlxsw_sp_fib6_entry *fib6_entry,
5233 					bool replace)
5234 {
5235 	struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node;
5236 	struct mlxsw_sp_fib6_entry *replaced;
5237 
5238 	if (!replace)
5239 		return;
5240 
5241 	replaced = list_next_entry(fib6_entry, common.list);
5242 
5243 	mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, replaced);
5244 	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, replaced);
5245 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5246 }
5247 
5248 static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp,
5249 				    struct rt6_info *rt, bool replace)
5250 {
5251 	struct mlxsw_sp_fib6_entry *fib6_entry;
5252 	struct mlxsw_sp_fib_node *fib_node;
5253 	int err;
5254 
5255 	if (mlxsw_sp->router->aborted)
5256 		return 0;
5257 
5258 	if (rt->rt6i_src.plen)
5259 		return -EINVAL;
5260 
5261 	if (mlxsw_sp_fib6_rt_should_ignore(rt))
5262 		return 0;
5263 
5264 	fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->rt6i_table->tb6_id,
5265 					 &rt->rt6i_dst.addr,
5266 					 sizeof(rt->rt6i_dst.addr),
5267 					 rt->rt6i_dst.plen,
5268 					 MLXSW_SP_L3_PROTO_IPV6);
5269 	if (IS_ERR(fib_node))
5270 		return PTR_ERR(fib_node);
5271 
5272 	/* Before creating a new entry, try to append route to an existing
5273 	 * multipath entry.
5274 	 */
5275 	fib6_entry = mlxsw_sp_fib6_node_mp_entry_find(fib_node, rt, replace);
5276 	if (fib6_entry) {
5277 		err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry, rt);
5278 		if (err)
5279 			goto err_fib6_entry_nexthop_add;
5280 		return 0;
5281 	}
5282 
5283 	fib6_entry = mlxsw_sp_fib6_entry_create(mlxsw_sp, fib_node, rt);
5284 	if (IS_ERR(fib6_entry)) {
5285 		err = PTR_ERR(fib6_entry);
5286 		goto err_fib6_entry_create;
5287 	}
5288 
5289 	err = mlxsw_sp_fib6_node_entry_link(mlxsw_sp, fib6_entry, replace);
5290 	if (err)
5291 		goto err_fib6_node_entry_link;
5292 
5293 	mlxsw_sp_fib6_entry_replace(mlxsw_sp, fib6_entry, replace);
5294 
5295 	return 0;
5296 
5297 err_fib6_node_entry_link:
5298 	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5299 err_fib6_entry_create:
5300 err_fib6_entry_nexthop_add:
5301 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5302 	return err;
5303 }
5304 
5305 static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp,
5306 				     struct rt6_info *rt)
5307 {
5308 	struct mlxsw_sp_fib6_entry *fib6_entry;
5309 	struct mlxsw_sp_fib_node *fib_node;
5310 
5311 	if (mlxsw_sp->router->aborted)
5312 		return;
5313 
5314 	if (mlxsw_sp_fib6_rt_should_ignore(rt))
5315 		return;
5316 
5317 	fib6_entry = mlxsw_sp_fib6_entry_lookup(mlxsw_sp, rt);
5318 	if (WARN_ON(!fib6_entry))
5319 		return;
5320 
5321 	/* If route is part of a multipath entry, but not the last one
5322 	 * removed, then only reduce its nexthop group.
5323 	 */
5324 	if (!list_is_singular(&fib6_entry->rt6_list)) {
5325 		mlxsw_sp_fib6_entry_nexthop_del(mlxsw_sp, fib6_entry, rt);
5326 		return;
5327 	}
5328 
5329 	fib_node = fib6_entry->common.fib_node;
5330 
5331 	mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry);
5332 	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5333 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5334 }
5335 
5336 static int __mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp,
5337 					    enum mlxsw_reg_ralxx_protocol proto,
5338 					    u8 tree_id)
5339 {
5340 	char ralta_pl[MLXSW_REG_RALTA_LEN];
5341 	char ralst_pl[MLXSW_REG_RALST_LEN];
5342 	int i, err;
5343 
5344 	mlxsw_reg_ralta_pack(ralta_pl, true, proto, tree_id);
5345 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
5346 	if (err)
5347 		return err;
5348 
5349 	mlxsw_reg_ralst_pack(ralst_pl, 0xff, tree_id);
5350 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
5351 	if (err)
5352 		return err;
5353 
5354 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
5355 		struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
5356 		char raltb_pl[MLXSW_REG_RALTB_LEN];
5357 		char ralue_pl[MLXSW_REG_RALUE_LEN];
5358 
5359 		mlxsw_reg_raltb_pack(raltb_pl, vr->id, proto, tree_id);
5360 		err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb),
5361 				      raltb_pl);
5362 		if (err)
5363 			return err;
5364 
5365 		mlxsw_reg_ralue_pack(ralue_pl, proto,
5366 				     MLXSW_REG_RALUE_OP_WRITE_WRITE, vr->id, 0);
5367 		mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
5368 		err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue),
5369 				      ralue_pl);
5370 		if (err)
5371 			return err;
5372 	}
5373 
5374 	return 0;
5375 }
5376 
5377 static int mlxsw_sp_router_fibmr_add(struct mlxsw_sp *mlxsw_sp,
5378 				     struct mfc_entry_notifier_info *men_info,
5379 				     bool replace)
5380 {
5381 	struct mlxsw_sp_vr *vr;
5382 
5383 	if (mlxsw_sp->router->aborted)
5384 		return 0;
5385 
5386 	vr = mlxsw_sp_vr_get(mlxsw_sp, men_info->tb_id, NULL);
5387 	if (IS_ERR(vr))
5388 		return PTR_ERR(vr);
5389 
5390 	return mlxsw_sp_mr_route4_add(vr->mr4_table, men_info->mfc, replace);
5391 }
5392 
5393 static void mlxsw_sp_router_fibmr_del(struct mlxsw_sp *mlxsw_sp,
5394 				      struct mfc_entry_notifier_info *men_info)
5395 {
5396 	struct mlxsw_sp_vr *vr;
5397 
5398 	if (mlxsw_sp->router->aborted)
5399 		return;
5400 
5401 	vr = mlxsw_sp_vr_find(mlxsw_sp, men_info->tb_id);
5402 	if (WARN_ON(!vr))
5403 		return;
5404 
5405 	mlxsw_sp_mr_route4_del(vr->mr4_table, men_info->mfc);
5406 	mlxsw_sp_vr_put(mlxsw_sp, vr);
5407 }
5408 
5409 static int
5410 mlxsw_sp_router_fibmr_vif_add(struct mlxsw_sp *mlxsw_sp,
5411 			      struct vif_entry_notifier_info *ven_info)
5412 {
5413 	struct mlxsw_sp_rif *rif;
5414 	struct mlxsw_sp_vr *vr;
5415 
5416 	if (mlxsw_sp->router->aborted)
5417 		return 0;
5418 
5419 	vr = mlxsw_sp_vr_get(mlxsw_sp, ven_info->tb_id, NULL);
5420 	if (IS_ERR(vr))
5421 		return PTR_ERR(vr);
5422 
5423 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, ven_info->dev);
5424 	return mlxsw_sp_mr_vif_add(vr->mr4_table, ven_info->dev,
5425 				   ven_info->vif_index,
5426 				   ven_info->vif_flags, rif);
5427 }
5428 
5429 static void
5430 mlxsw_sp_router_fibmr_vif_del(struct mlxsw_sp *mlxsw_sp,
5431 			      struct vif_entry_notifier_info *ven_info)
5432 {
5433 	struct mlxsw_sp_vr *vr;
5434 
5435 	if (mlxsw_sp->router->aborted)
5436 		return;
5437 
5438 	vr = mlxsw_sp_vr_find(mlxsw_sp, ven_info->tb_id);
5439 	if (WARN_ON(!vr))
5440 		return;
5441 
5442 	mlxsw_sp_mr_vif_del(vr->mr4_table, ven_info->vif_index);
5443 	mlxsw_sp_vr_put(mlxsw_sp, vr);
5444 }
5445 
5446 static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp)
5447 {
5448 	enum mlxsw_reg_ralxx_protocol proto = MLXSW_REG_RALXX_PROTOCOL_IPV4;
5449 	int err;
5450 
5451 	err = __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto,
5452 					       MLXSW_SP_LPM_TREE_MIN);
5453 	if (err)
5454 		return err;
5455 
5456 	/* The multicast router code does not need an abort trap as by default,
5457 	 * packets that don't match any routes are trapped to the CPU.
5458 	 */
5459 
5460 	proto = MLXSW_REG_RALXX_PROTOCOL_IPV6;
5461 	return __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto,
5462 						MLXSW_SP_LPM_TREE_MIN + 1);
5463 }
5464 
5465 static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp,
5466 				     struct mlxsw_sp_fib_node *fib_node)
5467 {
5468 	struct mlxsw_sp_fib4_entry *fib4_entry, *tmp;
5469 
5470 	list_for_each_entry_safe(fib4_entry, tmp, &fib_node->entry_list,
5471 				 common.list) {
5472 		bool do_break = &tmp->common.list == &fib_node->entry_list;
5473 
5474 		mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry);
5475 		mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
5476 		mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5477 		/* Break when entry list is empty and node was freed.
5478 		 * Otherwise, we'll access freed memory in the next
5479 		 * iteration.
5480 		 */
5481 		if (do_break)
5482 			break;
5483 	}
5484 }
5485 
5486 static void mlxsw_sp_fib6_node_flush(struct mlxsw_sp *mlxsw_sp,
5487 				     struct mlxsw_sp_fib_node *fib_node)
5488 {
5489 	struct mlxsw_sp_fib6_entry *fib6_entry, *tmp;
5490 
5491 	list_for_each_entry_safe(fib6_entry, tmp, &fib_node->entry_list,
5492 				 common.list) {
5493 		bool do_break = &tmp->common.list == &fib_node->entry_list;
5494 
5495 		mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry);
5496 		mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5497 		mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5498 		if (do_break)
5499 			break;
5500 	}
5501 }
5502 
5503 static void mlxsw_sp_fib_node_flush(struct mlxsw_sp *mlxsw_sp,
5504 				    struct mlxsw_sp_fib_node *fib_node)
5505 {
5506 	switch (fib_node->fib->proto) {
5507 	case MLXSW_SP_L3_PROTO_IPV4:
5508 		mlxsw_sp_fib4_node_flush(mlxsw_sp, fib_node);
5509 		break;
5510 	case MLXSW_SP_L3_PROTO_IPV6:
5511 		mlxsw_sp_fib6_node_flush(mlxsw_sp, fib_node);
5512 		break;
5513 	}
5514 }
5515 
5516 static void mlxsw_sp_vr_fib_flush(struct mlxsw_sp *mlxsw_sp,
5517 				  struct mlxsw_sp_vr *vr,
5518 				  enum mlxsw_sp_l3proto proto)
5519 {
5520 	struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
5521 	struct mlxsw_sp_fib_node *fib_node, *tmp;
5522 
5523 	list_for_each_entry_safe(fib_node, tmp, &fib->node_list, list) {
5524 		bool do_break = &tmp->list == &fib->node_list;
5525 
5526 		mlxsw_sp_fib_node_flush(mlxsw_sp, fib_node);
5527 		if (do_break)
5528 			break;
5529 	}
5530 }
5531 
5532 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp)
5533 {
5534 	int i;
5535 
5536 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
5537 		struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
5538 
5539 		if (!mlxsw_sp_vr_is_used(vr))
5540 			continue;
5541 
5542 		mlxsw_sp_mr_table_flush(vr->mr4_table);
5543 		mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
5544 
5545 		/* If virtual router was only used for IPv4, then it's no
5546 		 * longer used.
5547 		 */
5548 		if (!mlxsw_sp_vr_is_used(vr))
5549 			continue;
5550 		mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
5551 	}
5552 }
5553 
5554 static void mlxsw_sp_router_fib_abort(struct mlxsw_sp *mlxsw_sp)
5555 {
5556 	int err;
5557 
5558 	if (mlxsw_sp->router->aborted)
5559 		return;
5560 	dev_warn(mlxsw_sp->bus_info->dev, "FIB abort triggered. Note that FIB entries are no longer being offloaded to this device.\n");
5561 	mlxsw_sp_router_fib_flush(mlxsw_sp);
5562 	mlxsw_sp->router->aborted = true;
5563 	err = mlxsw_sp_router_set_abort_trap(mlxsw_sp);
5564 	if (err)
5565 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to set abort trap.\n");
5566 }
5567 
5568 struct mlxsw_sp_fib_event_work {
5569 	struct work_struct work;
5570 	union {
5571 		struct fib6_entry_notifier_info fen6_info;
5572 		struct fib_entry_notifier_info fen_info;
5573 		struct fib_rule_notifier_info fr_info;
5574 		struct fib_nh_notifier_info fnh_info;
5575 		struct mfc_entry_notifier_info men_info;
5576 		struct vif_entry_notifier_info ven_info;
5577 	};
5578 	struct mlxsw_sp *mlxsw_sp;
5579 	unsigned long event;
5580 };
5581 
5582 static void mlxsw_sp_router_fib4_event_work(struct work_struct *work)
5583 {
5584 	struct mlxsw_sp_fib_event_work *fib_work =
5585 		container_of(work, struct mlxsw_sp_fib_event_work, work);
5586 	struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5587 	bool replace, append;
5588 	int err;
5589 
5590 	/* Protect internal structures from changes */
5591 	rtnl_lock();
5592 	switch (fib_work->event) {
5593 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5594 	case FIB_EVENT_ENTRY_APPEND: /* fall through */
5595 	case FIB_EVENT_ENTRY_ADD:
5596 		replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
5597 		append = fib_work->event == FIB_EVENT_ENTRY_APPEND;
5598 		err = mlxsw_sp_router_fib4_add(mlxsw_sp, &fib_work->fen_info,
5599 					       replace, append);
5600 		if (err)
5601 			mlxsw_sp_router_fib_abort(mlxsw_sp);
5602 		fib_info_put(fib_work->fen_info.fi);
5603 		break;
5604 	case FIB_EVENT_ENTRY_DEL:
5605 		mlxsw_sp_router_fib4_del(mlxsw_sp, &fib_work->fen_info);
5606 		fib_info_put(fib_work->fen_info.fi);
5607 		break;
5608 	case FIB_EVENT_RULE_ADD:
5609 		/* if we get here, a rule was added that we do not support.
5610 		 * just do the fib_abort
5611 		 */
5612 		mlxsw_sp_router_fib_abort(mlxsw_sp);
5613 		break;
5614 	case FIB_EVENT_NH_ADD: /* fall through */
5615 	case FIB_EVENT_NH_DEL:
5616 		mlxsw_sp_nexthop4_event(mlxsw_sp, fib_work->event,
5617 					fib_work->fnh_info.fib_nh);
5618 		fib_info_put(fib_work->fnh_info.fib_nh->nh_parent);
5619 		break;
5620 	}
5621 	rtnl_unlock();
5622 	kfree(fib_work);
5623 }
5624 
5625 static void mlxsw_sp_router_fib6_event_work(struct work_struct *work)
5626 {
5627 	struct mlxsw_sp_fib_event_work *fib_work =
5628 		container_of(work, struct mlxsw_sp_fib_event_work, work);
5629 	struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5630 	bool replace;
5631 	int err;
5632 
5633 	rtnl_lock();
5634 	switch (fib_work->event) {
5635 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5636 	case FIB_EVENT_ENTRY_ADD:
5637 		replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
5638 		err = mlxsw_sp_router_fib6_add(mlxsw_sp,
5639 					       fib_work->fen6_info.rt, replace);
5640 		if (err)
5641 			mlxsw_sp_router_fib_abort(mlxsw_sp);
5642 		mlxsw_sp_rt6_release(fib_work->fen6_info.rt);
5643 		break;
5644 	case FIB_EVENT_ENTRY_DEL:
5645 		mlxsw_sp_router_fib6_del(mlxsw_sp, fib_work->fen6_info.rt);
5646 		mlxsw_sp_rt6_release(fib_work->fen6_info.rt);
5647 		break;
5648 	case FIB_EVENT_RULE_ADD:
5649 		/* if we get here, a rule was added that we do not support.
5650 		 * just do the fib_abort
5651 		 */
5652 		mlxsw_sp_router_fib_abort(mlxsw_sp);
5653 		break;
5654 	}
5655 	rtnl_unlock();
5656 	kfree(fib_work);
5657 }
5658 
5659 static void mlxsw_sp_router_fibmr_event_work(struct work_struct *work)
5660 {
5661 	struct mlxsw_sp_fib_event_work *fib_work =
5662 		container_of(work, struct mlxsw_sp_fib_event_work, work);
5663 	struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5664 	bool replace;
5665 	int err;
5666 
5667 	rtnl_lock();
5668 	switch (fib_work->event) {
5669 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5670 	case FIB_EVENT_ENTRY_ADD:
5671 		replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
5672 
5673 		err = mlxsw_sp_router_fibmr_add(mlxsw_sp, &fib_work->men_info,
5674 						replace);
5675 		if (err)
5676 			mlxsw_sp_router_fib_abort(mlxsw_sp);
5677 		ipmr_cache_put(fib_work->men_info.mfc);
5678 		break;
5679 	case FIB_EVENT_ENTRY_DEL:
5680 		mlxsw_sp_router_fibmr_del(mlxsw_sp, &fib_work->men_info);
5681 		ipmr_cache_put(fib_work->men_info.mfc);
5682 		break;
5683 	case FIB_EVENT_VIF_ADD:
5684 		err = mlxsw_sp_router_fibmr_vif_add(mlxsw_sp,
5685 						    &fib_work->ven_info);
5686 		if (err)
5687 			mlxsw_sp_router_fib_abort(mlxsw_sp);
5688 		dev_put(fib_work->ven_info.dev);
5689 		break;
5690 	case FIB_EVENT_VIF_DEL:
5691 		mlxsw_sp_router_fibmr_vif_del(mlxsw_sp,
5692 					      &fib_work->ven_info);
5693 		dev_put(fib_work->ven_info.dev);
5694 		break;
5695 	case FIB_EVENT_RULE_ADD:
5696 		/* if we get here, a rule was added that we do not support.
5697 		 * just do the fib_abort
5698 		 */
5699 		mlxsw_sp_router_fib_abort(mlxsw_sp);
5700 		break;
5701 	}
5702 	rtnl_unlock();
5703 	kfree(fib_work);
5704 }
5705 
5706 static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work,
5707 				       struct fib_notifier_info *info)
5708 {
5709 	struct fib_entry_notifier_info *fen_info;
5710 	struct fib_nh_notifier_info *fnh_info;
5711 
5712 	switch (fib_work->event) {
5713 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5714 	case FIB_EVENT_ENTRY_APPEND: /* fall through */
5715 	case FIB_EVENT_ENTRY_ADD: /* fall through */
5716 	case FIB_EVENT_ENTRY_DEL:
5717 		fen_info = container_of(info, struct fib_entry_notifier_info,
5718 					info);
5719 		fib_work->fen_info = *fen_info;
5720 		/* Take reference on fib_info to prevent it from being
5721 		 * freed while work is queued. Release it afterwards.
5722 		 */
5723 		fib_info_hold(fib_work->fen_info.fi);
5724 		break;
5725 	case FIB_EVENT_NH_ADD: /* fall through */
5726 	case FIB_EVENT_NH_DEL:
5727 		fnh_info = container_of(info, struct fib_nh_notifier_info,
5728 					info);
5729 		fib_work->fnh_info = *fnh_info;
5730 		fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
5731 		break;
5732 	}
5733 }
5734 
5735 static void mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work,
5736 				       struct fib_notifier_info *info)
5737 {
5738 	struct fib6_entry_notifier_info *fen6_info;
5739 
5740 	switch (fib_work->event) {
5741 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5742 	case FIB_EVENT_ENTRY_ADD: /* fall through */
5743 	case FIB_EVENT_ENTRY_DEL:
5744 		fen6_info = container_of(info, struct fib6_entry_notifier_info,
5745 					 info);
5746 		fib_work->fen6_info = *fen6_info;
5747 		rt6_hold(fib_work->fen6_info.rt);
5748 		break;
5749 	}
5750 }
5751 
5752 static void
5753 mlxsw_sp_router_fibmr_event(struct mlxsw_sp_fib_event_work *fib_work,
5754 			    struct fib_notifier_info *info)
5755 {
5756 	switch (fib_work->event) {
5757 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5758 	case FIB_EVENT_ENTRY_ADD: /* fall through */
5759 	case FIB_EVENT_ENTRY_DEL:
5760 		memcpy(&fib_work->men_info, info, sizeof(fib_work->men_info));
5761 		ipmr_cache_hold(fib_work->men_info.mfc);
5762 		break;
5763 	case FIB_EVENT_VIF_ADD: /* fall through */
5764 	case FIB_EVENT_VIF_DEL:
5765 		memcpy(&fib_work->ven_info, info, sizeof(fib_work->ven_info));
5766 		dev_hold(fib_work->ven_info.dev);
5767 		break;
5768 	}
5769 }
5770 
5771 static int mlxsw_sp_router_fib_rule_event(unsigned long event,
5772 					  struct fib_notifier_info *info,
5773 					  struct mlxsw_sp *mlxsw_sp)
5774 {
5775 	struct netlink_ext_ack *extack = info->extack;
5776 	struct fib_rule_notifier_info *fr_info;
5777 	struct fib_rule *rule;
5778 	int err = 0;
5779 
5780 	/* nothing to do at the moment */
5781 	if (event == FIB_EVENT_RULE_DEL)
5782 		return 0;
5783 
5784 	if (mlxsw_sp->router->aborted)
5785 		return 0;
5786 
5787 	fr_info = container_of(info, struct fib_rule_notifier_info, info);
5788 	rule = fr_info->rule;
5789 
5790 	switch (info->family) {
5791 	case AF_INET:
5792 		if (!fib4_rule_default(rule) && !rule->l3mdev)
5793 			err = -1;
5794 		break;
5795 	case AF_INET6:
5796 		if (!fib6_rule_default(rule) && !rule->l3mdev)
5797 			err = -1;
5798 		break;
5799 	case RTNL_FAMILY_IPMR:
5800 		if (!ipmr_rule_default(rule) && !rule->l3mdev)
5801 			err = -1;
5802 		break;
5803 	}
5804 
5805 	if (err < 0)
5806 		NL_SET_ERR_MSG_MOD(extack, "FIB rules not supported. Aborting offload");
5807 
5808 	return err;
5809 }
5810 
5811 /* Called with rcu_read_lock() */
5812 static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
5813 				     unsigned long event, void *ptr)
5814 {
5815 	struct mlxsw_sp_fib_event_work *fib_work;
5816 	struct fib_notifier_info *info = ptr;
5817 	struct mlxsw_sp_router *router;
5818 	int err;
5819 
5820 	if (!net_eq(info->net, &init_net) ||
5821 	    (info->family != AF_INET && info->family != AF_INET6 &&
5822 	     info->family != RTNL_FAMILY_IPMR))
5823 		return NOTIFY_DONE;
5824 
5825 	router = container_of(nb, struct mlxsw_sp_router, fib_nb);
5826 
5827 	switch (event) {
5828 	case FIB_EVENT_RULE_ADD: /* fall through */
5829 	case FIB_EVENT_RULE_DEL:
5830 		err = mlxsw_sp_router_fib_rule_event(event, info,
5831 						     router->mlxsw_sp);
5832 		if (!err)
5833 			return NOTIFY_DONE;
5834 	}
5835 
5836 	fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
5837 	if (WARN_ON(!fib_work))
5838 		return NOTIFY_BAD;
5839 
5840 	fib_work->mlxsw_sp = router->mlxsw_sp;
5841 	fib_work->event = event;
5842 
5843 	switch (info->family) {
5844 	case AF_INET:
5845 		INIT_WORK(&fib_work->work, mlxsw_sp_router_fib4_event_work);
5846 		mlxsw_sp_router_fib4_event(fib_work, info);
5847 		break;
5848 	case AF_INET6:
5849 		INIT_WORK(&fib_work->work, mlxsw_sp_router_fib6_event_work);
5850 		mlxsw_sp_router_fib6_event(fib_work, info);
5851 		break;
5852 	case RTNL_FAMILY_IPMR:
5853 		INIT_WORK(&fib_work->work, mlxsw_sp_router_fibmr_event_work);
5854 		mlxsw_sp_router_fibmr_event(fib_work, info);
5855 		break;
5856 	}
5857 
5858 	mlxsw_core_schedule_work(&fib_work->work);
5859 
5860 	return NOTIFY_DONE;
5861 }
5862 
5863 static struct mlxsw_sp_rif *
5864 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
5865 			 const struct net_device *dev)
5866 {
5867 	int i;
5868 
5869 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
5870 		if (mlxsw_sp->router->rifs[i] &&
5871 		    mlxsw_sp->router->rifs[i]->dev == dev)
5872 			return mlxsw_sp->router->rifs[i];
5873 
5874 	return NULL;
5875 }
5876 
5877 static int mlxsw_sp_router_rif_disable(struct mlxsw_sp *mlxsw_sp, u16 rif)
5878 {
5879 	char ritr_pl[MLXSW_REG_RITR_LEN];
5880 	int err;
5881 
5882 	mlxsw_reg_ritr_rif_pack(ritr_pl, rif);
5883 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
5884 	if (WARN_ON_ONCE(err))
5885 		return err;
5886 
5887 	mlxsw_reg_ritr_enable_set(ritr_pl, false);
5888 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
5889 }
5890 
5891 static void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
5892 					  struct mlxsw_sp_rif *rif)
5893 {
5894 	mlxsw_sp_router_rif_disable(mlxsw_sp, rif->rif_index);
5895 	mlxsw_sp_nexthop_rif_gone_sync(mlxsw_sp, rif);
5896 	mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, rif);
5897 }
5898 
5899 static bool
5900 mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *rif, struct net_device *dev,
5901 			   unsigned long event)
5902 {
5903 	struct inet6_dev *inet6_dev;
5904 	bool addr_list_empty = true;
5905 	struct in_device *idev;
5906 
5907 	switch (event) {
5908 	case NETDEV_UP:
5909 		return rif == NULL;
5910 	case NETDEV_DOWN:
5911 		idev = __in_dev_get_rtnl(dev);
5912 		if (idev && idev->ifa_list)
5913 			addr_list_empty = false;
5914 
5915 		inet6_dev = __in6_dev_get(dev);
5916 		if (addr_list_empty && inet6_dev &&
5917 		    !list_empty(&inet6_dev->addr_list))
5918 			addr_list_empty = false;
5919 
5920 		if (rif && addr_list_empty &&
5921 		    !netif_is_l3_slave(rif->dev))
5922 			return true;
5923 		/* It is possible we already removed the RIF ourselves
5924 		 * if it was assigned to a netdev that is now a bridge
5925 		 * or LAG slave.
5926 		 */
5927 		return false;
5928 	}
5929 
5930 	return false;
5931 }
5932 
5933 static enum mlxsw_sp_rif_type
5934 mlxsw_sp_dev_rif_type(const struct mlxsw_sp *mlxsw_sp,
5935 		      const struct net_device *dev)
5936 {
5937 	enum mlxsw_sp_fid_type type;
5938 
5939 	if (mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL))
5940 		return MLXSW_SP_RIF_TYPE_IPIP_LB;
5941 
5942 	/* Otherwise RIF type is derived from the type of the underlying FID. */
5943 	if (is_vlan_dev(dev) && netif_is_bridge_master(vlan_dev_real_dev(dev)))
5944 		type = MLXSW_SP_FID_TYPE_8021Q;
5945 	else if (netif_is_bridge_master(dev) && br_vlan_enabled(dev))
5946 		type = MLXSW_SP_FID_TYPE_8021Q;
5947 	else if (netif_is_bridge_master(dev))
5948 		type = MLXSW_SP_FID_TYPE_8021D;
5949 	else
5950 		type = MLXSW_SP_FID_TYPE_RFID;
5951 
5952 	return mlxsw_sp_fid_type_rif_type(mlxsw_sp, type);
5953 }
5954 
5955 static int mlxsw_sp_rif_index_alloc(struct mlxsw_sp *mlxsw_sp, u16 *p_rif_index)
5956 {
5957 	int i;
5958 
5959 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) {
5960 		if (!mlxsw_sp->router->rifs[i]) {
5961 			*p_rif_index = i;
5962 			return 0;
5963 		}
5964 	}
5965 
5966 	return -ENOBUFS;
5967 }
5968 
5969 static struct mlxsw_sp_rif *mlxsw_sp_rif_alloc(size_t rif_size, u16 rif_index,
5970 					       u16 vr_id,
5971 					       struct net_device *l3_dev)
5972 {
5973 	struct mlxsw_sp_rif *rif;
5974 
5975 	rif = kzalloc(rif_size, GFP_KERNEL);
5976 	if (!rif)
5977 		return NULL;
5978 
5979 	INIT_LIST_HEAD(&rif->nexthop_list);
5980 	INIT_LIST_HEAD(&rif->neigh_list);
5981 	ether_addr_copy(rif->addr, l3_dev->dev_addr);
5982 	rif->mtu = l3_dev->mtu;
5983 	rif->vr_id = vr_id;
5984 	rif->dev = l3_dev;
5985 	rif->rif_index = rif_index;
5986 
5987 	return rif;
5988 }
5989 
5990 struct mlxsw_sp_rif *mlxsw_sp_rif_by_index(const struct mlxsw_sp *mlxsw_sp,
5991 					   u16 rif_index)
5992 {
5993 	return mlxsw_sp->router->rifs[rif_index];
5994 }
5995 
5996 u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif)
5997 {
5998 	return rif->rif_index;
5999 }
6000 
6001 u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
6002 {
6003 	return lb_rif->common.rif_index;
6004 }
6005 
6006 u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
6007 {
6008 	return lb_rif->ul_vr_id;
6009 }
6010 
6011 int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif)
6012 {
6013 	return rif->dev->ifindex;
6014 }
6015 
6016 const struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif)
6017 {
6018 	return rif->dev;
6019 }
6020 
6021 static struct mlxsw_sp_rif *
6022 mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
6023 		    const struct mlxsw_sp_rif_params *params,
6024 		    struct netlink_ext_ack *extack)
6025 {
6026 	u32 tb_id = l3mdev_fib_table(params->dev);
6027 	const struct mlxsw_sp_rif_ops *ops;
6028 	struct mlxsw_sp_fid *fid = NULL;
6029 	enum mlxsw_sp_rif_type type;
6030 	struct mlxsw_sp_rif *rif;
6031 	struct mlxsw_sp_vr *vr;
6032 	u16 rif_index;
6033 	int err;
6034 
6035 	type = mlxsw_sp_dev_rif_type(mlxsw_sp, params->dev);
6036 	ops = mlxsw_sp->router->rif_ops_arr[type];
6037 
6038 	vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN, extack);
6039 	if (IS_ERR(vr))
6040 		return ERR_CAST(vr);
6041 	vr->rif_count++;
6042 
6043 	err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index);
6044 	if (err) {
6045 		NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported router interfaces");
6046 		goto err_rif_index_alloc;
6047 	}
6048 
6049 	rif = mlxsw_sp_rif_alloc(ops->rif_size, rif_index, vr->id, params->dev);
6050 	if (!rif) {
6051 		err = -ENOMEM;
6052 		goto err_rif_alloc;
6053 	}
6054 	rif->mlxsw_sp = mlxsw_sp;
6055 	rif->ops = ops;
6056 
6057 	if (ops->fid_get) {
6058 		fid = ops->fid_get(rif);
6059 		if (IS_ERR(fid)) {
6060 			err = PTR_ERR(fid);
6061 			goto err_fid_get;
6062 		}
6063 		rif->fid = fid;
6064 	}
6065 
6066 	if (ops->setup)
6067 		ops->setup(rif, params);
6068 
6069 	err = ops->configure(rif);
6070 	if (err)
6071 		goto err_configure;
6072 
6073 	err = mlxsw_sp_mr_rif_add(vr->mr4_table, rif);
6074 	if (err)
6075 		goto err_mr_rif_add;
6076 
6077 	mlxsw_sp_rif_counters_alloc(rif);
6078 	mlxsw_sp->router->rifs[rif_index] = rif;
6079 
6080 	return rif;
6081 
6082 err_mr_rif_add:
6083 	ops->deconfigure(rif);
6084 err_configure:
6085 	if (fid)
6086 		mlxsw_sp_fid_put(fid);
6087 err_fid_get:
6088 	kfree(rif);
6089 err_rif_alloc:
6090 err_rif_index_alloc:
6091 	vr->rif_count--;
6092 	mlxsw_sp_vr_put(mlxsw_sp, vr);
6093 	return ERR_PTR(err);
6094 }
6095 
6096 void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif)
6097 {
6098 	const struct mlxsw_sp_rif_ops *ops = rif->ops;
6099 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6100 	struct mlxsw_sp_fid *fid = rif->fid;
6101 	struct mlxsw_sp_vr *vr;
6102 
6103 	mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif);
6104 	vr = &mlxsw_sp->router->vrs[rif->vr_id];
6105 
6106 	mlxsw_sp->router->rifs[rif->rif_index] = NULL;
6107 	mlxsw_sp_rif_counters_free(rif);
6108 	mlxsw_sp_mr_rif_del(vr->mr4_table, rif);
6109 	ops->deconfigure(rif);
6110 	if (fid)
6111 		/* Loopback RIFs are not associated with a FID. */
6112 		mlxsw_sp_fid_put(fid);
6113 	kfree(rif);
6114 	vr->rif_count--;
6115 	mlxsw_sp_vr_put(mlxsw_sp, vr);
6116 }
6117 
6118 static void
6119 mlxsw_sp_rif_subport_params_init(struct mlxsw_sp_rif_params *params,
6120 				 struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
6121 {
6122 	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
6123 
6124 	params->vid = mlxsw_sp_port_vlan->vid;
6125 	params->lag = mlxsw_sp_port->lagged;
6126 	if (params->lag)
6127 		params->lag_id = mlxsw_sp_port->lag_id;
6128 	else
6129 		params->system_port = mlxsw_sp_port->local_port;
6130 }
6131 
6132 static int
6133 mlxsw_sp_port_vlan_router_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan,
6134 			       struct net_device *l3_dev,
6135 			       struct netlink_ext_ack *extack)
6136 {
6137 	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
6138 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
6139 	u16 vid = mlxsw_sp_port_vlan->vid;
6140 	struct mlxsw_sp_rif *rif;
6141 	struct mlxsw_sp_fid *fid;
6142 	int err;
6143 
6144 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
6145 	if (!rif) {
6146 		struct mlxsw_sp_rif_params params = {
6147 			.dev = l3_dev,
6148 		};
6149 
6150 		mlxsw_sp_rif_subport_params_init(&params, mlxsw_sp_port_vlan);
6151 		rif = mlxsw_sp_rif_create(mlxsw_sp, &params, extack);
6152 		if (IS_ERR(rif))
6153 			return PTR_ERR(rif);
6154 	}
6155 
6156 	/* FID was already created, just take a reference */
6157 	fid = rif->ops->fid_get(rif);
6158 	err = mlxsw_sp_fid_port_vid_map(fid, mlxsw_sp_port, vid);
6159 	if (err)
6160 		goto err_fid_port_vid_map;
6161 
6162 	err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, false);
6163 	if (err)
6164 		goto err_port_vid_learning_set;
6165 
6166 	err = mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid,
6167 					BR_STATE_FORWARDING);
6168 	if (err)
6169 		goto err_port_vid_stp_set;
6170 
6171 	mlxsw_sp_port_vlan->fid = fid;
6172 
6173 	return 0;
6174 
6175 err_port_vid_stp_set:
6176 	mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
6177 err_port_vid_learning_set:
6178 	mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
6179 err_fid_port_vid_map:
6180 	mlxsw_sp_fid_put(fid);
6181 	return err;
6182 }
6183 
6184 void
6185 mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
6186 {
6187 	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
6188 	struct mlxsw_sp_fid *fid = mlxsw_sp_port_vlan->fid;
6189 	u16 vid = mlxsw_sp_port_vlan->vid;
6190 
6191 	if (WARN_ON(mlxsw_sp_fid_type(fid) != MLXSW_SP_FID_TYPE_RFID))
6192 		return;
6193 
6194 	mlxsw_sp_port_vlan->fid = NULL;
6195 	mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid, BR_STATE_BLOCKING);
6196 	mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
6197 	mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
6198 	/* If router port holds the last reference on the rFID, then the
6199 	 * associated Sub-port RIF will be destroyed.
6200 	 */
6201 	mlxsw_sp_fid_put(fid);
6202 }
6203 
6204 static int mlxsw_sp_inetaddr_port_vlan_event(struct net_device *l3_dev,
6205 					     struct net_device *port_dev,
6206 					     unsigned long event, u16 vid,
6207 					     struct netlink_ext_ack *extack)
6208 {
6209 	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(port_dev);
6210 	struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
6211 
6212 	mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid);
6213 	if (WARN_ON(!mlxsw_sp_port_vlan))
6214 		return -EINVAL;
6215 
6216 	switch (event) {
6217 	case NETDEV_UP:
6218 		return mlxsw_sp_port_vlan_router_join(mlxsw_sp_port_vlan,
6219 						      l3_dev, extack);
6220 	case NETDEV_DOWN:
6221 		mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan);
6222 		break;
6223 	}
6224 
6225 	return 0;
6226 }
6227 
6228 static int mlxsw_sp_inetaddr_port_event(struct net_device *port_dev,
6229 					unsigned long event,
6230 					struct netlink_ext_ack *extack)
6231 {
6232 	if (netif_is_bridge_port(port_dev) ||
6233 	    netif_is_lag_port(port_dev) ||
6234 	    netif_is_ovs_port(port_dev))
6235 		return 0;
6236 
6237 	return mlxsw_sp_inetaddr_port_vlan_event(port_dev, port_dev, event, 1,
6238 						 extack);
6239 }
6240 
6241 static int __mlxsw_sp_inetaddr_lag_event(struct net_device *l3_dev,
6242 					 struct net_device *lag_dev,
6243 					 unsigned long event, u16 vid,
6244 					 struct netlink_ext_ack *extack)
6245 {
6246 	struct net_device *port_dev;
6247 	struct list_head *iter;
6248 	int err;
6249 
6250 	netdev_for_each_lower_dev(lag_dev, port_dev, iter) {
6251 		if (mlxsw_sp_port_dev_check(port_dev)) {
6252 			err = mlxsw_sp_inetaddr_port_vlan_event(l3_dev,
6253 								port_dev,
6254 								event, vid,
6255 								extack);
6256 			if (err)
6257 				return err;
6258 		}
6259 	}
6260 
6261 	return 0;
6262 }
6263 
6264 static int mlxsw_sp_inetaddr_lag_event(struct net_device *lag_dev,
6265 				       unsigned long event,
6266 				       struct netlink_ext_ack *extack)
6267 {
6268 	if (netif_is_bridge_port(lag_dev))
6269 		return 0;
6270 
6271 	return __mlxsw_sp_inetaddr_lag_event(lag_dev, lag_dev, event, 1,
6272 					     extack);
6273 }
6274 
6275 static int mlxsw_sp_inetaddr_bridge_event(struct net_device *l3_dev,
6276 					  unsigned long event,
6277 					  struct netlink_ext_ack *extack)
6278 {
6279 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
6280 	struct mlxsw_sp_rif_params params = {
6281 		.dev = l3_dev,
6282 	};
6283 	struct mlxsw_sp_rif *rif;
6284 
6285 	switch (event) {
6286 	case NETDEV_UP:
6287 		rif = mlxsw_sp_rif_create(mlxsw_sp, &params, extack);
6288 		if (IS_ERR(rif))
6289 			return PTR_ERR(rif);
6290 		break;
6291 	case NETDEV_DOWN:
6292 		rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
6293 		mlxsw_sp_rif_destroy(rif);
6294 		break;
6295 	}
6296 
6297 	return 0;
6298 }
6299 
6300 static int mlxsw_sp_inetaddr_vlan_event(struct net_device *vlan_dev,
6301 					unsigned long event,
6302 					struct netlink_ext_ack *extack)
6303 {
6304 	struct net_device *real_dev = vlan_dev_real_dev(vlan_dev);
6305 	u16 vid = vlan_dev_vlan_id(vlan_dev);
6306 
6307 	if (netif_is_bridge_port(vlan_dev))
6308 		return 0;
6309 
6310 	if (mlxsw_sp_port_dev_check(real_dev))
6311 		return mlxsw_sp_inetaddr_port_vlan_event(vlan_dev, real_dev,
6312 							 event, vid, extack);
6313 	else if (netif_is_lag_master(real_dev))
6314 		return __mlxsw_sp_inetaddr_lag_event(vlan_dev, real_dev, event,
6315 						     vid, extack);
6316 	else if (netif_is_bridge_master(real_dev) && br_vlan_enabled(real_dev))
6317 		return mlxsw_sp_inetaddr_bridge_event(vlan_dev, event, extack);
6318 
6319 	return 0;
6320 }
6321 
6322 static int __mlxsw_sp_inetaddr_event(struct net_device *dev,
6323 				     unsigned long event,
6324 				     struct netlink_ext_ack *extack)
6325 {
6326 	if (mlxsw_sp_port_dev_check(dev))
6327 		return mlxsw_sp_inetaddr_port_event(dev, event, extack);
6328 	else if (netif_is_lag_master(dev))
6329 		return mlxsw_sp_inetaddr_lag_event(dev, event, extack);
6330 	else if (netif_is_bridge_master(dev))
6331 		return mlxsw_sp_inetaddr_bridge_event(dev, event, extack);
6332 	else if (is_vlan_dev(dev))
6333 		return mlxsw_sp_inetaddr_vlan_event(dev, event, extack);
6334 	else
6335 		return 0;
6336 }
6337 
6338 int mlxsw_sp_inetaddr_event(struct notifier_block *unused,
6339 			    unsigned long event, void *ptr)
6340 {
6341 	struct in_ifaddr *ifa = (struct in_ifaddr *) ptr;
6342 	struct net_device *dev = ifa->ifa_dev->dev;
6343 	struct mlxsw_sp *mlxsw_sp;
6344 	struct mlxsw_sp_rif *rif;
6345 	int err = 0;
6346 
6347 	/* NETDEV_UP event is handled by mlxsw_sp_inetaddr_valid_event */
6348 	if (event == NETDEV_UP)
6349 		goto out;
6350 
6351 	mlxsw_sp = mlxsw_sp_lower_get(dev);
6352 	if (!mlxsw_sp)
6353 		goto out;
6354 
6355 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6356 	if (!mlxsw_sp_rif_should_config(rif, dev, event))
6357 		goto out;
6358 
6359 	err = __mlxsw_sp_inetaddr_event(dev, event, NULL);
6360 out:
6361 	return notifier_from_errno(err);
6362 }
6363 
6364 int mlxsw_sp_inetaddr_valid_event(struct notifier_block *unused,
6365 				  unsigned long event, void *ptr)
6366 {
6367 	struct in_validator_info *ivi = (struct in_validator_info *) ptr;
6368 	struct net_device *dev = ivi->ivi_dev->dev;
6369 	struct mlxsw_sp *mlxsw_sp;
6370 	struct mlxsw_sp_rif *rif;
6371 	int err = 0;
6372 
6373 	mlxsw_sp = mlxsw_sp_lower_get(dev);
6374 	if (!mlxsw_sp)
6375 		goto out;
6376 
6377 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6378 	if (!mlxsw_sp_rif_should_config(rif, dev, event))
6379 		goto out;
6380 
6381 	err = __mlxsw_sp_inetaddr_event(dev, event, ivi->extack);
6382 out:
6383 	return notifier_from_errno(err);
6384 }
6385 
6386 struct mlxsw_sp_inet6addr_event_work {
6387 	struct work_struct work;
6388 	struct net_device *dev;
6389 	unsigned long event;
6390 };
6391 
6392 static void mlxsw_sp_inet6addr_event_work(struct work_struct *work)
6393 {
6394 	struct mlxsw_sp_inet6addr_event_work *inet6addr_work =
6395 		container_of(work, struct mlxsw_sp_inet6addr_event_work, work);
6396 	struct net_device *dev = inet6addr_work->dev;
6397 	unsigned long event = inet6addr_work->event;
6398 	struct mlxsw_sp *mlxsw_sp;
6399 	struct mlxsw_sp_rif *rif;
6400 
6401 	rtnl_lock();
6402 	mlxsw_sp = mlxsw_sp_lower_get(dev);
6403 	if (!mlxsw_sp)
6404 		goto out;
6405 
6406 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6407 	if (!mlxsw_sp_rif_should_config(rif, dev, event))
6408 		goto out;
6409 
6410 	__mlxsw_sp_inetaddr_event(dev, event, NULL);
6411 out:
6412 	rtnl_unlock();
6413 	dev_put(dev);
6414 	kfree(inet6addr_work);
6415 }
6416 
6417 /* Called with rcu_read_lock() */
6418 int mlxsw_sp_inet6addr_event(struct notifier_block *unused,
6419 			     unsigned long event, void *ptr)
6420 {
6421 	struct inet6_ifaddr *if6 = (struct inet6_ifaddr *) ptr;
6422 	struct mlxsw_sp_inet6addr_event_work *inet6addr_work;
6423 	struct net_device *dev = if6->idev->dev;
6424 
6425 	/* NETDEV_UP event is handled by mlxsw_sp_inet6addr_valid_event */
6426 	if (event == NETDEV_UP)
6427 		return NOTIFY_DONE;
6428 
6429 	if (!mlxsw_sp_port_dev_lower_find_rcu(dev))
6430 		return NOTIFY_DONE;
6431 
6432 	inet6addr_work = kzalloc(sizeof(*inet6addr_work), GFP_ATOMIC);
6433 	if (!inet6addr_work)
6434 		return NOTIFY_BAD;
6435 
6436 	INIT_WORK(&inet6addr_work->work, mlxsw_sp_inet6addr_event_work);
6437 	inet6addr_work->dev = dev;
6438 	inet6addr_work->event = event;
6439 	dev_hold(dev);
6440 	mlxsw_core_schedule_work(&inet6addr_work->work);
6441 
6442 	return NOTIFY_DONE;
6443 }
6444 
6445 int mlxsw_sp_inet6addr_valid_event(struct notifier_block *unused,
6446 				   unsigned long event, void *ptr)
6447 {
6448 	struct in6_validator_info *i6vi = (struct in6_validator_info *) ptr;
6449 	struct net_device *dev = i6vi->i6vi_dev->dev;
6450 	struct mlxsw_sp *mlxsw_sp;
6451 	struct mlxsw_sp_rif *rif;
6452 	int err = 0;
6453 
6454 	mlxsw_sp = mlxsw_sp_lower_get(dev);
6455 	if (!mlxsw_sp)
6456 		goto out;
6457 
6458 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6459 	if (!mlxsw_sp_rif_should_config(rif, dev, event))
6460 		goto out;
6461 
6462 	err = __mlxsw_sp_inetaddr_event(dev, event, i6vi->extack);
6463 out:
6464 	return notifier_from_errno(err);
6465 }
6466 
6467 static int mlxsw_sp_rif_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
6468 			     const char *mac, int mtu)
6469 {
6470 	char ritr_pl[MLXSW_REG_RITR_LEN];
6471 	int err;
6472 
6473 	mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
6474 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6475 	if (err)
6476 		return err;
6477 
6478 	mlxsw_reg_ritr_mtu_set(ritr_pl, mtu);
6479 	mlxsw_reg_ritr_if_mac_memcpy_to(ritr_pl, mac);
6480 	mlxsw_reg_ritr_op_set(ritr_pl, MLXSW_REG_RITR_RIF_CREATE);
6481 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6482 }
6483 
6484 int mlxsw_sp_netdevice_router_port_event(struct net_device *dev)
6485 {
6486 	struct mlxsw_sp *mlxsw_sp;
6487 	struct mlxsw_sp_rif *rif;
6488 	u16 fid_index;
6489 	int err;
6490 
6491 	mlxsw_sp = mlxsw_sp_lower_get(dev);
6492 	if (!mlxsw_sp)
6493 		return 0;
6494 
6495 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6496 	if (!rif)
6497 		return 0;
6498 	fid_index = mlxsw_sp_fid_index(rif->fid);
6499 
6500 	err = mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, false);
6501 	if (err)
6502 		return err;
6503 
6504 	err = mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, dev->dev_addr,
6505 				dev->mtu);
6506 	if (err)
6507 		goto err_rif_edit;
6508 
6509 	err = mlxsw_sp_rif_fdb_op(mlxsw_sp, dev->dev_addr, fid_index, true);
6510 	if (err)
6511 		goto err_rif_fdb_op;
6512 
6513 	if (rif->mtu != dev->mtu) {
6514 		struct mlxsw_sp_vr *vr;
6515 
6516 		/* The RIF is relevant only to its mr_table instance, as unlike
6517 		 * unicast routing, in multicast routing a RIF cannot be shared
6518 		 * between several multicast routing tables.
6519 		 */
6520 		vr = &mlxsw_sp->router->vrs[rif->vr_id];
6521 		mlxsw_sp_mr_rif_mtu_update(vr->mr4_table, rif, dev->mtu);
6522 	}
6523 
6524 	ether_addr_copy(rif->addr, dev->dev_addr);
6525 	rif->mtu = dev->mtu;
6526 
6527 	netdev_dbg(dev, "Updated RIF=%d\n", rif->rif_index);
6528 
6529 	return 0;
6530 
6531 err_rif_fdb_op:
6532 	mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, rif->addr, rif->mtu);
6533 err_rif_edit:
6534 	mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, true);
6535 	return err;
6536 }
6537 
6538 static int mlxsw_sp_port_vrf_join(struct mlxsw_sp *mlxsw_sp,
6539 				  struct net_device *l3_dev,
6540 				  struct netlink_ext_ack *extack)
6541 {
6542 	struct mlxsw_sp_rif *rif;
6543 
6544 	/* If netdev is already associated with a RIF, then we need to
6545 	 * destroy it and create a new one with the new virtual router ID.
6546 	 */
6547 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
6548 	if (rif)
6549 		__mlxsw_sp_inetaddr_event(l3_dev, NETDEV_DOWN, extack);
6550 
6551 	return __mlxsw_sp_inetaddr_event(l3_dev, NETDEV_UP, extack);
6552 }
6553 
6554 static void mlxsw_sp_port_vrf_leave(struct mlxsw_sp *mlxsw_sp,
6555 				    struct net_device *l3_dev)
6556 {
6557 	struct mlxsw_sp_rif *rif;
6558 
6559 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
6560 	if (!rif)
6561 		return;
6562 	__mlxsw_sp_inetaddr_event(l3_dev, NETDEV_DOWN, NULL);
6563 }
6564 
6565 int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event,
6566 				 struct netdev_notifier_changeupper_info *info)
6567 {
6568 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
6569 	int err = 0;
6570 
6571 	if (!mlxsw_sp)
6572 		return 0;
6573 
6574 	switch (event) {
6575 	case NETDEV_PRECHANGEUPPER:
6576 		return 0;
6577 	case NETDEV_CHANGEUPPER:
6578 		if (info->linking) {
6579 			struct netlink_ext_ack *extack;
6580 
6581 			extack = netdev_notifier_info_to_extack(&info->info);
6582 			err = mlxsw_sp_port_vrf_join(mlxsw_sp, l3_dev, extack);
6583 		} else {
6584 			mlxsw_sp_port_vrf_leave(mlxsw_sp, l3_dev);
6585 		}
6586 		break;
6587 	}
6588 
6589 	return err;
6590 }
6591 
6592 static struct mlxsw_sp_rif_subport *
6593 mlxsw_sp_rif_subport_rif(const struct mlxsw_sp_rif *rif)
6594 {
6595 	return container_of(rif, struct mlxsw_sp_rif_subport, common);
6596 }
6597 
6598 static void mlxsw_sp_rif_subport_setup(struct mlxsw_sp_rif *rif,
6599 				       const struct mlxsw_sp_rif_params *params)
6600 {
6601 	struct mlxsw_sp_rif_subport *rif_subport;
6602 
6603 	rif_subport = mlxsw_sp_rif_subport_rif(rif);
6604 	rif_subport->vid = params->vid;
6605 	rif_subport->lag = params->lag;
6606 	if (params->lag)
6607 		rif_subport->lag_id = params->lag_id;
6608 	else
6609 		rif_subport->system_port = params->system_port;
6610 }
6611 
6612 static int mlxsw_sp_rif_subport_op(struct mlxsw_sp_rif *rif, bool enable)
6613 {
6614 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6615 	struct mlxsw_sp_rif_subport *rif_subport;
6616 	char ritr_pl[MLXSW_REG_RITR_LEN];
6617 
6618 	rif_subport = mlxsw_sp_rif_subport_rif(rif);
6619 	mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_SP_IF,
6620 			    rif->rif_index, rif->vr_id, rif->dev->mtu);
6621 	mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr);
6622 	mlxsw_reg_ritr_sp_if_pack(ritr_pl, rif_subport->lag,
6623 				  rif_subport->lag ? rif_subport->lag_id :
6624 						     rif_subport->system_port,
6625 				  rif_subport->vid);
6626 
6627 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6628 }
6629 
6630 static int mlxsw_sp_rif_subport_configure(struct mlxsw_sp_rif *rif)
6631 {
6632 	int err;
6633 
6634 	err = mlxsw_sp_rif_subport_op(rif, true);
6635 	if (err)
6636 		return err;
6637 
6638 	err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6639 				  mlxsw_sp_fid_index(rif->fid), true);
6640 	if (err)
6641 		goto err_rif_fdb_op;
6642 
6643 	mlxsw_sp_fid_rif_set(rif->fid, rif);
6644 	return 0;
6645 
6646 err_rif_fdb_op:
6647 	mlxsw_sp_rif_subport_op(rif, false);
6648 	return err;
6649 }
6650 
6651 static void mlxsw_sp_rif_subport_deconfigure(struct mlxsw_sp_rif *rif)
6652 {
6653 	struct mlxsw_sp_fid *fid = rif->fid;
6654 
6655 	mlxsw_sp_fid_rif_set(fid, NULL);
6656 	mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6657 			    mlxsw_sp_fid_index(fid), false);
6658 	mlxsw_sp_rif_subport_op(rif, false);
6659 }
6660 
6661 static struct mlxsw_sp_fid *
6662 mlxsw_sp_rif_subport_fid_get(struct mlxsw_sp_rif *rif)
6663 {
6664 	return mlxsw_sp_fid_rfid_get(rif->mlxsw_sp, rif->rif_index);
6665 }
6666 
6667 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_subport_ops = {
6668 	.type			= MLXSW_SP_RIF_TYPE_SUBPORT,
6669 	.rif_size		= sizeof(struct mlxsw_sp_rif_subport),
6670 	.setup			= mlxsw_sp_rif_subport_setup,
6671 	.configure		= mlxsw_sp_rif_subport_configure,
6672 	.deconfigure		= mlxsw_sp_rif_subport_deconfigure,
6673 	.fid_get		= mlxsw_sp_rif_subport_fid_get,
6674 };
6675 
6676 static int mlxsw_sp_rif_vlan_fid_op(struct mlxsw_sp_rif *rif,
6677 				    enum mlxsw_reg_ritr_if_type type,
6678 				    u16 vid_fid, bool enable)
6679 {
6680 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6681 	char ritr_pl[MLXSW_REG_RITR_LEN];
6682 
6683 	mlxsw_reg_ritr_pack(ritr_pl, enable, type, rif->rif_index, rif->vr_id,
6684 			    rif->dev->mtu);
6685 	mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr);
6686 	mlxsw_reg_ritr_fid_set(ritr_pl, type, vid_fid);
6687 
6688 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6689 }
6690 
6691 u8 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp)
6692 {
6693 	return mlxsw_core_max_ports(mlxsw_sp->core) + 1;
6694 }
6695 
6696 static int mlxsw_sp_rif_vlan_configure(struct mlxsw_sp_rif *rif)
6697 {
6698 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6699 	u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
6700 	int err;
6701 
6702 	err = mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, true);
6703 	if (err)
6704 		return err;
6705 
6706 	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
6707 				     mlxsw_sp_router_port(mlxsw_sp), true);
6708 	if (err)
6709 		goto err_fid_mc_flood_set;
6710 
6711 	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
6712 				     mlxsw_sp_router_port(mlxsw_sp), true);
6713 	if (err)
6714 		goto err_fid_bc_flood_set;
6715 
6716 	err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6717 				  mlxsw_sp_fid_index(rif->fid), true);
6718 	if (err)
6719 		goto err_rif_fdb_op;
6720 
6721 	mlxsw_sp_fid_rif_set(rif->fid, rif);
6722 	return 0;
6723 
6724 err_rif_fdb_op:
6725 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
6726 			       mlxsw_sp_router_port(mlxsw_sp), false);
6727 err_fid_bc_flood_set:
6728 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
6729 			       mlxsw_sp_router_port(mlxsw_sp), false);
6730 err_fid_mc_flood_set:
6731 	mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false);
6732 	return err;
6733 }
6734 
6735 static void mlxsw_sp_rif_vlan_deconfigure(struct mlxsw_sp_rif *rif)
6736 {
6737 	u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
6738 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6739 	struct mlxsw_sp_fid *fid = rif->fid;
6740 
6741 	mlxsw_sp_fid_rif_set(fid, NULL);
6742 	mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6743 			    mlxsw_sp_fid_index(fid), false);
6744 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
6745 			       mlxsw_sp_router_port(mlxsw_sp), false);
6746 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
6747 			       mlxsw_sp_router_port(mlxsw_sp), false);
6748 	mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false);
6749 }
6750 
6751 static struct mlxsw_sp_fid *
6752 mlxsw_sp_rif_vlan_fid_get(struct mlxsw_sp_rif *rif)
6753 {
6754 	u16 vid = is_vlan_dev(rif->dev) ? vlan_dev_vlan_id(rif->dev) : 1;
6755 
6756 	return mlxsw_sp_fid_8021q_get(rif->mlxsw_sp, vid);
6757 }
6758 
6759 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_vlan_ops = {
6760 	.type			= MLXSW_SP_RIF_TYPE_VLAN,
6761 	.rif_size		= sizeof(struct mlxsw_sp_rif),
6762 	.configure		= mlxsw_sp_rif_vlan_configure,
6763 	.deconfigure		= mlxsw_sp_rif_vlan_deconfigure,
6764 	.fid_get		= mlxsw_sp_rif_vlan_fid_get,
6765 };
6766 
6767 static int mlxsw_sp_rif_fid_configure(struct mlxsw_sp_rif *rif)
6768 {
6769 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6770 	u16 fid_index = mlxsw_sp_fid_index(rif->fid);
6771 	int err;
6772 
6773 	err = mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index,
6774 				       true);
6775 	if (err)
6776 		return err;
6777 
6778 	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
6779 				     mlxsw_sp_router_port(mlxsw_sp), true);
6780 	if (err)
6781 		goto err_fid_mc_flood_set;
6782 
6783 	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
6784 				     mlxsw_sp_router_port(mlxsw_sp), true);
6785 	if (err)
6786 		goto err_fid_bc_flood_set;
6787 
6788 	err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6789 				  mlxsw_sp_fid_index(rif->fid), true);
6790 	if (err)
6791 		goto err_rif_fdb_op;
6792 
6793 	mlxsw_sp_fid_rif_set(rif->fid, rif);
6794 	return 0;
6795 
6796 err_rif_fdb_op:
6797 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
6798 			       mlxsw_sp_router_port(mlxsw_sp), false);
6799 err_fid_bc_flood_set:
6800 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
6801 			       mlxsw_sp_router_port(mlxsw_sp), false);
6802 err_fid_mc_flood_set:
6803 	mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false);
6804 	return err;
6805 }
6806 
6807 static void mlxsw_sp_rif_fid_deconfigure(struct mlxsw_sp_rif *rif)
6808 {
6809 	u16 fid_index = mlxsw_sp_fid_index(rif->fid);
6810 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6811 	struct mlxsw_sp_fid *fid = rif->fid;
6812 
6813 	mlxsw_sp_fid_rif_set(fid, NULL);
6814 	mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6815 			    mlxsw_sp_fid_index(fid), false);
6816 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
6817 			       mlxsw_sp_router_port(mlxsw_sp), false);
6818 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
6819 			       mlxsw_sp_router_port(mlxsw_sp), false);
6820 	mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false);
6821 }
6822 
6823 static struct mlxsw_sp_fid *
6824 mlxsw_sp_rif_fid_fid_get(struct mlxsw_sp_rif *rif)
6825 {
6826 	return mlxsw_sp_fid_8021d_get(rif->mlxsw_sp, rif->dev->ifindex);
6827 }
6828 
6829 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_fid_ops = {
6830 	.type			= MLXSW_SP_RIF_TYPE_FID,
6831 	.rif_size		= sizeof(struct mlxsw_sp_rif),
6832 	.configure		= mlxsw_sp_rif_fid_configure,
6833 	.deconfigure		= mlxsw_sp_rif_fid_deconfigure,
6834 	.fid_get		= mlxsw_sp_rif_fid_fid_get,
6835 };
6836 
6837 static struct mlxsw_sp_rif_ipip_lb *
6838 mlxsw_sp_rif_ipip_lb_rif(struct mlxsw_sp_rif *rif)
6839 {
6840 	return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
6841 }
6842 
6843 static void
6844 mlxsw_sp_rif_ipip_lb_setup(struct mlxsw_sp_rif *rif,
6845 			   const struct mlxsw_sp_rif_params *params)
6846 {
6847 	struct mlxsw_sp_rif_params_ipip_lb *params_lb;
6848 	struct mlxsw_sp_rif_ipip_lb *rif_lb;
6849 
6850 	params_lb = container_of(params, struct mlxsw_sp_rif_params_ipip_lb,
6851 				 common);
6852 	rif_lb = mlxsw_sp_rif_ipip_lb_rif(rif);
6853 	rif_lb->lb_config = params_lb->lb_config;
6854 }
6855 
6856 static int
6857 mlxsw_sp_rif_ipip_lb_op(struct mlxsw_sp_rif_ipip_lb *lb_rif,
6858 			struct mlxsw_sp_vr *ul_vr, bool enable)
6859 {
6860 	struct mlxsw_sp_rif_ipip_lb_config lb_cf = lb_rif->lb_config;
6861 	struct mlxsw_sp_rif *rif = &lb_rif->common;
6862 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6863 	char ritr_pl[MLXSW_REG_RITR_LEN];
6864 	u32 saddr4;
6865 
6866 	switch (lb_cf.ul_protocol) {
6867 	case MLXSW_SP_L3_PROTO_IPV4:
6868 		saddr4 = be32_to_cpu(lb_cf.saddr.addr4);
6869 		mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF,
6870 				    rif->rif_index, rif->vr_id, rif->dev->mtu);
6871 		mlxsw_reg_ritr_loopback_ipip4_pack(ritr_pl, lb_cf.lb_ipipt,
6872 			    MLXSW_REG_RITR_LOOPBACK_IPIP_OPTIONS_GRE_KEY_PRESET,
6873 			    ul_vr->id, saddr4, lb_cf.okey);
6874 		break;
6875 
6876 	case MLXSW_SP_L3_PROTO_IPV6:
6877 		return -EAFNOSUPPORT;
6878 	}
6879 
6880 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6881 }
6882 
6883 static int
6884 mlxsw_sp_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif)
6885 {
6886 	struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
6887 	u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(rif->dev);
6888 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6889 	struct mlxsw_sp_vr *ul_vr;
6890 	int err;
6891 
6892 	ul_vr = mlxsw_sp_vr_get(mlxsw_sp, ul_tb_id, NULL);
6893 	if (IS_ERR(ul_vr))
6894 		return PTR_ERR(ul_vr);
6895 
6896 	err = mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, true);
6897 	if (err)
6898 		goto err_loopback_op;
6899 
6900 	lb_rif->ul_vr_id = ul_vr->id;
6901 	++ul_vr->rif_count;
6902 	return 0;
6903 
6904 err_loopback_op:
6905 	mlxsw_sp_vr_put(mlxsw_sp, ul_vr);
6906 	return err;
6907 }
6908 
6909 static void mlxsw_sp_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif)
6910 {
6911 	struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
6912 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6913 	struct mlxsw_sp_vr *ul_vr;
6914 
6915 	ul_vr = &mlxsw_sp->router->vrs[lb_rif->ul_vr_id];
6916 	mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, false);
6917 
6918 	--ul_vr->rif_count;
6919 	mlxsw_sp_vr_put(mlxsw_sp, ul_vr);
6920 }
6921 
6922 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_ipip_lb_ops = {
6923 	.type			= MLXSW_SP_RIF_TYPE_IPIP_LB,
6924 	.rif_size		= sizeof(struct mlxsw_sp_rif_ipip_lb),
6925 	.setup                  = mlxsw_sp_rif_ipip_lb_setup,
6926 	.configure		= mlxsw_sp_rif_ipip_lb_configure,
6927 	.deconfigure		= mlxsw_sp_rif_ipip_lb_deconfigure,
6928 };
6929 
6930 static const struct mlxsw_sp_rif_ops *mlxsw_sp_rif_ops_arr[] = {
6931 	[MLXSW_SP_RIF_TYPE_SUBPORT]	= &mlxsw_sp_rif_subport_ops,
6932 	[MLXSW_SP_RIF_TYPE_VLAN]	= &mlxsw_sp_rif_vlan_ops,
6933 	[MLXSW_SP_RIF_TYPE_FID]		= &mlxsw_sp_rif_fid_ops,
6934 	[MLXSW_SP_RIF_TYPE_IPIP_LB]	= &mlxsw_sp_rif_ipip_lb_ops,
6935 };
6936 
6937 static int mlxsw_sp_rifs_init(struct mlxsw_sp *mlxsw_sp)
6938 {
6939 	u64 max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
6940 
6941 	mlxsw_sp->router->rifs = kcalloc(max_rifs,
6942 					 sizeof(struct mlxsw_sp_rif *),
6943 					 GFP_KERNEL);
6944 	if (!mlxsw_sp->router->rifs)
6945 		return -ENOMEM;
6946 
6947 	mlxsw_sp->router->rif_ops_arr = mlxsw_sp_rif_ops_arr;
6948 
6949 	return 0;
6950 }
6951 
6952 static void mlxsw_sp_rifs_fini(struct mlxsw_sp *mlxsw_sp)
6953 {
6954 	int i;
6955 
6956 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
6957 		WARN_ON_ONCE(mlxsw_sp->router->rifs[i]);
6958 
6959 	kfree(mlxsw_sp->router->rifs);
6960 }
6961 
6962 static int
6963 mlxsw_sp_ipip_config_tigcr(struct mlxsw_sp *mlxsw_sp)
6964 {
6965 	char tigcr_pl[MLXSW_REG_TIGCR_LEN];
6966 
6967 	mlxsw_reg_tigcr_pack(tigcr_pl, true, 0);
6968 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tigcr), tigcr_pl);
6969 }
6970 
6971 static int mlxsw_sp_ipips_init(struct mlxsw_sp *mlxsw_sp)
6972 {
6973 	mlxsw_sp->router->ipip_ops_arr = mlxsw_sp_ipip_ops_arr;
6974 	INIT_LIST_HEAD(&mlxsw_sp->router->ipip_list);
6975 	return mlxsw_sp_ipip_config_tigcr(mlxsw_sp);
6976 }
6977 
6978 static void mlxsw_sp_ipips_fini(struct mlxsw_sp *mlxsw_sp)
6979 {
6980 	WARN_ON(!list_empty(&mlxsw_sp->router->ipip_list));
6981 }
6982 
6983 static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb)
6984 {
6985 	struct mlxsw_sp_router *router;
6986 
6987 	/* Flush pending FIB notifications and then flush the device's
6988 	 * table before requesting another dump. The FIB notification
6989 	 * block is unregistered, so no need to take RTNL.
6990 	 */
6991 	mlxsw_core_flush_owq();
6992 	router = container_of(nb, struct mlxsw_sp_router, fib_nb);
6993 	mlxsw_sp_router_fib_flush(router->mlxsw_sp);
6994 }
6995 
6996 #ifdef CONFIG_IP_ROUTE_MULTIPATH
6997 static void mlxsw_sp_mp_hash_header_set(char *recr2_pl, int header)
6998 {
6999 	mlxsw_reg_recr2_outer_header_enables_set(recr2_pl, header, true);
7000 }
7001 
7002 static void mlxsw_sp_mp_hash_field_set(char *recr2_pl, int field)
7003 {
7004 	mlxsw_reg_recr2_outer_header_fields_enable_set(recr2_pl, field, true);
7005 }
7006 
7007 static void mlxsw_sp_mp4_hash_init(char *recr2_pl)
7008 {
7009 	bool only_l3 = !init_net.ipv4.sysctl_fib_multipath_hash_policy;
7010 
7011 	mlxsw_sp_mp_hash_header_set(recr2_pl,
7012 				    MLXSW_REG_RECR2_IPV4_EN_NOT_TCP_NOT_UDP);
7013 	mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV4_EN_TCP_UDP);
7014 	mlxsw_reg_recr2_ipv4_sip_enable(recr2_pl);
7015 	mlxsw_reg_recr2_ipv4_dip_enable(recr2_pl);
7016 	if (only_l3)
7017 		return;
7018 	mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_EN_IPV4);
7019 	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV4_PROTOCOL);
7020 	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_SPORT);
7021 	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_DPORT);
7022 }
7023 
7024 static void mlxsw_sp_mp6_hash_init(char *recr2_pl)
7025 {
7026 	mlxsw_sp_mp_hash_header_set(recr2_pl,
7027 				    MLXSW_REG_RECR2_IPV6_EN_NOT_TCP_NOT_UDP);
7028 	mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV6_EN_TCP_UDP);
7029 	mlxsw_reg_recr2_ipv6_sip_enable(recr2_pl);
7030 	mlxsw_reg_recr2_ipv6_dip_enable(recr2_pl);
7031 	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV6_FLOW_LABEL);
7032 	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV6_NEXT_HEADER);
7033 }
7034 
7035 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
7036 {
7037 	char recr2_pl[MLXSW_REG_RECR2_LEN];
7038 	u32 seed;
7039 
7040 	get_random_bytes(&seed, sizeof(seed));
7041 	mlxsw_reg_recr2_pack(recr2_pl, seed);
7042 	mlxsw_sp_mp4_hash_init(recr2_pl);
7043 	mlxsw_sp_mp6_hash_init(recr2_pl);
7044 
7045 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(recr2), recr2_pl);
7046 }
7047 #else
7048 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
7049 {
7050 	return 0;
7051 }
7052 #endif
7053 
7054 static int mlxsw_sp_dscp_init(struct mlxsw_sp *mlxsw_sp)
7055 {
7056 	char rdpm_pl[MLXSW_REG_RDPM_LEN];
7057 	unsigned int i;
7058 
7059 	MLXSW_REG_ZERO(rdpm, rdpm_pl);
7060 
7061 	/* HW is determining switch priority based on DSCP-bits, but the
7062 	 * kernel is still doing that based on the ToS. Since there's a
7063 	 * mismatch in bits we need to make sure to translate the right
7064 	 * value ToS would observe, skipping the 2 least-significant ECN bits.
7065 	 */
7066 	for (i = 0; i < MLXSW_REG_RDPM_DSCP_ENTRY_REC_MAX_COUNT; i++)
7067 		mlxsw_reg_rdpm_pack(rdpm_pl, i, rt_tos2priority(i << 2));
7068 
7069 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rdpm), rdpm_pl);
7070 }
7071 
7072 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
7073 {
7074 	char rgcr_pl[MLXSW_REG_RGCR_LEN];
7075 	u64 max_rifs;
7076 	int err;
7077 
7078 	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_RIFS))
7079 		return -EIO;
7080 	max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
7081 
7082 	mlxsw_reg_rgcr_pack(rgcr_pl, true, true);
7083 	mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs);
7084 	mlxsw_reg_rgcr_usp_set(rgcr_pl, true);
7085 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
7086 	if (err)
7087 		return err;
7088 	return 0;
7089 }
7090 
7091 static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
7092 {
7093 	char rgcr_pl[MLXSW_REG_RGCR_LEN];
7094 
7095 	mlxsw_reg_rgcr_pack(rgcr_pl, false, false);
7096 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
7097 }
7098 
7099 int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
7100 {
7101 	struct mlxsw_sp_router *router;
7102 	int err;
7103 
7104 	router = kzalloc(sizeof(*mlxsw_sp->router), GFP_KERNEL);
7105 	if (!router)
7106 		return -ENOMEM;
7107 	mlxsw_sp->router = router;
7108 	router->mlxsw_sp = mlxsw_sp;
7109 
7110 	INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_neighs_list);
7111 	err = __mlxsw_sp_router_init(mlxsw_sp);
7112 	if (err)
7113 		goto err_router_init;
7114 
7115 	err = mlxsw_sp_rifs_init(mlxsw_sp);
7116 	if (err)
7117 		goto err_rifs_init;
7118 
7119 	err = mlxsw_sp_ipips_init(mlxsw_sp);
7120 	if (err)
7121 		goto err_ipips_init;
7122 
7123 	err = rhashtable_init(&mlxsw_sp->router->nexthop_ht,
7124 			      &mlxsw_sp_nexthop_ht_params);
7125 	if (err)
7126 		goto err_nexthop_ht_init;
7127 
7128 	err = rhashtable_init(&mlxsw_sp->router->nexthop_group_ht,
7129 			      &mlxsw_sp_nexthop_group_ht_params);
7130 	if (err)
7131 		goto err_nexthop_group_ht_init;
7132 
7133 	INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_list);
7134 	err = mlxsw_sp_lpm_init(mlxsw_sp);
7135 	if (err)
7136 		goto err_lpm_init;
7137 
7138 	err = mlxsw_sp_mr_init(mlxsw_sp, &mlxsw_sp_mr_tcam_ops);
7139 	if (err)
7140 		goto err_mr_init;
7141 
7142 	err = mlxsw_sp_vrs_init(mlxsw_sp);
7143 	if (err)
7144 		goto err_vrs_init;
7145 
7146 	err = mlxsw_sp_neigh_init(mlxsw_sp);
7147 	if (err)
7148 		goto err_neigh_init;
7149 
7150 	mlxsw_sp->router->netevent_nb.notifier_call =
7151 		mlxsw_sp_router_netevent_event;
7152 	err = register_netevent_notifier(&mlxsw_sp->router->netevent_nb);
7153 	if (err)
7154 		goto err_register_netevent_notifier;
7155 
7156 	err = mlxsw_sp_mp_hash_init(mlxsw_sp);
7157 	if (err)
7158 		goto err_mp_hash_init;
7159 
7160 	err = mlxsw_sp_dscp_init(mlxsw_sp);
7161 	if (err)
7162 		goto err_dscp_init;
7163 
7164 	mlxsw_sp->router->fib_nb.notifier_call = mlxsw_sp_router_fib_event;
7165 	err = register_fib_notifier(&mlxsw_sp->router->fib_nb,
7166 				    mlxsw_sp_router_fib_dump_flush);
7167 	if (err)
7168 		goto err_register_fib_notifier;
7169 
7170 	return 0;
7171 
7172 err_register_fib_notifier:
7173 err_dscp_init:
7174 err_mp_hash_init:
7175 	unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
7176 err_register_netevent_notifier:
7177 	mlxsw_sp_neigh_fini(mlxsw_sp);
7178 err_neigh_init:
7179 	mlxsw_sp_vrs_fini(mlxsw_sp);
7180 err_vrs_init:
7181 	mlxsw_sp_mr_fini(mlxsw_sp);
7182 err_mr_init:
7183 	mlxsw_sp_lpm_fini(mlxsw_sp);
7184 err_lpm_init:
7185 	rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
7186 err_nexthop_group_ht_init:
7187 	rhashtable_destroy(&mlxsw_sp->router->nexthop_ht);
7188 err_nexthop_ht_init:
7189 	mlxsw_sp_ipips_fini(mlxsw_sp);
7190 err_ipips_init:
7191 	mlxsw_sp_rifs_fini(mlxsw_sp);
7192 err_rifs_init:
7193 	__mlxsw_sp_router_fini(mlxsw_sp);
7194 err_router_init:
7195 	kfree(mlxsw_sp->router);
7196 	return err;
7197 }
7198 
7199 void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
7200 {
7201 	unregister_fib_notifier(&mlxsw_sp->router->fib_nb);
7202 	unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
7203 	mlxsw_sp_neigh_fini(mlxsw_sp);
7204 	mlxsw_sp_vrs_fini(mlxsw_sp);
7205 	mlxsw_sp_mr_fini(mlxsw_sp);
7206 	mlxsw_sp_lpm_fini(mlxsw_sp);
7207 	rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
7208 	rhashtable_destroy(&mlxsw_sp->router->nexthop_ht);
7209 	mlxsw_sp_ipips_fini(mlxsw_sp);
7210 	mlxsw_sp_rifs_fini(mlxsw_sp);
7211 	__mlxsw_sp_router_fini(mlxsw_sp);
7212 	kfree(mlxsw_sp->router);
7213 }
7214