xref: /linux/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c (revision 3494bec0f6ac8ac06e0ad7c35933db345b2c5a83)
1 // SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
2 /* Copyright (c) 2016-2018 Mellanox Technologies. All rights reserved */
3 
4 #include <linux/kernel.h>
5 #include <linux/types.h>
6 #include <linux/rhashtable.h>
7 #include <linux/bitops.h>
8 #include <linux/in6.h>
9 #include <linux/notifier.h>
10 #include <linux/inetdevice.h>
11 #include <linux/netdevice.h>
12 #include <linux/if_bridge.h>
13 #include <linux/socket.h>
14 #include <linux/route.h>
15 #include <linux/gcd.h>
16 #include <linux/if_macvlan.h>
17 #include <linux/refcount.h>
18 #include <linux/jhash.h>
19 #include <linux/net_namespace.h>
20 #include <net/netevent.h>
21 #include <net/neighbour.h>
22 #include <net/arp.h>
23 #include <net/ip_fib.h>
24 #include <net/ip6_fib.h>
25 #include <net/nexthop.h>
26 #include <net/fib_rules.h>
27 #include <net/ip_tunnels.h>
28 #include <net/l3mdev.h>
29 #include <net/addrconf.h>
30 #include <net/ndisc.h>
31 #include <net/ipv6.h>
32 #include <net/fib_notifier.h>
33 #include <net/switchdev.h>
34 
35 #include "spectrum.h"
36 #include "core.h"
37 #include "reg.h"
38 #include "spectrum_cnt.h"
39 #include "spectrum_dpipe.h"
40 #include "spectrum_ipip.h"
41 #include "spectrum_mr.h"
42 #include "spectrum_mr_tcam.h"
43 #include "spectrum_router.h"
44 #include "spectrum_span.h"
45 
46 struct mlxsw_sp_fib;
47 struct mlxsw_sp_vr;
48 struct mlxsw_sp_lpm_tree;
49 struct mlxsw_sp_rif_ops;
50 
51 struct mlxsw_sp_router {
52 	struct mlxsw_sp *mlxsw_sp;
53 	struct mlxsw_sp_rif **rifs;
54 	struct mlxsw_sp_vr *vrs;
55 	struct rhashtable neigh_ht;
56 	struct rhashtable nexthop_group_ht;
57 	struct rhashtable nexthop_ht;
58 	struct list_head nexthop_list;
59 	struct {
60 		/* One tree for each protocol: IPv4 and IPv6 */
61 		struct mlxsw_sp_lpm_tree *proto_trees[2];
62 		struct mlxsw_sp_lpm_tree *trees;
63 		unsigned int tree_count;
64 	} lpm;
65 	struct {
66 		struct delayed_work dw;
67 		unsigned long interval;	/* ms */
68 	} neighs_update;
69 	struct delayed_work nexthop_probe_dw;
70 #define MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL 5000 /* ms */
71 	struct list_head nexthop_neighs_list;
72 	struct list_head ipip_list;
73 	bool aborted;
74 	struct notifier_block fib_nb;
75 	struct notifier_block netevent_nb;
76 	struct notifier_block inetaddr_nb;
77 	struct notifier_block inet6addr_nb;
78 	const struct mlxsw_sp_rif_ops **rif_ops_arr;
79 	const struct mlxsw_sp_ipip_ops **ipip_ops_arr;
80 	u32 adj_discard_index;
81 	bool adj_discard_index_valid;
82 };
83 
84 struct mlxsw_sp_rif {
85 	struct list_head nexthop_list;
86 	struct list_head neigh_list;
87 	struct net_device *dev; /* NULL for underlay RIF */
88 	struct mlxsw_sp_fid *fid;
89 	unsigned char addr[ETH_ALEN];
90 	int mtu;
91 	u16 rif_index;
92 	u16 vr_id;
93 	const struct mlxsw_sp_rif_ops *ops;
94 	struct mlxsw_sp *mlxsw_sp;
95 
96 	unsigned int counter_ingress;
97 	bool counter_ingress_valid;
98 	unsigned int counter_egress;
99 	bool counter_egress_valid;
100 };
101 
102 struct mlxsw_sp_rif_params {
103 	struct net_device *dev;
104 	union {
105 		u16 system_port;
106 		u16 lag_id;
107 	};
108 	u16 vid;
109 	bool lag;
110 };
111 
112 struct mlxsw_sp_rif_subport {
113 	struct mlxsw_sp_rif common;
114 	refcount_t ref_count;
115 	union {
116 		u16 system_port;
117 		u16 lag_id;
118 	};
119 	u16 vid;
120 	bool lag;
121 };
122 
123 struct mlxsw_sp_rif_ipip_lb {
124 	struct mlxsw_sp_rif common;
125 	struct mlxsw_sp_rif_ipip_lb_config lb_config;
126 	u16 ul_vr_id; /* Reserved for Spectrum-2. */
127 	u16 ul_rif_id; /* Reserved for Spectrum. */
128 };
129 
130 struct mlxsw_sp_rif_params_ipip_lb {
131 	struct mlxsw_sp_rif_params common;
132 	struct mlxsw_sp_rif_ipip_lb_config lb_config;
133 };
134 
135 struct mlxsw_sp_rif_ops {
136 	enum mlxsw_sp_rif_type type;
137 	size_t rif_size;
138 
139 	void (*setup)(struct mlxsw_sp_rif *rif,
140 		      const struct mlxsw_sp_rif_params *params);
141 	int (*configure)(struct mlxsw_sp_rif *rif);
142 	void (*deconfigure)(struct mlxsw_sp_rif *rif);
143 	struct mlxsw_sp_fid * (*fid_get)(struct mlxsw_sp_rif *rif,
144 					 struct netlink_ext_ack *extack);
145 	void (*fdb_del)(struct mlxsw_sp_rif *rif, const char *mac);
146 };
147 
148 static struct mlxsw_sp_rif *
149 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
150 			 const struct net_device *dev);
151 static void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif);
152 static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree);
153 static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
154 				  struct mlxsw_sp_lpm_tree *lpm_tree);
155 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
156 				     const struct mlxsw_sp_fib *fib,
157 				     u8 tree_id);
158 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
159 				       const struct mlxsw_sp_fib *fib);
160 
161 static unsigned int *
162 mlxsw_sp_rif_p_counter_get(struct mlxsw_sp_rif *rif,
163 			   enum mlxsw_sp_rif_counter_dir dir)
164 {
165 	switch (dir) {
166 	case MLXSW_SP_RIF_COUNTER_EGRESS:
167 		return &rif->counter_egress;
168 	case MLXSW_SP_RIF_COUNTER_INGRESS:
169 		return &rif->counter_ingress;
170 	}
171 	return NULL;
172 }
173 
174 static bool
175 mlxsw_sp_rif_counter_valid_get(struct mlxsw_sp_rif *rif,
176 			       enum mlxsw_sp_rif_counter_dir dir)
177 {
178 	switch (dir) {
179 	case MLXSW_SP_RIF_COUNTER_EGRESS:
180 		return rif->counter_egress_valid;
181 	case MLXSW_SP_RIF_COUNTER_INGRESS:
182 		return rif->counter_ingress_valid;
183 	}
184 	return false;
185 }
186 
187 static void
188 mlxsw_sp_rif_counter_valid_set(struct mlxsw_sp_rif *rif,
189 			       enum mlxsw_sp_rif_counter_dir dir,
190 			       bool valid)
191 {
192 	switch (dir) {
193 	case MLXSW_SP_RIF_COUNTER_EGRESS:
194 		rif->counter_egress_valid = valid;
195 		break;
196 	case MLXSW_SP_RIF_COUNTER_INGRESS:
197 		rif->counter_ingress_valid = valid;
198 		break;
199 	}
200 }
201 
202 static int mlxsw_sp_rif_counter_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
203 				     unsigned int counter_index, bool enable,
204 				     enum mlxsw_sp_rif_counter_dir dir)
205 {
206 	char ritr_pl[MLXSW_REG_RITR_LEN];
207 	bool is_egress = false;
208 	int err;
209 
210 	if (dir == MLXSW_SP_RIF_COUNTER_EGRESS)
211 		is_egress = true;
212 	mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
213 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
214 	if (err)
215 		return err;
216 
217 	mlxsw_reg_ritr_counter_pack(ritr_pl, counter_index, enable,
218 				    is_egress);
219 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
220 }
221 
222 int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp,
223 				   struct mlxsw_sp_rif *rif,
224 				   enum mlxsw_sp_rif_counter_dir dir, u64 *cnt)
225 {
226 	char ricnt_pl[MLXSW_REG_RICNT_LEN];
227 	unsigned int *p_counter_index;
228 	bool valid;
229 	int err;
230 
231 	valid = mlxsw_sp_rif_counter_valid_get(rif, dir);
232 	if (!valid)
233 		return -EINVAL;
234 
235 	p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
236 	if (!p_counter_index)
237 		return -EINVAL;
238 	mlxsw_reg_ricnt_pack(ricnt_pl, *p_counter_index,
239 			     MLXSW_REG_RICNT_OPCODE_NOP);
240 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
241 	if (err)
242 		return err;
243 	*cnt = mlxsw_reg_ricnt_good_unicast_packets_get(ricnt_pl);
244 	return 0;
245 }
246 
247 static int mlxsw_sp_rif_counter_clear(struct mlxsw_sp *mlxsw_sp,
248 				      unsigned int counter_index)
249 {
250 	char ricnt_pl[MLXSW_REG_RICNT_LEN];
251 
252 	mlxsw_reg_ricnt_pack(ricnt_pl, counter_index,
253 			     MLXSW_REG_RICNT_OPCODE_CLEAR);
254 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
255 }
256 
257 int mlxsw_sp_rif_counter_alloc(struct mlxsw_sp *mlxsw_sp,
258 			       struct mlxsw_sp_rif *rif,
259 			       enum mlxsw_sp_rif_counter_dir dir)
260 {
261 	unsigned int *p_counter_index;
262 	int err;
263 
264 	p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
265 	if (!p_counter_index)
266 		return -EINVAL;
267 	err = mlxsw_sp_counter_alloc(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
268 				     p_counter_index);
269 	if (err)
270 		return err;
271 
272 	err = mlxsw_sp_rif_counter_clear(mlxsw_sp, *p_counter_index);
273 	if (err)
274 		goto err_counter_clear;
275 
276 	err = mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
277 					*p_counter_index, true, dir);
278 	if (err)
279 		goto err_counter_edit;
280 	mlxsw_sp_rif_counter_valid_set(rif, dir, true);
281 	return 0;
282 
283 err_counter_edit:
284 err_counter_clear:
285 	mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
286 			      *p_counter_index);
287 	return err;
288 }
289 
290 void mlxsw_sp_rif_counter_free(struct mlxsw_sp *mlxsw_sp,
291 			       struct mlxsw_sp_rif *rif,
292 			       enum mlxsw_sp_rif_counter_dir dir)
293 {
294 	unsigned int *p_counter_index;
295 
296 	if (!mlxsw_sp_rif_counter_valid_get(rif, dir))
297 		return;
298 
299 	p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
300 	if (WARN_ON(!p_counter_index))
301 		return;
302 	mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
303 				  *p_counter_index, false, dir);
304 	mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
305 			      *p_counter_index);
306 	mlxsw_sp_rif_counter_valid_set(rif, dir, false);
307 }
308 
309 static void mlxsw_sp_rif_counters_alloc(struct mlxsw_sp_rif *rif)
310 {
311 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
312 	struct devlink *devlink;
313 
314 	devlink = priv_to_devlink(mlxsw_sp->core);
315 	if (!devlink_dpipe_table_counter_enabled(devlink,
316 						 MLXSW_SP_DPIPE_TABLE_NAME_ERIF))
317 		return;
318 	mlxsw_sp_rif_counter_alloc(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS);
319 }
320 
321 static void mlxsw_sp_rif_counters_free(struct mlxsw_sp_rif *rif)
322 {
323 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
324 
325 	mlxsw_sp_rif_counter_free(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS);
326 }
327 
328 #define MLXSW_SP_PREFIX_COUNT (sizeof(struct in6_addr) * BITS_PER_BYTE + 1)
329 
330 struct mlxsw_sp_prefix_usage {
331 	DECLARE_BITMAP(b, MLXSW_SP_PREFIX_COUNT);
332 };
333 
334 #define mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) \
335 	for_each_set_bit(prefix, (prefix_usage)->b, MLXSW_SP_PREFIX_COUNT)
336 
337 static bool
338 mlxsw_sp_prefix_usage_eq(struct mlxsw_sp_prefix_usage *prefix_usage1,
339 			 struct mlxsw_sp_prefix_usage *prefix_usage2)
340 {
341 	return !memcmp(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
342 }
343 
344 static void
345 mlxsw_sp_prefix_usage_cpy(struct mlxsw_sp_prefix_usage *prefix_usage1,
346 			  struct mlxsw_sp_prefix_usage *prefix_usage2)
347 {
348 	memcpy(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
349 }
350 
351 static void
352 mlxsw_sp_prefix_usage_set(struct mlxsw_sp_prefix_usage *prefix_usage,
353 			  unsigned char prefix_len)
354 {
355 	set_bit(prefix_len, prefix_usage->b);
356 }
357 
358 static void
359 mlxsw_sp_prefix_usage_clear(struct mlxsw_sp_prefix_usage *prefix_usage,
360 			    unsigned char prefix_len)
361 {
362 	clear_bit(prefix_len, prefix_usage->b);
363 }
364 
365 struct mlxsw_sp_fib_key {
366 	unsigned char addr[sizeof(struct in6_addr)];
367 	unsigned char prefix_len;
368 };
369 
370 enum mlxsw_sp_fib_entry_type {
371 	MLXSW_SP_FIB_ENTRY_TYPE_REMOTE,
372 	MLXSW_SP_FIB_ENTRY_TYPE_LOCAL,
373 	MLXSW_SP_FIB_ENTRY_TYPE_TRAP,
374 	MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE,
375 	MLXSW_SP_FIB_ENTRY_TYPE_UNREACHABLE,
376 
377 	/* This is a special case of local delivery, where a packet should be
378 	 * decapsulated on reception. Note that there is no corresponding ENCAP,
379 	 * because that's a type of next hop, not of FIB entry. (There can be
380 	 * several next hops in a REMOTE entry, and some of them may be
381 	 * encapsulating entries.)
382 	 */
383 	MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP,
384 	MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP,
385 };
386 
387 struct mlxsw_sp_nexthop_group;
388 struct mlxsw_sp_fib_entry;
389 
390 struct mlxsw_sp_fib_node {
391 	struct mlxsw_sp_fib_entry *fib_entry;
392 	struct list_head list;
393 	struct rhash_head ht_node;
394 	struct mlxsw_sp_fib *fib;
395 	struct mlxsw_sp_fib_key key;
396 };
397 
398 struct mlxsw_sp_fib_entry_decap {
399 	struct mlxsw_sp_ipip_entry *ipip_entry;
400 	u32 tunnel_index;
401 };
402 
403 struct mlxsw_sp_fib_entry {
404 	struct mlxsw_sp_fib_node *fib_node;
405 	enum mlxsw_sp_fib_entry_type type;
406 	struct list_head nexthop_group_node;
407 	struct mlxsw_sp_nexthop_group *nh_group;
408 	struct mlxsw_sp_fib_entry_decap decap; /* Valid for decap entries. */
409 };
410 
411 struct mlxsw_sp_fib4_entry {
412 	struct mlxsw_sp_fib_entry common;
413 	u32 tb_id;
414 	u32 prio;
415 	u8 tos;
416 	u8 type;
417 };
418 
419 struct mlxsw_sp_fib6_entry {
420 	struct mlxsw_sp_fib_entry common;
421 	struct list_head rt6_list;
422 	unsigned int nrt6;
423 };
424 
425 struct mlxsw_sp_rt6 {
426 	struct list_head list;
427 	struct fib6_info *rt;
428 };
429 
430 struct mlxsw_sp_lpm_tree {
431 	u8 id; /* tree ID */
432 	unsigned int ref_count;
433 	enum mlxsw_sp_l3proto proto;
434 	unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT];
435 	struct mlxsw_sp_prefix_usage prefix_usage;
436 };
437 
438 struct mlxsw_sp_fib {
439 	struct rhashtable ht;
440 	struct list_head node_list;
441 	struct mlxsw_sp_vr *vr;
442 	struct mlxsw_sp_lpm_tree *lpm_tree;
443 	enum mlxsw_sp_l3proto proto;
444 };
445 
446 struct mlxsw_sp_vr {
447 	u16 id; /* virtual router ID */
448 	u32 tb_id; /* kernel fib table id */
449 	unsigned int rif_count;
450 	struct mlxsw_sp_fib *fib4;
451 	struct mlxsw_sp_fib *fib6;
452 	struct mlxsw_sp_mr_table *mr_table[MLXSW_SP_L3_PROTO_MAX];
453 	struct mlxsw_sp_rif *ul_rif;
454 	refcount_t ul_rif_refcnt;
455 };
456 
457 static const struct rhashtable_params mlxsw_sp_fib_ht_params;
458 
459 static struct mlxsw_sp_fib *mlxsw_sp_fib_create(struct mlxsw_sp *mlxsw_sp,
460 						struct mlxsw_sp_vr *vr,
461 						enum mlxsw_sp_l3proto proto)
462 {
463 	struct mlxsw_sp_lpm_tree *lpm_tree;
464 	struct mlxsw_sp_fib *fib;
465 	int err;
466 
467 	lpm_tree = mlxsw_sp->router->lpm.proto_trees[proto];
468 	fib = kzalloc(sizeof(*fib), GFP_KERNEL);
469 	if (!fib)
470 		return ERR_PTR(-ENOMEM);
471 	err = rhashtable_init(&fib->ht, &mlxsw_sp_fib_ht_params);
472 	if (err)
473 		goto err_rhashtable_init;
474 	INIT_LIST_HEAD(&fib->node_list);
475 	fib->proto = proto;
476 	fib->vr = vr;
477 	fib->lpm_tree = lpm_tree;
478 	mlxsw_sp_lpm_tree_hold(lpm_tree);
479 	err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, lpm_tree->id);
480 	if (err)
481 		goto err_lpm_tree_bind;
482 	return fib;
483 
484 err_lpm_tree_bind:
485 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
486 err_rhashtable_init:
487 	kfree(fib);
488 	return ERR_PTR(err);
489 }
490 
491 static void mlxsw_sp_fib_destroy(struct mlxsw_sp *mlxsw_sp,
492 				 struct mlxsw_sp_fib *fib)
493 {
494 	mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, fib);
495 	mlxsw_sp_lpm_tree_put(mlxsw_sp, fib->lpm_tree);
496 	WARN_ON(!list_empty(&fib->node_list));
497 	rhashtable_destroy(&fib->ht);
498 	kfree(fib);
499 }
500 
501 static struct mlxsw_sp_lpm_tree *
502 mlxsw_sp_lpm_tree_find_unused(struct mlxsw_sp *mlxsw_sp)
503 {
504 	static struct mlxsw_sp_lpm_tree *lpm_tree;
505 	int i;
506 
507 	for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
508 		lpm_tree = &mlxsw_sp->router->lpm.trees[i];
509 		if (lpm_tree->ref_count == 0)
510 			return lpm_tree;
511 	}
512 	return NULL;
513 }
514 
515 static int mlxsw_sp_lpm_tree_alloc(struct mlxsw_sp *mlxsw_sp,
516 				   struct mlxsw_sp_lpm_tree *lpm_tree)
517 {
518 	char ralta_pl[MLXSW_REG_RALTA_LEN];
519 
520 	mlxsw_reg_ralta_pack(ralta_pl, true,
521 			     (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
522 			     lpm_tree->id);
523 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
524 }
525 
526 static void mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp,
527 				   struct mlxsw_sp_lpm_tree *lpm_tree)
528 {
529 	char ralta_pl[MLXSW_REG_RALTA_LEN];
530 
531 	mlxsw_reg_ralta_pack(ralta_pl, false,
532 			     (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
533 			     lpm_tree->id);
534 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
535 }
536 
537 static int
538 mlxsw_sp_lpm_tree_left_struct_set(struct mlxsw_sp *mlxsw_sp,
539 				  struct mlxsw_sp_prefix_usage *prefix_usage,
540 				  struct mlxsw_sp_lpm_tree *lpm_tree)
541 {
542 	char ralst_pl[MLXSW_REG_RALST_LEN];
543 	u8 root_bin = 0;
544 	u8 prefix;
545 	u8 last_prefix = MLXSW_REG_RALST_BIN_NO_CHILD;
546 
547 	mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage)
548 		root_bin = prefix;
549 
550 	mlxsw_reg_ralst_pack(ralst_pl, root_bin, lpm_tree->id);
551 	mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) {
552 		if (prefix == 0)
553 			continue;
554 		mlxsw_reg_ralst_bin_pack(ralst_pl, prefix, last_prefix,
555 					 MLXSW_REG_RALST_BIN_NO_CHILD);
556 		last_prefix = prefix;
557 	}
558 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
559 }
560 
561 static struct mlxsw_sp_lpm_tree *
562 mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp,
563 			 struct mlxsw_sp_prefix_usage *prefix_usage,
564 			 enum mlxsw_sp_l3proto proto)
565 {
566 	struct mlxsw_sp_lpm_tree *lpm_tree;
567 	int err;
568 
569 	lpm_tree = mlxsw_sp_lpm_tree_find_unused(mlxsw_sp);
570 	if (!lpm_tree)
571 		return ERR_PTR(-EBUSY);
572 	lpm_tree->proto = proto;
573 	err = mlxsw_sp_lpm_tree_alloc(mlxsw_sp, lpm_tree);
574 	if (err)
575 		return ERR_PTR(err);
576 
577 	err = mlxsw_sp_lpm_tree_left_struct_set(mlxsw_sp, prefix_usage,
578 						lpm_tree);
579 	if (err)
580 		goto err_left_struct_set;
581 	memcpy(&lpm_tree->prefix_usage, prefix_usage,
582 	       sizeof(lpm_tree->prefix_usage));
583 	memset(&lpm_tree->prefix_ref_count, 0,
584 	       sizeof(lpm_tree->prefix_ref_count));
585 	lpm_tree->ref_count = 1;
586 	return lpm_tree;
587 
588 err_left_struct_set:
589 	mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
590 	return ERR_PTR(err);
591 }
592 
593 static void mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp *mlxsw_sp,
594 				      struct mlxsw_sp_lpm_tree *lpm_tree)
595 {
596 	mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
597 }
598 
599 static struct mlxsw_sp_lpm_tree *
600 mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp,
601 		      struct mlxsw_sp_prefix_usage *prefix_usage,
602 		      enum mlxsw_sp_l3proto proto)
603 {
604 	struct mlxsw_sp_lpm_tree *lpm_tree;
605 	int i;
606 
607 	for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
608 		lpm_tree = &mlxsw_sp->router->lpm.trees[i];
609 		if (lpm_tree->ref_count != 0 &&
610 		    lpm_tree->proto == proto &&
611 		    mlxsw_sp_prefix_usage_eq(&lpm_tree->prefix_usage,
612 					     prefix_usage)) {
613 			mlxsw_sp_lpm_tree_hold(lpm_tree);
614 			return lpm_tree;
615 		}
616 	}
617 	return mlxsw_sp_lpm_tree_create(mlxsw_sp, prefix_usage, proto);
618 }
619 
620 static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree)
621 {
622 	lpm_tree->ref_count++;
623 }
624 
625 static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
626 				  struct mlxsw_sp_lpm_tree *lpm_tree)
627 {
628 	if (--lpm_tree->ref_count == 0)
629 		mlxsw_sp_lpm_tree_destroy(mlxsw_sp, lpm_tree);
630 }
631 
632 #define MLXSW_SP_LPM_TREE_MIN 1 /* tree 0 is reserved */
633 
634 static int mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp)
635 {
636 	struct mlxsw_sp_prefix_usage req_prefix_usage = {{ 0 } };
637 	struct mlxsw_sp_lpm_tree *lpm_tree;
638 	u64 max_trees;
639 	int err, i;
640 
641 	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_LPM_TREES))
642 		return -EIO;
643 
644 	max_trees = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_LPM_TREES);
645 	mlxsw_sp->router->lpm.tree_count = max_trees - MLXSW_SP_LPM_TREE_MIN;
646 	mlxsw_sp->router->lpm.trees = kcalloc(mlxsw_sp->router->lpm.tree_count,
647 					     sizeof(struct mlxsw_sp_lpm_tree),
648 					     GFP_KERNEL);
649 	if (!mlxsw_sp->router->lpm.trees)
650 		return -ENOMEM;
651 
652 	for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
653 		lpm_tree = &mlxsw_sp->router->lpm.trees[i];
654 		lpm_tree->id = i + MLXSW_SP_LPM_TREE_MIN;
655 	}
656 
657 	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
658 					 MLXSW_SP_L3_PROTO_IPV4);
659 	if (IS_ERR(lpm_tree)) {
660 		err = PTR_ERR(lpm_tree);
661 		goto err_ipv4_tree_get;
662 	}
663 	mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4] = lpm_tree;
664 
665 	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
666 					 MLXSW_SP_L3_PROTO_IPV6);
667 	if (IS_ERR(lpm_tree)) {
668 		err = PTR_ERR(lpm_tree);
669 		goto err_ipv6_tree_get;
670 	}
671 	mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV6] = lpm_tree;
672 
673 	return 0;
674 
675 err_ipv6_tree_get:
676 	lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4];
677 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
678 err_ipv4_tree_get:
679 	kfree(mlxsw_sp->router->lpm.trees);
680 	return err;
681 }
682 
683 static void mlxsw_sp_lpm_fini(struct mlxsw_sp *mlxsw_sp)
684 {
685 	struct mlxsw_sp_lpm_tree *lpm_tree;
686 
687 	lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV6];
688 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
689 
690 	lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4];
691 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
692 
693 	kfree(mlxsw_sp->router->lpm.trees);
694 }
695 
696 static bool mlxsw_sp_vr_is_used(const struct mlxsw_sp_vr *vr)
697 {
698 	return !!vr->fib4 || !!vr->fib6 ||
699 	       !!vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] ||
700 	       !!vr->mr_table[MLXSW_SP_L3_PROTO_IPV6];
701 }
702 
703 static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp)
704 {
705 	struct mlxsw_sp_vr *vr;
706 	int i;
707 
708 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
709 		vr = &mlxsw_sp->router->vrs[i];
710 		if (!mlxsw_sp_vr_is_used(vr))
711 			return vr;
712 	}
713 	return NULL;
714 }
715 
716 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
717 				     const struct mlxsw_sp_fib *fib, u8 tree_id)
718 {
719 	char raltb_pl[MLXSW_REG_RALTB_LEN];
720 
721 	mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
722 			     (enum mlxsw_reg_ralxx_protocol) fib->proto,
723 			     tree_id);
724 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
725 }
726 
727 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
728 				       const struct mlxsw_sp_fib *fib)
729 {
730 	char raltb_pl[MLXSW_REG_RALTB_LEN];
731 
732 	/* Bind to tree 0 which is default */
733 	mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
734 			     (enum mlxsw_reg_ralxx_protocol) fib->proto, 0);
735 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
736 }
737 
738 static u32 mlxsw_sp_fix_tb_id(u32 tb_id)
739 {
740 	/* For our purpose, squash main, default and local tables into one */
741 	if (tb_id == RT_TABLE_LOCAL || tb_id == RT_TABLE_DEFAULT)
742 		tb_id = RT_TABLE_MAIN;
743 	return tb_id;
744 }
745 
746 static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp,
747 					    u32 tb_id)
748 {
749 	struct mlxsw_sp_vr *vr;
750 	int i;
751 
752 	tb_id = mlxsw_sp_fix_tb_id(tb_id);
753 
754 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
755 		vr = &mlxsw_sp->router->vrs[i];
756 		if (mlxsw_sp_vr_is_used(vr) && vr->tb_id == tb_id)
757 			return vr;
758 	}
759 	return NULL;
760 }
761 
762 int mlxsw_sp_router_tb_id_vr_id(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
763 				u16 *vr_id)
764 {
765 	struct mlxsw_sp_vr *vr;
766 
767 	vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
768 	if (!vr)
769 		return -ESRCH;
770 	*vr_id = vr->id;
771 
772 	return 0;
773 }
774 
775 static struct mlxsw_sp_fib *mlxsw_sp_vr_fib(const struct mlxsw_sp_vr *vr,
776 					    enum mlxsw_sp_l3proto proto)
777 {
778 	switch (proto) {
779 	case MLXSW_SP_L3_PROTO_IPV4:
780 		return vr->fib4;
781 	case MLXSW_SP_L3_PROTO_IPV6:
782 		return vr->fib6;
783 	}
784 	return NULL;
785 }
786 
787 static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp,
788 					      u32 tb_id,
789 					      struct netlink_ext_ack *extack)
790 {
791 	struct mlxsw_sp_mr_table *mr4_table, *mr6_table;
792 	struct mlxsw_sp_fib *fib4;
793 	struct mlxsw_sp_fib *fib6;
794 	struct mlxsw_sp_vr *vr;
795 	int err;
796 
797 	vr = mlxsw_sp_vr_find_unused(mlxsw_sp);
798 	if (!vr) {
799 		NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported virtual routers");
800 		return ERR_PTR(-EBUSY);
801 	}
802 	fib4 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
803 	if (IS_ERR(fib4))
804 		return ERR_CAST(fib4);
805 	fib6 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
806 	if (IS_ERR(fib6)) {
807 		err = PTR_ERR(fib6);
808 		goto err_fib6_create;
809 	}
810 	mr4_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id,
811 					     MLXSW_SP_L3_PROTO_IPV4);
812 	if (IS_ERR(mr4_table)) {
813 		err = PTR_ERR(mr4_table);
814 		goto err_mr4_table_create;
815 	}
816 	mr6_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id,
817 					     MLXSW_SP_L3_PROTO_IPV6);
818 	if (IS_ERR(mr6_table)) {
819 		err = PTR_ERR(mr6_table);
820 		goto err_mr6_table_create;
821 	}
822 
823 	vr->fib4 = fib4;
824 	vr->fib6 = fib6;
825 	vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] = mr4_table;
826 	vr->mr_table[MLXSW_SP_L3_PROTO_IPV6] = mr6_table;
827 	vr->tb_id = tb_id;
828 	return vr;
829 
830 err_mr6_table_create:
831 	mlxsw_sp_mr_table_destroy(mr4_table);
832 err_mr4_table_create:
833 	mlxsw_sp_fib_destroy(mlxsw_sp, fib6);
834 err_fib6_create:
835 	mlxsw_sp_fib_destroy(mlxsw_sp, fib4);
836 	return ERR_PTR(err);
837 }
838 
839 static void mlxsw_sp_vr_destroy(struct mlxsw_sp *mlxsw_sp,
840 				struct mlxsw_sp_vr *vr)
841 {
842 	mlxsw_sp_mr_table_destroy(vr->mr_table[MLXSW_SP_L3_PROTO_IPV6]);
843 	vr->mr_table[MLXSW_SP_L3_PROTO_IPV6] = NULL;
844 	mlxsw_sp_mr_table_destroy(vr->mr_table[MLXSW_SP_L3_PROTO_IPV4]);
845 	vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] = NULL;
846 	mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib6);
847 	vr->fib6 = NULL;
848 	mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib4);
849 	vr->fib4 = NULL;
850 }
851 
852 static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
853 					   struct netlink_ext_ack *extack)
854 {
855 	struct mlxsw_sp_vr *vr;
856 
857 	tb_id = mlxsw_sp_fix_tb_id(tb_id);
858 	vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
859 	if (!vr)
860 		vr = mlxsw_sp_vr_create(mlxsw_sp, tb_id, extack);
861 	return vr;
862 }
863 
864 static void mlxsw_sp_vr_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr)
865 {
866 	if (!vr->rif_count && list_empty(&vr->fib4->node_list) &&
867 	    list_empty(&vr->fib6->node_list) &&
868 	    mlxsw_sp_mr_table_empty(vr->mr_table[MLXSW_SP_L3_PROTO_IPV4]) &&
869 	    mlxsw_sp_mr_table_empty(vr->mr_table[MLXSW_SP_L3_PROTO_IPV6]))
870 		mlxsw_sp_vr_destroy(mlxsw_sp, vr);
871 }
872 
873 static bool
874 mlxsw_sp_vr_lpm_tree_should_replace(struct mlxsw_sp_vr *vr,
875 				    enum mlxsw_sp_l3proto proto, u8 tree_id)
876 {
877 	struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
878 
879 	if (!mlxsw_sp_vr_is_used(vr))
880 		return false;
881 	if (fib->lpm_tree->id == tree_id)
882 		return true;
883 	return false;
884 }
885 
886 static int mlxsw_sp_vr_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
887 					struct mlxsw_sp_fib *fib,
888 					struct mlxsw_sp_lpm_tree *new_tree)
889 {
890 	struct mlxsw_sp_lpm_tree *old_tree = fib->lpm_tree;
891 	int err;
892 
893 	fib->lpm_tree = new_tree;
894 	mlxsw_sp_lpm_tree_hold(new_tree);
895 	err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, new_tree->id);
896 	if (err)
897 		goto err_tree_bind;
898 	mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree);
899 	return 0;
900 
901 err_tree_bind:
902 	mlxsw_sp_lpm_tree_put(mlxsw_sp, new_tree);
903 	fib->lpm_tree = old_tree;
904 	return err;
905 }
906 
907 static int mlxsw_sp_vrs_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
908 					 struct mlxsw_sp_fib *fib,
909 					 struct mlxsw_sp_lpm_tree *new_tree)
910 {
911 	enum mlxsw_sp_l3proto proto = fib->proto;
912 	struct mlxsw_sp_lpm_tree *old_tree;
913 	u8 old_id, new_id = new_tree->id;
914 	struct mlxsw_sp_vr *vr;
915 	int i, err;
916 
917 	old_tree = mlxsw_sp->router->lpm.proto_trees[proto];
918 	old_id = old_tree->id;
919 
920 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
921 		vr = &mlxsw_sp->router->vrs[i];
922 		if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, old_id))
923 			continue;
924 		err = mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
925 						   mlxsw_sp_vr_fib(vr, proto),
926 						   new_tree);
927 		if (err)
928 			goto err_tree_replace;
929 	}
930 
931 	memcpy(new_tree->prefix_ref_count, old_tree->prefix_ref_count,
932 	       sizeof(new_tree->prefix_ref_count));
933 	mlxsw_sp->router->lpm.proto_trees[proto] = new_tree;
934 	mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree);
935 
936 	return 0;
937 
938 err_tree_replace:
939 	for (i--; i >= 0; i--) {
940 		if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, new_id))
941 			continue;
942 		mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
943 					     mlxsw_sp_vr_fib(vr, proto),
944 					     old_tree);
945 	}
946 	return err;
947 }
948 
949 static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp)
950 {
951 	struct mlxsw_sp_vr *vr;
952 	u64 max_vrs;
953 	int i;
954 
955 	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_VRS))
956 		return -EIO;
957 
958 	max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
959 	mlxsw_sp->router->vrs = kcalloc(max_vrs, sizeof(struct mlxsw_sp_vr),
960 					GFP_KERNEL);
961 	if (!mlxsw_sp->router->vrs)
962 		return -ENOMEM;
963 
964 	for (i = 0; i < max_vrs; i++) {
965 		vr = &mlxsw_sp->router->vrs[i];
966 		vr->id = i;
967 	}
968 
969 	return 0;
970 }
971 
972 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp);
973 
974 static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp)
975 {
976 	/* At this stage we're guaranteed not to have new incoming
977 	 * FIB notifications and the work queue is free from FIBs
978 	 * sitting on top of mlxsw netdevs. However, we can still
979 	 * have other FIBs queued. Flush the queue before flushing
980 	 * the device's tables. No need for locks, as we're the only
981 	 * writer.
982 	 */
983 	mlxsw_core_flush_owq();
984 	mlxsw_sp_router_fib_flush(mlxsw_sp);
985 	kfree(mlxsw_sp->router->vrs);
986 }
987 
988 static struct net_device *
989 __mlxsw_sp_ipip_netdev_ul_dev_get(const struct net_device *ol_dev)
990 {
991 	struct ip_tunnel *tun = netdev_priv(ol_dev);
992 	struct net *net = dev_net(ol_dev);
993 
994 	return dev_get_by_index_rcu(net, tun->parms.link);
995 }
996 
997 u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev)
998 {
999 	struct net_device *d;
1000 	u32 tb_id;
1001 
1002 	rcu_read_lock();
1003 	d = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
1004 	if (d)
1005 		tb_id = l3mdev_fib_table(d) ? : RT_TABLE_MAIN;
1006 	else
1007 		tb_id = RT_TABLE_MAIN;
1008 	rcu_read_unlock();
1009 
1010 	return tb_id;
1011 }
1012 
1013 static struct mlxsw_sp_rif *
1014 mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
1015 		    const struct mlxsw_sp_rif_params *params,
1016 		    struct netlink_ext_ack *extack);
1017 
1018 static struct mlxsw_sp_rif_ipip_lb *
1019 mlxsw_sp_ipip_ol_ipip_lb_create(struct mlxsw_sp *mlxsw_sp,
1020 				enum mlxsw_sp_ipip_type ipipt,
1021 				struct net_device *ol_dev,
1022 				struct netlink_ext_ack *extack)
1023 {
1024 	struct mlxsw_sp_rif_params_ipip_lb lb_params;
1025 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1026 	struct mlxsw_sp_rif *rif;
1027 
1028 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
1029 	lb_params = (struct mlxsw_sp_rif_params_ipip_lb) {
1030 		.common.dev = ol_dev,
1031 		.common.lag = false,
1032 		.lb_config = ipip_ops->ol_loopback_config(mlxsw_sp, ol_dev),
1033 	};
1034 
1035 	rif = mlxsw_sp_rif_create(mlxsw_sp, &lb_params.common, extack);
1036 	if (IS_ERR(rif))
1037 		return ERR_CAST(rif);
1038 	return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
1039 }
1040 
1041 static struct mlxsw_sp_ipip_entry *
1042 mlxsw_sp_ipip_entry_alloc(struct mlxsw_sp *mlxsw_sp,
1043 			  enum mlxsw_sp_ipip_type ipipt,
1044 			  struct net_device *ol_dev)
1045 {
1046 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1047 	struct mlxsw_sp_ipip_entry *ipip_entry;
1048 	struct mlxsw_sp_ipip_entry *ret = NULL;
1049 
1050 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
1051 	ipip_entry = kzalloc(sizeof(*ipip_entry), GFP_KERNEL);
1052 	if (!ipip_entry)
1053 		return ERR_PTR(-ENOMEM);
1054 
1055 	ipip_entry->ol_lb = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp, ipipt,
1056 							    ol_dev, NULL);
1057 	if (IS_ERR(ipip_entry->ol_lb)) {
1058 		ret = ERR_CAST(ipip_entry->ol_lb);
1059 		goto err_ol_ipip_lb_create;
1060 	}
1061 
1062 	ipip_entry->ipipt = ipipt;
1063 	ipip_entry->ol_dev = ol_dev;
1064 
1065 	switch (ipip_ops->ul_proto) {
1066 	case MLXSW_SP_L3_PROTO_IPV4:
1067 		ipip_entry->parms4 = mlxsw_sp_ipip_netdev_parms4(ol_dev);
1068 		break;
1069 	case MLXSW_SP_L3_PROTO_IPV6:
1070 		WARN_ON(1);
1071 		break;
1072 	}
1073 
1074 	return ipip_entry;
1075 
1076 err_ol_ipip_lb_create:
1077 	kfree(ipip_entry);
1078 	return ret;
1079 }
1080 
1081 static void
1082 mlxsw_sp_ipip_entry_dealloc(struct mlxsw_sp_ipip_entry *ipip_entry)
1083 {
1084 	mlxsw_sp_rif_destroy(&ipip_entry->ol_lb->common);
1085 	kfree(ipip_entry);
1086 }
1087 
1088 static bool
1089 mlxsw_sp_ipip_entry_saddr_matches(struct mlxsw_sp *mlxsw_sp,
1090 				  const enum mlxsw_sp_l3proto ul_proto,
1091 				  union mlxsw_sp_l3addr saddr,
1092 				  u32 ul_tb_id,
1093 				  struct mlxsw_sp_ipip_entry *ipip_entry)
1094 {
1095 	u32 tun_ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
1096 	enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
1097 	union mlxsw_sp_l3addr tun_saddr;
1098 
1099 	if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
1100 		return false;
1101 
1102 	tun_saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ipip_entry->ol_dev);
1103 	return tun_ul_tb_id == ul_tb_id &&
1104 	       mlxsw_sp_l3addr_eq(&tun_saddr, &saddr);
1105 }
1106 
1107 static int
1108 mlxsw_sp_fib_entry_decap_init(struct mlxsw_sp *mlxsw_sp,
1109 			      struct mlxsw_sp_fib_entry *fib_entry,
1110 			      struct mlxsw_sp_ipip_entry *ipip_entry)
1111 {
1112 	u32 tunnel_index;
1113 	int err;
1114 
1115 	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
1116 				  1, &tunnel_index);
1117 	if (err)
1118 		return err;
1119 
1120 	ipip_entry->decap_fib_entry = fib_entry;
1121 	fib_entry->decap.ipip_entry = ipip_entry;
1122 	fib_entry->decap.tunnel_index = tunnel_index;
1123 	return 0;
1124 }
1125 
1126 static void mlxsw_sp_fib_entry_decap_fini(struct mlxsw_sp *mlxsw_sp,
1127 					  struct mlxsw_sp_fib_entry *fib_entry)
1128 {
1129 	/* Unlink this node from the IPIP entry that it's the decap entry of. */
1130 	fib_entry->decap.ipip_entry->decap_fib_entry = NULL;
1131 	fib_entry->decap.ipip_entry = NULL;
1132 	mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
1133 			   1, fib_entry->decap.tunnel_index);
1134 }
1135 
1136 static struct mlxsw_sp_fib_node *
1137 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
1138 			 size_t addr_len, unsigned char prefix_len);
1139 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
1140 				     struct mlxsw_sp_fib_entry *fib_entry);
1141 
1142 static void
1143 mlxsw_sp_ipip_entry_demote_decap(struct mlxsw_sp *mlxsw_sp,
1144 				 struct mlxsw_sp_ipip_entry *ipip_entry)
1145 {
1146 	struct mlxsw_sp_fib_entry *fib_entry = ipip_entry->decap_fib_entry;
1147 
1148 	mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, fib_entry);
1149 	fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1150 
1151 	mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1152 }
1153 
1154 static void
1155 mlxsw_sp_ipip_entry_promote_decap(struct mlxsw_sp *mlxsw_sp,
1156 				  struct mlxsw_sp_ipip_entry *ipip_entry,
1157 				  struct mlxsw_sp_fib_entry *decap_fib_entry)
1158 {
1159 	if (mlxsw_sp_fib_entry_decap_init(mlxsw_sp, decap_fib_entry,
1160 					  ipip_entry))
1161 		return;
1162 	decap_fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
1163 
1164 	if (mlxsw_sp_fib_entry_update(mlxsw_sp, decap_fib_entry))
1165 		mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1166 }
1167 
1168 static struct mlxsw_sp_fib_entry *
1169 mlxsw_sp_router_ip2me_fib_entry_find(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
1170 				     enum mlxsw_sp_l3proto proto,
1171 				     const union mlxsw_sp_l3addr *addr,
1172 				     enum mlxsw_sp_fib_entry_type type)
1173 {
1174 	struct mlxsw_sp_fib_node *fib_node;
1175 	unsigned char addr_prefix_len;
1176 	struct mlxsw_sp_fib *fib;
1177 	struct mlxsw_sp_vr *vr;
1178 	const void *addrp;
1179 	size_t addr_len;
1180 	u32 addr4;
1181 
1182 	vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
1183 	if (!vr)
1184 		return NULL;
1185 	fib = mlxsw_sp_vr_fib(vr, proto);
1186 
1187 	switch (proto) {
1188 	case MLXSW_SP_L3_PROTO_IPV4:
1189 		addr4 = be32_to_cpu(addr->addr4);
1190 		addrp = &addr4;
1191 		addr_len = 4;
1192 		addr_prefix_len = 32;
1193 		break;
1194 	case MLXSW_SP_L3_PROTO_IPV6: /* fall through */
1195 	default:
1196 		WARN_ON(1);
1197 		return NULL;
1198 	}
1199 
1200 	fib_node = mlxsw_sp_fib_node_lookup(fib, addrp, addr_len,
1201 					    addr_prefix_len);
1202 	if (!fib_node || fib_node->fib_entry->type != type)
1203 		return NULL;
1204 
1205 	return fib_node->fib_entry;
1206 }
1207 
1208 /* Given an IPIP entry, find the corresponding decap route. */
1209 static struct mlxsw_sp_fib_entry *
1210 mlxsw_sp_ipip_entry_find_decap(struct mlxsw_sp *mlxsw_sp,
1211 			       struct mlxsw_sp_ipip_entry *ipip_entry)
1212 {
1213 	static struct mlxsw_sp_fib_node *fib_node;
1214 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1215 	unsigned char saddr_prefix_len;
1216 	union mlxsw_sp_l3addr saddr;
1217 	struct mlxsw_sp_fib *ul_fib;
1218 	struct mlxsw_sp_vr *ul_vr;
1219 	const void *saddrp;
1220 	size_t saddr_len;
1221 	u32 ul_tb_id;
1222 	u32 saddr4;
1223 
1224 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1225 
1226 	ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
1227 	ul_vr = mlxsw_sp_vr_find(mlxsw_sp, ul_tb_id);
1228 	if (!ul_vr)
1229 		return NULL;
1230 
1231 	ul_fib = mlxsw_sp_vr_fib(ul_vr, ipip_ops->ul_proto);
1232 	saddr = mlxsw_sp_ipip_netdev_saddr(ipip_ops->ul_proto,
1233 					   ipip_entry->ol_dev);
1234 
1235 	switch (ipip_ops->ul_proto) {
1236 	case MLXSW_SP_L3_PROTO_IPV4:
1237 		saddr4 = be32_to_cpu(saddr.addr4);
1238 		saddrp = &saddr4;
1239 		saddr_len = 4;
1240 		saddr_prefix_len = 32;
1241 		break;
1242 	case MLXSW_SP_L3_PROTO_IPV6:
1243 		WARN_ON(1);
1244 		return NULL;
1245 	}
1246 
1247 	fib_node = mlxsw_sp_fib_node_lookup(ul_fib, saddrp, saddr_len,
1248 					    saddr_prefix_len);
1249 	if (!fib_node ||
1250 	    fib_node->fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_TRAP)
1251 		return NULL;
1252 
1253 	return fib_node->fib_entry;
1254 }
1255 
1256 static struct mlxsw_sp_ipip_entry *
1257 mlxsw_sp_ipip_entry_create(struct mlxsw_sp *mlxsw_sp,
1258 			   enum mlxsw_sp_ipip_type ipipt,
1259 			   struct net_device *ol_dev)
1260 {
1261 	struct mlxsw_sp_ipip_entry *ipip_entry;
1262 
1263 	ipip_entry = mlxsw_sp_ipip_entry_alloc(mlxsw_sp, ipipt, ol_dev);
1264 	if (IS_ERR(ipip_entry))
1265 		return ipip_entry;
1266 
1267 	list_add_tail(&ipip_entry->ipip_list_node,
1268 		      &mlxsw_sp->router->ipip_list);
1269 
1270 	return ipip_entry;
1271 }
1272 
1273 static void
1274 mlxsw_sp_ipip_entry_destroy(struct mlxsw_sp *mlxsw_sp,
1275 			    struct mlxsw_sp_ipip_entry *ipip_entry)
1276 {
1277 	list_del(&ipip_entry->ipip_list_node);
1278 	mlxsw_sp_ipip_entry_dealloc(ipip_entry);
1279 }
1280 
1281 static bool
1282 mlxsw_sp_ipip_entry_matches_decap(struct mlxsw_sp *mlxsw_sp,
1283 				  const struct net_device *ul_dev,
1284 				  enum mlxsw_sp_l3proto ul_proto,
1285 				  union mlxsw_sp_l3addr ul_dip,
1286 				  struct mlxsw_sp_ipip_entry *ipip_entry)
1287 {
1288 	u32 ul_tb_id = l3mdev_fib_table(ul_dev) ? : RT_TABLE_MAIN;
1289 	enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
1290 
1291 	if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
1292 		return false;
1293 
1294 	return mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, ul_dip,
1295 						 ul_tb_id, ipip_entry);
1296 }
1297 
1298 /* Given decap parameters, find the corresponding IPIP entry. */
1299 static struct mlxsw_sp_ipip_entry *
1300 mlxsw_sp_ipip_entry_find_by_decap(struct mlxsw_sp *mlxsw_sp,
1301 				  const struct net_device *ul_dev,
1302 				  enum mlxsw_sp_l3proto ul_proto,
1303 				  union mlxsw_sp_l3addr ul_dip)
1304 {
1305 	struct mlxsw_sp_ipip_entry *ipip_entry;
1306 
1307 	list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
1308 			    ipip_list_node)
1309 		if (mlxsw_sp_ipip_entry_matches_decap(mlxsw_sp, ul_dev,
1310 						      ul_proto, ul_dip,
1311 						      ipip_entry))
1312 			return ipip_entry;
1313 
1314 	return NULL;
1315 }
1316 
1317 static bool mlxsw_sp_netdev_ipip_type(const struct mlxsw_sp *mlxsw_sp,
1318 				      const struct net_device *dev,
1319 				      enum mlxsw_sp_ipip_type *p_type)
1320 {
1321 	struct mlxsw_sp_router *router = mlxsw_sp->router;
1322 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1323 	enum mlxsw_sp_ipip_type ipipt;
1324 
1325 	for (ipipt = 0; ipipt < MLXSW_SP_IPIP_TYPE_MAX; ++ipipt) {
1326 		ipip_ops = router->ipip_ops_arr[ipipt];
1327 		if (dev->type == ipip_ops->dev_type) {
1328 			if (p_type)
1329 				*p_type = ipipt;
1330 			return true;
1331 		}
1332 	}
1333 	return false;
1334 }
1335 
1336 bool mlxsw_sp_netdev_is_ipip_ol(const struct mlxsw_sp *mlxsw_sp,
1337 				const struct net_device *dev)
1338 {
1339 	return mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL);
1340 }
1341 
1342 static struct mlxsw_sp_ipip_entry *
1343 mlxsw_sp_ipip_entry_find_by_ol_dev(struct mlxsw_sp *mlxsw_sp,
1344 				   const struct net_device *ol_dev)
1345 {
1346 	struct mlxsw_sp_ipip_entry *ipip_entry;
1347 
1348 	list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
1349 			    ipip_list_node)
1350 		if (ipip_entry->ol_dev == ol_dev)
1351 			return ipip_entry;
1352 
1353 	return NULL;
1354 }
1355 
1356 static struct mlxsw_sp_ipip_entry *
1357 mlxsw_sp_ipip_entry_find_by_ul_dev(const struct mlxsw_sp *mlxsw_sp,
1358 				   const struct net_device *ul_dev,
1359 				   struct mlxsw_sp_ipip_entry *start)
1360 {
1361 	struct mlxsw_sp_ipip_entry *ipip_entry;
1362 
1363 	ipip_entry = list_prepare_entry(start, &mlxsw_sp->router->ipip_list,
1364 					ipip_list_node);
1365 	list_for_each_entry_continue(ipip_entry, &mlxsw_sp->router->ipip_list,
1366 				     ipip_list_node) {
1367 		struct net_device *ol_dev = ipip_entry->ol_dev;
1368 		struct net_device *ipip_ul_dev;
1369 
1370 		rcu_read_lock();
1371 		ipip_ul_dev = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
1372 		rcu_read_unlock();
1373 
1374 		if (ipip_ul_dev == ul_dev)
1375 			return ipip_entry;
1376 	}
1377 
1378 	return NULL;
1379 }
1380 
1381 bool mlxsw_sp_netdev_is_ipip_ul(const struct mlxsw_sp *mlxsw_sp,
1382 				const struct net_device *dev)
1383 {
1384 	return mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp, dev, NULL);
1385 }
1386 
1387 static bool mlxsw_sp_netdevice_ipip_can_offload(struct mlxsw_sp *mlxsw_sp,
1388 						const struct net_device *ol_dev,
1389 						enum mlxsw_sp_ipip_type ipipt)
1390 {
1391 	const struct mlxsw_sp_ipip_ops *ops
1392 		= mlxsw_sp->router->ipip_ops_arr[ipipt];
1393 
1394 	/* For deciding whether decap should be offloaded, we don't care about
1395 	 * overlay protocol, so ask whether either one is supported.
1396 	 */
1397 	return ops->can_offload(mlxsw_sp, ol_dev, MLXSW_SP_L3_PROTO_IPV4) ||
1398 	       ops->can_offload(mlxsw_sp, ol_dev, MLXSW_SP_L3_PROTO_IPV6);
1399 }
1400 
1401 static int mlxsw_sp_netdevice_ipip_ol_reg_event(struct mlxsw_sp *mlxsw_sp,
1402 						struct net_device *ol_dev)
1403 {
1404 	struct mlxsw_sp_ipip_entry *ipip_entry;
1405 	enum mlxsw_sp_l3proto ul_proto;
1406 	enum mlxsw_sp_ipip_type ipipt;
1407 	union mlxsw_sp_l3addr saddr;
1408 	u32 ul_tb_id;
1409 
1410 	mlxsw_sp_netdev_ipip_type(mlxsw_sp, ol_dev, &ipipt);
1411 	if (mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev, ipipt)) {
1412 		ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev);
1413 		ul_proto = mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto;
1414 		saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev);
1415 		if (!mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto,
1416 							  saddr, ul_tb_id,
1417 							  NULL)) {
1418 			ipip_entry = mlxsw_sp_ipip_entry_create(mlxsw_sp, ipipt,
1419 								ol_dev);
1420 			if (IS_ERR(ipip_entry))
1421 				return PTR_ERR(ipip_entry);
1422 		}
1423 	}
1424 
1425 	return 0;
1426 }
1427 
1428 static void mlxsw_sp_netdevice_ipip_ol_unreg_event(struct mlxsw_sp *mlxsw_sp,
1429 						   struct net_device *ol_dev)
1430 {
1431 	struct mlxsw_sp_ipip_entry *ipip_entry;
1432 
1433 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1434 	if (ipip_entry)
1435 		mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
1436 }
1437 
1438 static void
1439 mlxsw_sp_ipip_entry_ol_up_event(struct mlxsw_sp *mlxsw_sp,
1440 				struct mlxsw_sp_ipip_entry *ipip_entry)
1441 {
1442 	struct mlxsw_sp_fib_entry *decap_fib_entry;
1443 
1444 	decap_fib_entry = mlxsw_sp_ipip_entry_find_decap(mlxsw_sp, ipip_entry);
1445 	if (decap_fib_entry)
1446 		mlxsw_sp_ipip_entry_promote_decap(mlxsw_sp, ipip_entry,
1447 						  decap_fib_entry);
1448 }
1449 
1450 static int
1451 mlxsw_sp_rif_ipip_lb_op(struct mlxsw_sp_rif_ipip_lb *lb_rif, u16 ul_vr_id,
1452 			u16 ul_rif_id, bool enable)
1453 {
1454 	struct mlxsw_sp_rif_ipip_lb_config lb_cf = lb_rif->lb_config;
1455 	struct mlxsw_sp_rif *rif = &lb_rif->common;
1456 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
1457 	char ritr_pl[MLXSW_REG_RITR_LEN];
1458 	u32 saddr4;
1459 
1460 	switch (lb_cf.ul_protocol) {
1461 	case MLXSW_SP_L3_PROTO_IPV4:
1462 		saddr4 = be32_to_cpu(lb_cf.saddr.addr4);
1463 		mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF,
1464 				    rif->rif_index, rif->vr_id, rif->dev->mtu);
1465 		mlxsw_reg_ritr_loopback_ipip4_pack(ritr_pl, lb_cf.lb_ipipt,
1466 			    MLXSW_REG_RITR_LOOPBACK_IPIP_OPTIONS_GRE_KEY_PRESET,
1467 			    ul_vr_id, ul_rif_id, saddr4, lb_cf.okey);
1468 		break;
1469 
1470 	case MLXSW_SP_L3_PROTO_IPV6:
1471 		return -EAFNOSUPPORT;
1472 	}
1473 
1474 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
1475 }
1476 
1477 static int mlxsw_sp_netdevice_ipip_ol_update_mtu(struct mlxsw_sp *mlxsw_sp,
1478 						 struct net_device *ol_dev)
1479 {
1480 	struct mlxsw_sp_ipip_entry *ipip_entry;
1481 	struct mlxsw_sp_rif_ipip_lb *lb_rif;
1482 	int err = 0;
1483 
1484 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1485 	if (ipip_entry) {
1486 		lb_rif = ipip_entry->ol_lb;
1487 		err = mlxsw_sp_rif_ipip_lb_op(lb_rif, lb_rif->ul_vr_id,
1488 					      lb_rif->ul_rif_id, true);
1489 		if (err)
1490 			goto out;
1491 		lb_rif->common.mtu = ol_dev->mtu;
1492 	}
1493 
1494 out:
1495 	return err;
1496 }
1497 
1498 static void mlxsw_sp_netdevice_ipip_ol_up_event(struct mlxsw_sp *mlxsw_sp,
1499 						struct net_device *ol_dev)
1500 {
1501 	struct mlxsw_sp_ipip_entry *ipip_entry;
1502 
1503 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1504 	if (ipip_entry)
1505 		mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry);
1506 }
1507 
1508 static void
1509 mlxsw_sp_ipip_entry_ol_down_event(struct mlxsw_sp *mlxsw_sp,
1510 				  struct mlxsw_sp_ipip_entry *ipip_entry)
1511 {
1512 	if (ipip_entry->decap_fib_entry)
1513 		mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1514 }
1515 
1516 static void mlxsw_sp_netdevice_ipip_ol_down_event(struct mlxsw_sp *mlxsw_sp,
1517 						  struct net_device *ol_dev)
1518 {
1519 	struct mlxsw_sp_ipip_entry *ipip_entry;
1520 
1521 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1522 	if (ipip_entry)
1523 		mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry);
1524 }
1525 
1526 static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp,
1527 					 struct mlxsw_sp_rif *old_rif,
1528 					 struct mlxsw_sp_rif *new_rif);
1529 static int
1530 mlxsw_sp_ipip_entry_ol_lb_update(struct mlxsw_sp *mlxsw_sp,
1531 				 struct mlxsw_sp_ipip_entry *ipip_entry,
1532 				 bool keep_encap,
1533 				 struct netlink_ext_ack *extack)
1534 {
1535 	struct mlxsw_sp_rif_ipip_lb *old_lb_rif = ipip_entry->ol_lb;
1536 	struct mlxsw_sp_rif_ipip_lb *new_lb_rif;
1537 
1538 	new_lb_rif = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp,
1539 						     ipip_entry->ipipt,
1540 						     ipip_entry->ol_dev,
1541 						     extack);
1542 	if (IS_ERR(new_lb_rif))
1543 		return PTR_ERR(new_lb_rif);
1544 	ipip_entry->ol_lb = new_lb_rif;
1545 
1546 	if (keep_encap)
1547 		mlxsw_sp_nexthop_rif_migrate(mlxsw_sp, &old_lb_rif->common,
1548 					     &new_lb_rif->common);
1549 
1550 	mlxsw_sp_rif_destroy(&old_lb_rif->common);
1551 
1552 	return 0;
1553 }
1554 
1555 static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp,
1556 					struct mlxsw_sp_rif *rif);
1557 
1558 /**
1559  * Update the offload related to an IPIP entry. This always updates decap, and
1560  * in addition to that it also:
1561  * @recreate_loopback: recreates the associated loopback RIF
1562  * @keep_encap: updates next hops that use the tunnel netdevice. This is only
1563  *              relevant when recreate_loopback is true.
1564  * @update_nexthops: updates next hops, keeping the current loopback RIF. This
1565  *                   is only relevant when recreate_loopback is false.
1566  */
1567 int __mlxsw_sp_ipip_entry_update_tunnel(struct mlxsw_sp *mlxsw_sp,
1568 					struct mlxsw_sp_ipip_entry *ipip_entry,
1569 					bool recreate_loopback,
1570 					bool keep_encap,
1571 					bool update_nexthops,
1572 					struct netlink_ext_ack *extack)
1573 {
1574 	int err;
1575 
1576 	/* RIFs can't be edited, so to update loopback, we need to destroy and
1577 	 * recreate it. That creates a window of opportunity where RALUE and
1578 	 * RATR registers end up referencing a RIF that's already gone. RATRs
1579 	 * are handled in mlxsw_sp_ipip_entry_ol_lb_update(), and to take care
1580 	 * of RALUE, demote the decap route back.
1581 	 */
1582 	if (ipip_entry->decap_fib_entry)
1583 		mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1584 
1585 	if (recreate_loopback) {
1586 		err = mlxsw_sp_ipip_entry_ol_lb_update(mlxsw_sp, ipip_entry,
1587 						       keep_encap, extack);
1588 		if (err)
1589 			return err;
1590 	} else if (update_nexthops) {
1591 		mlxsw_sp_nexthop_rif_update(mlxsw_sp,
1592 					    &ipip_entry->ol_lb->common);
1593 	}
1594 
1595 	if (ipip_entry->ol_dev->flags & IFF_UP)
1596 		mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry);
1597 
1598 	return 0;
1599 }
1600 
1601 static int mlxsw_sp_netdevice_ipip_ol_vrf_event(struct mlxsw_sp *mlxsw_sp,
1602 						struct net_device *ol_dev,
1603 						struct netlink_ext_ack *extack)
1604 {
1605 	struct mlxsw_sp_ipip_entry *ipip_entry =
1606 		mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1607 
1608 	if (!ipip_entry)
1609 		return 0;
1610 
1611 	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1612 						   true, false, false, extack);
1613 }
1614 
1615 static int
1616 mlxsw_sp_netdevice_ipip_ul_vrf_event(struct mlxsw_sp *mlxsw_sp,
1617 				     struct mlxsw_sp_ipip_entry *ipip_entry,
1618 				     struct net_device *ul_dev,
1619 				     bool *demote_this,
1620 				     struct netlink_ext_ack *extack)
1621 {
1622 	u32 ul_tb_id = l3mdev_fib_table(ul_dev) ? : RT_TABLE_MAIN;
1623 	enum mlxsw_sp_l3proto ul_proto;
1624 	union mlxsw_sp_l3addr saddr;
1625 
1626 	/* Moving underlay to a different VRF might cause local address
1627 	 * conflict, and the conflicting tunnels need to be demoted.
1628 	 */
1629 	ul_proto = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt]->ul_proto;
1630 	saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ipip_entry->ol_dev);
1631 	if (mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto,
1632 						 saddr, ul_tb_id,
1633 						 ipip_entry)) {
1634 		*demote_this = true;
1635 		return 0;
1636 	}
1637 
1638 	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1639 						   true, true, false, extack);
1640 }
1641 
1642 static int
1643 mlxsw_sp_netdevice_ipip_ul_up_event(struct mlxsw_sp *mlxsw_sp,
1644 				    struct mlxsw_sp_ipip_entry *ipip_entry,
1645 				    struct net_device *ul_dev)
1646 {
1647 	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1648 						   false, false, true, NULL);
1649 }
1650 
1651 static int
1652 mlxsw_sp_netdevice_ipip_ul_down_event(struct mlxsw_sp *mlxsw_sp,
1653 				      struct mlxsw_sp_ipip_entry *ipip_entry,
1654 				      struct net_device *ul_dev)
1655 {
1656 	/* A down underlay device causes encapsulated packets to not be
1657 	 * forwarded, but decap still works. So refresh next hops without
1658 	 * touching anything else.
1659 	 */
1660 	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1661 						   false, false, true, NULL);
1662 }
1663 
1664 static int
1665 mlxsw_sp_netdevice_ipip_ol_change_event(struct mlxsw_sp *mlxsw_sp,
1666 					struct net_device *ol_dev,
1667 					struct netlink_ext_ack *extack)
1668 {
1669 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1670 	struct mlxsw_sp_ipip_entry *ipip_entry;
1671 	int err;
1672 
1673 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1674 	if (!ipip_entry)
1675 		/* A change might make a tunnel eligible for offloading, but
1676 		 * that is currently not implemented. What falls to slow path
1677 		 * stays there.
1678 		 */
1679 		return 0;
1680 
1681 	/* A change might make a tunnel not eligible for offloading. */
1682 	if (!mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev,
1683 						 ipip_entry->ipipt)) {
1684 		mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1685 		return 0;
1686 	}
1687 
1688 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1689 	err = ipip_ops->ol_netdev_change(mlxsw_sp, ipip_entry, extack);
1690 	return err;
1691 }
1692 
1693 void mlxsw_sp_ipip_entry_demote_tunnel(struct mlxsw_sp *mlxsw_sp,
1694 				       struct mlxsw_sp_ipip_entry *ipip_entry)
1695 {
1696 	struct net_device *ol_dev = ipip_entry->ol_dev;
1697 
1698 	if (ol_dev->flags & IFF_UP)
1699 		mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry);
1700 	mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
1701 }
1702 
1703 /* The configuration where several tunnels have the same local address in the
1704  * same underlay table needs special treatment in the HW. That is currently not
1705  * implemented in the driver. This function finds and demotes the first tunnel
1706  * with a given source address, except the one passed in in the argument
1707  * `except'.
1708  */
1709 bool
1710 mlxsw_sp_ipip_demote_tunnel_by_saddr(struct mlxsw_sp *mlxsw_sp,
1711 				     enum mlxsw_sp_l3proto ul_proto,
1712 				     union mlxsw_sp_l3addr saddr,
1713 				     u32 ul_tb_id,
1714 				     const struct mlxsw_sp_ipip_entry *except)
1715 {
1716 	struct mlxsw_sp_ipip_entry *ipip_entry, *tmp;
1717 
1718 	list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list,
1719 				 ipip_list_node) {
1720 		if (ipip_entry != except &&
1721 		    mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, saddr,
1722 						      ul_tb_id, ipip_entry)) {
1723 			mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1724 			return true;
1725 		}
1726 	}
1727 
1728 	return false;
1729 }
1730 
1731 static void mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(struct mlxsw_sp *mlxsw_sp,
1732 						     struct net_device *ul_dev)
1733 {
1734 	struct mlxsw_sp_ipip_entry *ipip_entry, *tmp;
1735 
1736 	list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list,
1737 				 ipip_list_node) {
1738 		struct net_device *ol_dev = ipip_entry->ol_dev;
1739 		struct net_device *ipip_ul_dev;
1740 
1741 		rcu_read_lock();
1742 		ipip_ul_dev = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
1743 		rcu_read_unlock();
1744 		if (ipip_ul_dev == ul_dev)
1745 			mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1746 	}
1747 }
1748 
1749 int mlxsw_sp_netdevice_ipip_ol_event(struct mlxsw_sp *mlxsw_sp,
1750 				     struct net_device *ol_dev,
1751 				     unsigned long event,
1752 				     struct netdev_notifier_info *info)
1753 {
1754 	struct netdev_notifier_changeupper_info *chup;
1755 	struct netlink_ext_ack *extack;
1756 
1757 	switch (event) {
1758 	case NETDEV_REGISTER:
1759 		return mlxsw_sp_netdevice_ipip_ol_reg_event(mlxsw_sp, ol_dev);
1760 	case NETDEV_UNREGISTER:
1761 		mlxsw_sp_netdevice_ipip_ol_unreg_event(mlxsw_sp, ol_dev);
1762 		return 0;
1763 	case NETDEV_UP:
1764 		mlxsw_sp_netdevice_ipip_ol_up_event(mlxsw_sp, ol_dev);
1765 		return 0;
1766 	case NETDEV_DOWN:
1767 		mlxsw_sp_netdevice_ipip_ol_down_event(mlxsw_sp, ol_dev);
1768 		return 0;
1769 	case NETDEV_CHANGEUPPER:
1770 		chup = container_of(info, typeof(*chup), info);
1771 		extack = info->extack;
1772 		if (netif_is_l3_master(chup->upper_dev))
1773 			return mlxsw_sp_netdevice_ipip_ol_vrf_event(mlxsw_sp,
1774 								    ol_dev,
1775 								    extack);
1776 		return 0;
1777 	case NETDEV_CHANGE:
1778 		extack = info->extack;
1779 		return mlxsw_sp_netdevice_ipip_ol_change_event(mlxsw_sp,
1780 							       ol_dev, extack);
1781 	case NETDEV_CHANGEMTU:
1782 		return mlxsw_sp_netdevice_ipip_ol_update_mtu(mlxsw_sp, ol_dev);
1783 	}
1784 	return 0;
1785 }
1786 
1787 static int
1788 __mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
1789 				   struct mlxsw_sp_ipip_entry *ipip_entry,
1790 				   struct net_device *ul_dev,
1791 				   bool *demote_this,
1792 				   unsigned long event,
1793 				   struct netdev_notifier_info *info)
1794 {
1795 	struct netdev_notifier_changeupper_info *chup;
1796 	struct netlink_ext_ack *extack;
1797 
1798 	switch (event) {
1799 	case NETDEV_CHANGEUPPER:
1800 		chup = container_of(info, typeof(*chup), info);
1801 		extack = info->extack;
1802 		if (netif_is_l3_master(chup->upper_dev))
1803 			return mlxsw_sp_netdevice_ipip_ul_vrf_event(mlxsw_sp,
1804 								    ipip_entry,
1805 								    ul_dev,
1806 								    demote_this,
1807 								    extack);
1808 		break;
1809 
1810 	case NETDEV_UP:
1811 		return mlxsw_sp_netdevice_ipip_ul_up_event(mlxsw_sp, ipip_entry,
1812 							   ul_dev);
1813 	case NETDEV_DOWN:
1814 		return mlxsw_sp_netdevice_ipip_ul_down_event(mlxsw_sp,
1815 							     ipip_entry,
1816 							     ul_dev);
1817 	}
1818 	return 0;
1819 }
1820 
1821 int
1822 mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
1823 				 struct net_device *ul_dev,
1824 				 unsigned long event,
1825 				 struct netdev_notifier_info *info)
1826 {
1827 	struct mlxsw_sp_ipip_entry *ipip_entry = NULL;
1828 	int err;
1829 
1830 	while ((ipip_entry = mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp,
1831 								ul_dev,
1832 								ipip_entry))) {
1833 		struct mlxsw_sp_ipip_entry *prev;
1834 		bool demote_this = false;
1835 
1836 		err = __mlxsw_sp_netdevice_ipip_ul_event(mlxsw_sp, ipip_entry,
1837 							 ul_dev, &demote_this,
1838 							 event, info);
1839 		if (err) {
1840 			mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(mlxsw_sp,
1841 								 ul_dev);
1842 			return err;
1843 		}
1844 
1845 		if (demote_this) {
1846 			if (list_is_first(&ipip_entry->ipip_list_node,
1847 					  &mlxsw_sp->router->ipip_list))
1848 				prev = NULL;
1849 			else
1850 				/* This can't be cached from previous iteration,
1851 				 * because that entry could be gone now.
1852 				 */
1853 				prev = list_prev_entry(ipip_entry,
1854 						       ipip_list_node);
1855 			mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1856 			ipip_entry = prev;
1857 		}
1858 	}
1859 
1860 	return 0;
1861 }
1862 
1863 int mlxsw_sp_router_nve_promote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
1864 				      enum mlxsw_sp_l3proto ul_proto,
1865 				      const union mlxsw_sp_l3addr *ul_sip,
1866 				      u32 tunnel_index)
1867 {
1868 	enum mlxsw_sp_fib_entry_type type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1869 	struct mlxsw_sp_fib_entry *fib_entry;
1870 	int err;
1871 
1872 	/* It is valid to create a tunnel with a local IP and only later
1873 	 * assign this IP address to a local interface
1874 	 */
1875 	fib_entry = mlxsw_sp_router_ip2me_fib_entry_find(mlxsw_sp, ul_tb_id,
1876 							 ul_proto, ul_sip,
1877 							 type);
1878 	if (!fib_entry)
1879 		return 0;
1880 
1881 	fib_entry->decap.tunnel_index = tunnel_index;
1882 	fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
1883 
1884 	err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1885 	if (err)
1886 		goto err_fib_entry_update;
1887 
1888 	return 0;
1889 
1890 err_fib_entry_update:
1891 	fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1892 	mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1893 	return err;
1894 }
1895 
1896 void mlxsw_sp_router_nve_demote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
1897 				      enum mlxsw_sp_l3proto ul_proto,
1898 				      const union mlxsw_sp_l3addr *ul_sip)
1899 {
1900 	enum mlxsw_sp_fib_entry_type type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
1901 	struct mlxsw_sp_fib_entry *fib_entry;
1902 
1903 	fib_entry = mlxsw_sp_router_ip2me_fib_entry_find(mlxsw_sp, ul_tb_id,
1904 							 ul_proto, ul_sip,
1905 							 type);
1906 	if (!fib_entry)
1907 		return;
1908 
1909 	fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1910 	mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1911 }
1912 
1913 struct mlxsw_sp_neigh_key {
1914 	struct neighbour *n;
1915 };
1916 
1917 struct mlxsw_sp_neigh_entry {
1918 	struct list_head rif_list_node;
1919 	struct rhash_head ht_node;
1920 	struct mlxsw_sp_neigh_key key;
1921 	u16 rif;
1922 	bool connected;
1923 	unsigned char ha[ETH_ALEN];
1924 	struct list_head nexthop_list; /* list of nexthops using
1925 					* this neigh entry
1926 					*/
1927 	struct list_head nexthop_neighs_list_node;
1928 	unsigned int counter_index;
1929 	bool counter_valid;
1930 };
1931 
1932 static const struct rhashtable_params mlxsw_sp_neigh_ht_params = {
1933 	.key_offset = offsetof(struct mlxsw_sp_neigh_entry, key),
1934 	.head_offset = offsetof(struct mlxsw_sp_neigh_entry, ht_node),
1935 	.key_len = sizeof(struct mlxsw_sp_neigh_key),
1936 };
1937 
1938 struct mlxsw_sp_neigh_entry *
1939 mlxsw_sp_rif_neigh_next(struct mlxsw_sp_rif *rif,
1940 			struct mlxsw_sp_neigh_entry *neigh_entry)
1941 {
1942 	if (!neigh_entry) {
1943 		if (list_empty(&rif->neigh_list))
1944 			return NULL;
1945 		else
1946 			return list_first_entry(&rif->neigh_list,
1947 						typeof(*neigh_entry),
1948 						rif_list_node);
1949 	}
1950 	if (list_is_last(&neigh_entry->rif_list_node, &rif->neigh_list))
1951 		return NULL;
1952 	return list_next_entry(neigh_entry, rif_list_node);
1953 }
1954 
1955 int mlxsw_sp_neigh_entry_type(struct mlxsw_sp_neigh_entry *neigh_entry)
1956 {
1957 	return neigh_entry->key.n->tbl->family;
1958 }
1959 
1960 unsigned char *
1961 mlxsw_sp_neigh_entry_ha(struct mlxsw_sp_neigh_entry *neigh_entry)
1962 {
1963 	return neigh_entry->ha;
1964 }
1965 
1966 u32 mlxsw_sp_neigh4_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
1967 {
1968 	struct neighbour *n;
1969 
1970 	n = neigh_entry->key.n;
1971 	return ntohl(*((__be32 *) n->primary_key));
1972 }
1973 
1974 struct in6_addr *
1975 mlxsw_sp_neigh6_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
1976 {
1977 	struct neighbour *n;
1978 
1979 	n = neigh_entry->key.n;
1980 	return (struct in6_addr *) &n->primary_key;
1981 }
1982 
1983 int mlxsw_sp_neigh_counter_get(struct mlxsw_sp *mlxsw_sp,
1984 			       struct mlxsw_sp_neigh_entry *neigh_entry,
1985 			       u64 *p_counter)
1986 {
1987 	if (!neigh_entry->counter_valid)
1988 		return -EINVAL;
1989 
1990 	return mlxsw_sp_flow_counter_get(mlxsw_sp, neigh_entry->counter_index,
1991 					 p_counter, NULL);
1992 }
1993 
1994 static struct mlxsw_sp_neigh_entry *
1995 mlxsw_sp_neigh_entry_alloc(struct mlxsw_sp *mlxsw_sp, struct neighbour *n,
1996 			   u16 rif)
1997 {
1998 	struct mlxsw_sp_neigh_entry *neigh_entry;
1999 
2000 	neigh_entry = kzalloc(sizeof(*neigh_entry), GFP_KERNEL);
2001 	if (!neigh_entry)
2002 		return NULL;
2003 
2004 	neigh_entry->key.n = n;
2005 	neigh_entry->rif = rif;
2006 	INIT_LIST_HEAD(&neigh_entry->nexthop_list);
2007 
2008 	return neigh_entry;
2009 }
2010 
2011 static void mlxsw_sp_neigh_entry_free(struct mlxsw_sp_neigh_entry *neigh_entry)
2012 {
2013 	kfree(neigh_entry);
2014 }
2015 
2016 static int
2017 mlxsw_sp_neigh_entry_insert(struct mlxsw_sp *mlxsw_sp,
2018 			    struct mlxsw_sp_neigh_entry *neigh_entry)
2019 {
2020 	return rhashtable_insert_fast(&mlxsw_sp->router->neigh_ht,
2021 				      &neigh_entry->ht_node,
2022 				      mlxsw_sp_neigh_ht_params);
2023 }
2024 
2025 static void
2026 mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp,
2027 			    struct mlxsw_sp_neigh_entry *neigh_entry)
2028 {
2029 	rhashtable_remove_fast(&mlxsw_sp->router->neigh_ht,
2030 			       &neigh_entry->ht_node,
2031 			       mlxsw_sp_neigh_ht_params);
2032 }
2033 
2034 static bool
2035 mlxsw_sp_neigh_counter_should_alloc(struct mlxsw_sp *mlxsw_sp,
2036 				    struct mlxsw_sp_neigh_entry *neigh_entry)
2037 {
2038 	struct devlink *devlink;
2039 	const char *table_name;
2040 
2041 	switch (mlxsw_sp_neigh_entry_type(neigh_entry)) {
2042 	case AF_INET:
2043 		table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST4;
2044 		break;
2045 	case AF_INET6:
2046 		table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST6;
2047 		break;
2048 	default:
2049 		WARN_ON(1);
2050 		return false;
2051 	}
2052 
2053 	devlink = priv_to_devlink(mlxsw_sp->core);
2054 	return devlink_dpipe_table_counter_enabled(devlink, table_name);
2055 }
2056 
2057 static void
2058 mlxsw_sp_neigh_counter_alloc(struct mlxsw_sp *mlxsw_sp,
2059 			     struct mlxsw_sp_neigh_entry *neigh_entry)
2060 {
2061 	if (!mlxsw_sp_neigh_counter_should_alloc(mlxsw_sp, neigh_entry))
2062 		return;
2063 
2064 	if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &neigh_entry->counter_index))
2065 		return;
2066 
2067 	neigh_entry->counter_valid = true;
2068 }
2069 
2070 static void
2071 mlxsw_sp_neigh_counter_free(struct mlxsw_sp *mlxsw_sp,
2072 			    struct mlxsw_sp_neigh_entry *neigh_entry)
2073 {
2074 	if (!neigh_entry->counter_valid)
2075 		return;
2076 	mlxsw_sp_flow_counter_free(mlxsw_sp,
2077 				   neigh_entry->counter_index);
2078 	neigh_entry->counter_valid = false;
2079 }
2080 
2081 static struct mlxsw_sp_neigh_entry *
2082 mlxsw_sp_neigh_entry_create(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
2083 {
2084 	struct mlxsw_sp_neigh_entry *neigh_entry;
2085 	struct mlxsw_sp_rif *rif;
2086 	int err;
2087 
2088 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, n->dev);
2089 	if (!rif)
2090 		return ERR_PTR(-EINVAL);
2091 
2092 	neigh_entry = mlxsw_sp_neigh_entry_alloc(mlxsw_sp, n, rif->rif_index);
2093 	if (!neigh_entry)
2094 		return ERR_PTR(-ENOMEM);
2095 
2096 	err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
2097 	if (err)
2098 		goto err_neigh_entry_insert;
2099 
2100 	mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
2101 	list_add(&neigh_entry->rif_list_node, &rif->neigh_list);
2102 
2103 	return neigh_entry;
2104 
2105 err_neigh_entry_insert:
2106 	mlxsw_sp_neigh_entry_free(neigh_entry);
2107 	return ERR_PTR(err);
2108 }
2109 
2110 static void
2111 mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp *mlxsw_sp,
2112 			     struct mlxsw_sp_neigh_entry *neigh_entry)
2113 {
2114 	list_del(&neigh_entry->rif_list_node);
2115 	mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
2116 	mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry);
2117 	mlxsw_sp_neigh_entry_free(neigh_entry);
2118 }
2119 
2120 static struct mlxsw_sp_neigh_entry *
2121 mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
2122 {
2123 	struct mlxsw_sp_neigh_key key;
2124 
2125 	key.n = n;
2126 	return rhashtable_lookup_fast(&mlxsw_sp->router->neigh_ht,
2127 				      &key, mlxsw_sp_neigh_ht_params);
2128 }
2129 
2130 static void
2131 mlxsw_sp_router_neighs_update_interval_init(struct mlxsw_sp *mlxsw_sp)
2132 {
2133 	unsigned long interval;
2134 
2135 #if IS_ENABLED(CONFIG_IPV6)
2136 	interval = min_t(unsigned long,
2137 			 NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME),
2138 			 NEIGH_VAR(&nd_tbl.parms, DELAY_PROBE_TIME));
2139 #else
2140 	interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
2141 #endif
2142 	mlxsw_sp->router->neighs_update.interval = jiffies_to_msecs(interval);
2143 }
2144 
2145 static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp,
2146 						   char *rauhtd_pl,
2147 						   int ent_index)
2148 {
2149 	struct net_device *dev;
2150 	struct neighbour *n;
2151 	__be32 dipn;
2152 	u32 dip;
2153 	u16 rif;
2154 
2155 	mlxsw_reg_rauhtd_ent_ipv4_unpack(rauhtd_pl, ent_index, &rif, &dip);
2156 
2157 	if (!mlxsw_sp->router->rifs[rif]) {
2158 		dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
2159 		return;
2160 	}
2161 
2162 	dipn = htonl(dip);
2163 	dev = mlxsw_sp->router->rifs[rif]->dev;
2164 	n = neigh_lookup(&arp_tbl, &dipn, dev);
2165 	if (!n)
2166 		return;
2167 
2168 	netdev_dbg(dev, "Updating neighbour with IP=%pI4h\n", &dip);
2169 	neigh_event_send(n, NULL);
2170 	neigh_release(n);
2171 }
2172 
2173 #if IS_ENABLED(CONFIG_IPV6)
2174 static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2175 						   char *rauhtd_pl,
2176 						   int rec_index)
2177 {
2178 	struct net_device *dev;
2179 	struct neighbour *n;
2180 	struct in6_addr dip;
2181 	u16 rif;
2182 
2183 	mlxsw_reg_rauhtd_ent_ipv6_unpack(rauhtd_pl, rec_index, &rif,
2184 					 (char *) &dip);
2185 
2186 	if (!mlxsw_sp->router->rifs[rif]) {
2187 		dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
2188 		return;
2189 	}
2190 
2191 	dev = mlxsw_sp->router->rifs[rif]->dev;
2192 	n = neigh_lookup(&nd_tbl, &dip, dev);
2193 	if (!n)
2194 		return;
2195 
2196 	netdev_dbg(dev, "Updating neighbour with IP=%pI6c\n", &dip);
2197 	neigh_event_send(n, NULL);
2198 	neigh_release(n);
2199 }
2200 #else
2201 static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2202 						   char *rauhtd_pl,
2203 						   int rec_index)
2204 {
2205 }
2206 #endif
2207 
2208 static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp,
2209 						   char *rauhtd_pl,
2210 						   int rec_index)
2211 {
2212 	u8 num_entries;
2213 	int i;
2214 
2215 	num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
2216 								rec_index);
2217 	/* Hardware starts counting at 0, so add 1. */
2218 	num_entries++;
2219 
2220 	/* Each record consists of several neighbour entries. */
2221 	for (i = 0; i < num_entries; i++) {
2222 		int ent_index;
2223 
2224 		ent_index = rec_index * MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC + i;
2225 		mlxsw_sp_router_neigh_ent_ipv4_process(mlxsw_sp, rauhtd_pl,
2226 						       ent_index);
2227 	}
2228 
2229 }
2230 
2231 static void mlxsw_sp_router_neigh_rec_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2232 						   char *rauhtd_pl,
2233 						   int rec_index)
2234 {
2235 	/* One record contains one entry. */
2236 	mlxsw_sp_router_neigh_ent_ipv6_process(mlxsw_sp, rauhtd_pl,
2237 					       rec_index);
2238 }
2239 
2240 static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp,
2241 					      char *rauhtd_pl, int rec_index)
2242 {
2243 	switch (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, rec_index)) {
2244 	case MLXSW_REG_RAUHTD_TYPE_IPV4:
2245 		mlxsw_sp_router_neigh_rec_ipv4_process(mlxsw_sp, rauhtd_pl,
2246 						       rec_index);
2247 		break;
2248 	case MLXSW_REG_RAUHTD_TYPE_IPV6:
2249 		mlxsw_sp_router_neigh_rec_ipv6_process(mlxsw_sp, rauhtd_pl,
2250 						       rec_index);
2251 		break;
2252 	}
2253 }
2254 
2255 static bool mlxsw_sp_router_rauhtd_is_full(char *rauhtd_pl)
2256 {
2257 	u8 num_rec, last_rec_index, num_entries;
2258 
2259 	num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
2260 	last_rec_index = num_rec - 1;
2261 
2262 	if (num_rec < MLXSW_REG_RAUHTD_REC_MAX_NUM)
2263 		return false;
2264 	if (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, last_rec_index) ==
2265 	    MLXSW_REG_RAUHTD_TYPE_IPV6)
2266 		return true;
2267 
2268 	num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
2269 								last_rec_index);
2270 	if (++num_entries == MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC)
2271 		return true;
2272 	return false;
2273 }
2274 
2275 static int
2276 __mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp,
2277 				       char *rauhtd_pl,
2278 				       enum mlxsw_reg_rauhtd_type type)
2279 {
2280 	int i, num_rec;
2281 	int err;
2282 
2283 	/* Make sure the neighbour's netdev isn't removed in the
2284 	 * process.
2285 	 */
2286 	rtnl_lock();
2287 	do {
2288 		mlxsw_reg_rauhtd_pack(rauhtd_pl, type);
2289 		err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd),
2290 				      rauhtd_pl);
2291 		if (err) {
2292 			dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to dump neighbour table\n");
2293 			break;
2294 		}
2295 		num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
2296 		for (i = 0; i < num_rec; i++)
2297 			mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl,
2298 							  i);
2299 	} while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl));
2300 	rtnl_unlock();
2301 
2302 	return err;
2303 }
2304 
2305 static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp)
2306 {
2307 	enum mlxsw_reg_rauhtd_type type;
2308 	char *rauhtd_pl;
2309 	int err;
2310 
2311 	rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL);
2312 	if (!rauhtd_pl)
2313 		return -ENOMEM;
2314 
2315 	type = MLXSW_REG_RAUHTD_TYPE_IPV4;
2316 	err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
2317 	if (err)
2318 		goto out;
2319 
2320 	type = MLXSW_REG_RAUHTD_TYPE_IPV6;
2321 	err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
2322 out:
2323 	kfree(rauhtd_pl);
2324 	return err;
2325 }
2326 
2327 static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp *mlxsw_sp)
2328 {
2329 	struct mlxsw_sp_neigh_entry *neigh_entry;
2330 
2331 	/* Take RTNL mutex here to prevent lists from changes */
2332 	rtnl_lock();
2333 	list_for_each_entry(neigh_entry, &mlxsw_sp->router->nexthop_neighs_list,
2334 			    nexthop_neighs_list_node)
2335 		/* If this neigh have nexthops, make the kernel think this neigh
2336 		 * is active regardless of the traffic.
2337 		 */
2338 		neigh_event_send(neigh_entry->key.n, NULL);
2339 	rtnl_unlock();
2340 }
2341 
2342 static void
2343 mlxsw_sp_router_neighs_update_work_schedule(struct mlxsw_sp *mlxsw_sp)
2344 {
2345 	unsigned long interval = mlxsw_sp->router->neighs_update.interval;
2346 
2347 	mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw,
2348 			       msecs_to_jiffies(interval));
2349 }
2350 
2351 static void mlxsw_sp_router_neighs_update_work(struct work_struct *work)
2352 {
2353 	struct mlxsw_sp_router *router;
2354 	int err;
2355 
2356 	router = container_of(work, struct mlxsw_sp_router,
2357 			      neighs_update.dw.work);
2358 	err = mlxsw_sp_router_neighs_update_rauhtd(router->mlxsw_sp);
2359 	if (err)
2360 		dev_err(router->mlxsw_sp->bus_info->dev, "Could not update kernel for neigh activity");
2361 
2362 	mlxsw_sp_router_neighs_update_nh(router->mlxsw_sp);
2363 
2364 	mlxsw_sp_router_neighs_update_work_schedule(router->mlxsw_sp);
2365 }
2366 
2367 static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work)
2368 {
2369 	struct mlxsw_sp_neigh_entry *neigh_entry;
2370 	struct mlxsw_sp_router *router;
2371 
2372 	router = container_of(work, struct mlxsw_sp_router,
2373 			      nexthop_probe_dw.work);
2374 	/* Iterate over nexthop neighbours, find those who are unresolved and
2375 	 * send arp on them. This solves the chicken-egg problem when
2376 	 * the nexthop wouldn't get offloaded until the neighbor is resolved
2377 	 * but it wouldn't get resolved ever in case traffic is flowing in HW
2378 	 * using different nexthop.
2379 	 *
2380 	 * Take RTNL mutex here to prevent lists from changes.
2381 	 */
2382 	rtnl_lock();
2383 	list_for_each_entry(neigh_entry, &router->nexthop_neighs_list,
2384 			    nexthop_neighs_list_node)
2385 		if (!neigh_entry->connected)
2386 			neigh_event_send(neigh_entry->key.n, NULL);
2387 	rtnl_unlock();
2388 
2389 	mlxsw_core_schedule_dw(&router->nexthop_probe_dw,
2390 			       MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL);
2391 }
2392 
2393 static void
2394 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
2395 			      struct mlxsw_sp_neigh_entry *neigh_entry,
2396 			      bool removing, bool dead);
2397 
2398 static enum mlxsw_reg_rauht_op mlxsw_sp_rauht_op(bool adding)
2399 {
2400 	return adding ? MLXSW_REG_RAUHT_OP_WRITE_ADD :
2401 			MLXSW_REG_RAUHT_OP_WRITE_DELETE;
2402 }
2403 
2404 static int
2405 mlxsw_sp_router_neigh_entry_op4(struct mlxsw_sp *mlxsw_sp,
2406 				struct mlxsw_sp_neigh_entry *neigh_entry,
2407 				enum mlxsw_reg_rauht_op op)
2408 {
2409 	struct neighbour *n = neigh_entry->key.n;
2410 	u32 dip = ntohl(*((__be32 *) n->primary_key));
2411 	char rauht_pl[MLXSW_REG_RAUHT_LEN];
2412 
2413 	mlxsw_reg_rauht_pack4(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
2414 			      dip);
2415 	if (neigh_entry->counter_valid)
2416 		mlxsw_reg_rauht_pack_counter(rauht_pl,
2417 					     neigh_entry->counter_index);
2418 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
2419 }
2420 
2421 static int
2422 mlxsw_sp_router_neigh_entry_op6(struct mlxsw_sp *mlxsw_sp,
2423 				struct mlxsw_sp_neigh_entry *neigh_entry,
2424 				enum mlxsw_reg_rauht_op op)
2425 {
2426 	struct neighbour *n = neigh_entry->key.n;
2427 	char rauht_pl[MLXSW_REG_RAUHT_LEN];
2428 	const char *dip = n->primary_key;
2429 
2430 	mlxsw_reg_rauht_pack6(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
2431 			      dip);
2432 	if (neigh_entry->counter_valid)
2433 		mlxsw_reg_rauht_pack_counter(rauht_pl,
2434 					     neigh_entry->counter_index);
2435 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
2436 }
2437 
2438 bool mlxsw_sp_neigh_ipv6_ignore(struct mlxsw_sp_neigh_entry *neigh_entry)
2439 {
2440 	struct neighbour *n = neigh_entry->key.n;
2441 
2442 	/* Packets with a link-local destination address are trapped
2443 	 * after LPM lookup and never reach the neighbour table, so
2444 	 * there is no need to program such neighbours to the device.
2445 	 */
2446 	if (ipv6_addr_type((struct in6_addr *) &n->primary_key) &
2447 	    IPV6_ADDR_LINKLOCAL)
2448 		return true;
2449 	return false;
2450 }
2451 
2452 static void
2453 mlxsw_sp_neigh_entry_update(struct mlxsw_sp *mlxsw_sp,
2454 			    struct mlxsw_sp_neigh_entry *neigh_entry,
2455 			    bool adding)
2456 {
2457 	enum mlxsw_reg_rauht_op op = mlxsw_sp_rauht_op(adding);
2458 	int err;
2459 
2460 	if (!adding && !neigh_entry->connected)
2461 		return;
2462 	neigh_entry->connected = adding;
2463 	if (neigh_entry->key.n->tbl->family == AF_INET) {
2464 		err = mlxsw_sp_router_neigh_entry_op4(mlxsw_sp, neigh_entry,
2465 						      op);
2466 		if (err)
2467 			return;
2468 	} else if (neigh_entry->key.n->tbl->family == AF_INET6) {
2469 		if (mlxsw_sp_neigh_ipv6_ignore(neigh_entry))
2470 			return;
2471 		err = mlxsw_sp_router_neigh_entry_op6(mlxsw_sp, neigh_entry,
2472 						      op);
2473 		if (err)
2474 			return;
2475 	} else {
2476 		WARN_ON_ONCE(1);
2477 		return;
2478 	}
2479 
2480 	if (adding)
2481 		neigh_entry->key.n->flags |= NTF_OFFLOADED;
2482 	else
2483 		neigh_entry->key.n->flags &= ~NTF_OFFLOADED;
2484 }
2485 
2486 void
2487 mlxsw_sp_neigh_entry_counter_update(struct mlxsw_sp *mlxsw_sp,
2488 				    struct mlxsw_sp_neigh_entry *neigh_entry,
2489 				    bool adding)
2490 {
2491 	if (adding)
2492 		mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
2493 	else
2494 		mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
2495 	mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, true);
2496 }
2497 
2498 struct mlxsw_sp_netevent_work {
2499 	struct work_struct work;
2500 	struct mlxsw_sp *mlxsw_sp;
2501 	struct neighbour *n;
2502 };
2503 
2504 static void mlxsw_sp_router_neigh_event_work(struct work_struct *work)
2505 {
2506 	struct mlxsw_sp_netevent_work *net_work =
2507 		container_of(work, struct mlxsw_sp_netevent_work, work);
2508 	struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2509 	struct mlxsw_sp_neigh_entry *neigh_entry;
2510 	struct neighbour *n = net_work->n;
2511 	unsigned char ha[ETH_ALEN];
2512 	bool entry_connected;
2513 	u8 nud_state, dead;
2514 
2515 	/* If these parameters are changed after we release the lock,
2516 	 * then we are guaranteed to receive another event letting us
2517 	 * know about it.
2518 	 */
2519 	read_lock_bh(&n->lock);
2520 	memcpy(ha, n->ha, ETH_ALEN);
2521 	nud_state = n->nud_state;
2522 	dead = n->dead;
2523 	read_unlock_bh(&n->lock);
2524 
2525 	rtnl_lock();
2526 	mlxsw_sp_span_respin(mlxsw_sp);
2527 
2528 	entry_connected = nud_state & NUD_VALID && !dead;
2529 	neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
2530 	if (!entry_connected && !neigh_entry)
2531 		goto out;
2532 	if (!neigh_entry) {
2533 		neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
2534 		if (IS_ERR(neigh_entry))
2535 			goto out;
2536 	}
2537 
2538 	memcpy(neigh_entry->ha, ha, ETH_ALEN);
2539 	mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, entry_connected);
2540 	mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, !entry_connected,
2541 				      dead);
2542 
2543 	if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
2544 		mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2545 
2546 out:
2547 	rtnl_unlock();
2548 	neigh_release(n);
2549 	kfree(net_work);
2550 }
2551 
2552 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp);
2553 
2554 static void mlxsw_sp_router_mp_hash_event_work(struct work_struct *work)
2555 {
2556 	struct mlxsw_sp_netevent_work *net_work =
2557 		container_of(work, struct mlxsw_sp_netevent_work, work);
2558 	struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2559 
2560 	mlxsw_sp_mp_hash_init(mlxsw_sp);
2561 	kfree(net_work);
2562 }
2563 
2564 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp);
2565 
2566 static void mlxsw_sp_router_update_priority_work(struct work_struct *work)
2567 {
2568 	struct mlxsw_sp_netevent_work *net_work =
2569 		container_of(work, struct mlxsw_sp_netevent_work, work);
2570 	struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2571 
2572 	__mlxsw_sp_router_init(mlxsw_sp);
2573 	kfree(net_work);
2574 }
2575 
2576 static int mlxsw_sp_router_schedule_work(struct net *net,
2577 					 struct notifier_block *nb,
2578 					 void (*cb)(struct work_struct *))
2579 {
2580 	struct mlxsw_sp_netevent_work *net_work;
2581 	struct mlxsw_sp_router *router;
2582 
2583 	router = container_of(nb, struct mlxsw_sp_router, netevent_nb);
2584 	if (!net_eq(net, mlxsw_sp_net(router->mlxsw_sp)))
2585 		return NOTIFY_DONE;
2586 
2587 	net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC);
2588 	if (!net_work)
2589 		return NOTIFY_BAD;
2590 
2591 	INIT_WORK(&net_work->work, cb);
2592 	net_work->mlxsw_sp = router->mlxsw_sp;
2593 	mlxsw_core_schedule_work(&net_work->work);
2594 	return NOTIFY_DONE;
2595 }
2596 
2597 static int mlxsw_sp_router_netevent_event(struct notifier_block *nb,
2598 					  unsigned long event, void *ptr)
2599 {
2600 	struct mlxsw_sp_netevent_work *net_work;
2601 	struct mlxsw_sp_port *mlxsw_sp_port;
2602 	struct mlxsw_sp *mlxsw_sp;
2603 	unsigned long interval;
2604 	struct neigh_parms *p;
2605 	struct neighbour *n;
2606 
2607 	switch (event) {
2608 	case NETEVENT_DELAY_PROBE_TIME_UPDATE:
2609 		p = ptr;
2610 
2611 		/* We don't care about changes in the default table. */
2612 		if (!p->dev || (p->tbl->family != AF_INET &&
2613 				p->tbl->family != AF_INET6))
2614 			return NOTIFY_DONE;
2615 
2616 		/* We are in atomic context and can't take RTNL mutex,
2617 		 * so use RCU variant to walk the device chain.
2618 		 */
2619 		mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(p->dev);
2620 		if (!mlxsw_sp_port)
2621 			return NOTIFY_DONE;
2622 
2623 		mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
2624 		interval = jiffies_to_msecs(NEIGH_VAR(p, DELAY_PROBE_TIME));
2625 		mlxsw_sp->router->neighs_update.interval = interval;
2626 
2627 		mlxsw_sp_port_dev_put(mlxsw_sp_port);
2628 		break;
2629 	case NETEVENT_NEIGH_UPDATE:
2630 		n = ptr;
2631 
2632 		if (n->tbl->family != AF_INET && n->tbl->family != AF_INET6)
2633 			return NOTIFY_DONE;
2634 
2635 		mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(n->dev);
2636 		if (!mlxsw_sp_port)
2637 			return NOTIFY_DONE;
2638 
2639 		net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC);
2640 		if (!net_work) {
2641 			mlxsw_sp_port_dev_put(mlxsw_sp_port);
2642 			return NOTIFY_BAD;
2643 		}
2644 
2645 		INIT_WORK(&net_work->work, mlxsw_sp_router_neigh_event_work);
2646 		net_work->mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
2647 		net_work->n = n;
2648 
2649 		/* Take a reference to ensure the neighbour won't be
2650 		 * destructed until we drop the reference in delayed
2651 		 * work.
2652 		 */
2653 		neigh_clone(n);
2654 		mlxsw_core_schedule_work(&net_work->work);
2655 		mlxsw_sp_port_dev_put(mlxsw_sp_port);
2656 		break;
2657 	case NETEVENT_IPV4_MPATH_HASH_UPDATE:
2658 	case NETEVENT_IPV6_MPATH_HASH_UPDATE:
2659 		return mlxsw_sp_router_schedule_work(ptr, nb,
2660 				mlxsw_sp_router_mp_hash_event_work);
2661 
2662 	case NETEVENT_IPV4_FWD_UPDATE_PRIORITY_UPDATE:
2663 		return mlxsw_sp_router_schedule_work(ptr, nb,
2664 				mlxsw_sp_router_update_priority_work);
2665 	}
2666 
2667 	return NOTIFY_DONE;
2668 }
2669 
2670 static int mlxsw_sp_neigh_init(struct mlxsw_sp *mlxsw_sp)
2671 {
2672 	int err;
2673 
2674 	err = rhashtable_init(&mlxsw_sp->router->neigh_ht,
2675 			      &mlxsw_sp_neigh_ht_params);
2676 	if (err)
2677 		return err;
2678 
2679 	/* Initialize the polling interval according to the default
2680 	 * table.
2681 	 */
2682 	mlxsw_sp_router_neighs_update_interval_init(mlxsw_sp);
2683 
2684 	/* Create the delayed works for the activity_update */
2685 	INIT_DELAYED_WORK(&mlxsw_sp->router->neighs_update.dw,
2686 			  mlxsw_sp_router_neighs_update_work);
2687 	INIT_DELAYED_WORK(&mlxsw_sp->router->nexthop_probe_dw,
2688 			  mlxsw_sp_router_probe_unresolved_nexthops);
2689 	mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw, 0);
2690 	mlxsw_core_schedule_dw(&mlxsw_sp->router->nexthop_probe_dw, 0);
2691 	return 0;
2692 }
2693 
2694 static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp)
2695 {
2696 	cancel_delayed_work_sync(&mlxsw_sp->router->neighs_update.dw);
2697 	cancel_delayed_work_sync(&mlxsw_sp->router->nexthop_probe_dw);
2698 	rhashtable_destroy(&mlxsw_sp->router->neigh_ht);
2699 }
2700 
2701 static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
2702 					 struct mlxsw_sp_rif *rif)
2703 {
2704 	struct mlxsw_sp_neigh_entry *neigh_entry, *tmp;
2705 
2706 	list_for_each_entry_safe(neigh_entry, tmp, &rif->neigh_list,
2707 				 rif_list_node) {
2708 		mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, false);
2709 		mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2710 	}
2711 }
2712 
2713 enum mlxsw_sp_nexthop_type {
2714 	MLXSW_SP_NEXTHOP_TYPE_ETH,
2715 	MLXSW_SP_NEXTHOP_TYPE_IPIP,
2716 };
2717 
2718 struct mlxsw_sp_nexthop_key {
2719 	struct fib_nh *fib_nh;
2720 };
2721 
2722 struct mlxsw_sp_nexthop {
2723 	struct list_head neigh_list_node; /* member of neigh entry list */
2724 	struct list_head rif_list_node;
2725 	struct list_head router_list_node;
2726 	struct mlxsw_sp_nexthop_group *nh_grp; /* pointer back to the group
2727 						* this belongs to
2728 						*/
2729 	struct rhash_head ht_node;
2730 	struct mlxsw_sp_nexthop_key key;
2731 	unsigned char gw_addr[sizeof(struct in6_addr)];
2732 	int ifindex;
2733 	int nh_weight;
2734 	int norm_nh_weight;
2735 	int num_adj_entries;
2736 	struct mlxsw_sp_rif *rif;
2737 	u8 should_offload:1, /* set indicates this neigh is connected and
2738 			      * should be put to KVD linear area of this group.
2739 			      */
2740 	   offloaded:1, /* set in case the neigh is actually put into
2741 			 * KVD linear area of this group.
2742 			 */
2743 	   update:1; /* set indicates that MAC of this neigh should be
2744 		      * updated in HW
2745 		      */
2746 	enum mlxsw_sp_nexthop_type type;
2747 	union {
2748 		struct mlxsw_sp_neigh_entry *neigh_entry;
2749 		struct mlxsw_sp_ipip_entry *ipip_entry;
2750 	};
2751 	unsigned int counter_index;
2752 	bool counter_valid;
2753 };
2754 
2755 struct mlxsw_sp_nexthop_group {
2756 	void *priv;
2757 	struct rhash_head ht_node;
2758 	struct list_head fib_list; /* list of fib entries that use this group */
2759 	struct neigh_table *neigh_tbl;
2760 	u8 adj_index_valid:1,
2761 	   gateway:1; /* routes using the group use a gateway */
2762 	u32 adj_index;
2763 	u16 ecmp_size;
2764 	u16 count;
2765 	int sum_norm_weight;
2766 	struct mlxsw_sp_nexthop nexthops[0];
2767 #define nh_rif	nexthops[0].rif
2768 };
2769 
2770 void mlxsw_sp_nexthop_counter_alloc(struct mlxsw_sp *mlxsw_sp,
2771 				    struct mlxsw_sp_nexthop *nh)
2772 {
2773 	struct devlink *devlink;
2774 
2775 	devlink = priv_to_devlink(mlxsw_sp->core);
2776 	if (!devlink_dpipe_table_counter_enabled(devlink,
2777 						 MLXSW_SP_DPIPE_TABLE_NAME_ADJ))
2778 		return;
2779 
2780 	if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &nh->counter_index))
2781 		return;
2782 
2783 	nh->counter_valid = true;
2784 }
2785 
2786 void mlxsw_sp_nexthop_counter_free(struct mlxsw_sp *mlxsw_sp,
2787 				   struct mlxsw_sp_nexthop *nh)
2788 {
2789 	if (!nh->counter_valid)
2790 		return;
2791 	mlxsw_sp_flow_counter_free(mlxsw_sp, nh->counter_index);
2792 	nh->counter_valid = false;
2793 }
2794 
2795 int mlxsw_sp_nexthop_counter_get(struct mlxsw_sp *mlxsw_sp,
2796 				 struct mlxsw_sp_nexthop *nh, u64 *p_counter)
2797 {
2798 	if (!nh->counter_valid)
2799 		return -EINVAL;
2800 
2801 	return mlxsw_sp_flow_counter_get(mlxsw_sp, nh->counter_index,
2802 					 p_counter, NULL);
2803 }
2804 
2805 struct mlxsw_sp_nexthop *mlxsw_sp_nexthop_next(struct mlxsw_sp_router *router,
2806 					       struct mlxsw_sp_nexthop *nh)
2807 {
2808 	if (!nh) {
2809 		if (list_empty(&router->nexthop_list))
2810 			return NULL;
2811 		else
2812 			return list_first_entry(&router->nexthop_list,
2813 						typeof(*nh), router_list_node);
2814 	}
2815 	if (list_is_last(&nh->router_list_node, &router->nexthop_list))
2816 		return NULL;
2817 	return list_next_entry(nh, router_list_node);
2818 }
2819 
2820 bool mlxsw_sp_nexthop_offload(struct mlxsw_sp_nexthop *nh)
2821 {
2822 	return nh->offloaded;
2823 }
2824 
2825 unsigned char *mlxsw_sp_nexthop_ha(struct mlxsw_sp_nexthop *nh)
2826 {
2827 	if (!nh->offloaded)
2828 		return NULL;
2829 	return nh->neigh_entry->ha;
2830 }
2831 
2832 int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop *nh, u32 *p_adj_index,
2833 			     u32 *p_adj_size, u32 *p_adj_hash_index)
2834 {
2835 	struct mlxsw_sp_nexthop_group *nh_grp = nh->nh_grp;
2836 	u32 adj_hash_index = 0;
2837 	int i;
2838 
2839 	if (!nh->offloaded || !nh_grp->adj_index_valid)
2840 		return -EINVAL;
2841 
2842 	*p_adj_index = nh_grp->adj_index;
2843 	*p_adj_size = nh_grp->ecmp_size;
2844 
2845 	for (i = 0; i < nh_grp->count; i++) {
2846 		struct mlxsw_sp_nexthop *nh_iter = &nh_grp->nexthops[i];
2847 
2848 		if (nh_iter == nh)
2849 			break;
2850 		if (nh_iter->offloaded)
2851 			adj_hash_index += nh_iter->num_adj_entries;
2852 	}
2853 
2854 	*p_adj_hash_index = adj_hash_index;
2855 	return 0;
2856 }
2857 
2858 struct mlxsw_sp_rif *mlxsw_sp_nexthop_rif(struct mlxsw_sp_nexthop *nh)
2859 {
2860 	return nh->rif;
2861 }
2862 
2863 bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh)
2864 {
2865 	struct mlxsw_sp_nexthop_group *nh_grp = nh->nh_grp;
2866 	int i;
2867 
2868 	for (i = 0; i < nh_grp->count; i++) {
2869 		struct mlxsw_sp_nexthop *nh_iter = &nh_grp->nexthops[i];
2870 
2871 		if (nh_iter->type == MLXSW_SP_NEXTHOP_TYPE_IPIP)
2872 			return true;
2873 	}
2874 	return false;
2875 }
2876 
2877 static struct fib_info *
2878 mlxsw_sp_nexthop4_group_fi(const struct mlxsw_sp_nexthop_group *nh_grp)
2879 {
2880 	return nh_grp->priv;
2881 }
2882 
2883 struct mlxsw_sp_nexthop_group_cmp_arg {
2884 	enum mlxsw_sp_l3proto proto;
2885 	union {
2886 		struct fib_info *fi;
2887 		struct mlxsw_sp_fib6_entry *fib6_entry;
2888 	};
2889 };
2890 
2891 static bool
2892 mlxsw_sp_nexthop6_group_has_nexthop(const struct mlxsw_sp_nexthop_group *nh_grp,
2893 				    const struct in6_addr *gw, int ifindex,
2894 				    int weight)
2895 {
2896 	int i;
2897 
2898 	for (i = 0; i < nh_grp->count; i++) {
2899 		const struct mlxsw_sp_nexthop *nh;
2900 
2901 		nh = &nh_grp->nexthops[i];
2902 		if (nh->ifindex == ifindex && nh->nh_weight == weight &&
2903 		    ipv6_addr_equal(gw, (struct in6_addr *) nh->gw_addr))
2904 			return true;
2905 	}
2906 
2907 	return false;
2908 }
2909 
2910 static bool
2911 mlxsw_sp_nexthop6_group_cmp(const struct mlxsw_sp_nexthop_group *nh_grp,
2912 			    const struct mlxsw_sp_fib6_entry *fib6_entry)
2913 {
2914 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
2915 
2916 	if (nh_grp->count != fib6_entry->nrt6)
2917 		return false;
2918 
2919 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
2920 		struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh;
2921 		struct in6_addr *gw;
2922 		int ifindex, weight;
2923 
2924 		ifindex = fib6_nh->fib_nh_dev->ifindex;
2925 		weight = fib6_nh->fib_nh_weight;
2926 		gw = &fib6_nh->fib_nh_gw6;
2927 		if (!mlxsw_sp_nexthop6_group_has_nexthop(nh_grp, gw, ifindex,
2928 							 weight))
2929 			return false;
2930 	}
2931 
2932 	return true;
2933 }
2934 
2935 static int
2936 mlxsw_sp_nexthop_group_cmp(struct rhashtable_compare_arg *arg, const void *ptr)
2937 {
2938 	const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = arg->key;
2939 	const struct mlxsw_sp_nexthop_group *nh_grp = ptr;
2940 
2941 	switch (cmp_arg->proto) {
2942 	case MLXSW_SP_L3_PROTO_IPV4:
2943 		return cmp_arg->fi != mlxsw_sp_nexthop4_group_fi(nh_grp);
2944 	case MLXSW_SP_L3_PROTO_IPV6:
2945 		return !mlxsw_sp_nexthop6_group_cmp(nh_grp,
2946 						    cmp_arg->fib6_entry);
2947 	default:
2948 		WARN_ON(1);
2949 		return 1;
2950 	}
2951 }
2952 
2953 static int
2954 mlxsw_sp_nexthop_group_type(const struct mlxsw_sp_nexthop_group *nh_grp)
2955 {
2956 	return nh_grp->neigh_tbl->family;
2957 }
2958 
2959 static u32 mlxsw_sp_nexthop_group_hash_obj(const void *data, u32 len, u32 seed)
2960 {
2961 	const struct mlxsw_sp_nexthop_group *nh_grp = data;
2962 	const struct mlxsw_sp_nexthop *nh;
2963 	struct fib_info *fi;
2964 	unsigned int val;
2965 	int i;
2966 
2967 	switch (mlxsw_sp_nexthop_group_type(nh_grp)) {
2968 	case AF_INET:
2969 		fi = mlxsw_sp_nexthop4_group_fi(nh_grp);
2970 		return jhash(&fi, sizeof(fi), seed);
2971 	case AF_INET6:
2972 		val = nh_grp->count;
2973 		for (i = 0; i < nh_grp->count; i++) {
2974 			nh = &nh_grp->nexthops[i];
2975 			val ^= jhash(&nh->ifindex, sizeof(nh->ifindex), seed);
2976 		}
2977 		return jhash(&val, sizeof(val), seed);
2978 	default:
2979 		WARN_ON(1);
2980 		return 0;
2981 	}
2982 }
2983 
2984 static u32
2985 mlxsw_sp_nexthop6_group_hash(struct mlxsw_sp_fib6_entry *fib6_entry, u32 seed)
2986 {
2987 	unsigned int val = fib6_entry->nrt6;
2988 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
2989 	struct net_device *dev;
2990 
2991 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
2992 		dev = mlxsw_sp_rt6->rt->fib6_nh->fib_nh_dev;
2993 		val ^= jhash(&dev->ifindex, sizeof(dev->ifindex), seed);
2994 	}
2995 
2996 	return jhash(&val, sizeof(val), seed);
2997 }
2998 
2999 static u32
3000 mlxsw_sp_nexthop_group_hash(const void *data, u32 len, u32 seed)
3001 {
3002 	const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = data;
3003 
3004 	switch (cmp_arg->proto) {
3005 	case MLXSW_SP_L3_PROTO_IPV4:
3006 		return jhash(&cmp_arg->fi, sizeof(cmp_arg->fi), seed);
3007 	case MLXSW_SP_L3_PROTO_IPV6:
3008 		return mlxsw_sp_nexthop6_group_hash(cmp_arg->fib6_entry, seed);
3009 	default:
3010 		WARN_ON(1);
3011 		return 0;
3012 	}
3013 }
3014 
3015 static const struct rhashtable_params mlxsw_sp_nexthop_group_ht_params = {
3016 	.head_offset = offsetof(struct mlxsw_sp_nexthop_group, ht_node),
3017 	.hashfn	     = mlxsw_sp_nexthop_group_hash,
3018 	.obj_hashfn  = mlxsw_sp_nexthop_group_hash_obj,
3019 	.obj_cmpfn   = mlxsw_sp_nexthop_group_cmp,
3020 };
3021 
3022 static int mlxsw_sp_nexthop_group_insert(struct mlxsw_sp *mlxsw_sp,
3023 					 struct mlxsw_sp_nexthop_group *nh_grp)
3024 {
3025 	if (mlxsw_sp_nexthop_group_type(nh_grp) == AF_INET6 &&
3026 	    !nh_grp->gateway)
3027 		return 0;
3028 
3029 	return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_group_ht,
3030 				      &nh_grp->ht_node,
3031 				      mlxsw_sp_nexthop_group_ht_params);
3032 }
3033 
3034 static void mlxsw_sp_nexthop_group_remove(struct mlxsw_sp *mlxsw_sp,
3035 					  struct mlxsw_sp_nexthop_group *nh_grp)
3036 {
3037 	if (mlxsw_sp_nexthop_group_type(nh_grp) == AF_INET6 &&
3038 	    !nh_grp->gateway)
3039 		return;
3040 
3041 	rhashtable_remove_fast(&mlxsw_sp->router->nexthop_group_ht,
3042 			       &nh_grp->ht_node,
3043 			       mlxsw_sp_nexthop_group_ht_params);
3044 }
3045 
3046 static struct mlxsw_sp_nexthop_group *
3047 mlxsw_sp_nexthop4_group_lookup(struct mlxsw_sp *mlxsw_sp,
3048 			       struct fib_info *fi)
3049 {
3050 	struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
3051 
3052 	cmp_arg.proto = MLXSW_SP_L3_PROTO_IPV4;
3053 	cmp_arg.fi = fi;
3054 	return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
3055 				      &cmp_arg,
3056 				      mlxsw_sp_nexthop_group_ht_params);
3057 }
3058 
3059 static struct mlxsw_sp_nexthop_group *
3060 mlxsw_sp_nexthop6_group_lookup(struct mlxsw_sp *mlxsw_sp,
3061 			       struct mlxsw_sp_fib6_entry *fib6_entry)
3062 {
3063 	struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
3064 
3065 	cmp_arg.proto = MLXSW_SP_L3_PROTO_IPV6;
3066 	cmp_arg.fib6_entry = fib6_entry;
3067 	return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
3068 				      &cmp_arg,
3069 				      mlxsw_sp_nexthop_group_ht_params);
3070 }
3071 
3072 static const struct rhashtable_params mlxsw_sp_nexthop_ht_params = {
3073 	.key_offset = offsetof(struct mlxsw_sp_nexthop, key),
3074 	.head_offset = offsetof(struct mlxsw_sp_nexthop, ht_node),
3075 	.key_len = sizeof(struct mlxsw_sp_nexthop_key),
3076 };
3077 
3078 static int mlxsw_sp_nexthop_insert(struct mlxsw_sp *mlxsw_sp,
3079 				   struct mlxsw_sp_nexthop *nh)
3080 {
3081 	return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_ht,
3082 				      &nh->ht_node, mlxsw_sp_nexthop_ht_params);
3083 }
3084 
3085 static void mlxsw_sp_nexthop_remove(struct mlxsw_sp *mlxsw_sp,
3086 				    struct mlxsw_sp_nexthop *nh)
3087 {
3088 	rhashtable_remove_fast(&mlxsw_sp->router->nexthop_ht, &nh->ht_node,
3089 			       mlxsw_sp_nexthop_ht_params);
3090 }
3091 
3092 static struct mlxsw_sp_nexthop *
3093 mlxsw_sp_nexthop_lookup(struct mlxsw_sp *mlxsw_sp,
3094 			struct mlxsw_sp_nexthop_key key)
3095 {
3096 	return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_ht, &key,
3097 				      mlxsw_sp_nexthop_ht_params);
3098 }
3099 
3100 static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp,
3101 					     const struct mlxsw_sp_fib *fib,
3102 					     u32 adj_index, u16 ecmp_size,
3103 					     u32 new_adj_index,
3104 					     u16 new_ecmp_size)
3105 {
3106 	char raleu_pl[MLXSW_REG_RALEU_LEN];
3107 
3108 	mlxsw_reg_raleu_pack(raleu_pl,
3109 			     (enum mlxsw_reg_ralxx_protocol) fib->proto,
3110 			     fib->vr->id, adj_index, ecmp_size, new_adj_index,
3111 			     new_ecmp_size);
3112 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raleu), raleu_pl);
3113 }
3114 
3115 static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp,
3116 					  struct mlxsw_sp_nexthop_group *nh_grp,
3117 					  u32 old_adj_index, u16 old_ecmp_size)
3118 {
3119 	struct mlxsw_sp_fib_entry *fib_entry;
3120 	struct mlxsw_sp_fib *fib = NULL;
3121 	int err;
3122 
3123 	list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
3124 		if (fib == fib_entry->fib_node->fib)
3125 			continue;
3126 		fib = fib_entry->fib_node->fib;
3127 		err = mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, fib,
3128 							old_adj_index,
3129 							old_ecmp_size,
3130 							nh_grp->adj_index,
3131 							nh_grp->ecmp_size);
3132 		if (err)
3133 			return err;
3134 	}
3135 	return 0;
3136 }
3137 
3138 static int __mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
3139 				     struct mlxsw_sp_nexthop *nh)
3140 {
3141 	struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
3142 	char ratr_pl[MLXSW_REG_RATR_LEN];
3143 
3144 	mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY,
3145 			    true, MLXSW_REG_RATR_TYPE_ETHERNET,
3146 			    adj_index, neigh_entry->rif);
3147 	mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha);
3148 	if (nh->counter_valid)
3149 		mlxsw_reg_ratr_counter_pack(ratr_pl, nh->counter_index, true);
3150 	else
3151 		mlxsw_reg_ratr_counter_pack(ratr_pl, 0, false);
3152 
3153 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
3154 }
3155 
3156 int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
3157 			    struct mlxsw_sp_nexthop *nh)
3158 {
3159 	int i;
3160 
3161 	for (i = 0; i < nh->num_adj_entries; i++) {
3162 		int err;
3163 
3164 		err = __mlxsw_sp_nexthop_update(mlxsw_sp, adj_index + i, nh);
3165 		if (err)
3166 			return err;
3167 	}
3168 
3169 	return 0;
3170 }
3171 
3172 static int __mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
3173 					  u32 adj_index,
3174 					  struct mlxsw_sp_nexthop *nh)
3175 {
3176 	const struct mlxsw_sp_ipip_ops *ipip_ops;
3177 
3178 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[nh->ipip_entry->ipipt];
3179 	return ipip_ops->nexthop_update(mlxsw_sp, adj_index, nh->ipip_entry);
3180 }
3181 
3182 static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
3183 					u32 adj_index,
3184 					struct mlxsw_sp_nexthop *nh)
3185 {
3186 	int i;
3187 
3188 	for (i = 0; i < nh->num_adj_entries; i++) {
3189 		int err;
3190 
3191 		err = __mlxsw_sp_nexthop_ipip_update(mlxsw_sp, adj_index + i,
3192 						     nh);
3193 		if (err)
3194 			return err;
3195 	}
3196 
3197 	return 0;
3198 }
3199 
3200 static int
3201 mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp,
3202 			      struct mlxsw_sp_nexthop_group *nh_grp,
3203 			      bool reallocate)
3204 {
3205 	u32 adj_index = nh_grp->adj_index; /* base */
3206 	struct mlxsw_sp_nexthop *nh;
3207 	int i;
3208 	int err;
3209 
3210 	for (i = 0; i < nh_grp->count; i++) {
3211 		nh = &nh_grp->nexthops[i];
3212 
3213 		if (!nh->should_offload) {
3214 			nh->offloaded = 0;
3215 			continue;
3216 		}
3217 
3218 		if (nh->update || reallocate) {
3219 			switch (nh->type) {
3220 			case MLXSW_SP_NEXTHOP_TYPE_ETH:
3221 				err = mlxsw_sp_nexthop_update
3222 					    (mlxsw_sp, adj_index, nh);
3223 				break;
3224 			case MLXSW_SP_NEXTHOP_TYPE_IPIP:
3225 				err = mlxsw_sp_nexthop_ipip_update
3226 					    (mlxsw_sp, adj_index, nh);
3227 				break;
3228 			}
3229 			if (err)
3230 				return err;
3231 			nh->update = 0;
3232 			nh->offloaded = 1;
3233 		}
3234 		adj_index += nh->num_adj_entries;
3235 	}
3236 	return 0;
3237 }
3238 
3239 static int
3240 mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp,
3241 				    struct mlxsw_sp_nexthop_group *nh_grp)
3242 {
3243 	struct mlxsw_sp_fib_entry *fib_entry;
3244 	int err;
3245 
3246 	list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
3247 		err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
3248 		if (err)
3249 			return err;
3250 	}
3251 	return 0;
3252 }
3253 
3254 static void mlxsw_sp_adj_grp_size_round_up(u16 *p_adj_grp_size)
3255 {
3256 	/* Valid sizes for an adjacency group are:
3257 	 * 1-64, 512, 1024, 2048 and 4096.
3258 	 */
3259 	if (*p_adj_grp_size <= 64)
3260 		return;
3261 	else if (*p_adj_grp_size <= 512)
3262 		*p_adj_grp_size = 512;
3263 	else if (*p_adj_grp_size <= 1024)
3264 		*p_adj_grp_size = 1024;
3265 	else if (*p_adj_grp_size <= 2048)
3266 		*p_adj_grp_size = 2048;
3267 	else
3268 		*p_adj_grp_size = 4096;
3269 }
3270 
3271 static void mlxsw_sp_adj_grp_size_round_down(u16 *p_adj_grp_size,
3272 					     unsigned int alloc_size)
3273 {
3274 	if (alloc_size >= 4096)
3275 		*p_adj_grp_size = 4096;
3276 	else if (alloc_size >= 2048)
3277 		*p_adj_grp_size = 2048;
3278 	else if (alloc_size >= 1024)
3279 		*p_adj_grp_size = 1024;
3280 	else if (alloc_size >= 512)
3281 		*p_adj_grp_size = 512;
3282 }
3283 
3284 static int mlxsw_sp_fix_adj_grp_size(struct mlxsw_sp *mlxsw_sp,
3285 				     u16 *p_adj_grp_size)
3286 {
3287 	unsigned int alloc_size;
3288 	int err;
3289 
3290 	/* Round up the requested group size to the next size supported
3291 	 * by the device and make sure the request can be satisfied.
3292 	 */
3293 	mlxsw_sp_adj_grp_size_round_up(p_adj_grp_size);
3294 	err = mlxsw_sp_kvdl_alloc_count_query(mlxsw_sp,
3295 					      MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3296 					      *p_adj_grp_size, &alloc_size);
3297 	if (err)
3298 		return err;
3299 	/* It is possible the allocation results in more allocated
3300 	 * entries than requested. Try to use as much of them as
3301 	 * possible.
3302 	 */
3303 	mlxsw_sp_adj_grp_size_round_down(p_adj_grp_size, alloc_size);
3304 
3305 	return 0;
3306 }
3307 
3308 static void
3309 mlxsw_sp_nexthop_group_normalize(struct mlxsw_sp_nexthop_group *nh_grp)
3310 {
3311 	int i, g = 0, sum_norm_weight = 0;
3312 	struct mlxsw_sp_nexthop *nh;
3313 
3314 	for (i = 0; i < nh_grp->count; i++) {
3315 		nh = &nh_grp->nexthops[i];
3316 
3317 		if (!nh->should_offload)
3318 			continue;
3319 		if (g > 0)
3320 			g = gcd(nh->nh_weight, g);
3321 		else
3322 			g = nh->nh_weight;
3323 	}
3324 
3325 	for (i = 0; i < nh_grp->count; i++) {
3326 		nh = &nh_grp->nexthops[i];
3327 
3328 		if (!nh->should_offload)
3329 			continue;
3330 		nh->norm_nh_weight = nh->nh_weight / g;
3331 		sum_norm_weight += nh->norm_nh_weight;
3332 	}
3333 
3334 	nh_grp->sum_norm_weight = sum_norm_weight;
3335 }
3336 
3337 static void
3338 mlxsw_sp_nexthop_group_rebalance(struct mlxsw_sp_nexthop_group *nh_grp)
3339 {
3340 	int total = nh_grp->sum_norm_weight;
3341 	u16 ecmp_size = nh_grp->ecmp_size;
3342 	int i, weight = 0, lower_bound = 0;
3343 
3344 	for (i = 0; i < nh_grp->count; i++) {
3345 		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3346 		int upper_bound;
3347 
3348 		if (!nh->should_offload)
3349 			continue;
3350 		weight += nh->norm_nh_weight;
3351 		upper_bound = DIV_ROUND_CLOSEST(ecmp_size * weight, total);
3352 		nh->num_adj_entries = upper_bound - lower_bound;
3353 		lower_bound = upper_bound;
3354 	}
3355 }
3356 
3357 static struct mlxsw_sp_nexthop *
3358 mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp,
3359 		     const struct mlxsw_sp_rt6 *mlxsw_sp_rt6);
3360 
3361 static void
3362 mlxsw_sp_nexthop4_group_offload_refresh(struct mlxsw_sp *mlxsw_sp,
3363 					struct mlxsw_sp_nexthop_group *nh_grp)
3364 {
3365 	int i;
3366 
3367 	for (i = 0; i < nh_grp->count; i++) {
3368 		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3369 
3370 		if (nh->offloaded)
3371 			nh->key.fib_nh->fib_nh_flags |= RTNH_F_OFFLOAD;
3372 		else
3373 			nh->key.fib_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
3374 	}
3375 }
3376 
3377 static void
3378 __mlxsw_sp_nexthop6_group_offload_refresh(struct mlxsw_sp_nexthop_group *nh_grp,
3379 					  struct mlxsw_sp_fib6_entry *fib6_entry)
3380 {
3381 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
3382 
3383 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
3384 		struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh;
3385 		struct mlxsw_sp_nexthop *nh;
3386 
3387 		nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6);
3388 		if (nh && nh->offloaded)
3389 			fib6_nh->fib_nh_flags |= RTNH_F_OFFLOAD;
3390 		else
3391 			fib6_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
3392 	}
3393 }
3394 
3395 static void
3396 mlxsw_sp_nexthop6_group_offload_refresh(struct mlxsw_sp *mlxsw_sp,
3397 					struct mlxsw_sp_nexthop_group *nh_grp)
3398 {
3399 	struct mlxsw_sp_fib6_entry *fib6_entry;
3400 
3401 	/* Unfortunately, in IPv6 the route and the nexthop are described by
3402 	 * the same struct, so we need to iterate over all the routes using the
3403 	 * nexthop group and set / clear the offload indication for them.
3404 	 */
3405 	list_for_each_entry(fib6_entry, &nh_grp->fib_list,
3406 			    common.nexthop_group_node)
3407 		__mlxsw_sp_nexthop6_group_offload_refresh(nh_grp, fib6_entry);
3408 }
3409 
3410 static void
3411 mlxsw_sp_nexthop_group_offload_refresh(struct mlxsw_sp *mlxsw_sp,
3412 				       struct mlxsw_sp_nexthop_group *nh_grp)
3413 {
3414 	switch (mlxsw_sp_nexthop_group_type(nh_grp)) {
3415 	case AF_INET:
3416 		mlxsw_sp_nexthop4_group_offload_refresh(mlxsw_sp, nh_grp);
3417 		break;
3418 	case AF_INET6:
3419 		mlxsw_sp_nexthop6_group_offload_refresh(mlxsw_sp, nh_grp);
3420 		break;
3421 	}
3422 }
3423 
3424 static void
3425 mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
3426 			       struct mlxsw_sp_nexthop_group *nh_grp)
3427 {
3428 	u16 ecmp_size, old_ecmp_size;
3429 	struct mlxsw_sp_nexthop *nh;
3430 	bool offload_change = false;
3431 	u32 adj_index;
3432 	bool old_adj_index_valid;
3433 	u32 old_adj_index;
3434 	int i;
3435 	int err;
3436 
3437 	if (!nh_grp->gateway) {
3438 		mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3439 		return;
3440 	}
3441 
3442 	for (i = 0; i < nh_grp->count; i++) {
3443 		nh = &nh_grp->nexthops[i];
3444 
3445 		if (nh->should_offload != nh->offloaded) {
3446 			offload_change = true;
3447 			if (nh->should_offload)
3448 				nh->update = 1;
3449 		}
3450 	}
3451 	if (!offload_change) {
3452 		/* Nothing was added or removed, so no need to reallocate. Just
3453 		 * update MAC on existing adjacency indexes.
3454 		 */
3455 		err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, false);
3456 		if (err) {
3457 			dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
3458 			goto set_trap;
3459 		}
3460 		return;
3461 	}
3462 	mlxsw_sp_nexthop_group_normalize(nh_grp);
3463 	if (!nh_grp->sum_norm_weight)
3464 		/* No neigh of this group is connected so we just set
3465 		 * the trap and let everthing flow through kernel.
3466 		 */
3467 		goto set_trap;
3468 
3469 	ecmp_size = nh_grp->sum_norm_weight;
3470 	err = mlxsw_sp_fix_adj_grp_size(mlxsw_sp, &ecmp_size);
3471 	if (err)
3472 		/* No valid allocation size available. */
3473 		goto set_trap;
3474 
3475 	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3476 				  ecmp_size, &adj_index);
3477 	if (err) {
3478 		/* We ran out of KVD linear space, just set the
3479 		 * trap and let everything flow through kernel.
3480 		 */
3481 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to allocate KVD linear area for nexthop group.\n");
3482 		goto set_trap;
3483 	}
3484 	old_adj_index_valid = nh_grp->adj_index_valid;
3485 	old_adj_index = nh_grp->adj_index;
3486 	old_ecmp_size = nh_grp->ecmp_size;
3487 	nh_grp->adj_index_valid = 1;
3488 	nh_grp->adj_index = adj_index;
3489 	nh_grp->ecmp_size = ecmp_size;
3490 	mlxsw_sp_nexthop_group_rebalance(nh_grp);
3491 	err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, true);
3492 	if (err) {
3493 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
3494 		goto set_trap;
3495 	}
3496 
3497 	mlxsw_sp_nexthop_group_offload_refresh(mlxsw_sp, nh_grp);
3498 
3499 	if (!old_adj_index_valid) {
3500 		/* The trap was set for fib entries, so we have to call
3501 		 * fib entry update to unset it and use adjacency index.
3502 		 */
3503 		err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3504 		if (err) {
3505 			dev_warn(mlxsw_sp->bus_info->dev, "Failed to add adjacency index to fib entries.\n");
3506 			goto set_trap;
3507 		}
3508 		return;
3509 	}
3510 
3511 	err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, nh_grp,
3512 					     old_adj_index, old_ecmp_size);
3513 	mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3514 			   old_ecmp_size, old_adj_index);
3515 	if (err) {
3516 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to mass-update adjacency index for nexthop group.\n");
3517 		goto set_trap;
3518 	}
3519 
3520 	return;
3521 
3522 set_trap:
3523 	old_adj_index_valid = nh_grp->adj_index_valid;
3524 	nh_grp->adj_index_valid = 0;
3525 	for (i = 0; i < nh_grp->count; i++) {
3526 		nh = &nh_grp->nexthops[i];
3527 		nh->offloaded = 0;
3528 	}
3529 	err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3530 	if (err)
3531 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n");
3532 	mlxsw_sp_nexthop_group_offload_refresh(mlxsw_sp, nh_grp);
3533 	if (old_adj_index_valid)
3534 		mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3535 				   nh_grp->ecmp_size, nh_grp->adj_index);
3536 }
3537 
3538 static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh,
3539 					    bool removing)
3540 {
3541 	if (!removing)
3542 		nh->should_offload = 1;
3543 	else
3544 		nh->should_offload = 0;
3545 	nh->update = 1;
3546 }
3547 
3548 static int
3549 mlxsw_sp_nexthop_dead_neigh_replace(struct mlxsw_sp *mlxsw_sp,
3550 				    struct mlxsw_sp_neigh_entry *neigh_entry)
3551 {
3552 	struct neighbour *n, *old_n = neigh_entry->key.n;
3553 	struct mlxsw_sp_nexthop *nh;
3554 	bool entry_connected;
3555 	u8 nud_state, dead;
3556 	int err;
3557 
3558 	nh = list_first_entry(&neigh_entry->nexthop_list,
3559 			      struct mlxsw_sp_nexthop, neigh_list_node);
3560 
3561 	n = neigh_lookup(nh->nh_grp->neigh_tbl, &nh->gw_addr, nh->rif->dev);
3562 	if (!n) {
3563 		n = neigh_create(nh->nh_grp->neigh_tbl, &nh->gw_addr,
3564 				 nh->rif->dev);
3565 		if (IS_ERR(n))
3566 			return PTR_ERR(n);
3567 		neigh_event_send(n, NULL);
3568 	}
3569 
3570 	mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry);
3571 	neigh_entry->key.n = n;
3572 	err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
3573 	if (err)
3574 		goto err_neigh_entry_insert;
3575 
3576 	read_lock_bh(&n->lock);
3577 	nud_state = n->nud_state;
3578 	dead = n->dead;
3579 	read_unlock_bh(&n->lock);
3580 	entry_connected = nud_state & NUD_VALID && !dead;
3581 
3582 	list_for_each_entry(nh, &neigh_entry->nexthop_list,
3583 			    neigh_list_node) {
3584 		neigh_release(old_n);
3585 		neigh_clone(n);
3586 		__mlxsw_sp_nexthop_neigh_update(nh, !entry_connected);
3587 		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3588 	}
3589 
3590 	neigh_release(n);
3591 
3592 	return 0;
3593 
3594 err_neigh_entry_insert:
3595 	neigh_entry->key.n = old_n;
3596 	mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
3597 	neigh_release(n);
3598 	return err;
3599 }
3600 
3601 static void
3602 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
3603 			      struct mlxsw_sp_neigh_entry *neigh_entry,
3604 			      bool removing, bool dead)
3605 {
3606 	struct mlxsw_sp_nexthop *nh;
3607 
3608 	if (list_empty(&neigh_entry->nexthop_list))
3609 		return;
3610 
3611 	if (dead) {
3612 		int err;
3613 
3614 		err = mlxsw_sp_nexthop_dead_neigh_replace(mlxsw_sp,
3615 							  neigh_entry);
3616 		if (err)
3617 			dev_err(mlxsw_sp->bus_info->dev, "Failed to replace dead neigh\n");
3618 		return;
3619 	}
3620 
3621 	list_for_each_entry(nh, &neigh_entry->nexthop_list,
3622 			    neigh_list_node) {
3623 		__mlxsw_sp_nexthop_neigh_update(nh, removing);
3624 		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3625 	}
3626 }
3627 
3628 static void mlxsw_sp_nexthop_rif_init(struct mlxsw_sp_nexthop *nh,
3629 				      struct mlxsw_sp_rif *rif)
3630 {
3631 	if (nh->rif)
3632 		return;
3633 
3634 	nh->rif = rif;
3635 	list_add(&nh->rif_list_node, &rif->nexthop_list);
3636 }
3637 
3638 static void mlxsw_sp_nexthop_rif_fini(struct mlxsw_sp_nexthop *nh)
3639 {
3640 	if (!nh->rif)
3641 		return;
3642 
3643 	list_del(&nh->rif_list_node);
3644 	nh->rif = NULL;
3645 }
3646 
3647 static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp,
3648 				       struct mlxsw_sp_nexthop *nh)
3649 {
3650 	struct mlxsw_sp_neigh_entry *neigh_entry;
3651 	struct neighbour *n;
3652 	u8 nud_state, dead;
3653 	int err;
3654 
3655 	if (!nh->nh_grp->gateway || nh->neigh_entry)
3656 		return 0;
3657 
3658 	/* Take a reference of neigh here ensuring that neigh would
3659 	 * not be destructed before the nexthop entry is finished.
3660 	 * The reference is taken either in neigh_lookup() or
3661 	 * in neigh_create() in case n is not found.
3662 	 */
3663 	n = neigh_lookup(nh->nh_grp->neigh_tbl, &nh->gw_addr, nh->rif->dev);
3664 	if (!n) {
3665 		n = neigh_create(nh->nh_grp->neigh_tbl, &nh->gw_addr,
3666 				 nh->rif->dev);
3667 		if (IS_ERR(n))
3668 			return PTR_ERR(n);
3669 		neigh_event_send(n, NULL);
3670 	}
3671 	neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
3672 	if (!neigh_entry) {
3673 		neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
3674 		if (IS_ERR(neigh_entry)) {
3675 			err = -EINVAL;
3676 			goto err_neigh_entry_create;
3677 		}
3678 	}
3679 
3680 	/* If that is the first nexthop connected to that neigh, add to
3681 	 * nexthop_neighs_list
3682 	 */
3683 	if (list_empty(&neigh_entry->nexthop_list))
3684 		list_add_tail(&neigh_entry->nexthop_neighs_list_node,
3685 			      &mlxsw_sp->router->nexthop_neighs_list);
3686 
3687 	nh->neigh_entry = neigh_entry;
3688 	list_add_tail(&nh->neigh_list_node, &neigh_entry->nexthop_list);
3689 	read_lock_bh(&n->lock);
3690 	nud_state = n->nud_state;
3691 	dead = n->dead;
3692 	read_unlock_bh(&n->lock);
3693 	__mlxsw_sp_nexthop_neigh_update(nh, !(nud_state & NUD_VALID && !dead));
3694 
3695 	return 0;
3696 
3697 err_neigh_entry_create:
3698 	neigh_release(n);
3699 	return err;
3700 }
3701 
3702 static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp,
3703 					struct mlxsw_sp_nexthop *nh)
3704 {
3705 	struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
3706 	struct neighbour *n;
3707 
3708 	if (!neigh_entry)
3709 		return;
3710 	n = neigh_entry->key.n;
3711 
3712 	__mlxsw_sp_nexthop_neigh_update(nh, true);
3713 	list_del(&nh->neigh_list_node);
3714 	nh->neigh_entry = NULL;
3715 
3716 	/* If that is the last nexthop connected to that neigh, remove from
3717 	 * nexthop_neighs_list
3718 	 */
3719 	if (list_empty(&neigh_entry->nexthop_list))
3720 		list_del(&neigh_entry->nexthop_neighs_list_node);
3721 
3722 	if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
3723 		mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
3724 
3725 	neigh_release(n);
3726 }
3727 
3728 static bool mlxsw_sp_ipip_netdev_ul_up(struct net_device *ol_dev)
3729 {
3730 	struct net_device *ul_dev;
3731 	bool is_up;
3732 
3733 	rcu_read_lock();
3734 	ul_dev = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
3735 	is_up = ul_dev ? (ul_dev->flags & IFF_UP) : true;
3736 	rcu_read_unlock();
3737 
3738 	return is_up;
3739 }
3740 
3741 static void mlxsw_sp_nexthop_ipip_init(struct mlxsw_sp *mlxsw_sp,
3742 				       struct mlxsw_sp_nexthop *nh,
3743 				       struct mlxsw_sp_ipip_entry *ipip_entry)
3744 {
3745 	bool removing;
3746 
3747 	if (!nh->nh_grp->gateway || nh->ipip_entry)
3748 		return;
3749 
3750 	nh->ipip_entry = ipip_entry;
3751 	removing = !mlxsw_sp_ipip_netdev_ul_up(ipip_entry->ol_dev);
3752 	__mlxsw_sp_nexthop_neigh_update(nh, removing);
3753 	mlxsw_sp_nexthop_rif_init(nh, &ipip_entry->ol_lb->common);
3754 }
3755 
3756 static void mlxsw_sp_nexthop_ipip_fini(struct mlxsw_sp *mlxsw_sp,
3757 				       struct mlxsw_sp_nexthop *nh)
3758 {
3759 	struct mlxsw_sp_ipip_entry *ipip_entry = nh->ipip_entry;
3760 
3761 	if (!ipip_entry)
3762 		return;
3763 
3764 	__mlxsw_sp_nexthop_neigh_update(nh, true);
3765 	nh->ipip_entry = NULL;
3766 }
3767 
3768 static bool mlxsw_sp_nexthop4_ipip_type(const struct mlxsw_sp *mlxsw_sp,
3769 					const struct fib_nh *fib_nh,
3770 					enum mlxsw_sp_ipip_type *p_ipipt)
3771 {
3772 	struct net_device *dev = fib_nh->fib_nh_dev;
3773 
3774 	return dev &&
3775 	       fib_nh->nh_parent->fib_type == RTN_UNICAST &&
3776 	       mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, p_ipipt);
3777 }
3778 
3779 static void mlxsw_sp_nexthop_type_fini(struct mlxsw_sp *mlxsw_sp,
3780 				       struct mlxsw_sp_nexthop *nh)
3781 {
3782 	switch (nh->type) {
3783 	case MLXSW_SP_NEXTHOP_TYPE_ETH:
3784 		mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
3785 		mlxsw_sp_nexthop_rif_fini(nh);
3786 		break;
3787 	case MLXSW_SP_NEXTHOP_TYPE_IPIP:
3788 		mlxsw_sp_nexthop_rif_fini(nh);
3789 		mlxsw_sp_nexthop_ipip_fini(mlxsw_sp, nh);
3790 		break;
3791 	}
3792 }
3793 
3794 static int mlxsw_sp_nexthop4_type_init(struct mlxsw_sp *mlxsw_sp,
3795 				       struct mlxsw_sp_nexthop *nh,
3796 				       struct fib_nh *fib_nh)
3797 {
3798 	const struct mlxsw_sp_ipip_ops *ipip_ops;
3799 	struct net_device *dev = fib_nh->fib_nh_dev;
3800 	struct mlxsw_sp_ipip_entry *ipip_entry;
3801 	struct mlxsw_sp_rif *rif;
3802 	int err;
3803 
3804 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, dev);
3805 	if (ipip_entry) {
3806 		ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
3807 		if (ipip_ops->can_offload(mlxsw_sp, dev,
3808 					  MLXSW_SP_L3_PROTO_IPV4)) {
3809 			nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP;
3810 			mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, ipip_entry);
3811 			return 0;
3812 		}
3813 	}
3814 
3815 	nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH;
3816 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
3817 	if (!rif)
3818 		return 0;
3819 
3820 	mlxsw_sp_nexthop_rif_init(nh, rif);
3821 	err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
3822 	if (err)
3823 		goto err_neigh_init;
3824 
3825 	return 0;
3826 
3827 err_neigh_init:
3828 	mlxsw_sp_nexthop_rif_fini(nh);
3829 	return err;
3830 }
3831 
3832 static void mlxsw_sp_nexthop4_type_fini(struct mlxsw_sp *mlxsw_sp,
3833 					struct mlxsw_sp_nexthop *nh)
3834 {
3835 	mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
3836 }
3837 
3838 static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp,
3839 				  struct mlxsw_sp_nexthop_group *nh_grp,
3840 				  struct mlxsw_sp_nexthop *nh,
3841 				  struct fib_nh *fib_nh)
3842 {
3843 	struct net_device *dev = fib_nh->fib_nh_dev;
3844 	struct in_device *in_dev;
3845 	int err;
3846 
3847 	nh->nh_grp = nh_grp;
3848 	nh->key.fib_nh = fib_nh;
3849 #ifdef CONFIG_IP_ROUTE_MULTIPATH
3850 	nh->nh_weight = fib_nh->fib_nh_weight;
3851 #else
3852 	nh->nh_weight = 1;
3853 #endif
3854 	memcpy(&nh->gw_addr, &fib_nh->fib_nh_gw4, sizeof(fib_nh->fib_nh_gw4));
3855 	err = mlxsw_sp_nexthop_insert(mlxsw_sp, nh);
3856 	if (err)
3857 		return err;
3858 
3859 	mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
3860 	list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
3861 
3862 	if (!dev)
3863 		return 0;
3864 
3865 	rcu_read_lock();
3866 	in_dev = __in_dev_get_rcu(dev);
3867 	if (in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
3868 	    fib_nh->fib_nh_flags & RTNH_F_LINKDOWN) {
3869 		rcu_read_unlock();
3870 		return 0;
3871 	}
3872 	rcu_read_unlock();
3873 
3874 	err = mlxsw_sp_nexthop4_type_init(mlxsw_sp, nh, fib_nh);
3875 	if (err)
3876 		goto err_nexthop_neigh_init;
3877 
3878 	return 0;
3879 
3880 err_nexthop_neigh_init:
3881 	mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
3882 	return err;
3883 }
3884 
3885 static void mlxsw_sp_nexthop4_fini(struct mlxsw_sp *mlxsw_sp,
3886 				   struct mlxsw_sp_nexthop *nh)
3887 {
3888 	mlxsw_sp_nexthop4_type_fini(mlxsw_sp, nh);
3889 	list_del(&nh->router_list_node);
3890 	mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
3891 	mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
3892 }
3893 
3894 static void mlxsw_sp_nexthop4_event(struct mlxsw_sp *mlxsw_sp,
3895 				    unsigned long event, struct fib_nh *fib_nh)
3896 {
3897 	struct mlxsw_sp_nexthop_key key;
3898 	struct mlxsw_sp_nexthop *nh;
3899 
3900 	if (mlxsw_sp->router->aborted)
3901 		return;
3902 
3903 	key.fib_nh = fib_nh;
3904 	nh = mlxsw_sp_nexthop_lookup(mlxsw_sp, key);
3905 	if (!nh)
3906 		return;
3907 
3908 	switch (event) {
3909 	case FIB_EVENT_NH_ADD:
3910 		mlxsw_sp_nexthop4_type_init(mlxsw_sp, nh, fib_nh);
3911 		break;
3912 	case FIB_EVENT_NH_DEL:
3913 		mlxsw_sp_nexthop4_type_fini(mlxsw_sp, nh);
3914 		break;
3915 	}
3916 
3917 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3918 }
3919 
3920 static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp,
3921 					struct mlxsw_sp_rif *rif)
3922 {
3923 	struct mlxsw_sp_nexthop *nh;
3924 	bool removing;
3925 
3926 	list_for_each_entry(nh, &rif->nexthop_list, rif_list_node) {
3927 		switch (nh->type) {
3928 		case MLXSW_SP_NEXTHOP_TYPE_ETH:
3929 			removing = false;
3930 			break;
3931 		case MLXSW_SP_NEXTHOP_TYPE_IPIP:
3932 			removing = !mlxsw_sp_ipip_netdev_ul_up(rif->dev);
3933 			break;
3934 		default:
3935 			WARN_ON(1);
3936 			continue;
3937 		}
3938 
3939 		__mlxsw_sp_nexthop_neigh_update(nh, removing);
3940 		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3941 	}
3942 }
3943 
3944 static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp,
3945 					 struct mlxsw_sp_rif *old_rif,
3946 					 struct mlxsw_sp_rif *new_rif)
3947 {
3948 	struct mlxsw_sp_nexthop *nh;
3949 
3950 	list_splice_init(&old_rif->nexthop_list, &new_rif->nexthop_list);
3951 	list_for_each_entry(nh, &new_rif->nexthop_list, rif_list_node)
3952 		nh->rif = new_rif;
3953 	mlxsw_sp_nexthop_rif_update(mlxsw_sp, new_rif);
3954 }
3955 
3956 static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
3957 					   struct mlxsw_sp_rif *rif)
3958 {
3959 	struct mlxsw_sp_nexthop *nh, *tmp;
3960 
3961 	list_for_each_entry_safe(nh, tmp, &rif->nexthop_list, rif_list_node) {
3962 		mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
3963 		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3964 	}
3965 }
3966 
3967 static bool mlxsw_sp_fi_is_gateway(const struct mlxsw_sp *mlxsw_sp,
3968 				   struct fib_info *fi)
3969 {
3970 	const struct fib_nh *nh = fib_info_nh(fi, 0);
3971 
3972 	return nh->fib_nh_scope == RT_SCOPE_LINK ||
3973 	       mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, nh, NULL);
3974 }
3975 
3976 static struct mlxsw_sp_nexthop_group *
3977 mlxsw_sp_nexthop4_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
3978 {
3979 	unsigned int nhs = fib_info_num_path(fi);
3980 	struct mlxsw_sp_nexthop_group *nh_grp;
3981 	struct mlxsw_sp_nexthop *nh;
3982 	struct fib_nh *fib_nh;
3983 	int i;
3984 	int err;
3985 
3986 	nh_grp = kzalloc(struct_size(nh_grp, nexthops, nhs), GFP_KERNEL);
3987 	if (!nh_grp)
3988 		return ERR_PTR(-ENOMEM);
3989 	nh_grp->priv = fi;
3990 	INIT_LIST_HEAD(&nh_grp->fib_list);
3991 	nh_grp->neigh_tbl = &arp_tbl;
3992 
3993 	nh_grp->gateway = mlxsw_sp_fi_is_gateway(mlxsw_sp, fi);
3994 	nh_grp->count = nhs;
3995 	fib_info_hold(fi);
3996 	for (i = 0; i < nh_grp->count; i++) {
3997 		nh = &nh_grp->nexthops[i];
3998 		fib_nh = fib_info_nh(fi, i);
3999 		err = mlxsw_sp_nexthop4_init(mlxsw_sp, nh_grp, nh, fib_nh);
4000 		if (err)
4001 			goto err_nexthop4_init;
4002 	}
4003 	err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
4004 	if (err)
4005 		goto err_nexthop_group_insert;
4006 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
4007 	return nh_grp;
4008 
4009 err_nexthop_group_insert:
4010 err_nexthop4_init:
4011 	for (i--; i >= 0; i--) {
4012 		nh = &nh_grp->nexthops[i];
4013 		mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
4014 	}
4015 	fib_info_put(fi);
4016 	kfree(nh_grp);
4017 	return ERR_PTR(err);
4018 }
4019 
4020 static void
4021 mlxsw_sp_nexthop4_group_destroy(struct mlxsw_sp *mlxsw_sp,
4022 				struct mlxsw_sp_nexthop_group *nh_grp)
4023 {
4024 	struct mlxsw_sp_nexthop *nh;
4025 	int i;
4026 
4027 	mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
4028 	for (i = 0; i < nh_grp->count; i++) {
4029 		nh = &nh_grp->nexthops[i];
4030 		mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
4031 	}
4032 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
4033 	WARN_ON_ONCE(nh_grp->adj_index_valid);
4034 	fib_info_put(mlxsw_sp_nexthop4_group_fi(nh_grp));
4035 	kfree(nh_grp);
4036 }
4037 
4038 static int mlxsw_sp_nexthop4_group_get(struct mlxsw_sp *mlxsw_sp,
4039 				       struct mlxsw_sp_fib_entry *fib_entry,
4040 				       struct fib_info *fi)
4041 {
4042 	struct mlxsw_sp_nexthop_group *nh_grp;
4043 
4044 	nh_grp = mlxsw_sp_nexthop4_group_lookup(mlxsw_sp, fi);
4045 	if (!nh_grp) {
4046 		nh_grp = mlxsw_sp_nexthop4_group_create(mlxsw_sp, fi);
4047 		if (IS_ERR(nh_grp))
4048 			return PTR_ERR(nh_grp);
4049 	}
4050 	list_add_tail(&fib_entry->nexthop_group_node, &nh_grp->fib_list);
4051 	fib_entry->nh_group = nh_grp;
4052 	return 0;
4053 }
4054 
4055 static void mlxsw_sp_nexthop4_group_put(struct mlxsw_sp *mlxsw_sp,
4056 					struct mlxsw_sp_fib_entry *fib_entry)
4057 {
4058 	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
4059 
4060 	list_del(&fib_entry->nexthop_group_node);
4061 	if (!list_empty(&nh_grp->fib_list))
4062 		return;
4063 	mlxsw_sp_nexthop4_group_destroy(mlxsw_sp, nh_grp);
4064 }
4065 
4066 static bool
4067 mlxsw_sp_fib4_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
4068 {
4069 	struct mlxsw_sp_fib4_entry *fib4_entry;
4070 
4071 	fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry,
4072 				  common);
4073 	return !fib4_entry->tos;
4074 }
4075 
4076 static bool
4077 mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
4078 {
4079 	struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group;
4080 
4081 	switch (fib_entry->fib_node->fib->proto) {
4082 	case MLXSW_SP_L3_PROTO_IPV4:
4083 		if (!mlxsw_sp_fib4_entry_should_offload(fib_entry))
4084 			return false;
4085 		break;
4086 	case MLXSW_SP_L3_PROTO_IPV6:
4087 		break;
4088 	}
4089 
4090 	switch (fib_entry->type) {
4091 	case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
4092 		return !!nh_group->adj_index_valid;
4093 	case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
4094 		return !!nh_group->nh_rif;
4095 	case MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE:
4096 	case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
4097 	case MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP:
4098 		return true;
4099 	default:
4100 		return false;
4101 	}
4102 }
4103 
4104 static struct mlxsw_sp_nexthop *
4105 mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp,
4106 		     const struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
4107 {
4108 	int i;
4109 
4110 	for (i = 0; i < nh_grp->count; i++) {
4111 		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
4112 		struct fib6_info *rt = mlxsw_sp_rt6->rt;
4113 
4114 		if (nh->rif && nh->rif->dev == rt->fib6_nh->fib_nh_dev &&
4115 		    ipv6_addr_equal((const struct in6_addr *) &nh->gw_addr,
4116 				    &rt->fib6_nh->fib_nh_gw6))
4117 			return nh;
4118 		continue;
4119 	}
4120 
4121 	return NULL;
4122 }
4123 
4124 static void
4125 mlxsw_sp_fib4_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp,
4126 				 struct mlxsw_sp_fib_entry *fib_entry)
4127 {
4128 	struct fib_info *fi = mlxsw_sp_nexthop4_group_fi(fib_entry->nh_group);
4129 	u32 *p_dst = (u32 *) fib_entry->fib_node->key.addr;
4130 	int dst_len = fib_entry->fib_node->key.prefix_len;
4131 	struct mlxsw_sp_fib4_entry *fib4_entry;
4132 	struct fib_rt_info fri;
4133 	bool should_offload;
4134 
4135 	should_offload = mlxsw_sp_fib_entry_should_offload(fib_entry);
4136 	fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry,
4137 				  common);
4138 	fri.fi = fi;
4139 	fri.tb_id = fib4_entry->tb_id;
4140 	fri.dst = cpu_to_be32(*p_dst);
4141 	fri.dst_len = dst_len;
4142 	fri.tos = fib4_entry->tos;
4143 	fri.type = fib4_entry->type;
4144 	fri.offload = should_offload;
4145 	fri.trap = !should_offload;
4146 	fib_alias_hw_flags_set(mlxsw_sp_net(mlxsw_sp), &fri);
4147 }
4148 
4149 static void
4150 mlxsw_sp_fib4_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp,
4151 				   struct mlxsw_sp_fib_entry *fib_entry)
4152 {
4153 	struct fib_info *fi = mlxsw_sp_nexthop4_group_fi(fib_entry->nh_group);
4154 	u32 *p_dst = (u32 *) fib_entry->fib_node->key.addr;
4155 	int dst_len = fib_entry->fib_node->key.prefix_len;
4156 	struct mlxsw_sp_fib4_entry *fib4_entry;
4157 	struct fib_rt_info fri;
4158 
4159 	fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry,
4160 				  common);
4161 	fri.fi = fi;
4162 	fri.tb_id = fib4_entry->tb_id;
4163 	fri.dst = cpu_to_be32(*p_dst);
4164 	fri.dst_len = dst_len;
4165 	fri.tos = fib4_entry->tos;
4166 	fri.type = fib4_entry->type;
4167 	fri.offload = false;
4168 	fri.trap = false;
4169 	fib_alias_hw_flags_set(mlxsw_sp_net(mlxsw_sp), &fri);
4170 }
4171 
4172 static void
4173 mlxsw_sp_fib6_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp,
4174 				 struct mlxsw_sp_fib_entry *fib_entry)
4175 {
4176 	struct mlxsw_sp_fib6_entry *fib6_entry;
4177 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4178 	bool should_offload;
4179 
4180 	should_offload = mlxsw_sp_fib_entry_should_offload(fib_entry);
4181 
4182 	/* In IPv6 a multipath route is represented using multiple routes, so
4183 	 * we need to set the flags on all of them.
4184 	 */
4185 	fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
4186 				  common);
4187 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list)
4188 		fib6_info_hw_flags_set(mlxsw_sp_rt6->rt, should_offload,
4189 				       !should_offload);
4190 }
4191 
4192 static void
4193 mlxsw_sp_fib6_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp,
4194 				   struct mlxsw_sp_fib_entry *fib_entry)
4195 {
4196 	struct mlxsw_sp_fib6_entry *fib6_entry;
4197 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4198 
4199 	fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
4200 				  common);
4201 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list)
4202 		fib6_info_hw_flags_set(mlxsw_sp_rt6->rt, false, false);
4203 }
4204 
4205 static void
4206 mlxsw_sp_fib_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp,
4207 				struct mlxsw_sp_fib_entry *fib_entry)
4208 {
4209 	switch (fib_entry->fib_node->fib->proto) {
4210 	case MLXSW_SP_L3_PROTO_IPV4:
4211 		mlxsw_sp_fib4_entry_hw_flags_set(mlxsw_sp, fib_entry);
4212 		break;
4213 	case MLXSW_SP_L3_PROTO_IPV6:
4214 		mlxsw_sp_fib6_entry_hw_flags_set(mlxsw_sp, fib_entry);
4215 		break;
4216 	}
4217 }
4218 
4219 static void
4220 mlxsw_sp_fib_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp,
4221 				  struct mlxsw_sp_fib_entry *fib_entry)
4222 {
4223 	switch (fib_entry->fib_node->fib->proto) {
4224 	case MLXSW_SP_L3_PROTO_IPV4:
4225 		mlxsw_sp_fib4_entry_hw_flags_clear(mlxsw_sp, fib_entry);
4226 		break;
4227 	case MLXSW_SP_L3_PROTO_IPV6:
4228 		mlxsw_sp_fib6_entry_hw_flags_clear(mlxsw_sp, fib_entry);
4229 		break;
4230 	}
4231 }
4232 
4233 static void
4234 mlxsw_sp_fib_entry_hw_flags_refresh(struct mlxsw_sp *mlxsw_sp,
4235 				    struct mlxsw_sp_fib_entry *fib_entry,
4236 				    enum mlxsw_reg_ralue_op op)
4237 {
4238 	switch (op) {
4239 	case MLXSW_REG_RALUE_OP_WRITE_WRITE:
4240 		mlxsw_sp_fib_entry_hw_flags_set(mlxsw_sp, fib_entry);
4241 		break;
4242 	case MLXSW_REG_RALUE_OP_WRITE_DELETE:
4243 		mlxsw_sp_fib_entry_hw_flags_clear(mlxsw_sp, fib_entry);
4244 		break;
4245 	default:
4246 		break;
4247 	}
4248 }
4249 
4250 static void
4251 mlxsw_sp_fib_entry_ralue_pack(char *ralue_pl,
4252 			      const struct mlxsw_sp_fib_entry *fib_entry,
4253 			      enum mlxsw_reg_ralue_op op)
4254 {
4255 	struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib;
4256 	enum mlxsw_reg_ralxx_protocol proto;
4257 	u32 *p_dip;
4258 
4259 	proto = (enum mlxsw_reg_ralxx_protocol) fib->proto;
4260 
4261 	switch (fib->proto) {
4262 	case MLXSW_SP_L3_PROTO_IPV4:
4263 		p_dip = (u32 *) fib_entry->fib_node->key.addr;
4264 		mlxsw_reg_ralue_pack4(ralue_pl, proto, op, fib->vr->id,
4265 				      fib_entry->fib_node->key.prefix_len,
4266 				      *p_dip);
4267 		break;
4268 	case MLXSW_SP_L3_PROTO_IPV6:
4269 		mlxsw_reg_ralue_pack6(ralue_pl, proto, op, fib->vr->id,
4270 				      fib_entry->fib_node->key.prefix_len,
4271 				      fib_entry->fib_node->key.addr);
4272 		break;
4273 	}
4274 }
4275 
4276 static int mlxsw_sp_adj_discard_write(struct mlxsw_sp *mlxsw_sp, u16 rif_index)
4277 {
4278 	enum mlxsw_reg_ratr_trap_action trap_action;
4279 	char ratr_pl[MLXSW_REG_RATR_LEN];
4280 	int err;
4281 
4282 	if (mlxsw_sp->router->adj_discard_index_valid)
4283 		return 0;
4284 
4285 	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1,
4286 				  &mlxsw_sp->router->adj_discard_index);
4287 	if (err)
4288 		return err;
4289 
4290 	trap_action = MLXSW_REG_RATR_TRAP_ACTION_DISCARD_ERRORS;
4291 	mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY, true,
4292 			    MLXSW_REG_RATR_TYPE_ETHERNET,
4293 			    mlxsw_sp->router->adj_discard_index, rif_index);
4294 	mlxsw_reg_ratr_trap_action_set(ratr_pl, trap_action);
4295 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
4296 	if (err)
4297 		goto err_ratr_write;
4298 
4299 	mlxsw_sp->router->adj_discard_index_valid = true;
4300 
4301 	return 0;
4302 
4303 err_ratr_write:
4304 	mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1,
4305 			   mlxsw_sp->router->adj_discard_index);
4306 	return err;
4307 }
4308 
4309 static int mlxsw_sp_fib_entry_op_remote(struct mlxsw_sp *mlxsw_sp,
4310 					struct mlxsw_sp_fib_entry *fib_entry,
4311 					enum mlxsw_reg_ralue_op op)
4312 {
4313 	struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group;
4314 	char ralue_pl[MLXSW_REG_RALUE_LEN];
4315 	enum mlxsw_reg_ralue_trap_action trap_action;
4316 	u16 trap_id = 0;
4317 	u32 adjacency_index = 0;
4318 	u16 ecmp_size = 0;
4319 	int err;
4320 
4321 	/* In case the nexthop group adjacency index is valid, use it
4322 	 * with provided ECMP size. Otherwise, setup trap and pass
4323 	 * traffic to kernel.
4324 	 */
4325 	if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
4326 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
4327 		adjacency_index = fib_entry->nh_group->adj_index;
4328 		ecmp_size = fib_entry->nh_group->ecmp_size;
4329 	} else if (!nh_group->adj_index_valid && nh_group->count &&
4330 		   nh_group->nh_rif) {
4331 		err = mlxsw_sp_adj_discard_write(mlxsw_sp,
4332 						 nh_group->nh_rif->rif_index);
4333 		if (err)
4334 			return err;
4335 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
4336 		adjacency_index = mlxsw_sp->router->adj_discard_index;
4337 		ecmp_size = 1;
4338 	} else {
4339 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
4340 		trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
4341 	}
4342 
4343 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
4344 	mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id,
4345 					adjacency_index, ecmp_size);
4346 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
4347 }
4348 
4349 static int mlxsw_sp_fib_entry_op_local(struct mlxsw_sp *mlxsw_sp,
4350 				       struct mlxsw_sp_fib_entry *fib_entry,
4351 				       enum mlxsw_reg_ralue_op op)
4352 {
4353 	struct mlxsw_sp_rif *rif = fib_entry->nh_group->nh_rif;
4354 	enum mlxsw_reg_ralue_trap_action trap_action;
4355 	char ralue_pl[MLXSW_REG_RALUE_LEN];
4356 	u16 trap_id = 0;
4357 	u16 rif_index = 0;
4358 
4359 	if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
4360 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
4361 		rif_index = rif->rif_index;
4362 	} else {
4363 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
4364 		trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
4365 	}
4366 
4367 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
4368 	mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id,
4369 				       rif_index);
4370 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
4371 }
4372 
4373 static int mlxsw_sp_fib_entry_op_trap(struct mlxsw_sp *mlxsw_sp,
4374 				      struct mlxsw_sp_fib_entry *fib_entry,
4375 				      enum mlxsw_reg_ralue_op op)
4376 {
4377 	char ralue_pl[MLXSW_REG_RALUE_LEN];
4378 
4379 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
4380 	mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
4381 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
4382 }
4383 
4384 static int mlxsw_sp_fib_entry_op_blackhole(struct mlxsw_sp *mlxsw_sp,
4385 					   struct mlxsw_sp_fib_entry *fib_entry,
4386 					   enum mlxsw_reg_ralue_op op)
4387 {
4388 	enum mlxsw_reg_ralue_trap_action trap_action;
4389 	char ralue_pl[MLXSW_REG_RALUE_LEN];
4390 
4391 	trap_action = MLXSW_REG_RALUE_TRAP_ACTION_DISCARD_ERROR;
4392 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
4393 	mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, 0, 0);
4394 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
4395 }
4396 
4397 static int
4398 mlxsw_sp_fib_entry_op_unreachable(struct mlxsw_sp *mlxsw_sp,
4399 				  struct mlxsw_sp_fib_entry *fib_entry,
4400 				  enum mlxsw_reg_ralue_op op)
4401 {
4402 	enum mlxsw_reg_ralue_trap_action trap_action;
4403 	char ralue_pl[MLXSW_REG_RALUE_LEN];
4404 	u16 trap_id;
4405 
4406 	trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
4407 	trap_id = MLXSW_TRAP_ID_RTR_INGRESS1;
4408 
4409 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
4410 	mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id, 0);
4411 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
4412 }
4413 
4414 static int
4415 mlxsw_sp_fib_entry_op_ipip_decap(struct mlxsw_sp *mlxsw_sp,
4416 				 struct mlxsw_sp_fib_entry *fib_entry,
4417 				 enum mlxsw_reg_ralue_op op)
4418 {
4419 	struct mlxsw_sp_ipip_entry *ipip_entry = fib_entry->decap.ipip_entry;
4420 	const struct mlxsw_sp_ipip_ops *ipip_ops;
4421 
4422 	if (WARN_ON(!ipip_entry))
4423 		return -EINVAL;
4424 
4425 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
4426 	return ipip_ops->fib_entry_op(mlxsw_sp, ipip_entry, op,
4427 				      fib_entry->decap.tunnel_index);
4428 }
4429 
4430 static int mlxsw_sp_fib_entry_op_nve_decap(struct mlxsw_sp *mlxsw_sp,
4431 					   struct mlxsw_sp_fib_entry *fib_entry,
4432 					   enum mlxsw_reg_ralue_op op)
4433 {
4434 	char ralue_pl[MLXSW_REG_RALUE_LEN];
4435 
4436 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
4437 	mlxsw_reg_ralue_act_ip2me_tun_pack(ralue_pl,
4438 					   fib_entry->decap.tunnel_index);
4439 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
4440 }
4441 
4442 static int __mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
4443 				   struct mlxsw_sp_fib_entry *fib_entry,
4444 				   enum mlxsw_reg_ralue_op op)
4445 {
4446 	switch (fib_entry->type) {
4447 	case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
4448 		return mlxsw_sp_fib_entry_op_remote(mlxsw_sp, fib_entry, op);
4449 	case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
4450 		return mlxsw_sp_fib_entry_op_local(mlxsw_sp, fib_entry, op);
4451 	case MLXSW_SP_FIB_ENTRY_TYPE_TRAP:
4452 		return mlxsw_sp_fib_entry_op_trap(mlxsw_sp, fib_entry, op);
4453 	case MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE:
4454 		return mlxsw_sp_fib_entry_op_blackhole(mlxsw_sp, fib_entry, op);
4455 	case MLXSW_SP_FIB_ENTRY_TYPE_UNREACHABLE:
4456 		return mlxsw_sp_fib_entry_op_unreachable(mlxsw_sp, fib_entry,
4457 							 op);
4458 	case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
4459 		return mlxsw_sp_fib_entry_op_ipip_decap(mlxsw_sp,
4460 							fib_entry, op);
4461 	case MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP:
4462 		return mlxsw_sp_fib_entry_op_nve_decap(mlxsw_sp, fib_entry, op);
4463 	}
4464 	return -EINVAL;
4465 }
4466 
4467 static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
4468 				 struct mlxsw_sp_fib_entry *fib_entry,
4469 				 enum mlxsw_reg_ralue_op op)
4470 {
4471 	int err = __mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, op);
4472 
4473 	if (err)
4474 		return err;
4475 
4476 	mlxsw_sp_fib_entry_hw_flags_refresh(mlxsw_sp, fib_entry, op);
4477 
4478 	return err;
4479 }
4480 
4481 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
4482 				     struct mlxsw_sp_fib_entry *fib_entry)
4483 {
4484 	return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
4485 				     MLXSW_REG_RALUE_OP_WRITE_WRITE);
4486 }
4487 
4488 static int mlxsw_sp_fib_entry_del(struct mlxsw_sp *mlxsw_sp,
4489 				  struct mlxsw_sp_fib_entry *fib_entry)
4490 {
4491 	return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
4492 				     MLXSW_REG_RALUE_OP_WRITE_DELETE);
4493 }
4494 
4495 static int
4496 mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp,
4497 			     const struct fib_entry_notifier_info *fen_info,
4498 			     struct mlxsw_sp_fib_entry *fib_entry)
4499 {
4500 	struct net_device *dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev;
4501 	union mlxsw_sp_l3addr dip = { .addr4 = htonl(fen_info->dst) };
4502 	u32 tb_id = mlxsw_sp_fix_tb_id(fen_info->tb_id);
4503 	struct mlxsw_sp_ipip_entry *ipip_entry;
4504 	struct fib_info *fi = fen_info->fi;
4505 
4506 	switch (fen_info->type) {
4507 	case RTN_LOCAL:
4508 		ipip_entry = mlxsw_sp_ipip_entry_find_by_decap(mlxsw_sp, dev,
4509 						 MLXSW_SP_L3_PROTO_IPV4, dip);
4510 		if (ipip_entry && ipip_entry->ol_dev->flags & IFF_UP) {
4511 			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
4512 			return mlxsw_sp_fib_entry_decap_init(mlxsw_sp,
4513 							     fib_entry,
4514 							     ipip_entry);
4515 		}
4516 		if (mlxsw_sp_nve_ipv4_route_is_decap(mlxsw_sp, tb_id,
4517 						     dip.addr4)) {
4518 			u32 t_index;
4519 
4520 			t_index = mlxsw_sp_nve_decap_tunnel_index_get(mlxsw_sp);
4521 			fib_entry->decap.tunnel_index = t_index;
4522 			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
4523 			return 0;
4524 		}
4525 		/* fall through */
4526 	case RTN_BROADCAST:
4527 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
4528 		return 0;
4529 	case RTN_BLACKHOLE:
4530 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE;
4531 		return 0;
4532 	case RTN_UNREACHABLE: /* fall through */
4533 	case RTN_PROHIBIT:
4534 		/* Packets hitting these routes need to be trapped, but
4535 		 * can do so with a lower priority than packets directed
4536 		 * at the host, so use action type local instead of trap.
4537 		 */
4538 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_UNREACHABLE;
4539 		return 0;
4540 	case RTN_UNICAST:
4541 		if (mlxsw_sp_fi_is_gateway(mlxsw_sp, fi))
4542 			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
4543 		else
4544 			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
4545 		return 0;
4546 	default:
4547 		return -EINVAL;
4548 	}
4549 }
4550 
4551 static void
4552 mlxsw_sp_fib4_entry_type_unset(struct mlxsw_sp *mlxsw_sp,
4553 			       struct mlxsw_sp_fib_entry *fib_entry)
4554 {
4555 	switch (fib_entry->type) {
4556 	case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
4557 		mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, fib_entry);
4558 		break;
4559 	default:
4560 		break;
4561 	}
4562 }
4563 
4564 static struct mlxsw_sp_fib4_entry *
4565 mlxsw_sp_fib4_entry_create(struct mlxsw_sp *mlxsw_sp,
4566 			   struct mlxsw_sp_fib_node *fib_node,
4567 			   const struct fib_entry_notifier_info *fen_info)
4568 {
4569 	struct mlxsw_sp_fib4_entry *fib4_entry;
4570 	struct mlxsw_sp_fib_entry *fib_entry;
4571 	int err;
4572 
4573 	fib4_entry = kzalloc(sizeof(*fib4_entry), GFP_KERNEL);
4574 	if (!fib4_entry)
4575 		return ERR_PTR(-ENOMEM);
4576 	fib_entry = &fib4_entry->common;
4577 
4578 	err = mlxsw_sp_fib4_entry_type_set(mlxsw_sp, fen_info, fib_entry);
4579 	if (err)
4580 		goto err_fib4_entry_type_set;
4581 
4582 	err = mlxsw_sp_nexthop4_group_get(mlxsw_sp, fib_entry, fen_info->fi);
4583 	if (err)
4584 		goto err_nexthop4_group_get;
4585 
4586 	fib4_entry->prio = fen_info->fi->fib_priority;
4587 	fib4_entry->tb_id = fen_info->tb_id;
4588 	fib4_entry->type = fen_info->type;
4589 	fib4_entry->tos = fen_info->tos;
4590 
4591 	fib_entry->fib_node = fib_node;
4592 
4593 	return fib4_entry;
4594 
4595 err_nexthop4_group_get:
4596 	mlxsw_sp_fib4_entry_type_unset(mlxsw_sp, fib_entry);
4597 err_fib4_entry_type_set:
4598 	kfree(fib4_entry);
4599 	return ERR_PTR(err);
4600 }
4601 
4602 static void mlxsw_sp_fib4_entry_destroy(struct mlxsw_sp *mlxsw_sp,
4603 					struct mlxsw_sp_fib4_entry *fib4_entry)
4604 {
4605 	mlxsw_sp_nexthop4_group_put(mlxsw_sp, &fib4_entry->common);
4606 	mlxsw_sp_fib4_entry_type_unset(mlxsw_sp, &fib4_entry->common);
4607 	kfree(fib4_entry);
4608 }
4609 
4610 static struct mlxsw_sp_fib4_entry *
4611 mlxsw_sp_fib4_entry_lookup(struct mlxsw_sp *mlxsw_sp,
4612 			   const struct fib_entry_notifier_info *fen_info)
4613 {
4614 	struct mlxsw_sp_fib4_entry *fib4_entry;
4615 	struct mlxsw_sp_fib_node *fib_node;
4616 	struct mlxsw_sp_fib *fib;
4617 	struct mlxsw_sp_vr *vr;
4618 
4619 	vr = mlxsw_sp_vr_find(mlxsw_sp, fen_info->tb_id);
4620 	if (!vr)
4621 		return NULL;
4622 	fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV4);
4623 
4624 	fib_node = mlxsw_sp_fib_node_lookup(fib, &fen_info->dst,
4625 					    sizeof(fen_info->dst),
4626 					    fen_info->dst_len);
4627 	if (!fib_node)
4628 		return NULL;
4629 
4630 	fib4_entry = container_of(fib_node->fib_entry,
4631 				  struct mlxsw_sp_fib4_entry, common);
4632 	if (fib4_entry->tb_id == fen_info->tb_id &&
4633 	    fib4_entry->tos == fen_info->tos &&
4634 	    fib4_entry->type == fen_info->type &&
4635 	    mlxsw_sp_nexthop4_group_fi(fib4_entry->common.nh_group) ==
4636 	    fen_info->fi)
4637 		return fib4_entry;
4638 
4639 	return NULL;
4640 }
4641 
4642 static const struct rhashtable_params mlxsw_sp_fib_ht_params = {
4643 	.key_offset = offsetof(struct mlxsw_sp_fib_node, key),
4644 	.head_offset = offsetof(struct mlxsw_sp_fib_node, ht_node),
4645 	.key_len = sizeof(struct mlxsw_sp_fib_key),
4646 	.automatic_shrinking = true,
4647 };
4648 
4649 static int mlxsw_sp_fib_node_insert(struct mlxsw_sp_fib *fib,
4650 				    struct mlxsw_sp_fib_node *fib_node)
4651 {
4652 	return rhashtable_insert_fast(&fib->ht, &fib_node->ht_node,
4653 				      mlxsw_sp_fib_ht_params);
4654 }
4655 
4656 static void mlxsw_sp_fib_node_remove(struct mlxsw_sp_fib *fib,
4657 				     struct mlxsw_sp_fib_node *fib_node)
4658 {
4659 	rhashtable_remove_fast(&fib->ht, &fib_node->ht_node,
4660 			       mlxsw_sp_fib_ht_params);
4661 }
4662 
4663 static struct mlxsw_sp_fib_node *
4664 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
4665 			 size_t addr_len, unsigned char prefix_len)
4666 {
4667 	struct mlxsw_sp_fib_key key;
4668 
4669 	memset(&key, 0, sizeof(key));
4670 	memcpy(key.addr, addr, addr_len);
4671 	key.prefix_len = prefix_len;
4672 	return rhashtable_lookup_fast(&fib->ht, &key, mlxsw_sp_fib_ht_params);
4673 }
4674 
4675 static struct mlxsw_sp_fib_node *
4676 mlxsw_sp_fib_node_create(struct mlxsw_sp_fib *fib, const void *addr,
4677 			 size_t addr_len, unsigned char prefix_len)
4678 {
4679 	struct mlxsw_sp_fib_node *fib_node;
4680 
4681 	fib_node = kzalloc(sizeof(*fib_node), GFP_KERNEL);
4682 	if (!fib_node)
4683 		return NULL;
4684 
4685 	list_add(&fib_node->list, &fib->node_list);
4686 	memcpy(fib_node->key.addr, addr, addr_len);
4687 	fib_node->key.prefix_len = prefix_len;
4688 
4689 	return fib_node;
4690 }
4691 
4692 static void mlxsw_sp_fib_node_destroy(struct mlxsw_sp_fib_node *fib_node)
4693 {
4694 	list_del(&fib_node->list);
4695 	kfree(fib_node);
4696 }
4697 
4698 static int mlxsw_sp_fib_lpm_tree_link(struct mlxsw_sp *mlxsw_sp,
4699 				      struct mlxsw_sp_fib_node *fib_node)
4700 {
4701 	struct mlxsw_sp_prefix_usage req_prefix_usage;
4702 	struct mlxsw_sp_fib *fib = fib_node->fib;
4703 	struct mlxsw_sp_lpm_tree *lpm_tree;
4704 	int err;
4705 
4706 	lpm_tree = mlxsw_sp->router->lpm.proto_trees[fib->proto];
4707 	if (lpm_tree->prefix_ref_count[fib_node->key.prefix_len] != 0)
4708 		goto out;
4709 
4710 	mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &lpm_tree->prefix_usage);
4711 	mlxsw_sp_prefix_usage_set(&req_prefix_usage, fib_node->key.prefix_len);
4712 	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
4713 					 fib->proto);
4714 	if (IS_ERR(lpm_tree))
4715 		return PTR_ERR(lpm_tree);
4716 
4717 	err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
4718 	if (err)
4719 		goto err_lpm_tree_replace;
4720 
4721 out:
4722 	lpm_tree->prefix_ref_count[fib_node->key.prefix_len]++;
4723 	return 0;
4724 
4725 err_lpm_tree_replace:
4726 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
4727 	return err;
4728 }
4729 
4730 static void mlxsw_sp_fib_lpm_tree_unlink(struct mlxsw_sp *mlxsw_sp,
4731 					 struct mlxsw_sp_fib_node *fib_node)
4732 {
4733 	struct mlxsw_sp_lpm_tree *lpm_tree = fib_node->fib->lpm_tree;
4734 	struct mlxsw_sp_prefix_usage req_prefix_usage;
4735 	struct mlxsw_sp_fib *fib = fib_node->fib;
4736 	int err;
4737 
4738 	if (--lpm_tree->prefix_ref_count[fib_node->key.prefix_len] != 0)
4739 		return;
4740 	/* Try to construct a new LPM tree from the current prefix usage
4741 	 * minus the unused one. If we fail, continue using the old one.
4742 	 */
4743 	mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &lpm_tree->prefix_usage);
4744 	mlxsw_sp_prefix_usage_clear(&req_prefix_usage,
4745 				    fib_node->key.prefix_len);
4746 	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
4747 					 fib->proto);
4748 	if (IS_ERR(lpm_tree))
4749 		return;
4750 
4751 	err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
4752 	if (err)
4753 		goto err_lpm_tree_replace;
4754 
4755 	return;
4756 
4757 err_lpm_tree_replace:
4758 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
4759 }
4760 
4761 static int mlxsw_sp_fib_node_init(struct mlxsw_sp *mlxsw_sp,
4762 				  struct mlxsw_sp_fib_node *fib_node,
4763 				  struct mlxsw_sp_fib *fib)
4764 {
4765 	int err;
4766 
4767 	err = mlxsw_sp_fib_node_insert(fib, fib_node);
4768 	if (err)
4769 		return err;
4770 	fib_node->fib = fib;
4771 
4772 	err = mlxsw_sp_fib_lpm_tree_link(mlxsw_sp, fib_node);
4773 	if (err)
4774 		goto err_fib_lpm_tree_link;
4775 
4776 	return 0;
4777 
4778 err_fib_lpm_tree_link:
4779 	fib_node->fib = NULL;
4780 	mlxsw_sp_fib_node_remove(fib, fib_node);
4781 	return err;
4782 }
4783 
4784 static void mlxsw_sp_fib_node_fini(struct mlxsw_sp *mlxsw_sp,
4785 				   struct mlxsw_sp_fib_node *fib_node)
4786 {
4787 	struct mlxsw_sp_fib *fib = fib_node->fib;
4788 
4789 	mlxsw_sp_fib_lpm_tree_unlink(mlxsw_sp, fib_node);
4790 	fib_node->fib = NULL;
4791 	mlxsw_sp_fib_node_remove(fib, fib_node);
4792 }
4793 
4794 static struct mlxsw_sp_fib_node *
4795 mlxsw_sp_fib_node_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id, const void *addr,
4796 		      size_t addr_len, unsigned char prefix_len,
4797 		      enum mlxsw_sp_l3proto proto)
4798 {
4799 	struct mlxsw_sp_fib_node *fib_node;
4800 	struct mlxsw_sp_fib *fib;
4801 	struct mlxsw_sp_vr *vr;
4802 	int err;
4803 
4804 	vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id, NULL);
4805 	if (IS_ERR(vr))
4806 		return ERR_CAST(vr);
4807 	fib = mlxsw_sp_vr_fib(vr, proto);
4808 
4809 	fib_node = mlxsw_sp_fib_node_lookup(fib, addr, addr_len, prefix_len);
4810 	if (fib_node)
4811 		return fib_node;
4812 
4813 	fib_node = mlxsw_sp_fib_node_create(fib, addr, addr_len, prefix_len);
4814 	if (!fib_node) {
4815 		err = -ENOMEM;
4816 		goto err_fib_node_create;
4817 	}
4818 
4819 	err = mlxsw_sp_fib_node_init(mlxsw_sp, fib_node, fib);
4820 	if (err)
4821 		goto err_fib_node_init;
4822 
4823 	return fib_node;
4824 
4825 err_fib_node_init:
4826 	mlxsw_sp_fib_node_destroy(fib_node);
4827 err_fib_node_create:
4828 	mlxsw_sp_vr_put(mlxsw_sp, vr);
4829 	return ERR_PTR(err);
4830 }
4831 
4832 static void mlxsw_sp_fib_node_put(struct mlxsw_sp *mlxsw_sp,
4833 				  struct mlxsw_sp_fib_node *fib_node)
4834 {
4835 	struct mlxsw_sp_vr *vr = fib_node->fib->vr;
4836 
4837 	if (fib_node->fib_entry)
4838 		return;
4839 	mlxsw_sp_fib_node_fini(mlxsw_sp, fib_node);
4840 	mlxsw_sp_fib_node_destroy(fib_node);
4841 	mlxsw_sp_vr_put(mlxsw_sp, vr);
4842 }
4843 
4844 static int mlxsw_sp_fib_node_entry_link(struct mlxsw_sp *mlxsw_sp,
4845 					struct mlxsw_sp_fib_entry *fib_entry)
4846 {
4847 	struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
4848 	int err;
4849 
4850 	fib_node->fib_entry = fib_entry;
4851 
4852 	err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
4853 	if (err)
4854 		goto err_fib_entry_update;
4855 
4856 	return 0;
4857 
4858 err_fib_entry_update:
4859 	fib_node->fib_entry = NULL;
4860 	return err;
4861 }
4862 
4863 static void
4864 mlxsw_sp_fib_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
4865 			       struct mlxsw_sp_fib_entry *fib_entry)
4866 {
4867 	struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
4868 
4869 	mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry);
4870 	fib_node->fib_entry = NULL;
4871 }
4872 
4873 static bool mlxsw_sp_fib4_allow_replace(struct mlxsw_sp_fib4_entry *fib4_entry)
4874 {
4875 	struct mlxsw_sp_fib_node *fib_node = fib4_entry->common.fib_node;
4876 	struct mlxsw_sp_fib4_entry *fib4_replaced;
4877 
4878 	if (!fib_node->fib_entry)
4879 		return true;
4880 
4881 	fib4_replaced = container_of(fib_node->fib_entry,
4882 				     struct mlxsw_sp_fib4_entry, common);
4883 	if (fib4_entry->tb_id == RT_TABLE_MAIN &&
4884 	    fib4_replaced->tb_id == RT_TABLE_LOCAL)
4885 		return false;
4886 
4887 	return true;
4888 }
4889 
4890 static int
4891 mlxsw_sp_router_fib4_replace(struct mlxsw_sp *mlxsw_sp,
4892 			     const struct fib_entry_notifier_info *fen_info)
4893 {
4894 	struct mlxsw_sp_fib4_entry *fib4_entry, *fib4_replaced;
4895 	struct mlxsw_sp_fib_entry *replaced;
4896 	struct mlxsw_sp_fib_node *fib_node;
4897 	int err;
4898 
4899 	if (mlxsw_sp->router->aborted)
4900 		return 0;
4901 
4902 	fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, fen_info->tb_id,
4903 					 &fen_info->dst, sizeof(fen_info->dst),
4904 					 fen_info->dst_len,
4905 					 MLXSW_SP_L3_PROTO_IPV4);
4906 	if (IS_ERR(fib_node)) {
4907 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to get FIB node\n");
4908 		return PTR_ERR(fib_node);
4909 	}
4910 
4911 	fib4_entry = mlxsw_sp_fib4_entry_create(mlxsw_sp, fib_node, fen_info);
4912 	if (IS_ERR(fib4_entry)) {
4913 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to create FIB entry\n");
4914 		err = PTR_ERR(fib4_entry);
4915 		goto err_fib4_entry_create;
4916 	}
4917 
4918 	if (!mlxsw_sp_fib4_allow_replace(fib4_entry)) {
4919 		mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
4920 		mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4921 		return 0;
4922 	}
4923 
4924 	replaced = fib_node->fib_entry;
4925 	err = mlxsw_sp_fib_node_entry_link(mlxsw_sp, &fib4_entry->common);
4926 	if (err) {
4927 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to link FIB entry to node\n");
4928 		goto err_fib_node_entry_link;
4929 	}
4930 
4931 	/* Nothing to replace */
4932 	if (!replaced)
4933 		return 0;
4934 
4935 	mlxsw_sp_fib_entry_hw_flags_clear(mlxsw_sp, replaced);
4936 	fib4_replaced = container_of(replaced, struct mlxsw_sp_fib4_entry,
4937 				     common);
4938 	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_replaced);
4939 
4940 	return 0;
4941 
4942 err_fib_node_entry_link:
4943 	fib_node->fib_entry = replaced;
4944 	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
4945 err_fib4_entry_create:
4946 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4947 	return err;
4948 }
4949 
4950 static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp,
4951 				     struct fib_entry_notifier_info *fen_info)
4952 {
4953 	struct mlxsw_sp_fib4_entry *fib4_entry;
4954 	struct mlxsw_sp_fib_node *fib_node;
4955 
4956 	if (mlxsw_sp->router->aborted)
4957 		return;
4958 
4959 	fib4_entry = mlxsw_sp_fib4_entry_lookup(mlxsw_sp, fen_info);
4960 	if (!fib4_entry)
4961 		return;
4962 	fib_node = fib4_entry->common.fib_node;
4963 
4964 	mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, &fib4_entry->common);
4965 	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
4966 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4967 }
4968 
4969 static bool mlxsw_sp_fib6_rt_should_ignore(const struct fib6_info *rt)
4970 {
4971 	/* Packets with link-local destination IP arriving to the router
4972 	 * are trapped to the CPU, so no need to program specific routes
4973 	 * for them.
4974 	 */
4975 	if (ipv6_addr_type(&rt->fib6_dst.addr) & IPV6_ADDR_LINKLOCAL)
4976 		return true;
4977 
4978 	/* Multicast routes aren't supported, so ignore them. Neighbour
4979 	 * Discovery packets are specifically trapped.
4980 	 */
4981 	if (ipv6_addr_type(&rt->fib6_dst.addr) & IPV6_ADDR_MULTICAST)
4982 		return true;
4983 
4984 	/* Cloned routes are irrelevant in the forwarding path. */
4985 	if (rt->fib6_flags & RTF_CACHE)
4986 		return true;
4987 
4988 	return false;
4989 }
4990 
4991 static struct mlxsw_sp_rt6 *mlxsw_sp_rt6_create(struct fib6_info *rt)
4992 {
4993 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4994 
4995 	mlxsw_sp_rt6 = kzalloc(sizeof(*mlxsw_sp_rt6), GFP_KERNEL);
4996 	if (!mlxsw_sp_rt6)
4997 		return ERR_PTR(-ENOMEM);
4998 
4999 	/* In case of route replace, replaced route is deleted with
5000 	 * no notification. Take reference to prevent accessing freed
5001 	 * memory.
5002 	 */
5003 	mlxsw_sp_rt6->rt = rt;
5004 	fib6_info_hold(rt);
5005 
5006 	return mlxsw_sp_rt6;
5007 }
5008 
5009 #if IS_ENABLED(CONFIG_IPV6)
5010 static void mlxsw_sp_rt6_release(struct fib6_info *rt)
5011 {
5012 	fib6_info_release(rt);
5013 }
5014 #else
5015 static void mlxsw_sp_rt6_release(struct fib6_info *rt)
5016 {
5017 }
5018 #endif
5019 
5020 static void mlxsw_sp_rt6_destroy(struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
5021 {
5022 	struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh;
5023 
5024 	fib6_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
5025 	mlxsw_sp_rt6_release(mlxsw_sp_rt6->rt);
5026 	kfree(mlxsw_sp_rt6);
5027 }
5028 
5029 static struct fib6_info *
5030 mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry)
5031 {
5032 	return list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
5033 				list)->rt;
5034 }
5035 
5036 static struct mlxsw_sp_rt6 *
5037 mlxsw_sp_fib6_entry_rt_find(const struct mlxsw_sp_fib6_entry *fib6_entry,
5038 			    const struct fib6_info *rt)
5039 {
5040 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5041 
5042 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
5043 		if (mlxsw_sp_rt6->rt == rt)
5044 			return mlxsw_sp_rt6;
5045 	}
5046 
5047 	return NULL;
5048 }
5049 
5050 static bool mlxsw_sp_nexthop6_ipip_type(const struct mlxsw_sp *mlxsw_sp,
5051 					const struct fib6_info *rt,
5052 					enum mlxsw_sp_ipip_type *ret)
5053 {
5054 	return rt->fib6_nh->fib_nh_dev &&
5055 	       mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->fib6_nh->fib_nh_dev, ret);
5056 }
5057 
5058 static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp *mlxsw_sp,
5059 				       struct mlxsw_sp_nexthop_group *nh_grp,
5060 				       struct mlxsw_sp_nexthop *nh,
5061 				       const struct fib6_info *rt)
5062 {
5063 	const struct mlxsw_sp_ipip_ops *ipip_ops;
5064 	struct mlxsw_sp_ipip_entry *ipip_entry;
5065 	struct net_device *dev = rt->fib6_nh->fib_nh_dev;
5066 	struct mlxsw_sp_rif *rif;
5067 	int err;
5068 
5069 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, dev);
5070 	if (ipip_entry) {
5071 		ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
5072 		if (ipip_ops->can_offload(mlxsw_sp, dev,
5073 					  MLXSW_SP_L3_PROTO_IPV6)) {
5074 			nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP;
5075 			mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, ipip_entry);
5076 			return 0;
5077 		}
5078 	}
5079 
5080 	nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH;
5081 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
5082 	if (!rif)
5083 		return 0;
5084 	mlxsw_sp_nexthop_rif_init(nh, rif);
5085 
5086 	err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
5087 	if (err)
5088 		goto err_nexthop_neigh_init;
5089 
5090 	return 0;
5091 
5092 err_nexthop_neigh_init:
5093 	mlxsw_sp_nexthop_rif_fini(nh);
5094 	return err;
5095 }
5096 
5097 static void mlxsw_sp_nexthop6_type_fini(struct mlxsw_sp *mlxsw_sp,
5098 					struct mlxsw_sp_nexthop *nh)
5099 {
5100 	mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
5101 }
5102 
5103 static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp,
5104 				  struct mlxsw_sp_nexthop_group *nh_grp,
5105 				  struct mlxsw_sp_nexthop *nh,
5106 				  const struct fib6_info *rt)
5107 {
5108 	struct net_device *dev = rt->fib6_nh->fib_nh_dev;
5109 
5110 	nh->nh_grp = nh_grp;
5111 	nh->nh_weight = rt->fib6_nh->fib_nh_weight;
5112 	memcpy(&nh->gw_addr, &rt->fib6_nh->fib_nh_gw6, sizeof(nh->gw_addr));
5113 	mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
5114 
5115 	list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
5116 
5117 	if (!dev)
5118 		return 0;
5119 	nh->ifindex = dev->ifindex;
5120 
5121 	return mlxsw_sp_nexthop6_type_init(mlxsw_sp, nh_grp, nh, rt);
5122 }
5123 
5124 static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp,
5125 				   struct mlxsw_sp_nexthop *nh)
5126 {
5127 	mlxsw_sp_nexthop6_type_fini(mlxsw_sp, nh);
5128 	list_del(&nh->router_list_node);
5129 	mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
5130 }
5131 
5132 static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp *mlxsw_sp,
5133 				    const struct fib6_info *rt)
5134 {
5135 	return rt->fib6_nh->fib_nh_gw_family ||
5136 	       mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, NULL);
5137 }
5138 
5139 static struct mlxsw_sp_nexthop_group *
5140 mlxsw_sp_nexthop6_group_create(struct mlxsw_sp *mlxsw_sp,
5141 			       struct mlxsw_sp_fib6_entry *fib6_entry)
5142 {
5143 	struct mlxsw_sp_nexthop_group *nh_grp;
5144 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5145 	struct mlxsw_sp_nexthop *nh;
5146 	int i = 0;
5147 	int err;
5148 
5149 	nh_grp = kzalloc(struct_size(nh_grp, nexthops, fib6_entry->nrt6),
5150 			 GFP_KERNEL);
5151 	if (!nh_grp)
5152 		return ERR_PTR(-ENOMEM);
5153 	INIT_LIST_HEAD(&nh_grp->fib_list);
5154 #if IS_ENABLED(CONFIG_IPV6)
5155 	nh_grp->neigh_tbl = &nd_tbl;
5156 #endif
5157 	mlxsw_sp_rt6 = list_first_entry(&fib6_entry->rt6_list,
5158 					struct mlxsw_sp_rt6, list);
5159 	nh_grp->gateway = mlxsw_sp_rt6_is_gateway(mlxsw_sp, mlxsw_sp_rt6->rt);
5160 	nh_grp->count = fib6_entry->nrt6;
5161 	for (i = 0; i < nh_grp->count; i++) {
5162 		struct fib6_info *rt = mlxsw_sp_rt6->rt;
5163 
5164 		nh = &nh_grp->nexthops[i];
5165 		err = mlxsw_sp_nexthop6_init(mlxsw_sp, nh_grp, nh, rt);
5166 		if (err)
5167 			goto err_nexthop6_init;
5168 		mlxsw_sp_rt6 = list_next_entry(mlxsw_sp_rt6, list);
5169 	}
5170 
5171 	err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
5172 	if (err)
5173 		goto err_nexthop_group_insert;
5174 
5175 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
5176 	return nh_grp;
5177 
5178 err_nexthop_group_insert:
5179 err_nexthop6_init:
5180 	for (i--; i >= 0; i--) {
5181 		nh = &nh_grp->nexthops[i];
5182 		mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
5183 	}
5184 	kfree(nh_grp);
5185 	return ERR_PTR(err);
5186 }
5187 
5188 static void
5189 mlxsw_sp_nexthop6_group_destroy(struct mlxsw_sp *mlxsw_sp,
5190 				struct mlxsw_sp_nexthop_group *nh_grp)
5191 {
5192 	struct mlxsw_sp_nexthop *nh;
5193 	int i = nh_grp->count;
5194 
5195 	mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
5196 	for (i--; i >= 0; i--) {
5197 		nh = &nh_grp->nexthops[i];
5198 		mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
5199 	}
5200 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
5201 	WARN_ON(nh_grp->adj_index_valid);
5202 	kfree(nh_grp);
5203 }
5204 
5205 static int mlxsw_sp_nexthop6_group_get(struct mlxsw_sp *mlxsw_sp,
5206 				       struct mlxsw_sp_fib6_entry *fib6_entry)
5207 {
5208 	struct mlxsw_sp_nexthop_group *nh_grp;
5209 
5210 	nh_grp = mlxsw_sp_nexthop6_group_lookup(mlxsw_sp, fib6_entry);
5211 	if (!nh_grp) {
5212 		nh_grp = mlxsw_sp_nexthop6_group_create(mlxsw_sp, fib6_entry);
5213 		if (IS_ERR(nh_grp))
5214 			return PTR_ERR(nh_grp);
5215 	}
5216 
5217 	list_add_tail(&fib6_entry->common.nexthop_group_node,
5218 		      &nh_grp->fib_list);
5219 	fib6_entry->common.nh_group = nh_grp;
5220 
5221 	/* The route and the nexthop are described by the same struct, so we
5222 	 * need to the update the nexthop offload indication for the new route.
5223 	 */
5224 	__mlxsw_sp_nexthop6_group_offload_refresh(nh_grp, fib6_entry);
5225 
5226 	return 0;
5227 }
5228 
5229 static void mlxsw_sp_nexthop6_group_put(struct mlxsw_sp *mlxsw_sp,
5230 					struct mlxsw_sp_fib_entry *fib_entry)
5231 {
5232 	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
5233 
5234 	list_del(&fib_entry->nexthop_group_node);
5235 	if (!list_empty(&nh_grp->fib_list))
5236 		return;
5237 	mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, nh_grp);
5238 }
5239 
5240 static int
5241 mlxsw_sp_nexthop6_group_update(struct mlxsw_sp *mlxsw_sp,
5242 			       struct mlxsw_sp_fib6_entry *fib6_entry)
5243 {
5244 	struct mlxsw_sp_nexthop_group *old_nh_grp = fib6_entry->common.nh_group;
5245 	int err;
5246 
5247 	fib6_entry->common.nh_group = NULL;
5248 	list_del(&fib6_entry->common.nexthop_group_node);
5249 
5250 	err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
5251 	if (err)
5252 		goto err_nexthop6_group_get;
5253 
5254 	/* In case this entry is offloaded, then the adjacency index
5255 	 * currently associated with it in the device's table is that
5256 	 * of the old group. Start using the new one instead.
5257 	 */
5258 	err = mlxsw_sp_fib_entry_update(mlxsw_sp, &fib6_entry->common);
5259 	if (err)
5260 		goto err_fib_entry_update;
5261 
5262 	if (list_empty(&old_nh_grp->fib_list))
5263 		mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, old_nh_grp);
5264 
5265 	return 0;
5266 
5267 err_fib_entry_update:
5268 	mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
5269 err_nexthop6_group_get:
5270 	list_add_tail(&fib6_entry->common.nexthop_group_node,
5271 		      &old_nh_grp->fib_list);
5272 	fib6_entry->common.nh_group = old_nh_grp;
5273 	return err;
5274 }
5275 
5276 static int
5277 mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp,
5278 				struct mlxsw_sp_fib6_entry *fib6_entry,
5279 				struct fib6_info **rt_arr, unsigned int nrt6)
5280 {
5281 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5282 	int err, i;
5283 
5284 	for (i = 0; i < nrt6; i++) {
5285 		mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt_arr[i]);
5286 		if (IS_ERR(mlxsw_sp_rt6)) {
5287 			err = PTR_ERR(mlxsw_sp_rt6);
5288 			goto err_rt6_create;
5289 		}
5290 
5291 		list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
5292 		fib6_entry->nrt6++;
5293 	}
5294 
5295 	err = mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
5296 	if (err)
5297 		goto err_nexthop6_group_update;
5298 
5299 	return 0;
5300 
5301 err_nexthop6_group_update:
5302 	i = nrt6;
5303 err_rt6_create:
5304 	for (i--; i >= 0; i--) {
5305 		fib6_entry->nrt6--;
5306 		mlxsw_sp_rt6 = list_last_entry(&fib6_entry->rt6_list,
5307 					       struct mlxsw_sp_rt6, list);
5308 		list_del(&mlxsw_sp_rt6->list);
5309 		mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5310 	}
5311 	return err;
5312 }
5313 
5314 static void
5315 mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp,
5316 				struct mlxsw_sp_fib6_entry *fib6_entry,
5317 				struct fib6_info **rt_arr, unsigned int nrt6)
5318 {
5319 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5320 	int i;
5321 
5322 	for (i = 0; i < nrt6; i++) {
5323 		mlxsw_sp_rt6 = mlxsw_sp_fib6_entry_rt_find(fib6_entry,
5324 							   rt_arr[i]);
5325 		if (WARN_ON_ONCE(!mlxsw_sp_rt6))
5326 			continue;
5327 
5328 		fib6_entry->nrt6--;
5329 		list_del(&mlxsw_sp_rt6->list);
5330 		mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5331 	}
5332 
5333 	mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
5334 }
5335 
5336 static void mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp *mlxsw_sp,
5337 					 struct mlxsw_sp_fib_entry *fib_entry,
5338 					 const struct fib6_info *rt)
5339 {
5340 	/* Packets hitting RTF_REJECT routes need to be discarded by the
5341 	 * stack. We can rely on their destination device not having a
5342 	 * RIF (it's the loopback device) and can thus use action type
5343 	 * local, which will cause them to be trapped with a lower
5344 	 * priority than packets that need to be locally received.
5345 	 */
5346 	if (rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST))
5347 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
5348 	else if (rt->fib6_type == RTN_BLACKHOLE)
5349 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE;
5350 	else if (rt->fib6_flags & RTF_REJECT)
5351 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_UNREACHABLE;
5352 	else if (mlxsw_sp_rt6_is_gateway(mlxsw_sp, rt))
5353 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
5354 	else
5355 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
5356 }
5357 
5358 static void
5359 mlxsw_sp_fib6_entry_rt_destroy_all(struct mlxsw_sp_fib6_entry *fib6_entry)
5360 {
5361 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6, *tmp;
5362 
5363 	list_for_each_entry_safe(mlxsw_sp_rt6, tmp, &fib6_entry->rt6_list,
5364 				 list) {
5365 		fib6_entry->nrt6--;
5366 		list_del(&mlxsw_sp_rt6->list);
5367 		mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5368 	}
5369 }
5370 
5371 static struct mlxsw_sp_fib6_entry *
5372 mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp,
5373 			   struct mlxsw_sp_fib_node *fib_node,
5374 			   struct fib6_info **rt_arr, unsigned int nrt6)
5375 {
5376 	struct mlxsw_sp_fib6_entry *fib6_entry;
5377 	struct mlxsw_sp_fib_entry *fib_entry;
5378 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5379 	int err, i;
5380 
5381 	fib6_entry = kzalloc(sizeof(*fib6_entry), GFP_KERNEL);
5382 	if (!fib6_entry)
5383 		return ERR_PTR(-ENOMEM);
5384 	fib_entry = &fib6_entry->common;
5385 
5386 	INIT_LIST_HEAD(&fib6_entry->rt6_list);
5387 
5388 	for (i = 0; i < nrt6; i++) {
5389 		mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt_arr[i]);
5390 		if (IS_ERR(mlxsw_sp_rt6)) {
5391 			err = PTR_ERR(mlxsw_sp_rt6);
5392 			goto err_rt6_create;
5393 		}
5394 		list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
5395 		fib6_entry->nrt6++;
5396 	}
5397 
5398 	mlxsw_sp_fib6_entry_type_set(mlxsw_sp, fib_entry, rt_arr[0]);
5399 
5400 	err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
5401 	if (err)
5402 		goto err_nexthop6_group_get;
5403 
5404 	fib_entry->fib_node = fib_node;
5405 
5406 	return fib6_entry;
5407 
5408 err_nexthop6_group_get:
5409 	i = nrt6;
5410 err_rt6_create:
5411 	for (i--; i >= 0; i--) {
5412 		fib6_entry->nrt6--;
5413 		mlxsw_sp_rt6 = list_last_entry(&fib6_entry->rt6_list,
5414 					       struct mlxsw_sp_rt6, list);
5415 		list_del(&mlxsw_sp_rt6->list);
5416 		mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5417 	}
5418 	kfree(fib6_entry);
5419 	return ERR_PTR(err);
5420 }
5421 
5422 static void mlxsw_sp_fib6_entry_destroy(struct mlxsw_sp *mlxsw_sp,
5423 					struct mlxsw_sp_fib6_entry *fib6_entry)
5424 {
5425 	mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
5426 	mlxsw_sp_fib6_entry_rt_destroy_all(fib6_entry);
5427 	WARN_ON(fib6_entry->nrt6);
5428 	kfree(fib6_entry);
5429 }
5430 
5431 static struct mlxsw_sp_fib6_entry *
5432 mlxsw_sp_fib6_entry_lookup(struct mlxsw_sp *mlxsw_sp,
5433 			   const struct fib6_info *rt)
5434 {
5435 	struct mlxsw_sp_fib6_entry *fib6_entry;
5436 	struct mlxsw_sp_fib_node *fib_node;
5437 	struct mlxsw_sp_fib *fib;
5438 	struct fib6_info *cmp_rt;
5439 	struct mlxsw_sp_vr *vr;
5440 
5441 	vr = mlxsw_sp_vr_find(mlxsw_sp, rt->fib6_table->tb6_id);
5442 	if (!vr)
5443 		return NULL;
5444 	fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV6);
5445 
5446 	fib_node = mlxsw_sp_fib_node_lookup(fib, &rt->fib6_dst.addr,
5447 					    sizeof(rt->fib6_dst.addr),
5448 					    rt->fib6_dst.plen);
5449 	if (!fib_node)
5450 		return NULL;
5451 
5452 	fib6_entry = container_of(fib_node->fib_entry,
5453 				  struct mlxsw_sp_fib6_entry, common);
5454 	cmp_rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
5455 	if (rt->fib6_table->tb6_id == cmp_rt->fib6_table->tb6_id &&
5456 	    rt->fib6_metric == cmp_rt->fib6_metric &&
5457 	    mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt))
5458 		return fib6_entry;
5459 
5460 	return NULL;
5461 }
5462 
5463 static bool mlxsw_sp_fib6_allow_replace(struct mlxsw_sp_fib6_entry *fib6_entry)
5464 {
5465 	struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node;
5466 	struct mlxsw_sp_fib6_entry *fib6_replaced;
5467 	struct fib6_info *rt, *rt_replaced;
5468 
5469 	if (!fib_node->fib_entry)
5470 		return true;
5471 
5472 	fib6_replaced = container_of(fib_node->fib_entry,
5473 				     struct mlxsw_sp_fib6_entry,
5474 				     common);
5475 	rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
5476 	rt_replaced = mlxsw_sp_fib6_entry_rt(fib6_replaced);
5477 	if (rt->fib6_table->tb6_id == RT_TABLE_MAIN &&
5478 	    rt_replaced->fib6_table->tb6_id == RT_TABLE_LOCAL)
5479 		return false;
5480 
5481 	return true;
5482 }
5483 
5484 static int mlxsw_sp_router_fib6_replace(struct mlxsw_sp *mlxsw_sp,
5485 					struct fib6_info **rt_arr,
5486 					unsigned int nrt6)
5487 {
5488 	struct mlxsw_sp_fib6_entry *fib6_entry, *fib6_replaced;
5489 	struct mlxsw_sp_fib_entry *replaced;
5490 	struct mlxsw_sp_fib_node *fib_node;
5491 	struct fib6_info *rt = rt_arr[0];
5492 	int err;
5493 
5494 	if (mlxsw_sp->router->aborted)
5495 		return 0;
5496 
5497 	if (rt->fib6_src.plen)
5498 		return -EINVAL;
5499 
5500 	if (mlxsw_sp_fib6_rt_should_ignore(rt))
5501 		return 0;
5502 
5503 	fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->fib6_table->tb6_id,
5504 					 &rt->fib6_dst.addr,
5505 					 sizeof(rt->fib6_dst.addr),
5506 					 rt->fib6_dst.plen,
5507 					 MLXSW_SP_L3_PROTO_IPV6);
5508 	if (IS_ERR(fib_node))
5509 		return PTR_ERR(fib_node);
5510 
5511 	fib6_entry = mlxsw_sp_fib6_entry_create(mlxsw_sp, fib_node, rt_arr,
5512 						nrt6);
5513 	if (IS_ERR(fib6_entry)) {
5514 		err = PTR_ERR(fib6_entry);
5515 		goto err_fib6_entry_create;
5516 	}
5517 
5518 	if (!mlxsw_sp_fib6_allow_replace(fib6_entry)) {
5519 		mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5520 		mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5521 		return 0;
5522 	}
5523 
5524 	replaced = fib_node->fib_entry;
5525 	err = mlxsw_sp_fib_node_entry_link(mlxsw_sp, &fib6_entry->common);
5526 	if (err)
5527 		goto err_fib_node_entry_link;
5528 
5529 	/* Nothing to replace */
5530 	if (!replaced)
5531 		return 0;
5532 
5533 	mlxsw_sp_fib_entry_hw_flags_clear(mlxsw_sp, replaced);
5534 	fib6_replaced = container_of(replaced, struct mlxsw_sp_fib6_entry,
5535 				     common);
5536 	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_replaced);
5537 
5538 	return 0;
5539 
5540 err_fib_node_entry_link:
5541 	fib_node->fib_entry = replaced;
5542 	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5543 err_fib6_entry_create:
5544 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5545 	return err;
5546 }
5547 
5548 static int mlxsw_sp_router_fib6_append(struct mlxsw_sp *mlxsw_sp,
5549 				       struct fib6_info **rt_arr,
5550 				       unsigned int nrt6)
5551 {
5552 	struct mlxsw_sp_fib6_entry *fib6_entry;
5553 	struct mlxsw_sp_fib_node *fib_node;
5554 	struct fib6_info *rt = rt_arr[0];
5555 	int err;
5556 
5557 	if (mlxsw_sp->router->aborted)
5558 		return 0;
5559 
5560 	if (rt->fib6_src.plen)
5561 		return -EINVAL;
5562 
5563 	if (mlxsw_sp_fib6_rt_should_ignore(rt))
5564 		return 0;
5565 
5566 	fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->fib6_table->tb6_id,
5567 					 &rt->fib6_dst.addr,
5568 					 sizeof(rt->fib6_dst.addr),
5569 					 rt->fib6_dst.plen,
5570 					 MLXSW_SP_L3_PROTO_IPV6);
5571 	if (IS_ERR(fib_node))
5572 		return PTR_ERR(fib_node);
5573 
5574 	if (WARN_ON_ONCE(!fib_node->fib_entry)) {
5575 		mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5576 		return -EINVAL;
5577 	}
5578 
5579 	fib6_entry = container_of(fib_node->fib_entry,
5580 				  struct mlxsw_sp_fib6_entry, common);
5581 	err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry, rt_arr,
5582 					      nrt6);
5583 	if (err)
5584 		goto err_fib6_entry_nexthop_add;
5585 
5586 	return 0;
5587 
5588 err_fib6_entry_nexthop_add:
5589 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5590 	return err;
5591 }
5592 
5593 static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp,
5594 				     struct fib6_info **rt_arr,
5595 				     unsigned int nrt6)
5596 {
5597 	struct mlxsw_sp_fib6_entry *fib6_entry;
5598 	struct mlxsw_sp_fib_node *fib_node;
5599 	struct fib6_info *rt = rt_arr[0];
5600 
5601 	if (mlxsw_sp->router->aborted)
5602 		return;
5603 
5604 	if (mlxsw_sp_fib6_rt_should_ignore(rt))
5605 		return;
5606 
5607 	/* Multipath routes are first added to the FIB trie and only then
5608 	 * notified. If we vetoed the addition, we will get a delete
5609 	 * notification for a route we do not have. Therefore, do not warn if
5610 	 * route was not found.
5611 	 */
5612 	fib6_entry = mlxsw_sp_fib6_entry_lookup(mlxsw_sp, rt);
5613 	if (!fib6_entry)
5614 		return;
5615 
5616 	/* If not all the nexthops are deleted, then only reduce the nexthop
5617 	 * group.
5618 	 */
5619 	if (nrt6 != fib6_entry->nrt6) {
5620 		mlxsw_sp_fib6_entry_nexthop_del(mlxsw_sp, fib6_entry, rt_arr,
5621 						nrt6);
5622 		return;
5623 	}
5624 
5625 	fib_node = fib6_entry->common.fib_node;
5626 
5627 	mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, &fib6_entry->common);
5628 	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5629 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5630 }
5631 
5632 static int __mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp,
5633 					    enum mlxsw_reg_ralxx_protocol proto,
5634 					    u8 tree_id)
5635 {
5636 	char ralta_pl[MLXSW_REG_RALTA_LEN];
5637 	char ralst_pl[MLXSW_REG_RALST_LEN];
5638 	int i, err;
5639 
5640 	mlxsw_reg_ralta_pack(ralta_pl, true, proto, tree_id);
5641 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
5642 	if (err)
5643 		return err;
5644 
5645 	mlxsw_reg_ralst_pack(ralst_pl, 0xff, tree_id);
5646 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
5647 	if (err)
5648 		return err;
5649 
5650 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
5651 		struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
5652 		char raltb_pl[MLXSW_REG_RALTB_LEN];
5653 		char ralue_pl[MLXSW_REG_RALUE_LEN];
5654 
5655 		mlxsw_reg_raltb_pack(raltb_pl, vr->id, proto, tree_id);
5656 		err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb),
5657 				      raltb_pl);
5658 		if (err)
5659 			return err;
5660 
5661 		mlxsw_reg_ralue_pack(ralue_pl, proto,
5662 				     MLXSW_REG_RALUE_OP_WRITE_WRITE, vr->id, 0);
5663 		mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
5664 		err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue),
5665 				      ralue_pl);
5666 		if (err)
5667 			return err;
5668 	}
5669 
5670 	return 0;
5671 }
5672 
5673 static struct mlxsw_sp_mr_table *
5674 mlxsw_sp_router_fibmr_family_to_table(struct mlxsw_sp_vr *vr, int family)
5675 {
5676 	if (family == RTNL_FAMILY_IPMR)
5677 		return vr->mr_table[MLXSW_SP_L3_PROTO_IPV4];
5678 	else
5679 		return vr->mr_table[MLXSW_SP_L3_PROTO_IPV6];
5680 }
5681 
5682 static int mlxsw_sp_router_fibmr_add(struct mlxsw_sp *mlxsw_sp,
5683 				     struct mfc_entry_notifier_info *men_info,
5684 				     bool replace)
5685 {
5686 	struct mlxsw_sp_mr_table *mrt;
5687 	struct mlxsw_sp_vr *vr;
5688 
5689 	if (mlxsw_sp->router->aborted)
5690 		return 0;
5691 
5692 	vr = mlxsw_sp_vr_get(mlxsw_sp, men_info->tb_id, NULL);
5693 	if (IS_ERR(vr))
5694 		return PTR_ERR(vr);
5695 
5696 	mrt = mlxsw_sp_router_fibmr_family_to_table(vr, men_info->info.family);
5697 	return mlxsw_sp_mr_route_add(mrt, men_info->mfc, replace);
5698 }
5699 
5700 static void mlxsw_sp_router_fibmr_del(struct mlxsw_sp *mlxsw_sp,
5701 				      struct mfc_entry_notifier_info *men_info)
5702 {
5703 	struct mlxsw_sp_mr_table *mrt;
5704 	struct mlxsw_sp_vr *vr;
5705 
5706 	if (mlxsw_sp->router->aborted)
5707 		return;
5708 
5709 	vr = mlxsw_sp_vr_find(mlxsw_sp, men_info->tb_id);
5710 	if (WARN_ON(!vr))
5711 		return;
5712 
5713 	mrt = mlxsw_sp_router_fibmr_family_to_table(vr, men_info->info.family);
5714 	mlxsw_sp_mr_route_del(mrt, men_info->mfc);
5715 	mlxsw_sp_vr_put(mlxsw_sp, vr);
5716 }
5717 
5718 static int
5719 mlxsw_sp_router_fibmr_vif_add(struct mlxsw_sp *mlxsw_sp,
5720 			      struct vif_entry_notifier_info *ven_info)
5721 {
5722 	struct mlxsw_sp_mr_table *mrt;
5723 	struct mlxsw_sp_rif *rif;
5724 	struct mlxsw_sp_vr *vr;
5725 
5726 	if (mlxsw_sp->router->aborted)
5727 		return 0;
5728 
5729 	vr = mlxsw_sp_vr_get(mlxsw_sp, ven_info->tb_id, NULL);
5730 	if (IS_ERR(vr))
5731 		return PTR_ERR(vr);
5732 
5733 	mrt = mlxsw_sp_router_fibmr_family_to_table(vr, ven_info->info.family);
5734 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, ven_info->dev);
5735 	return mlxsw_sp_mr_vif_add(mrt, ven_info->dev,
5736 				   ven_info->vif_index,
5737 				   ven_info->vif_flags, rif);
5738 }
5739 
5740 static void
5741 mlxsw_sp_router_fibmr_vif_del(struct mlxsw_sp *mlxsw_sp,
5742 			      struct vif_entry_notifier_info *ven_info)
5743 {
5744 	struct mlxsw_sp_mr_table *mrt;
5745 	struct mlxsw_sp_vr *vr;
5746 
5747 	if (mlxsw_sp->router->aborted)
5748 		return;
5749 
5750 	vr = mlxsw_sp_vr_find(mlxsw_sp, ven_info->tb_id);
5751 	if (WARN_ON(!vr))
5752 		return;
5753 
5754 	mrt = mlxsw_sp_router_fibmr_family_to_table(vr, ven_info->info.family);
5755 	mlxsw_sp_mr_vif_del(mrt, ven_info->vif_index);
5756 	mlxsw_sp_vr_put(mlxsw_sp, vr);
5757 }
5758 
5759 static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp)
5760 {
5761 	enum mlxsw_reg_ralxx_protocol proto = MLXSW_REG_RALXX_PROTOCOL_IPV4;
5762 	int err;
5763 
5764 	err = __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto,
5765 					       MLXSW_SP_LPM_TREE_MIN);
5766 	if (err)
5767 		return err;
5768 
5769 	/* The multicast router code does not need an abort trap as by default,
5770 	 * packets that don't match any routes are trapped to the CPU.
5771 	 */
5772 
5773 	proto = MLXSW_REG_RALXX_PROTOCOL_IPV6;
5774 	return __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto,
5775 						MLXSW_SP_LPM_TREE_MIN + 1);
5776 }
5777 
5778 static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp,
5779 				     struct mlxsw_sp_fib_node *fib_node)
5780 {
5781 	struct mlxsw_sp_fib4_entry *fib4_entry;
5782 
5783 	fib4_entry = container_of(fib_node->fib_entry,
5784 				  struct mlxsw_sp_fib4_entry, common);
5785 	mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, fib_node->fib_entry);
5786 	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
5787 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5788 }
5789 
5790 static void mlxsw_sp_fib6_node_flush(struct mlxsw_sp *mlxsw_sp,
5791 				     struct mlxsw_sp_fib_node *fib_node)
5792 {
5793 	struct mlxsw_sp_fib6_entry *fib6_entry;
5794 
5795 	fib6_entry = container_of(fib_node->fib_entry,
5796 				  struct mlxsw_sp_fib6_entry, common);
5797 	mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, fib_node->fib_entry);
5798 	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5799 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5800 }
5801 
5802 static void mlxsw_sp_fib_node_flush(struct mlxsw_sp *mlxsw_sp,
5803 				    struct mlxsw_sp_fib_node *fib_node)
5804 {
5805 	switch (fib_node->fib->proto) {
5806 	case MLXSW_SP_L3_PROTO_IPV4:
5807 		mlxsw_sp_fib4_node_flush(mlxsw_sp, fib_node);
5808 		break;
5809 	case MLXSW_SP_L3_PROTO_IPV6:
5810 		mlxsw_sp_fib6_node_flush(mlxsw_sp, fib_node);
5811 		break;
5812 	}
5813 }
5814 
5815 static void mlxsw_sp_vr_fib_flush(struct mlxsw_sp *mlxsw_sp,
5816 				  struct mlxsw_sp_vr *vr,
5817 				  enum mlxsw_sp_l3proto proto)
5818 {
5819 	struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
5820 	struct mlxsw_sp_fib_node *fib_node, *tmp;
5821 
5822 	list_for_each_entry_safe(fib_node, tmp, &fib->node_list, list) {
5823 		bool do_break = &tmp->list == &fib->node_list;
5824 
5825 		mlxsw_sp_fib_node_flush(mlxsw_sp, fib_node);
5826 		if (do_break)
5827 			break;
5828 	}
5829 }
5830 
5831 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp)
5832 {
5833 	int i, j;
5834 
5835 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
5836 		struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
5837 
5838 		if (!mlxsw_sp_vr_is_used(vr))
5839 			continue;
5840 
5841 		for (j = 0; j < MLXSW_SP_L3_PROTO_MAX; j++)
5842 			mlxsw_sp_mr_table_flush(vr->mr_table[j]);
5843 		mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
5844 
5845 		/* If virtual router was only used for IPv4, then it's no
5846 		 * longer used.
5847 		 */
5848 		if (!mlxsw_sp_vr_is_used(vr))
5849 			continue;
5850 		mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
5851 	}
5852 
5853 	/* After flushing all the routes, it is not possible anyone is still
5854 	 * using the adjacency index that is discarding packets, so free it in
5855 	 * case it was allocated.
5856 	 */
5857 	if (!mlxsw_sp->router->adj_discard_index_valid)
5858 		return;
5859 	mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1,
5860 			   mlxsw_sp->router->adj_discard_index);
5861 	mlxsw_sp->router->adj_discard_index_valid = false;
5862 }
5863 
5864 static void mlxsw_sp_router_fib_abort(struct mlxsw_sp *mlxsw_sp)
5865 {
5866 	int err;
5867 
5868 	if (mlxsw_sp->router->aborted)
5869 		return;
5870 	dev_warn(mlxsw_sp->bus_info->dev, "FIB abort triggered. Note that FIB entries are no longer being offloaded to this device.\n");
5871 	mlxsw_sp_router_fib_flush(mlxsw_sp);
5872 	mlxsw_sp->router->aborted = true;
5873 	err = mlxsw_sp_router_set_abort_trap(mlxsw_sp);
5874 	if (err)
5875 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to set abort trap.\n");
5876 }
5877 
5878 struct mlxsw_sp_fib6_event_work {
5879 	struct fib6_info **rt_arr;
5880 	unsigned int nrt6;
5881 };
5882 
5883 struct mlxsw_sp_fib_event_work {
5884 	struct work_struct work;
5885 	union {
5886 		struct mlxsw_sp_fib6_event_work fib6_work;
5887 		struct fib_entry_notifier_info fen_info;
5888 		struct fib_rule_notifier_info fr_info;
5889 		struct fib_nh_notifier_info fnh_info;
5890 		struct mfc_entry_notifier_info men_info;
5891 		struct vif_entry_notifier_info ven_info;
5892 	};
5893 	struct mlxsw_sp *mlxsw_sp;
5894 	unsigned long event;
5895 };
5896 
5897 static int
5898 mlxsw_sp_router_fib6_work_init(struct mlxsw_sp_fib6_event_work *fib6_work,
5899 			       struct fib6_entry_notifier_info *fen6_info)
5900 {
5901 	struct fib6_info *rt = fen6_info->rt;
5902 	struct fib6_info **rt_arr;
5903 	struct fib6_info *iter;
5904 	unsigned int nrt6;
5905 	int i = 0;
5906 
5907 	nrt6 = fen6_info->nsiblings + 1;
5908 
5909 	rt_arr = kcalloc(nrt6, sizeof(struct fib6_info *), GFP_ATOMIC);
5910 	if (!rt_arr)
5911 		return -ENOMEM;
5912 
5913 	fib6_work->rt_arr = rt_arr;
5914 	fib6_work->nrt6 = nrt6;
5915 
5916 	rt_arr[0] = rt;
5917 	fib6_info_hold(rt);
5918 
5919 	if (!fen6_info->nsiblings)
5920 		return 0;
5921 
5922 	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) {
5923 		if (i == fen6_info->nsiblings)
5924 			break;
5925 
5926 		rt_arr[i + 1] = iter;
5927 		fib6_info_hold(iter);
5928 		i++;
5929 	}
5930 	WARN_ON_ONCE(i != fen6_info->nsiblings);
5931 
5932 	return 0;
5933 }
5934 
5935 static void
5936 mlxsw_sp_router_fib6_work_fini(struct mlxsw_sp_fib6_event_work *fib6_work)
5937 {
5938 	int i;
5939 
5940 	for (i = 0; i < fib6_work->nrt6; i++)
5941 		mlxsw_sp_rt6_release(fib6_work->rt_arr[i]);
5942 	kfree(fib6_work->rt_arr);
5943 }
5944 
5945 static void mlxsw_sp_router_fib4_event_work(struct work_struct *work)
5946 {
5947 	struct mlxsw_sp_fib_event_work *fib_work =
5948 		container_of(work, struct mlxsw_sp_fib_event_work, work);
5949 	struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5950 	int err;
5951 
5952 	/* Protect internal structures from changes */
5953 	rtnl_lock();
5954 	mlxsw_sp_span_respin(mlxsw_sp);
5955 
5956 	switch (fib_work->event) {
5957 	case FIB_EVENT_ENTRY_REPLACE:
5958 		err = mlxsw_sp_router_fib4_replace(mlxsw_sp,
5959 						   &fib_work->fen_info);
5960 		if (err)
5961 			mlxsw_sp_router_fib_abort(mlxsw_sp);
5962 		fib_info_put(fib_work->fen_info.fi);
5963 		break;
5964 	case FIB_EVENT_ENTRY_DEL:
5965 		mlxsw_sp_router_fib4_del(mlxsw_sp, &fib_work->fen_info);
5966 		fib_info_put(fib_work->fen_info.fi);
5967 		break;
5968 	case FIB_EVENT_NH_ADD: /* fall through */
5969 	case FIB_EVENT_NH_DEL:
5970 		mlxsw_sp_nexthop4_event(mlxsw_sp, fib_work->event,
5971 					fib_work->fnh_info.fib_nh);
5972 		fib_info_put(fib_work->fnh_info.fib_nh->nh_parent);
5973 		break;
5974 	}
5975 	rtnl_unlock();
5976 	kfree(fib_work);
5977 }
5978 
5979 static void mlxsw_sp_router_fib6_event_work(struct work_struct *work)
5980 {
5981 	struct mlxsw_sp_fib_event_work *fib_work =
5982 		container_of(work, struct mlxsw_sp_fib_event_work, work);
5983 	struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5984 	int err;
5985 
5986 	rtnl_lock();
5987 	mlxsw_sp_span_respin(mlxsw_sp);
5988 
5989 	switch (fib_work->event) {
5990 	case FIB_EVENT_ENTRY_REPLACE:
5991 		err = mlxsw_sp_router_fib6_replace(mlxsw_sp,
5992 						   fib_work->fib6_work.rt_arr,
5993 						   fib_work->fib6_work.nrt6);
5994 		if (err)
5995 			mlxsw_sp_router_fib_abort(mlxsw_sp);
5996 		mlxsw_sp_router_fib6_work_fini(&fib_work->fib6_work);
5997 		break;
5998 	case FIB_EVENT_ENTRY_APPEND:
5999 		err = mlxsw_sp_router_fib6_append(mlxsw_sp,
6000 						  fib_work->fib6_work.rt_arr,
6001 						  fib_work->fib6_work.nrt6);
6002 		if (err)
6003 			mlxsw_sp_router_fib_abort(mlxsw_sp);
6004 		mlxsw_sp_router_fib6_work_fini(&fib_work->fib6_work);
6005 		break;
6006 	case FIB_EVENT_ENTRY_DEL:
6007 		mlxsw_sp_router_fib6_del(mlxsw_sp,
6008 					 fib_work->fib6_work.rt_arr,
6009 					 fib_work->fib6_work.nrt6);
6010 		mlxsw_sp_router_fib6_work_fini(&fib_work->fib6_work);
6011 		break;
6012 	}
6013 	rtnl_unlock();
6014 	kfree(fib_work);
6015 }
6016 
6017 static void mlxsw_sp_router_fibmr_event_work(struct work_struct *work)
6018 {
6019 	struct mlxsw_sp_fib_event_work *fib_work =
6020 		container_of(work, struct mlxsw_sp_fib_event_work, work);
6021 	struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
6022 	bool replace;
6023 	int err;
6024 
6025 	rtnl_lock();
6026 	switch (fib_work->event) {
6027 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
6028 	case FIB_EVENT_ENTRY_ADD:
6029 		replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
6030 
6031 		err = mlxsw_sp_router_fibmr_add(mlxsw_sp, &fib_work->men_info,
6032 						replace);
6033 		if (err)
6034 			mlxsw_sp_router_fib_abort(mlxsw_sp);
6035 		mr_cache_put(fib_work->men_info.mfc);
6036 		break;
6037 	case FIB_EVENT_ENTRY_DEL:
6038 		mlxsw_sp_router_fibmr_del(mlxsw_sp, &fib_work->men_info);
6039 		mr_cache_put(fib_work->men_info.mfc);
6040 		break;
6041 	case FIB_EVENT_VIF_ADD:
6042 		err = mlxsw_sp_router_fibmr_vif_add(mlxsw_sp,
6043 						    &fib_work->ven_info);
6044 		if (err)
6045 			mlxsw_sp_router_fib_abort(mlxsw_sp);
6046 		dev_put(fib_work->ven_info.dev);
6047 		break;
6048 	case FIB_EVENT_VIF_DEL:
6049 		mlxsw_sp_router_fibmr_vif_del(mlxsw_sp,
6050 					      &fib_work->ven_info);
6051 		dev_put(fib_work->ven_info.dev);
6052 		break;
6053 	}
6054 	rtnl_unlock();
6055 	kfree(fib_work);
6056 }
6057 
6058 static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work,
6059 				       struct fib_notifier_info *info)
6060 {
6061 	struct fib_entry_notifier_info *fen_info;
6062 	struct fib_nh_notifier_info *fnh_info;
6063 
6064 	switch (fib_work->event) {
6065 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
6066 	case FIB_EVENT_ENTRY_DEL:
6067 		fen_info = container_of(info, struct fib_entry_notifier_info,
6068 					info);
6069 		fib_work->fen_info = *fen_info;
6070 		/* Take reference on fib_info to prevent it from being
6071 		 * freed while work is queued. Release it afterwards.
6072 		 */
6073 		fib_info_hold(fib_work->fen_info.fi);
6074 		break;
6075 	case FIB_EVENT_NH_ADD: /* fall through */
6076 	case FIB_EVENT_NH_DEL:
6077 		fnh_info = container_of(info, struct fib_nh_notifier_info,
6078 					info);
6079 		fib_work->fnh_info = *fnh_info;
6080 		fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
6081 		break;
6082 	}
6083 }
6084 
6085 static int mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work,
6086 				      struct fib_notifier_info *info)
6087 {
6088 	struct fib6_entry_notifier_info *fen6_info;
6089 	int err;
6090 
6091 	switch (fib_work->event) {
6092 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
6093 	case FIB_EVENT_ENTRY_APPEND: /* fall through */
6094 	case FIB_EVENT_ENTRY_DEL:
6095 		fen6_info = container_of(info, struct fib6_entry_notifier_info,
6096 					 info);
6097 		err = mlxsw_sp_router_fib6_work_init(&fib_work->fib6_work,
6098 						     fen6_info);
6099 		if (err)
6100 			return err;
6101 		break;
6102 	}
6103 
6104 	return 0;
6105 }
6106 
6107 static void
6108 mlxsw_sp_router_fibmr_event(struct mlxsw_sp_fib_event_work *fib_work,
6109 			    struct fib_notifier_info *info)
6110 {
6111 	switch (fib_work->event) {
6112 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
6113 	case FIB_EVENT_ENTRY_ADD: /* fall through */
6114 	case FIB_EVENT_ENTRY_DEL:
6115 		memcpy(&fib_work->men_info, info, sizeof(fib_work->men_info));
6116 		mr_cache_hold(fib_work->men_info.mfc);
6117 		break;
6118 	case FIB_EVENT_VIF_ADD: /* fall through */
6119 	case FIB_EVENT_VIF_DEL:
6120 		memcpy(&fib_work->ven_info, info, sizeof(fib_work->ven_info));
6121 		dev_hold(fib_work->ven_info.dev);
6122 		break;
6123 	}
6124 }
6125 
6126 static int mlxsw_sp_router_fib_rule_event(unsigned long event,
6127 					  struct fib_notifier_info *info,
6128 					  struct mlxsw_sp *mlxsw_sp)
6129 {
6130 	struct netlink_ext_ack *extack = info->extack;
6131 	struct fib_rule_notifier_info *fr_info;
6132 	struct fib_rule *rule;
6133 	int err = 0;
6134 
6135 	/* nothing to do at the moment */
6136 	if (event == FIB_EVENT_RULE_DEL)
6137 		return 0;
6138 
6139 	if (mlxsw_sp->router->aborted)
6140 		return 0;
6141 
6142 	fr_info = container_of(info, struct fib_rule_notifier_info, info);
6143 	rule = fr_info->rule;
6144 
6145 	/* Rule only affects locally generated traffic */
6146 	if (rule->iifindex == mlxsw_sp_net(mlxsw_sp)->loopback_dev->ifindex)
6147 		return 0;
6148 
6149 	switch (info->family) {
6150 	case AF_INET:
6151 		if (!fib4_rule_default(rule) && !rule->l3mdev)
6152 			err = -EOPNOTSUPP;
6153 		break;
6154 	case AF_INET6:
6155 		if (!fib6_rule_default(rule) && !rule->l3mdev)
6156 			err = -EOPNOTSUPP;
6157 		break;
6158 	case RTNL_FAMILY_IPMR:
6159 		if (!ipmr_rule_default(rule) && !rule->l3mdev)
6160 			err = -EOPNOTSUPP;
6161 		break;
6162 	case RTNL_FAMILY_IP6MR:
6163 		if (!ip6mr_rule_default(rule) && !rule->l3mdev)
6164 			err = -EOPNOTSUPP;
6165 		break;
6166 	}
6167 
6168 	if (err < 0)
6169 		NL_SET_ERR_MSG_MOD(extack, "FIB rules not supported");
6170 
6171 	return err;
6172 }
6173 
6174 /* Called with rcu_read_lock() */
6175 static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
6176 				     unsigned long event, void *ptr)
6177 {
6178 	struct mlxsw_sp_fib_event_work *fib_work;
6179 	struct fib_notifier_info *info = ptr;
6180 	struct mlxsw_sp_router *router;
6181 	int err;
6182 
6183 	if ((info->family != AF_INET && info->family != AF_INET6 &&
6184 	     info->family != RTNL_FAMILY_IPMR &&
6185 	     info->family != RTNL_FAMILY_IP6MR))
6186 		return NOTIFY_DONE;
6187 
6188 	router = container_of(nb, struct mlxsw_sp_router, fib_nb);
6189 
6190 	switch (event) {
6191 	case FIB_EVENT_RULE_ADD: /* fall through */
6192 	case FIB_EVENT_RULE_DEL:
6193 		err = mlxsw_sp_router_fib_rule_event(event, info,
6194 						     router->mlxsw_sp);
6195 		return notifier_from_errno(err);
6196 	case FIB_EVENT_ENTRY_ADD: /* fall through */
6197 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
6198 	case FIB_EVENT_ENTRY_APPEND:
6199 		if (router->aborted) {
6200 			NL_SET_ERR_MSG_MOD(info->extack, "FIB offload was aborted. Not configuring route");
6201 			return notifier_from_errno(-EINVAL);
6202 		}
6203 		if (info->family == AF_INET) {
6204 			struct fib_entry_notifier_info *fen_info = ptr;
6205 
6206 			if (fen_info->fi->fib_nh_is_v6) {
6207 				NL_SET_ERR_MSG_MOD(info->extack, "IPv6 gateway with IPv4 route is not supported");
6208 				return notifier_from_errno(-EINVAL);
6209 			}
6210 			if (fen_info->fi->nh) {
6211 				NL_SET_ERR_MSG_MOD(info->extack, "IPv4 route with nexthop objects is not supported");
6212 				return notifier_from_errno(-EINVAL);
6213 			}
6214 		} else if (info->family == AF_INET6) {
6215 			struct fib6_entry_notifier_info *fen6_info;
6216 
6217 			fen6_info = container_of(info,
6218 						 struct fib6_entry_notifier_info,
6219 						 info);
6220 			if (fen6_info->rt->nh) {
6221 				NL_SET_ERR_MSG_MOD(info->extack, "IPv6 route with nexthop objects is not supported");
6222 				return notifier_from_errno(-EINVAL);
6223 			}
6224 		}
6225 		break;
6226 	}
6227 
6228 	fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
6229 	if (WARN_ON(!fib_work))
6230 		return NOTIFY_BAD;
6231 
6232 	fib_work->mlxsw_sp = router->mlxsw_sp;
6233 	fib_work->event = event;
6234 
6235 	switch (info->family) {
6236 	case AF_INET:
6237 		INIT_WORK(&fib_work->work, mlxsw_sp_router_fib4_event_work);
6238 		mlxsw_sp_router_fib4_event(fib_work, info);
6239 		break;
6240 	case AF_INET6:
6241 		INIT_WORK(&fib_work->work, mlxsw_sp_router_fib6_event_work);
6242 		err = mlxsw_sp_router_fib6_event(fib_work, info);
6243 		if (err)
6244 			goto err_fib_event;
6245 		break;
6246 	case RTNL_FAMILY_IP6MR:
6247 	case RTNL_FAMILY_IPMR:
6248 		INIT_WORK(&fib_work->work, mlxsw_sp_router_fibmr_event_work);
6249 		mlxsw_sp_router_fibmr_event(fib_work, info);
6250 		break;
6251 	}
6252 
6253 	mlxsw_core_schedule_work(&fib_work->work);
6254 
6255 	return NOTIFY_DONE;
6256 
6257 err_fib_event:
6258 	kfree(fib_work);
6259 	return NOTIFY_BAD;
6260 }
6261 
6262 static struct mlxsw_sp_rif *
6263 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
6264 			 const struct net_device *dev)
6265 {
6266 	int i;
6267 
6268 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
6269 		if (mlxsw_sp->router->rifs[i] &&
6270 		    mlxsw_sp->router->rifs[i]->dev == dev)
6271 			return mlxsw_sp->router->rifs[i];
6272 
6273 	return NULL;
6274 }
6275 
6276 bool mlxsw_sp_rif_exists(struct mlxsw_sp *mlxsw_sp,
6277 			 const struct net_device *dev)
6278 {
6279 	return !!mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6280 }
6281 
6282 u16 mlxsw_sp_rif_vid(struct mlxsw_sp *mlxsw_sp, const struct net_device *dev)
6283 {
6284 	struct mlxsw_sp_rif *rif;
6285 	u16 vid = 0;
6286 
6287 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6288 	if (!rif)
6289 		goto out;
6290 
6291 	/* We only return the VID for VLAN RIFs. Otherwise we return an
6292 	 * invalid value (0).
6293 	 */
6294 	if (rif->ops->type != MLXSW_SP_RIF_TYPE_VLAN)
6295 		goto out;
6296 
6297 	vid = mlxsw_sp_fid_8021q_vid(rif->fid);
6298 
6299 out:
6300 	return vid;
6301 }
6302 
6303 static int mlxsw_sp_router_rif_disable(struct mlxsw_sp *mlxsw_sp, u16 rif)
6304 {
6305 	char ritr_pl[MLXSW_REG_RITR_LEN];
6306 	int err;
6307 
6308 	mlxsw_reg_ritr_rif_pack(ritr_pl, rif);
6309 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6310 	if (err)
6311 		return err;
6312 
6313 	mlxsw_reg_ritr_enable_set(ritr_pl, false);
6314 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6315 }
6316 
6317 static void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
6318 					  struct mlxsw_sp_rif *rif)
6319 {
6320 	mlxsw_sp_router_rif_disable(mlxsw_sp, rif->rif_index);
6321 	mlxsw_sp_nexthop_rif_gone_sync(mlxsw_sp, rif);
6322 	mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, rif);
6323 }
6324 
6325 static bool
6326 mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *rif, struct net_device *dev,
6327 			   unsigned long event)
6328 {
6329 	struct inet6_dev *inet6_dev;
6330 	bool addr_list_empty = true;
6331 	struct in_device *idev;
6332 
6333 	switch (event) {
6334 	case NETDEV_UP:
6335 		return rif == NULL;
6336 	case NETDEV_DOWN:
6337 		rcu_read_lock();
6338 		idev = __in_dev_get_rcu(dev);
6339 		if (idev && idev->ifa_list)
6340 			addr_list_empty = false;
6341 
6342 		inet6_dev = __in6_dev_get(dev);
6343 		if (addr_list_empty && inet6_dev &&
6344 		    !list_empty(&inet6_dev->addr_list))
6345 			addr_list_empty = false;
6346 		rcu_read_unlock();
6347 
6348 		/* macvlans do not have a RIF, but rather piggy back on the
6349 		 * RIF of their lower device.
6350 		 */
6351 		if (netif_is_macvlan(dev) && addr_list_empty)
6352 			return true;
6353 
6354 		if (rif && addr_list_empty &&
6355 		    !netif_is_l3_slave(rif->dev))
6356 			return true;
6357 		/* It is possible we already removed the RIF ourselves
6358 		 * if it was assigned to a netdev that is now a bridge
6359 		 * or LAG slave.
6360 		 */
6361 		return false;
6362 	}
6363 
6364 	return false;
6365 }
6366 
6367 static enum mlxsw_sp_rif_type
6368 mlxsw_sp_dev_rif_type(const struct mlxsw_sp *mlxsw_sp,
6369 		      const struct net_device *dev)
6370 {
6371 	enum mlxsw_sp_fid_type type;
6372 
6373 	if (mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL))
6374 		return MLXSW_SP_RIF_TYPE_IPIP_LB;
6375 
6376 	/* Otherwise RIF type is derived from the type of the underlying FID. */
6377 	if (is_vlan_dev(dev) && netif_is_bridge_master(vlan_dev_real_dev(dev)))
6378 		type = MLXSW_SP_FID_TYPE_8021Q;
6379 	else if (netif_is_bridge_master(dev) && br_vlan_enabled(dev))
6380 		type = MLXSW_SP_FID_TYPE_8021Q;
6381 	else if (netif_is_bridge_master(dev))
6382 		type = MLXSW_SP_FID_TYPE_8021D;
6383 	else
6384 		type = MLXSW_SP_FID_TYPE_RFID;
6385 
6386 	return mlxsw_sp_fid_type_rif_type(mlxsw_sp, type);
6387 }
6388 
6389 static int mlxsw_sp_rif_index_alloc(struct mlxsw_sp *mlxsw_sp, u16 *p_rif_index)
6390 {
6391 	int i;
6392 
6393 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) {
6394 		if (!mlxsw_sp->router->rifs[i]) {
6395 			*p_rif_index = i;
6396 			return 0;
6397 		}
6398 	}
6399 
6400 	return -ENOBUFS;
6401 }
6402 
6403 static struct mlxsw_sp_rif *mlxsw_sp_rif_alloc(size_t rif_size, u16 rif_index,
6404 					       u16 vr_id,
6405 					       struct net_device *l3_dev)
6406 {
6407 	struct mlxsw_sp_rif *rif;
6408 
6409 	rif = kzalloc(rif_size, GFP_KERNEL);
6410 	if (!rif)
6411 		return NULL;
6412 
6413 	INIT_LIST_HEAD(&rif->nexthop_list);
6414 	INIT_LIST_HEAD(&rif->neigh_list);
6415 	if (l3_dev) {
6416 		ether_addr_copy(rif->addr, l3_dev->dev_addr);
6417 		rif->mtu = l3_dev->mtu;
6418 		rif->dev = l3_dev;
6419 	}
6420 	rif->vr_id = vr_id;
6421 	rif->rif_index = rif_index;
6422 
6423 	return rif;
6424 }
6425 
6426 struct mlxsw_sp_rif *mlxsw_sp_rif_by_index(const struct mlxsw_sp *mlxsw_sp,
6427 					   u16 rif_index)
6428 {
6429 	return mlxsw_sp->router->rifs[rif_index];
6430 }
6431 
6432 u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif)
6433 {
6434 	return rif->rif_index;
6435 }
6436 
6437 u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
6438 {
6439 	return lb_rif->common.rif_index;
6440 }
6441 
6442 u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
6443 {
6444 	u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(lb_rif->common.dev);
6445 	struct mlxsw_sp_vr *ul_vr;
6446 
6447 	ul_vr = mlxsw_sp_vr_get(lb_rif->common.mlxsw_sp, ul_tb_id, NULL);
6448 	if (WARN_ON(IS_ERR(ul_vr)))
6449 		return 0;
6450 
6451 	return ul_vr->id;
6452 }
6453 
6454 u16 mlxsw_sp_ipip_lb_ul_rif_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
6455 {
6456 	return lb_rif->ul_rif_id;
6457 }
6458 
6459 int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif)
6460 {
6461 	return rif->dev->ifindex;
6462 }
6463 
6464 const struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif)
6465 {
6466 	return rif->dev;
6467 }
6468 
6469 static struct mlxsw_sp_rif *
6470 mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
6471 		    const struct mlxsw_sp_rif_params *params,
6472 		    struct netlink_ext_ack *extack)
6473 {
6474 	u32 tb_id = l3mdev_fib_table(params->dev);
6475 	const struct mlxsw_sp_rif_ops *ops;
6476 	struct mlxsw_sp_fid *fid = NULL;
6477 	enum mlxsw_sp_rif_type type;
6478 	struct mlxsw_sp_rif *rif;
6479 	struct mlxsw_sp_vr *vr;
6480 	u16 rif_index;
6481 	int i, err;
6482 
6483 	type = mlxsw_sp_dev_rif_type(mlxsw_sp, params->dev);
6484 	ops = mlxsw_sp->rif_ops_arr[type];
6485 
6486 	vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN, extack);
6487 	if (IS_ERR(vr))
6488 		return ERR_CAST(vr);
6489 	vr->rif_count++;
6490 
6491 	err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index);
6492 	if (err) {
6493 		NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported router interfaces");
6494 		goto err_rif_index_alloc;
6495 	}
6496 
6497 	rif = mlxsw_sp_rif_alloc(ops->rif_size, rif_index, vr->id, params->dev);
6498 	if (!rif) {
6499 		err = -ENOMEM;
6500 		goto err_rif_alloc;
6501 	}
6502 	dev_hold(rif->dev);
6503 	mlxsw_sp->router->rifs[rif_index] = rif;
6504 	rif->mlxsw_sp = mlxsw_sp;
6505 	rif->ops = ops;
6506 
6507 	if (ops->fid_get) {
6508 		fid = ops->fid_get(rif, extack);
6509 		if (IS_ERR(fid)) {
6510 			err = PTR_ERR(fid);
6511 			goto err_fid_get;
6512 		}
6513 		rif->fid = fid;
6514 	}
6515 
6516 	if (ops->setup)
6517 		ops->setup(rif, params);
6518 
6519 	err = ops->configure(rif);
6520 	if (err)
6521 		goto err_configure;
6522 
6523 	for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++) {
6524 		err = mlxsw_sp_mr_rif_add(vr->mr_table[i], rif);
6525 		if (err)
6526 			goto err_mr_rif_add;
6527 	}
6528 
6529 	mlxsw_sp_rif_counters_alloc(rif);
6530 
6531 	return rif;
6532 
6533 err_mr_rif_add:
6534 	for (i--; i >= 0; i--)
6535 		mlxsw_sp_mr_rif_del(vr->mr_table[i], rif);
6536 	ops->deconfigure(rif);
6537 err_configure:
6538 	if (fid)
6539 		mlxsw_sp_fid_put(fid);
6540 err_fid_get:
6541 	mlxsw_sp->router->rifs[rif_index] = NULL;
6542 	dev_put(rif->dev);
6543 	kfree(rif);
6544 err_rif_alloc:
6545 err_rif_index_alloc:
6546 	vr->rif_count--;
6547 	mlxsw_sp_vr_put(mlxsw_sp, vr);
6548 	return ERR_PTR(err);
6549 }
6550 
6551 static void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif)
6552 {
6553 	const struct mlxsw_sp_rif_ops *ops = rif->ops;
6554 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6555 	struct mlxsw_sp_fid *fid = rif->fid;
6556 	struct mlxsw_sp_vr *vr;
6557 	int i;
6558 
6559 	mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif);
6560 	vr = &mlxsw_sp->router->vrs[rif->vr_id];
6561 
6562 	mlxsw_sp_rif_counters_free(rif);
6563 	for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++)
6564 		mlxsw_sp_mr_rif_del(vr->mr_table[i], rif);
6565 	ops->deconfigure(rif);
6566 	if (fid)
6567 		/* Loopback RIFs are not associated with a FID. */
6568 		mlxsw_sp_fid_put(fid);
6569 	mlxsw_sp->router->rifs[rif->rif_index] = NULL;
6570 	dev_put(rif->dev);
6571 	kfree(rif);
6572 	vr->rif_count--;
6573 	mlxsw_sp_vr_put(mlxsw_sp, vr);
6574 }
6575 
6576 void mlxsw_sp_rif_destroy_by_dev(struct mlxsw_sp *mlxsw_sp,
6577 				 struct net_device *dev)
6578 {
6579 	struct mlxsw_sp_rif *rif;
6580 
6581 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6582 	if (!rif)
6583 		return;
6584 	mlxsw_sp_rif_destroy(rif);
6585 }
6586 
6587 static void
6588 mlxsw_sp_rif_subport_params_init(struct mlxsw_sp_rif_params *params,
6589 				 struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
6590 {
6591 	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
6592 
6593 	params->vid = mlxsw_sp_port_vlan->vid;
6594 	params->lag = mlxsw_sp_port->lagged;
6595 	if (params->lag)
6596 		params->lag_id = mlxsw_sp_port->lag_id;
6597 	else
6598 		params->system_port = mlxsw_sp_port->local_port;
6599 }
6600 
6601 static struct mlxsw_sp_rif_subport *
6602 mlxsw_sp_rif_subport_rif(const struct mlxsw_sp_rif *rif)
6603 {
6604 	return container_of(rif, struct mlxsw_sp_rif_subport, common);
6605 }
6606 
6607 static struct mlxsw_sp_rif *
6608 mlxsw_sp_rif_subport_get(struct mlxsw_sp *mlxsw_sp,
6609 			 const struct mlxsw_sp_rif_params *params,
6610 			 struct netlink_ext_ack *extack)
6611 {
6612 	struct mlxsw_sp_rif_subport *rif_subport;
6613 	struct mlxsw_sp_rif *rif;
6614 
6615 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, params->dev);
6616 	if (!rif)
6617 		return mlxsw_sp_rif_create(mlxsw_sp, params, extack);
6618 
6619 	rif_subport = mlxsw_sp_rif_subport_rif(rif);
6620 	refcount_inc(&rif_subport->ref_count);
6621 	return rif;
6622 }
6623 
6624 static void mlxsw_sp_rif_subport_put(struct mlxsw_sp_rif *rif)
6625 {
6626 	struct mlxsw_sp_rif_subport *rif_subport;
6627 
6628 	rif_subport = mlxsw_sp_rif_subport_rif(rif);
6629 	if (!refcount_dec_and_test(&rif_subport->ref_count))
6630 		return;
6631 
6632 	mlxsw_sp_rif_destroy(rif);
6633 }
6634 
6635 static int
6636 mlxsw_sp_port_vlan_router_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan,
6637 			       struct net_device *l3_dev,
6638 			       struct netlink_ext_ack *extack)
6639 {
6640 	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
6641 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
6642 	struct mlxsw_sp_rif_params params = {
6643 		.dev = l3_dev,
6644 	};
6645 	u16 vid = mlxsw_sp_port_vlan->vid;
6646 	struct mlxsw_sp_rif *rif;
6647 	struct mlxsw_sp_fid *fid;
6648 	int err;
6649 
6650 	mlxsw_sp_rif_subport_params_init(&params, mlxsw_sp_port_vlan);
6651 	rif = mlxsw_sp_rif_subport_get(mlxsw_sp, &params, extack);
6652 	if (IS_ERR(rif))
6653 		return PTR_ERR(rif);
6654 
6655 	/* FID was already created, just take a reference */
6656 	fid = rif->ops->fid_get(rif, extack);
6657 	err = mlxsw_sp_fid_port_vid_map(fid, mlxsw_sp_port, vid);
6658 	if (err)
6659 		goto err_fid_port_vid_map;
6660 
6661 	err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, false);
6662 	if (err)
6663 		goto err_port_vid_learning_set;
6664 
6665 	err = mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid,
6666 					BR_STATE_FORWARDING);
6667 	if (err)
6668 		goto err_port_vid_stp_set;
6669 
6670 	mlxsw_sp_port_vlan->fid = fid;
6671 
6672 	return 0;
6673 
6674 err_port_vid_stp_set:
6675 	mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
6676 err_port_vid_learning_set:
6677 	mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
6678 err_fid_port_vid_map:
6679 	mlxsw_sp_fid_put(fid);
6680 	mlxsw_sp_rif_subport_put(rif);
6681 	return err;
6682 }
6683 
6684 static void
6685 __mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
6686 {
6687 	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
6688 	struct mlxsw_sp_fid *fid = mlxsw_sp_port_vlan->fid;
6689 	struct mlxsw_sp_rif *rif = mlxsw_sp_fid_rif(fid);
6690 	u16 vid = mlxsw_sp_port_vlan->vid;
6691 
6692 	if (WARN_ON(mlxsw_sp_fid_type(fid) != MLXSW_SP_FID_TYPE_RFID))
6693 		return;
6694 
6695 	mlxsw_sp_port_vlan->fid = NULL;
6696 	mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid, BR_STATE_BLOCKING);
6697 	mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
6698 	mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
6699 	mlxsw_sp_fid_put(fid);
6700 	mlxsw_sp_rif_subport_put(rif);
6701 }
6702 
6703 void
6704 mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
6705 {
6706 	__mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan);
6707 }
6708 
6709 static int mlxsw_sp_inetaddr_port_vlan_event(struct net_device *l3_dev,
6710 					     struct net_device *port_dev,
6711 					     unsigned long event, u16 vid,
6712 					     struct netlink_ext_ack *extack)
6713 {
6714 	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(port_dev);
6715 	struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
6716 
6717 	mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid);
6718 	if (WARN_ON(!mlxsw_sp_port_vlan))
6719 		return -EINVAL;
6720 
6721 	switch (event) {
6722 	case NETDEV_UP:
6723 		return mlxsw_sp_port_vlan_router_join(mlxsw_sp_port_vlan,
6724 						      l3_dev, extack);
6725 	case NETDEV_DOWN:
6726 		__mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan);
6727 		break;
6728 	}
6729 
6730 	return 0;
6731 }
6732 
6733 static int mlxsw_sp_inetaddr_port_event(struct net_device *port_dev,
6734 					unsigned long event,
6735 					struct netlink_ext_ack *extack)
6736 {
6737 	if (netif_is_bridge_port(port_dev) ||
6738 	    netif_is_lag_port(port_dev) ||
6739 	    netif_is_ovs_port(port_dev))
6740 		return 0;
6741 
6742 	return mlxsw_sp_inetaddr_port_vlan_event(port_dev, port_dev, event,
6743 						 MLXSW_SP_DEFAULT_VID, extack);
6744 }
6745 
6746 static int __mlxsw_sp_inetaddr_lag_event(struct net_device *l3_dev,
6747 					 struct net_device *lag_dev,
6748 					 unsigned long event, u16 vid,
6749 					 struct netlink_ext_ack *extack)
6750 {
6751 	struct net_device *port_dev;
6752 	struct list_head *iter;
6753 	int err;
6754 
6755 	netdev_for_each_lower_dev(lag_dev, port_dev, iter) {
6756 		if (mlxsw_sp_port_dev_check(port_dev)) {
6757 			err = mlxsw_sp_inetaddr_port_vlan_event(l3_dev,
6758 								port_dev,
6759 								event, vid,
6760 								extack);
6761 			if (err)
6762 				return err;
6763 		}
6764 	}
6765 
6766 	return 0;
6767 }
6768 
6769 static int mlxsw_sp_inetaddr_lag_event(struct net_device *lag_dev,
6770 				       unsigned long event,
6771 				       struct netlink_ext_ack *extack)
6772 {
6773 	if (netif_is_bridge_port(lag_dev))
6774 		return 0;
6775 
6776 	return __mlxsw_sp_inetaddr_lag_event(lag_dev, lag_dev, event,
6777 					     MLXSW_SP_DEFAULT_VID, extack);
6778 }
6779 
6780 static int mlxsw_sp_inetaddr_bridge_event(struct mlxsw_sp *mlxsw_sp,
6781 					  struct net_device *l3_dev,
6782 					  unsigned long event,
6783 					  struct netlink_ext_ack *extack)
6784 {
6785 	struct mlxsw_sp_rif_params params = {
6786 		.dev = l3_dev,
6787 	};
6788 	struct mlxsw_sp_rif *rif;
6789 
6790 	switch (event) {
6791 	case NETDEV_UP:
6792 		rif = mlxsw_sp_rif_create(mlxsw_sp, &params, extack);
6793 		if (IS_ERR(rif))
6794 			return PTR_ERR(rif);
6795 		break;
6796 	case NETDEV_DOWN:
6797 		rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
6798 		mlxsw_sp_rif_destroy(rif);
6799 		break;
6800 	}
6801 
6802 	return 0;
6803 }
6804 
6805 static int mlxsw_sp_inetaddr_vlan_event(struct mlxsw_sp *mlxsw_sp,
6806 					struct net_device *vlan_dev,
6807 					unsigned long event,
6808 					struct netlink_ext_ack *extack)
6809 {
6810 	struct net_device *real_dev = vlan_dev_real_dev(vlan_dev);
6811 	u16 vid = vlan_dev_vlan_id(vlan_dev);
6812 
6813 	if (netif_is_bridge_port(vlan_dev))
6814 		return 0;
6815 
6816 	if (mlxsw_sp_port_dev_check(real_dev))
6817 		return mlxsw_sp_inetaddr_port_vlan_event(vlan_dev, real_dev,
6818 							 event, vid, extack);
6819 	else if (netif_is_lag_master(real_dev))
6820 		return __mlxsw_sp_inetaddr_lag_event(vlan_dev, real_dev, event,
6821 						     vid, extack);
6822 	else if (netif_is_bridge_master(real_dev) && br_vlan_enabled(real_dev))
6823 		return mlxsw_sp_inetaddr_bridge_event(mlxsw_sp, vlan_dev, event,
6824 						      extack);
6825 
6826 	return 0;
6827 }
6828 
6829 static bool mlxsw_sp_rif_macvlan_is_vrrp4(const u8 *mac)
6830 {
6831 	u8 vrrp4[ETH_ALEN] = { 0x00, 0x00, 0x5e, 0x00, 0x01, 0x00 };
6832 	u8 mask[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0x00 };
6833 
6834 	return ether_addr_equal_masked(mac, vrrp4, mask);
6835 }
6836 
6837 static bool mlxsw_sp_rif_macvlan_is_vrrp6(const u8 *mac)
6838 {
6839 	u8 vrrp6[ETH_ALEN] = { 0x00, 0x00, 0x5e, 0x00, 0x02, 0x00 };
6840 	u8 mask[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0x00 };
6841 
6842 	return ether_addr_equal_masked(mac, vrrp6, mask);
6843 }
6844 
6845 static int mlxsw_sp_rif_vrrp_op(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
6846 				const u8 *mac, bool adding)
6847 {
6848 	char ritr_pl[MLXSW_REG_RITR_LEN];
6849 	u8 vrrp_id = adding ? mac[5] : 0;
6850 	int err;
6851 
6852 	if (!mlxsw_sp_rif_macvlan_is_vrrp4(mac) &&
6853 	    !mlxsw_sp_rif_macvlan_is_vrrp6(mac))
6854 		return 0;
6855 
6856 	mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
6857 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6858 	if (err)
6859 		return err;
6860 
6861 	if (mlxsw_sp_rif_macvlan_is_vrrp4(mac))
6862 		mlxsw_reg_ritr_if_vrrp_id_ipv4_set(ritr_pl, vrrp_id);
6863 	else
6864 		mlxsw_reg_ritr_if_vrrp_id_ipv6_set(ritr_pl, vrrp_id);
6865 
6866 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6867 }
6868 
6869 static int mlxsw_sp_rif_macvlan_add(struct mlxsw_sp *mlxsw_sp,
6870 				    const struct net_device *macvlan_dev,
6871 				    struct netlink_ext_ack *extack)
6872 {
6873 	struct macvlan_dev *vlan = netdev_priv(macvlan_dev);
6874 	struct mlxsw_sp_rif *rif;
6875 	int err;
6876 
6877 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, vlan->lowerdev);
6878 	if (!rif) {
6879 		NL_SET_ERR_MSG_MOD(extack, "macvlan is only supported on top of router interfaces");
6880 		return -EOPNOTSUPP;
6881 	}
6882 
6883 	err = mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
6884 				  mlxsw_sp_fid_index(rif->fid), true);
6885 	if (err)
6886 		return err;
6887 
6888 	err = mlxsw_sp_rif_vrrp_op(mlxsw_sp, rif->rif_index,
6889 				   macvlan_dev->dev_addr, true);
6890 	if (err)
6891 		goto err_rif_vrrp_add;
6892 
6893 	/* Make sure the bridge driver does not have this MAC pointing at
6894 	 * some other port.
6895 	 */
6896 	if (rif->ops->fdb_del)
6897 		rif->ops->fdb_del(rif, macvlan_dev->dev_addr);
6898 
6899 	return 0;
6900 
6901 err_rif_vrrp_add:
6902 	mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
6903 			    mlxsw_sp_fid_index(rif->fid), false);
6904 	return err;
6905 }
6906 
6907 static void __mlxsw_sp_rif_macvlan_del(struct mlxsw_sp *mlxsw_sp,
6908 				       const struct net_device *macvlan_dev)
6909 {
6910 	struct macvlan_dev *vlan = netdev_priv(macvlan_dev);
6911 	struct mlxsw_sp_rif *rif;
6912 
6913 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, vlan->lowerdev);
6914 	/* If we do not have a RIF, then we already took care of
6915 	 * removing the macvlan's MAC during RIF deletion.
6916 	 */
6917 	if (!rif)
6918 		return;
6919 	mlxsw_sp_rif_vrrp_op(mlxsw_sp, rif->rif_index, macvlan_dev->dev_addr,
6920 			     false);
6921 	mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
6922 			    mlxsw_sp_fid_index(rif->fid), false);
6923 }
6924 
6925 void mlxsw_sp_rif_macvlan_del(struct mlxsw_sp *mlxsw_sp,
6926 			      const struct net_device *macvlan_dev)
6927 {
6928 	__mlxsw_sp_rif_macvlan_del(mlxsw_sp, macvlan_dev);
6929 }
6930 
6931 static int mlxsw_sp_inetaddr_macvlan_event(struct mlxsw_sp *mlxsw_sp,
6932 					   struct net_device *macvlan_dev,
6933 					   unsigned long event,
6934 					   struct netlink_ext_ack *extack)
6935 {
6936 	switch (event) {
6937 	case NETDEV_UP:
6938 		return mlxsw_sp_rif_macvlan_add(mlxsw_sp, macvlan_dev, extack);
6939 	case NETDEV_DOWN:
6940 		__mlxsw_sp_rif_macvlan_del(mlxsw_sp, macvlan_dev);
6941 		break;
6942 	}
6943 
6944 	return 0;
6945 }
6946 
6947 static int mlxsw_sp_router_port_check_rif_addr(struct mlxsw_sp *mlxsw_sp,
6948 					       struct net_device *dev,
6949 					       const unsigned char *dev_addr,
6950 					       struct netlink_ext_ack *extack)
6951 {
6952 	struct mlxsw_sp_rif *rif;
6953 	int i;
6954 
6955 	/* A RIF is not created for macvlan netdevs. Their MAC is used to
6956 	 * populate the FDB
6957 	 */
6958 	if (netif_is_macvlan(dev) || netif_is_l3_master(dev))
6959 		return 0;
6960 
6961 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) {
6962 		rif = mlxsw_sp->router->rifs[i];
6963 		if (rif && rif->ops &&
6964 		    rif->ops->type == MLXSW_SP_RIF_TYPE_IPIP_LB)
6965 			continue;
6966 		if (rif && rif->dev && rif->dev != dev &&
6967 		    !ether_addr_equal_masked(rif->dev->dev_addr, dev_addr,
6968 					     mlxsw_sp->mac_mask)) {
6969 			NL_SET_ERR_MSG_MOD(extack, "All router interface MAC addresses must have the same prefix");
6970 			return -EINVAL;
6971 		}
6972 	}
6973 
6974 	return 0;
6975 }
6976 
6977 static int __mlxsw_sp_inetaddr_event(struct mlxsw_sp *mlxsw_sp,
6978 				     struct net_device *dev,
6979 				     unsigned long event,
6980 				     struct netlink_ext_ack *extack)
6981 {
6982 	if (mlxsw_sp_port_dev_check(dev))
6983 		return mlxsw_sp_inetaddr_port_event(dev, event, extack);
6984 	else if (netif_is_lag_master(dev))
6985 		return mlxsw_sp_inetaddr_lag_event(dev, event, extack);
6986 	else if (netif_is_bridge_master(dev))
6987 		return mlxsw_sp_inetaddr_bridge_event(mlxsw_sp, dev, event,
6988 						      extack);
6989 	else if (is_vlan_dev(dev))
6990 		return mlxsw_sp_inetaddr_vlan_event(mlxsw_sp, dev, event,
6991 						    extack);
6992 	else if (netif_is_macvlan(dev))
6993 		return mlxsw_sp_inetaddr_macvlan_event(mlxsw_sp, dev, event,
6994 						       extack);
6995 	else
6996 		return 0;
6997 }
6998 
6999 static int mlxsw_sp_inetaddr_event(struct notifier_block *nb,
7000 				   unsigned long event, void *ptr)
7001 {
7002 	struct in_ifaddr *ifa = (struct in_ifaddr *) ptr;
7003 	struct net_device *dev = ifa->ifa_dev->dev;
7004 	struct mlxsw_sp_router *router;
7005 	struct mlxsw_sp_rif *rif;
7006 	int err = 0;
7007 
7008 	/* NETDEV_UP event is handled by mlxsw_sp_inetaddr_valid_event */
7009 	if (event == NETDEV_UP)
7010 		goto out;
7011 
7012 	router = container_of(nb, struct mlxsw_sp_router, inetaddr_nb);
7013 	rif = mlxsw_sp_rif_find_by_dev(router->mlxsw_sp, dev);
7014 	if (!mlxsw_sp_rif_should_config(rif, dev, event))
7015 		goto out;
7016 
7017 	err = __mlxsw_sp_inetaddr_event(router->mlxsw_sp, dev, event, NULL);
7018 out:
7019 	return notifier_from_errno(err);
7020 }
7021 
7022 int mlxsw_sp_inetaddr_valid_event(struct notifier_block *unused,
7023 				  unsigned long event, void *ptr)
7024 {
7025 	struct in_validator_info *ivi = (struct in_validator_info *) ptr;
7026 	struct net_device *dev = ivi->ivi_dev->dev;
7027 	struct mlxsw_sp *mlxsw_sp;
7028 	struct mlxsw_sp_rif *rif;
7029 	int err = 0;
7030 
7031 	mlxsw_sp = mlxsw_sp_lower_get(dev);
7032 	if (!mlxsw_sp)
7033 		goto out;
7034 
7035 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
7036 	if (!mlxsw_sp_rif_should_config(rif, dev, event))
7037 		goto out;
7038 
7039 	err = mlxsw_sp_router_port_check_rif_addr(mlxsw_sp, dev, dev->dev_addr,
7040 						  ivi->extack);
7041 	if (err)
7042 		goto out;
7043 
7044 	err = __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, ivi->extack);
7045 out:
7046 	return notifier_from_errno(err);
7047 }
7048 
7049 struct mlxsw_sp_inet6addr_event_work {
7050 	struct work_struct work;
7051 	struct mlxsw_sp *mlxsw_sp;
7052 	struct net_device *dev;
7053 	unsigned long event;
7054 };
7055 
7056 static void mlxsw_sp_inet6addr_event_work(struct work_struct *work)
7057 {
7058 	struct mlxsw_sp_inet6addr_event_work *inet6addr_work =
7059 		container_of(work, struct mlxsw_sp_inet6addr_event_work, work);
7060 	struct mlxsw_sp *mlxsw_sp = inet6addr_work->mlxsw_sp;
7061 	struct net_device *dev = inet6addr_work->dev;
7062 	unsigned long event = inet6addr_work->event;
7063 	struct mlxsw_sp_rif *rif;
7064 
7065 	rtnl_lock();
7066 
7067 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
7068 	if (!mlxsw_sp_rif_should_config(rif, dev, event))
7069 		goto out;
7070 
7071 	__mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, NULL);
7072 out:
7073 	rtnl_unlock();
7074 	dev_put(dev);
7075 	kfree(inet6addr_work);
7076 }
7077 
7078 /* Called with rcu_read_lock() */
7079 static int mlxsw_sp_inet6addr_event(struct notifier_block *nb,
7080 				    unsigned long event, void *ptr)
7081 {
7082 	struct inet6_ifaddr *if6 = (struct inet6_ifaddr *) ptr;
7083 	struct mlxsw_sp_inet6addr_event_work *inet6addr_work;
7084 	struct net_device *dev = if6->idev->dev;
7085 	struct mlxsw_sp_router *router;
7086 
7087 	/* NETDEV_UP event is handled by mlxsw_sp_inet6addr_valid_event */
7088 	if (event == NETDEV_UP)
7089 		return NOTIFY_DONE;
7090 
7091 	inet6addr_work = kzalloc(sizeof(*inet6addr_work), GFP_ATOMIC);
7092 	if (!inet6addr_work)
7093 		return NOTIFY_BAD;
7094 
7095 	router = container_of(nb, struct mlxsw_sp_router, inet6addr_nb);
7096 	INIT_WORK(&inet6addr_work->work, mlxsw_sp_inet6addr_event_work);
7097 	inet6addr_work->mlxsw_sp = router->mlxsw_sp;
7098 	inet6addr_work->dev = dev;
7099 	inet6addr_work->event = event;
7100 	dev_hold(dev);
7101 	mlxsw_core_schedule_work(&inet6addr_work->work);
7102 
7103 	return NOTIFY_DONE;
7104 }
7105 
7106 int mlxsw_sp_inet6addr_valid_event(struct notifier_block *unused,
7107 				   unsigned long event, void *ptr)
7108 {
7109 	struct in6_validator_info *i6vi = (struct in6_validator_info *) ptr;
7110 	struct net_device *dev = i6vi->i6vi_dev->dev;
7111 	struct mlxsw_sp *mlxsw_sp;
7112 	struct mlxsw_sp_rif *rif;
7113 	int err = 0;
7114 
7115 	mlxsw_sp = mlxsw_sp_lower_get(dev);
7116 	if (!mlxsw_sp)
7117 		goto out;
7118 
7119 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
7120 	if (!mlxsw_sp_rif_should_config(rif, dev, event))
7121 		goto out;
7122 
7123 	err = mlxsw_sp_router_port_check_rif_addr(mlxsw_sp, dev, dev->dev_addr,
7124 						  i6vi->extack);
7125 	if (err)
7126 		goto out;
7127 
7128 	err = __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, i6vi->extack);
7129 out:
7130 	return notifier_from_errno(err);
7131 }
7132 
7133 static int mlxsw_sp_rif_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
7134 			     const char *mac, int mtu)
7135 {
7136 	char ritr_pl[MLXSW_REG_RITR_LEN];
7137 	int err;
7138 
7139 	mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
7140 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
7141 	if (err)
7142 		return err;
7143 
7144 	mlxsw_reg_ritr_mtu_set(ritr_pl, mtu);
7145 	mlxsw_reg_ritr_if_mac_memcpy_to(ritr_pl, mac);
7146 	mlxsw_reg_ritr_op_set(ritr_pl, MLXSW_REG_RITR_RIF_CREATE);
7147 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
7148 }
7149 
7150 static int
7151 mlxsw_sp_router_port_change_event(struct mlxsw_sp *mlxsw_sp,
7152 				  struct mlxsw_sp_rif *rif)
7153 {
7154 	struct net_device *dev = rif->dev;
7155 	u16 fid_index;
7156 	int err;
7157 
7158 	fid_index = mlxsw_sp_fid_index(rif->fid);
7159 
7160 	err = mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, false);
7161 	if (err)
7162 		return err;
7163 
7164 	err = mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, dev->dev_addr,
7165 				dev->mtu);
7166 	if (err)
7167 		goto err_rif_edit;
7168 
7169 	err = mlxsw_sp_rif_fdb_op(mlxsw_sp, dev->dev_addr, fid_index, true);
7170 	if (err)
7171 		goto err_rif_fdb_op;
7172 
7173 	if (rif->mtu != dev->mtu) {
7174 		struct mlxsw_sp_vr *vr;
7175 		int i;
7176 
7177 		/* The RIF is relevant only to its mr_table instance, as unlike
7178 		 * unicast routing, in multicast routing a RIF cannot be shared
7179 		 * between several multicast routing tables.
7180 		 */
7181 		vr = &mlxsw_sp->router->vrs[rif->vr_id];
7182 		for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++)
7183 			mlxsw_sp_mr_rif_mtu_update(vr->mr_table[i],
7184 						   rif, dev->mtu);
7185 	}
7186 
7187 	ether_addr_copy(rif->addr, dev->dev_addr);
7188 	rif->mtu = dev->mtu;
7189 
7190 	netdev_dbg(dev, "Updated RIF=%d\n", rif->rif_index);
7191 
7192 	return 0;
7193 
7194 err_rif_fdb_op:
7195 	mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, rif->addr, rif->mtu);
7196 err_rif_edit:
7197 	mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, true);
7198 	return err;
7199 }
7200 
7201 static int mlxsw_sp_router_port_pre_changeaddr_event(struct mlxsw_sp_rif *rif,
7202 			    struct netdev_notifier_pre_changeaddr_info *info)
7203 {
7204 	struct netlink_ext_ack *extack;
7205 
7206 	extack = netdev_notifier_info_to_extack(&info->info);
7207 	return mlxsw_sp_router_port_check_rif_addr(rif->mlxsw_sp, rif->dev,
7208 						   info->dev_addr, extack);
7209 }
7210 
7211 int mlxsw_sp_netdevice_router_port_event(struct net_device *dev,
7212 					 unsigned long event, void *ptr)
7213 {
7214 	struct mlxsw_sp *mlxsw_sp;
7215 	struct mlxsw_sp_rif *rif;
7216 
7217 	mlxsw_sp = mlxsw_sp_lower_get(dev);
7218 	if (!mlxsw_sp)
7219 		return 0;
7220 
7221 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
7222 	if (!rif)
7223 		return 0;
7224 
7225 	switch (event) {
7226 	case NETDEV_CHANGEMTU: /* fall through */
7227 	case NETDEV_CHANGEADDR:
7228 		return mlxsw_sp_router_port_change_event(mlxsw_sp, rif);
7229 	case NETDEV_PRE_CHANGEADDR:
7230 		return mlxsw_sp_router_port_pre_changeaddr_event(rif, ptr);
7231 	}
7232 
7233 	return 0;
7234 }
7235 
7236 static int mlxsw_sp_port_vrf_join(struct mlxsw_sp *mlxsw_sp,
7237 				  struct net_device *l3_dev,
7238 				  struct netlink_ext_ack *extack)
7239 {
7240 	struct mlxsw_sp_rif *rif;
7241 
7242 	/* If netdev is already associated with a RIF, then we need to
7243 	 * destroy it and create a new one with the new virtual router ID.
7244 	 */
7245 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
7246 	if (rif)
7247 		__mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_DOWN,
7248 					  extack);
7249 
7250 	return __mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_UP, extack);
7251 }
7252 
7253 static void mlxsw_sp_port_vrf_leave(struct mlxsw_sp *mlxsw_sp,
7254 				    struct net_device *l3_dev)
7255 {
7256 	struct mlxsw_sp_rif *rif;
7257 
7258 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
7259 	if (!rif)
7260 		return;
7261 	__mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_DOWN, NULL);
7262 }
7263 
7264 int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event,
7265 				 struct netdev_notifier_changeupper_info *info)
7266 {
7267 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
7268 	int err = 0;
7269 
7270 	/* We do not create a RIF for a macvlan, but only use it to
7271 	 * direct more MAC addresses to the router.
7272 	 */
7273 	if (!mlxsw_sp || netif_is_macvlan(l3_dev))
7274 		return 0;
7275 
7276 	switch (event) {
7277 	case NETDEV_PRECHANGEUPPER:
7278 		return 0;
7279 	case NETDEV_CHANGEUPPER:
7280 		if (info->linking) {
7281 			struct netlink_ext_ack *extack;
7282 
7283 			extack = netdev_notifier_info_to_extack(&info->info);
7284 			err = mlxsw_sp_port_vrf_join(mlxsw_sp, l3_dev, extack);
7285 		} else {
7286 			mlxsw_sp_port_vrf_leave(mlxsw_sp, l3_dev);
7287 		}
7288 		break;
7289 	}
7290 
7291 	return err;
7292 }
7293 
7294 static int __mlxsw_sp_rif_macvlan_flush(struct net_device *dev, void *data)
7295 {
7296 	struct mlxsw_sp_rif *rif = data;
7297 
7298 	if (!netif_is_macvlan(dev))
7299 		return 0;
7300 
7301 	return mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr,
7302 				   mlxsw_sp_fid_index(rif->fid), false);
7303 }
7304 
7305 static int mlxsw_sp_rif_macvlan_flush(struct mlxsw_sp_rif *rif)
7306 {
7307 	if (!netif_is_macvlan_port(rif->dev))
7308 		return 0;
7309 
7310 	netdev_warn(rif->dev, "Router interface is deleted. Upper macvlans will not work\n");
7311 	return netdev_walk_all_upper_dev_rcu(rif->dev,
7312 					     __mlxsw_sp_rif_macvlan_flush, rif);
7313 }
7314 
7315 static void mlxsw_sp_rif_subport_setup(struct mlxsw_sp_rif *rif,
7316 				       const struct mlxsw_sp_rif_params *params)
7317 {
7318 	struct mlxsw_sp_rif_subport *rif_subport;
7319 
7320 	rif_subport = mlxsw_sp_rif_subport_rif(rif);
7321 	refcount_set(&rif_subport->ref_count, 1);
7322 	rif_subport->vid = params->vid;
7323 	rif_subport->lag = params->lag;
7324 	if (params->lag)
7325 		rif_subport->lag_id = params->lag_id;
7326 	else
7327 		rif_subport->system_port = params->system_port;
7328 }
7329 
7330 static int mlxsw_sp_rif_subport_op(struct mlxsw_sp_rif *rif, bool enable)
7331 {
7332 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7333 	struct mlxsw_sp_rif_subport *rif_subport;
7334 	char ritr_pl[MLXSW_REG_RITR_LEN];
7335 
7336 	rif_subport = mlxsw_sp_rif_subport_rif(rif);
7337 	mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_SP_IF,
7338 			    rif->rif_index, rif->vr_id, rif->dev->mtu);
7339 	mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr);
7340 	mlxsw_reg_ritr_sp_if_pack(ritr_pl, rif_subport->lag,
7341 				  rif_subport->lag ? rif_subport->lag_id :
7342 						     rif_subport->system_port,
7343 				  rif_subport->vid);
7344 
7345 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
7346 }
7347 
7348 static int mlxsw_sp_rif_subport_configure(struct mlxsw_sp_rif *rif)
7349 {
7350 	int err;
7351 
7352 	err = mlxsw_sp_rif_subport_op(rif, true);
7353 	if (err)
7354 		return err;
7355 
7356 	err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7357 				  mlxsw_sp_fid_index(rif->fid), true);
7358 	if (err)
7359 		goto err_rif_fdb_op;
7360 
7361 	mlxsw_sp_fid_rif_set(rif->fid, rif);
7362 	return 0;
7363 
7364 err_rif_fdb_op:
7365 	mlxsw_sp_rif_subport_op(rif, false);
7366 	return err;
7367 }
7368 
7369 static void mlxsw_sp_rif_subport_deconfigure(struct mlxsw_sp_rif *rif)
7370 {
7371 	struct mlxsw_sp_fid *fid = rif->fid;
7372 
7373 	mlxsw_sp_fid_rif_set(fid, NULL);
7374 	mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7375 			    mlxsw_sp_fid_index(fid), false);
7376 	mlxsw_sp_rif_macvlan_flush(rif);
7377 	mlxsw_sp_rif_subport_op(rif, false);
7378 }
7379 
7380 static struct mlxsw_sp_fid *
7381 mlxsw_sp_rif_subport_fid_get(struct mlxsw_sp_rif *rif,
7382 			     struct netlink_ext_ack *extack)
7383 {
7384 	return mlxsw_sp_fid_rfid_get(rif->mlxsw_sp, rif->rif_index);
7385 }
7386 
7387 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_subport_ops = {
7388 	.type			= MLXSW_SP_RIF_TYPE_SUBPORT,
7389 	.rif_size		= sizeof(struct mlxsw_sp_rif_subport),
7390 	.setup			= mlxsw_sp_rif_subport_setup,
7391 	.configure		= mlxsw_sp_rif_subport_configure,
7392 	.deconfigure		= mlxsw_sp_rif_subport_deconfigure,
7393 	.fid_get		= mlxsw_sp_rif_subport_fid_get,
7394 };
7395 
7396 static int mlxsw_sp_rif_vlan_fid_op(struct mlxsw_sp_rif *rif,
7397 				    enum mlxsw_reg_ritr_if_type type,
7398 				    u16 vid_fid, bool enable)
7399 {
7400 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7401 	char ritr_pl[MLXSW_REG_RITR_LEN];
7402 
7403 	mlxsw_reg_ritr_pack(ritr_pl, enable, type, rif->rif_index, rif->vr_id,
7404 			    rif->dev->mtu);
7405 	mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr);
7406 	mlxsw_reg_ritr_fid_set(ritr_pl, type, vid_fid);
7407 
7408 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
7409 }
7410 
7411 u8 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp)
7412 {
7413 	return mlxsw_core_max_ports(mlxsw_sp->core) + 1;
7414 }
7415 
7416 static int mlxsw_sp_rif_vlan_configure(struct mlxsw_sp_rif *rif)
7417 {
7418 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7419 	u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
7420 	int err;
7421 
7422 	err = mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, true);
7423 	if (err)
7424 		return err;
7425 
7426 	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7427 				     mlxsw_sp_router_port(mlxsw_sp), true);
7428 	if (err)
7429 		goto err_fid_mc_flood_set;
7430 
7431 	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7432 				     mlxsw_sp_router_port(mlxsw_sp), true);
7433 	if (err)
7434 		goto err_fid_bc_flood_set;
7435 
7436 	err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7437 				  mlxsw_sp_fid_index(rif->fid), true);
7438 	if (err)
7439 		goto err_rif_fdb_op;
7440 
7441 	mlxsw_sp_fid_rif_set(rif->fid, rif);
7442 	return 0;
7443 
7444 err_rif_fdb_op:
7445 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7446 			       mlxsw_sp_router_port(mlxsw_sp), false);
7447 err_fid_bc_flood_set:
7448 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7449 			       mlxsw_sp_router_port(mlxsw_sp), false);
7450 err_fid_mc_flood_set:
7451 	mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false);
7452 	return err;
7453 }
7454 
7455 static void mlxsw_sp_rif_vlan_deconfigure(struct mlxsw_sp_rif *rif)
7456 {
7457 	u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
7458 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7459 	struct mlxsw_sp_fid *fid = rif->fid;
7460 
7461 	mlxsw_sp_fid_rif_set(fid, NULL);
7462 	mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7463 			    mlxsw_sp_fid_index(fid), false);
7464 	mlxsw_sp_rif_macvlan_flush(rif);
7465 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7466 			       mlxsw_sp_router_port(mlxsw_sp), false);
7467 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7468 			       mlxsw_sp_router_port(mlxsw_sp), false);
7469 	mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false);
7470 }
7471 
7472 static struct mlxsw_sp_fid *
7473 mlxsw_sp_rif_vlan_fid_get(struct mlxsw_sp_rif *rif,
7474 			  struct netlink_ext_ack *extack)
7475 {
7476 	struct net_device *br_dev = rif->dev;
7477 	u16 vid;
7478 	int err;
7479 
7480 	if (is_vlan_dev(rif->dev)) {
7481 		vid = vlan_dev_vlan_id(rif->dev);
7482 		br_dev = vlan_dev_real_dev(rif->dev);
7483 		if (WARN_ON(!netif_is_bridge_master(br_dev)))
7484 			return ERR_PTR(-EINVAL);
7485 	} else {
7486 		err = br_vlan_get_pvid(rif->dev, &vid);
7487 		if (err < 0 || !vid) {
7488 			NL_SET_ERR_MSG_MOD(extack, "Couldn't determine bridge PVID");
7489 			return ERR_PTR(-EINVAL);
7490 		}
7491 	}
7492 
7493 	return mlxsw_sp_fid_8021q_get(rif->mlxsw_sp, vid);
7494 }
7495 
7496 static void mlxsw_sp_rif_vlan_fdb_del(struct mlxsw_sp_rif *rif, const char *mac)
7497 {
7498 	u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
7499 	struct switchdev_notifier_fdb_info info;
7500 	struct net_device *br_dev;
7501 	struct net_device *dev;
7502 
7503 	br_dev = is_vlan_dev(rif->dev) ? vlan_dev_real_dev(rif->dev) : rif->dev;
7504 	dev = br_fdb_find_port(br_dev, mac, vid);
7505 	if (!dev)
7506 		return;
7507 
7508 	info.addr = mac;
7509 	info.vid = vid;
7510 	call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info,
7511 				 NULL);
7512 }
7513 
7514 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_vlan_ops = {
7515 	.type			= MLXSW_SP_RIF_TYPE_VLAN,
7516 	.rif_size		= sizeof(struct mlxsw_sp_rif),
7517 	.configure		= mlxsw_sp_rif_vlan_configure,
7518 	.deconfigure		= mlxsw_sp_rif_vlan_deconfigure,
7519 	.fid_get		= mlxsw_sp_rif_vlan_fid_get,
7520 	.fdb_del		= mlxsw_sp_rif_vlan_fdb_del,
7521 };
7522 
7523 static int mlxsw_sp_rif_fid_configure(struct mlxsw_sp_rif *rif)
7524 {
7525 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7526 	u16 fid_index = mlxsw_sp_fid_index(rif->fid);
7527 	int err;
7528 
7529 	err = mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index,
7530 				       true);
7531 	if (err)
7532 		return err;
7533 
7534 	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7535 				     mlxsw_sp_router_port(mlxsw_sp), true);
7536 	if (err)
7537 		goto err_fid_mc_flood_set;
7538 
7539 	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7540 				     mlxsw_sp_router_port(mlxsw_sp), true);
7541 	if (err)
7542 		goto err_fid_bc_flood_set;
7543 
7544 	err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7545 				  mlxsw_sp_fid_index(rif->fid), true);
7546 	if (err)
7547 		goto err_rif_fdb_op;
7548 
7549 	mlxsw_sp_fid_rif_set(rif->fid, rif);
7550 	return 0;
7551 
7552 err_rif_fdb_op:
7553 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7554 			       mlxsw_sp_router_port(mlxsw_sp), false);
7555 err_fid_bc_flood_set:
7556 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7557 			       mlxsw_sp_router_port(mlxsw_sp), false);
7558 err_fid_mc_flood_set:
7559 	mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false);
7560 	return err;
7561 }
7562 
7563 static void mlxsw_sp_rif_fid_deconfigure(struct mlxsw_sp_rif *rif)
7564 {
7565 	u16 fid_index = mlxsw_sp_fid_index(rif->fid);
7566 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7567 	struct mlxsw_sp_fid *fid = rif->fid;
7568 
7569 	mlxsw_sp_fid_rif_set(fid, NULL);
7570 	mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7571 			    mlxsw_sp_fid_index(fid), false);
7572 	mlxsw_sp_rif_macvlan_flush(rif);
7573 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7574 			       mlxsw_sp_router_port(mlxsw_sp), false);
7575 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7576 			       mlxsw_sp_router_port(mlxsw_sp), false);
7577 	mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false);
7578 }
7579 
7580 static struct mlxsw_sp_fid *
7581 mlxsw_sp_rif_fid_fid_get(struct mlxsw_sp_rif *rif,
7582 			 struct netlink_ext_ack *extack)
7583 {
7584 	return mlxsw_sp_fid_8021d_get(rif->mlxsw_sp, rif->dev->ifindex);
7585 }
7586 
7587 static void mlxsw_sp_rif_fid_fdb_del(struct mlxsw_sp_rif *rif, const char *mac)
7588 {
7589 	struct switchdev_notifier_fdb_info info;
7590 	struct net_device *dev;
7591 
7592 	dev = br_fdb_find_port(rif->dev, mac, 0);
7593 	if (!dev)
7594 		return;
7595 
7596 	info.addr = mac;
7597 	info.vid = 0;
7598 	call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info,
7599 				 NULL);
7600 }
7601 
7602 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_fid_ops = {
7603 	.type			= MLXSW_SP_RIF_TYPE_FID,
7604 	.rif_size		= sizeof(struct mlxsw_sp_rif),
7605 	.configure		= mlxsw_sp_rif_fid_configure,
7606 	.deconfigure		= mlxsw_sp_rif_fid_deconfigure,
7607 	.fid_get		= mlxsw_sp_rif_fid_fid_get,
7608 	.fdb_del		= mlxsw_sp_rif_fid_fdb_del,
7609 };
7610 
7611 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_vlan_emu_ops = {
7612 	.type			= MLXSW_SP_RIF_TYPE_VLAN,
7613 	.rif_size		= sizeof(struct mlxsw_sp_rif),
7614 	.configure		= mlxsw_sp_rif_fid_configure,
7615 	.deconfigure		= mlxsw_sp_rif_fid_deconfigure,
7616 	.fid_get		= mlxsw_sp_rif_vlan_fid_get,
7617 	.fdb_del		= mlxsw_sp_rif_vlan_fdb_del,
7618 };
7619 
7620 static struct mlxsw_sp_rif_ipip_lb *
7621 mlxsw_sp_rif_ipip_lb_rif(struct mlxsw_sp_rif *rif)
7622 {
7623 	return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
7624 }
7625 
7626 static void
7627 mlxsw_sp_rif_ipip_lb_setup(struct mlxsw_sp_rif *rif,
7628 			   const struct mlxsw_sp_rif_params *params)
7629 {
7630 	struct mlxsw_sp_rif_params_ipip_lb *params_lb;
7631 	struct mlxsw_sp_rif_ipip_lb *rif_lb;
7632 
7633 	params_lb = container_of(params, struct mlxsw_sp_rif_params_ipip_lb,
7634 				 common);
7635 	rif_lb = mlxsw_sp_rif_ipip_lb_rif(rif);
7636 	rif_lb->lb_config = params_lb->lb_config;
7637 }
7638 
7639 static int
7640 mlxsw_sp1_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif)
7641 {
7642 	struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
7643 	u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(rif->dev);
7644 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7645 	struct mlxsw_sp_vr *ul_vr;
7646 	int err;
7647 
7648 	ul_vr = mlxsw_sp_vr_get(mlxsw_sp, ul_tb_id, NULL);
7649 	if (IS_ERR(ul_vr))
7650 		return PTR_ERR(ul_vr);
7651 
7652 	err = mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr->id, 0, true);
7653 	if (err)
7654 		goto err_loopback_op;
7655 
7656 	lb_rif->ul_vr_id = ul_vr->id;
7657 	lb_rif->ul_rif_id = 0;
7658 	++ul_vr->rif_count;
7659 	return 0;
7660 
7661 err_loopback_op:
7662 	mlxsw_sp_vr_put(mlxsw_sp, ul_vr);
7663 	return err;
7664 }
7665 
7666 static void mlxsw_sp1_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif)
7667 {
7668 	struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
7669 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7670 	struct mlxsw_sp_vr *ul_vr;
7671 
7672 	ul_vr = &mlxsw_sp->router->vrs[lb_rif->ul_vr_id];
7673 	mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr->id, 0, false);
7674 
7675 	--ul_vr->rif_count;
7676 	mlxsw_sp_vr_put(mlxsw_sp, ul_vr);
7677 }
7678 
7679 static const struct mlxsw_sp_rif_ops mlxsw_sp1_rif_ipip_lb_ops = {
7680 	.type			= MLXSW_SP_RIF_TYPE_IPIP_LB,
7681 	.rif_size		= sizeof(struct mlxsw_sp_rif_ipip_lb),
7682 	.setup                  = mlxsw_sp_rif_ipip_lb_setup,
7683 	.configure		= mlxsw_sp1_rif_ipip_lb_configure,
7684 	.deconfigure		= mlxsw_sp1_rif_ipip_lb_deconfigure,
7685 };
7686 
7687 const struct mlxsw_sp_rif_ops *mlxsw_sp1_rif_ops_arr[] = {
7688 	[MLXSW_SP_RIF_TYPE_SUBPORT]	= &mlxsw_sp_rif_subport_ops,
7689 	[MLXSW_SP_RIF_TYPE_VLAN]	= &mlxsw_sp_rif_vlan_emu_ops,
7690 	[MLXSW_SP_RIF_TYPE_FID]		= &mlxsw_sp_rif_fid_ops,
7691 	[MLXSW_SP_RIF_TYPE_IPIP_LB]	= &mlxsw_sp1_rif_ipip_lb_ops,
7692 };
7693 
7694 static int
7695 mlxsw_sp_rif_ipip_lb_ul_rif_op(struct mlxsw_sp_rif *ul_rif, bool enable)
7696 {
7697 	struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp;
7698 	char ritr_pl[MLXSW_REG_RITR_LEN];
7699 
7700 	mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF,
7701 			    ul_rif->rif_index, ul_rif->vr_id, IP_MAX_MTU);
7702 	mlxsw_reg_ritr_loopback_protocol_set(ritr_pl,
7703 					     MLXSW_REG_RITR_LOOPBACK_GENERIC);
7704 
7705 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
7706 }
7707 
7708 static struct mlxsw_sp_rif *
7709 mlxsw_sp_ul_rif_create(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr,
7710 		       struct netlink_ext_ack *extack)
7711 {
7712 	struct mlxsw_sp_rif *ul_rif;
7713 	u16 rif_index;
7714 	int err;
7715 
7716 	err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index);
7717 	if (err) {
7718 		NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported router interfaces");
7719 		return ERR_PTR(err);
7720 	}
7721 
7722 	ul_rif = mlxsw_sp_rif_alloc(sizeof(*ul_rif), rif_index, vr->id, NULL);
7723 	if (!ul_rif)
7724 		return ERR_PTR(-ENOMEM);
7725 
7726 	mlxsw_sp->router->rifs[rif_index] = ul_rif;
7727 	ul_rif->mlxsw_sp = mlxsw_sp;
7728 	err = mlxsw_sp_rif_ipip_lb_ul_rif_op(ul_rif, true);
7729 	if (err)
7730 		goto ul_rif_op_err;
7731 
7732 	return ul_rif;
7733 
7734 ul_rif_op_err:
7735 	mlxsw_sp->router->rifs[rif_index] = NULL;
7736 	kfree(ul_rif);
7737 	return ERR_PTR(err);
7738 }
7739 
7740 static void mlxsw_sp_ul_rif_destroy(struct mlxsw_sp_rif *ul_rif)
7741 {
7742 	struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp;
7743 
7744 	mlxsw_sp_rif_ipip_lb_ul_rif_op(ul_rif, false);
7745 	mlxsw_sp->router->rifs[ul_rif->rif_index] = NULL;
7746 	kfree(ul_rif);
7747 }
7748 
7749 static struct mlxsw_sp_rif *
7750 mlxsw_sp_ul_rif_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
7751 		    struct netlink_ext_ack *extack)
7752 {
7753 	struct mlxsw_sp_vr *vr;
7754 	int err;
7755 
7756 	vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id, extack);
7757 	if (IS_ERR(vr))
7758 		return ERR_CAST(vr);
7759 
7760 	if (refcount_inc_not_zero(&vr->ul_rif_refcnt))
7761 		return vr->ul_rif;
7762 
7763 	vr->ul_rif = mlxsw_sp_ul_rif_create(mlxsw_sp, vr, extack);
7764 	if (IS_ERR(vr->ul_rif)) {
7765 		err = PTR_ERR(vr->ul_rif);
7766 		goto err_ul_rif_create;
7767 	}
7768 
7769 	vr->rif_count++;
7770 	refcount_set(&vr->ul_rif_refcnt, 1);
7771 
7772 	return vr->ul_rif;
7773 
7774 err_ul_rif_create:
7775 	mlxsw_sp_vr_put(mlxsw_sp, vr);
7776 	return ERR_PTR(err);
7777 }
7778 
7779 static void mlxsw_sp_ul_rif_put(struct mlxsw_sp_rif *ul_rif)
7780 {
7781 	struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp;
7782 	struct mlxsw_sp_vr *vr;
7783 
7784 	vr = &mlxsw_sp->router->vrs[ul_rif->vr_id];
7785 
7786 	if (!refcount_dec_and_test(&vr->ul_rif_refcnt))
7787 		return;
7788 
7789 	vr->rif_count--;
7790 	mlxsw_sp_ul_rif_destroy(ul_rif);
7791 	mlxsw_sp_vr_put(mlxsw_sp, vr);
7792 }
7793 
7794 int mlxsw_sp_router_ul_rif_get(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
7795 			       u16 *ul_rif_index)
7796 {
7797 	struct mlxsw_sp_rif *ul_rif;
7798 
7799 	ASSERT_RTNL();
7800 
7801 	ul_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, ul_tb_id, NULL);
7802 	if (IS_ERR(ul_rif))
7803 		return PTR_ERR(ul_rif);
7804 	*ul_rif_index = ul_rif->rif_index;
7805 
7806 	return 0;
7807 }
7808 
7809 void mlxsw_sp_router_ul_rif_put(struct mlxsw_sp *mlxsw_sp, u16 ul_rif_index)
7810 {
7811 	struct mlxsw_sp_rif *ul_rif;
7812 
7813 	ASSERT_RTNL();
7814 
7815 	ul_rif = mlxsw_sp->router->rifs[ul_rif_index];
7816 	if (WARN_ON(!ul_rif))
7817 		return;
7818 
7819 	mlxsw_sp_ul_rif_put(ul_rif);
7820 }
7821 
7822 static int
7823 mlxsw_sp2_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif)
7824 {
7825 	struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
7826 	u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(rif->dev);
7827 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7828 	struct mlxsw_sp_rif *ul_rif;
7829 	int err;
7830 
7831 	ul_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, ul_tb_id, NULL);
7832 	if (IS_ERR(ul_rif))
7833 		return PTR_ERR(ul_rif);
7834 
7835 	err = mlxsw_sp_rif_ipip_lb_op(lb_rif, 0, ul_rif->rif_index, true);
7836 	if (err)
7837 		goto err_loopback_op;
7838 
7839 	lb_rif->ul_vr_id = 0;
7840 	lb_rif->ul_rif_id = ul_rif->rif_index;
7841 
7842 	return 0;
7843 
7844 err_loopback_op:
7845 	mlxsw_sp_ul_rif_put(ul_rif);
7846 	return err;
7847 }
7848 
7849 static void mlxsw_sp2_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif)
7850 {
7851 	struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
7852 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7853 	struct mlxsw_sp_rif *ul_rif;
7854 
7855 	ul_rif = mlxsw_sp_rif_by_index(mlxsw_sp, lb_rif->ul_rif_id);
7856 	mlxsw_sp_rif_ipip_lb_op(lb_rif, 0, lb_rif->ul_rif_id, false);
7857 	mlxsw_sp_ul_rif_put(ul_rif);
7858 }
7859 
7860 static const struct mlxsw_sp_rif_ops mlxsw_sp2_rif_ipip_lb_ops = {
7861 	.type			= MLXSW_SP_RIF_TYPE_IPIP_LB,
7862 	.rif_size		= sizeof(struct mlxsw_sp_rif_ipip_lb),
7863 	.setup                  = mlxsw_sp_rif_ipip_lb_setup,
7864 	.configure		= mlxsw_sp2_rif_ipip_lb_configure,
7865 	.deconfigure		= mlxsw_sp2_rif_ipip_lb_deconfigure,
7866 };
7867 
7868 const struct mlxsw_sp_rif_ops *mlxsw_sp2_rif_ops_arr[] = {
7869 	[MLXSW_SP_RIF_TYPE_SUBPORT]	= &mlxsw_sp_rif_subport_ops,
7870 	[MLXSW_SP_RIF_TYPE_VLAN]	= &mlxsw_sp_rif_vlan_emu_ops,
7871 	[MLXSW_SP_RIF_TYPE_FID]		= &mlxsw_sp_rif_fid_ops,
7872 	[MLXSW_SP_RIF_TYPE_IPIP_LB]	= &mlxsw_sp2_rif_ipip_lb_ops,
7873 };
7874 
7875 static int mlxsw_sp_rifs_init(struct mlxsw_sp *mlxsw_sp)
7876 {
7877 	u64 max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
7878 
7879 	mlxsw_sp->router->rifs = kcalloc(max_rifs,
7880 					 sizeof(struct mlxsw_sp_rif *),
7881 					 GFP_KERNEL);
7882 	if (!mlxsw_sp->router->rifs)
7883 		return -ENOMEM;
7884 
7885 	return 0;
7886 }
7887 
7888 static void mlxsw_sp_rifs_fini(struct mlxsw_sp *mlxsw_sp)
7889 {
7890 	int i;
7891 
7892 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
7893 		WARN_ON_ONCE(mlxsw_sp->router->rifs[i]);
7894 
7895 	kfree(mlxsw_sp->router->rifs);
7896 }
7897 
7898 static int
7899 mlxsw_sp_ipip_config_tigcr(struct mlxsw_sp *mlxsw_sp)
7900 {
7901 	char tigcr_pl[MLXSW_REG_TIGCR_LEN];
7902 
7903 	mlxsw_reg_tigcr_pack(tigcr_pl, true, 0);
7904 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tigcr), tigcr_pl);
7905 }
7906 
7907 static int mlxsw_sp_ipips_init(struct mlxsw_sp *mlxsw_sp)
7908 {
7909 	int err;
7910 
7911 	mlxsw_sp->router->ipip_ops_arr = mlxsw_sp_ipip_ops_arr;
7912 	INIT_LIST_HEAD(&mlxsw_sp->router->ipip_list);
7913 
7914 	err = mlxsw_sp_ipip_ecn_encap_init(mlxsw_sp);
7915 	if (err)
7916 		return err;
7917 	err = mlxsw_sp_ipip_ecn_decap_init(mlxsw_sp);
7918 	if (err)
7919 		return err;
7920 
7921 	return mlxsw_sp_ipip_config_tigcr(mlxsw_sp);
7922 }
7923 
7924 static void mlxsw_sp_ipips_fini(struct mlxsw_sp *mlxsw_sp)
7925 {
7926 	WARN_ON(!list_empty(&mlxsw_sp->router->ipip_list));
7927 }
7928 
7929 static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb)
7930 {
7931 	struct mlxsw_sp_router *router;
7932 
7933 	/* Flush pending FIB notifications and then flush the device's
7934 	 * table before requesting another dump. The FIB notification
7935 	 * block is unregistered, so no need to take RTNL.
7936 	 */
7937 	mlxsw_core_flush_owq();
7938 	router = container_of(nb, struct mlxsw_sp_router, fib_nb);
7939 	mlxsw_sp_router_fib_flush(router->mlxsw_sp);
7940 }
7941 
7942 #ifdef CONFIG_IP_ROUTE_MULTIPATH
7943 static void mlxsw_sp_mp_hash_header_set(char *recr2_pl, int header)
7944 {
7945 	mlxsw_reg_recr2_outer_header_enables_set(recr2_pl, header, true);
7946 }
7947 
7948 static void mlxsw_sp_mp_hash_field_set(char *recr2_pl, int field)
7949 {
7950 	mlxsw_reg_recr2_outer_header_fields_enable_set(recr2_pl, field, true);
7951 }
7952 
7953 static void mlxsw_sp_mp4_hash_init(struct mlxsw_sp *mlxsw_sp, char *recr2_pl)
7954 {
7955 	struct net *net = mlxsw_sp_net(mlxsw_sp);
7956 	bool only_l3 = !net->ipv4.sysctl_fib_multipath_hash_policy;
7957 
7958 	mlxsw_sp_mp_hash_header_set(recr2_pl,
7959 				    MLXSW_REG_RECR2_IPV4_EN_NOT_TCP_NOT_UDP);
7960 	mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV4_EN_TCP_UDP);
7961 	mlxsw_reg_recr2_ipv4_sip_enable(recr2_pl);
7962 	mlxsw_reg_recr2_ipv4_dip_enable(recr2_pl);
7963 	if (only_l3)
7964 		return;
7965 	mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_EN_IPV4);
7966 	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV4_PROTOCOL);
7967 	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_SPORT);
7968 	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_DPORT);
7969 }
7970 
7971 static void mlxsw_sp_mp6_hash_init(struct mlxsw_sp *mlxsw_sp, char *recr2_pl)
7972 {
7973 	bool only_l3 = !ip6_multipath_hash_policy(mlxsw_sp_net(mlxsw_sp));
7974 
7975 	mlxsw_sp_mp_hash_header_set(recr2_pl,
7976 				    MLXSW_REG_RECR2_IPV6_EN_NOT_TCP_NOT_UDP);
7977 	mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV6_EN_TCP_UDP);
7978 	mlxsw_reg_recr2_ipv6_sip_enable(recr2_pl);
7979 	mlxsw_reg_recr2_ipv6_dip_enable(recr2_pl);
7980 	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV6_NEXT_HEADER);
7981 	if (only_l3) {
7982 		mlxsw_sp_mp_hash_field_set(recr2_pl,
7983 					   MLXSW_REG_RECR2_IPV6_FLOW_LABEL);
7984 	} else {
7985 		mlxsw_sp_mp_hash_header_set(recr2_pl,
7986 					    MLXSW_REG_RECR2_TCP_UDP_EN_IPV6);
7987 		mlxsw_sp_mp_hash_field_set(recr2_pl,
7988 					   MLXSW_REG_RECR2_TCP_UDP_SPORT);
7989 		mlxsw_sp_mp_hash_field_set(recr2_pl,
7990 					   MLXSW_REG_RECR2_TCP_UDP_DPORT);
7991 	}
7992 }
7993 
7994 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
7995 {
7996 	char recr2_pl[MLXSW_REG_RECR2_LEN];
7997 	u32 seed;
7998 
7999 	seed = jhash(mlxsw_sp->base_mac, sizeof(mlxsw_sp->base_mac), 0);
8000 	mlxsw_reg_recr2_pack(recr2_pl, seed);
8001 	mlxsw_sp_mp4_hash_init(mlxsw_sp, recr2_pl);
8002 	mlxsw_sp_mp6_hash_init(mlxsw_sp, recr2_pl);
8003 
8004 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(recr2), recr2_pl);
8005 }
8006 #else
8007 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
8008 {
8009 	return 0;
8010 }
8011 #endif
8012 
8013 static int mlxsw_sp_dscp_init(struct mlxsw_sp *mlxsw_sp)
8014 {
8015 	char rdpm_pl[MLXSW_REG_RDPM_LEN];
8016 	unsigned int i;
8017 
8018 	MLXSW_REG_ZERO(rdpm, rdpm_pl);
8019 
8020 	/* HW is determining switch priority based on DSCP-bits, but the
8021 	 * kernel is still doing that based on the ToS. Since there's a
8022 	 * mismatch in bits we need to make sure to translate the right
8023 	 * value ToS would observe, skipping the 2 least-significant ECN bits.
8024 	 */
8025 	for (i = 0; i < MLXSW_REG_RDPM_DSCP_ENTRY_REC_MAX_COUNT; i++)
8026 		mlxsw_reg_rdpm_pack(rdpm_pl, i, rt_tos2priority(i << 2));
8027 
8028 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rdpm), rdpm_pl);
8029 }
8030 
8031 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
8032 {
8033 	struct net *net = mlxsw_sp_net(mlxsw_sp);
8034 	bool usp = net->ipv4.sysctl_ip_fwd_update_priority;
8035 	char rgcr_pl[MLXSW_REG_RGCR_LEN];
8036 	u64 max_rifs;
8037 	int err;
8038 
8039 	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_RIFS))
8040 		return -EIO;
8041 	max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
8042 
8043 	mlxsw_reg_rgcr_pack(rgcr_pl, true, true);
8044 	mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs);
8045 	mlxsw_reg_rgcr_usp_set(rgcr_pl, usp);
8046 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
8047 	if (err)
8048 		return err;
8049 	return 0;
8050 }
8051 
8052 static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
8053 {
8054 	char rgcr_pl[MLXSW_REG_RGCR_LEN];
8055 
8056 	mlxsw_reg_rgcr_pack(rgcr_pl, false, false);
8057 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
8058 }
8059 
8060 int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp,
8061 			 struct netlink_ext_ack *extack)
8062 {
8063 	struct mlxsw_sp_router *router;
8064 	int err;
8065 
8066 	router = kzalloc(sizeof(*mlxsw_sp->router), GFP_KERNEL);
8067 	if (!router)
8068 		return -ENOMEM;
8069 	mlxsw_sp->router = router;
8070 	router->mlxsw_sp = mlxsw_sp;
8071 
8072 	router->inetaddr_nb.notifier_call = mlxsw_sp_inetaddr_event;
8073 	err = register_inetaddr_notifier(&router->inetaddr_nb);
8074 	if (err)
8075 		goto err_register_inetaddr_notifier;
8076 
8077 	router->inet6addr_nb.notifier_call = mlxsw_sp_inet6addr_event;
8078 	err = register_inet6addr_notifier(&router->inet6addr_nb);
8079 	if (err)
8080 		goto err_register_inet6addr_notifier;
8081 
8082 	INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_neighs_list);
8083 	err = __mlxsw_sp_router_init(mlxsw_sp);
8084 	if (err)
8085 		goto err_router_init;
8086 
8087 	err = mlxsw_sp_rifs_init(mlxsw_sp);
8088 	if (err)
8089 		goto err_rifs_init;
8090 
8091 	err = mlxsw_sp_ipips_init(mlxsw_sp);
8092 	if (err)
8093 		goto err_ipips_init;
8094 
8095 	err = rhashtable_init(&mlxsw_sp->router->nexthop_ht,
8096 			      &mlxsw_sp_nexthop_ht_params);
8097 	if (err)
8098 		goto err_nexthop_ht_init;
8099 
8100 	err = rhashtable_init(&mlxsw_sp->router->nexthop_group_ht,
8101 			      &mlxsw_sp_nexthop_group_ht_params);
8102 	if (err)
8103 		goto err_nexthop_group_ht_init;
8104 
8105 	INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_list);
8106 	err = mlxsw_sp_lpm_init(mlxsw_sp);
8107 	if (err)
8108 		goto err_lpm_init;
8109 
8110 	err = mlxsw_sp_mr_init(mlxsw_sp, &mlxsw_sp_mr_tcam_ops);
8111 	if (err)
8112 		goto err_mr_init;
8113 
8114 	err = mlxsw_sp_vrs_init(mlxsw_sp);
8115 	if (err)
8116 		goto err_vrs_init;
8117 
8118 	err = mlxsw_sp_neigh_init(mlxsw_sp);
8119 	if (err)
8120 		goto err_neigh_init;
8121 
8122 	mlxsw_sp->router->netevent_nb.notifier_call =
8123 		mlxsw_sp_router_netevent_event;
8124 	err = register_netevent_notifier(&mlxsw_sp->router->netevent_nb);
8125 	if (err)
8126 		goto err_register_netevent_notifier;
8127 
8128 	err = mlxsw_sp_mp_hash_init(mlxsw_sp);
8129 	if (err)
8130 		goto err_mp_hash_init;
8131 
8132 	err = mlxsw_sp_dscp_init(mlxsw_sp);
8133 	if (err)
8134 		goto err_dscp_init;
8135 
8136 	mlxsw_sp->router->fib_nb.notifier_call = mlxsw_sp_router_fib_event;
8137 	err = register_fib_notifier(mlxsw_sp_net(mlxsw_sp),
8138 				    &mlxsw_sp->router->fib_nb,
8139 				    mlxsw_sp_router_fib_dump_flush, extack);
8140 	if (err)
8141 		goto err_register_fib_notifier;
8142 
8143 	return 0;
8144 
8145 err_register_fib_notifier:
8146 err_dscp_init:
8147 err_mp_hash_init:
8148 	unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
8149 err_register_netevent_notifier:
8150 	mlxsw_sp_neigh_fini(mlxsw_sp);
8151 err_neigh_init:
8152 	mlxsw_sp_vrs_fini(mlxsw_sp);
8153 err_vrs_init:
8154 	mlxsw_sp_mr_fini(mlxsw_sp);
8155 err_mr_init:
8156 	mlxsw_sp_lpm_fini(mlxsw_sp);
8157 err_lpm_init:
8158 	rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
8159 err_nexthop_group_ht_init:
8160 	rhashtable_destroy(&mlxsw_sp->router->nexthop_ht);
8161 err_nexthop_ht_init:
8162 	mlxsw_sp_ipips_fini(mlxsw_sp);
8163 err_ipips_init:
8164 	mlxsw_sp_rifs_fini(mlxsw_sp);
8165 err_rifs_init:
8166 	__mlxsw_sp_router_fini(mlxsw_sp);
8167 err_router_init:
8168 	unregister_inet6addr_notifier(&router->inet6addr_nb);
8169 err_register_inet6addr_notifier:
8170 	unregister_inetaddr_notifier(&router->inetaddr_nb);
8171 err_register_inetaddr_notifier:
8172 	kfree(mlxsw_sp->router);
8173 	return err;
8174 }
8175 
8176 void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
8177 {
8178 	unregister_fib_notifier(mlxsw_sp_net(mlxsw_sp),
8179 				&mlxsw_sp->router->fib_nb);
8180 	unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
8181 	mlxsw_sp_neigh_fini(mlxsw_sp);
8182 	mlxsw_sp_vrs_fini(mlxsw_sp);
8183 	mlxsw_sp_mr_fini(mlxsw_sp);
8184 	mlxsw_sp_lpm_fini(mlxsw_sp);
8185 	rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
8186 	rhashtable_destroy(&mlxsw_sp->router->nexthop_ht);
8187 	mlxsw_sp_ipips_fini(mlxsw_sp);
8188 	mlxsw_sp_rifs_fini(mlxsw_sp);
8189 	__mlxsw_sp_router_fini(mlxsw_sp);
8190 	unregister_inet6addr_notifier(&mlxsw_sp->router->inet6addr_nb);
8191 	unregister_inetaddr_notifier(&mlxsw_sp->router->inetaddr_nb);
8192 	kfree(mlxsw_sp->router);
8193 }
8194