xref: /linux/include/net/nexthop.h (revision 1b98f357dadd6ea613a435fbaef1a5dd7b35fd21)
1 /* SPDX-License-Identifier: GPL-2.0 */
2 /*
3  * Generic nexthop implementation
4  *
5  * Copyright (c) 2017-19 Cumulus Networks
6  * Copyright (c) 2017-19 David Ahern <dsa@cumulusnetworks.com>
7  */
8 
9 #ifndef __LINUX_NEXTHOP_H
10 #define __LINUX_NEXTHOP_H
11 
12 #include <linux/netdevice.h>
13 #include <linux/notifier.h>
14 #include <linux/route.h>
15 #include <linux/types.h>
16 #include <net/ip_fib.h>
17 #include <net/ip6_fib.h>
18 #include <net/netlink.h>
19 
20 #define NEXTHOP_VALID_USER_FLAGS RTNH_F_ONLINK
21 
22 struct nexthop;
23 
24 struct nh_config {
25 	u32		nh_id;
26 
27 	u8		nh_family;
28 	u8		nh_protocol;
29 	u8		nh_blackhole;
30 	u8		nh_fdb;
31 	u32		nh_flags;
32 
33 	int		nh_ifindex;
34 	struct net_device *dev;
35 
36 	union {
37 		__be32		ipv4;
38 		struct in6_addr	ipv6;
39 	} gw;
40 
41 	struct nlattr	*nh_grp;
42 	u16		nh_grp_type;
43 	u16		nh_grp_res_num_buckets;
44 	unsigned long	nh_grp_res_idle_timer;
45 	unsigned long	nh_grp_res_unbalanced_timer;
46 	bool		nh_grp_res_has_num_buckets;
47 	bool		nh_grp_res_has_idle_timer;
48 	bool		nh_grp_res_has_unbalanced_timer;
49 
50 	bool		nh_hw_stats;
51 
52 	struct nlattr	*nh_encap;
53 	u16		nh_encap_type;
54 
55 	u32		nlflags;
56 	struct nl_info	nlinfo;
57 };
58 
59 struct nh_info {
60 	struct hlist_node	dev_hash;    /* entry on netns devhash */
61 	struct nexthop		*nh_parent;
62 
63 	u8			family;
64 	bool			reject_nh;
65 	bool			fdb_nh;
66 
67 	union {
68 		struct fib_nh_common	fib_nhc;
69 		struct fib_nh		fib_nh;
70 		struct fib6_nh		fib6_nh;
71 	};
72 };
73 
74 struct nh_res_bucket {
75 	struct nh_grp_entry __rcu *nh_entry;
76 	atomic_long_t		used_time;
77 	unsigned long		migrated_time;
78 	bool			occupied;
79 	u8			nh_flags;
80 };
81 
82 struct nh_res_table {
83 	struct net		*net;
84 	u32			nhg_id;
85 	struct delayed_work	upkeep_dw;
86 
87 	/* List of NHGEs that have too few buckets ("uw" for underweight).
88 	 * Reclaimed buckets will be given to entries in this list.
89 	 */
90 	struct list_head	uw_nh_entries;
91 	unsigned long		unbalanced_since;
92 
93 	u32			idle_timer;
94 	u32			unbalanced_timer;
95 
96 	u16			num_nh_buckets;
97 	struct nh_res_bucket	nh_buckets[] __counted_by(num_nh_buckets);
98 };
99 
100 struct nh_grp_entry_stats {
101 	u64_stats_t packets;
102 	struct u64_stats_sync syncp;
103 };
104 
105 struct nh_grp_entry {
106 	struct nexthop	*nh;
107 	struct nh_grp_entry_stats __percpu	*stats;
108 	u16		weight;
109 
110 	union {
111 		struct {
112 			atomic_t	upper_bound;
113 		} hthr;
114 		struct {
115 			/* Member on uw_nh_entries. */
116 			struct list_head	uw_nh_entry;
117 
118 			u16			count_buckets;
119 			u16			wants_buckets;
120 		} res;
121 	};
122 
123 	struct list_head nh_list;
124 	struct nexthop	*nh_parent;  /* nexthop of group with this entry */
125 	u64		packets_hw;
126 };
127 
128 struct nh_group {
129 	struct nh_group		*spare; /* spare group for removals */
130 	u16			num_nh;
131 	bool			is_multipath;
132 	bool			hash_threshold;
133 	bool			resilient;
134 	bool			fdb_nh;
135 	bool			has_v4;
136 	bool			hw_stats;
137 
138 	struct nh_res_table __rcu *res_table;
139 	struct nh_grp_entry	nh_entries[] __counted_by(num_nh);
140 };
141 
142 struct nexthop {
143 	struct rb_node		rb_node;    /* entry on netns rbtree */
144 	struct list_head	fi_list;    /* v4 entries using nh */
145 	struct list_head	f6i_list;   /* v6 entries using nh */
146 	struct list_head        fdb_list;   /* fdb entries using this nh */
147 	struct list_head	grp_list;   /* nh group entries using this nh */
148 	struct net		*net;
149 
150 	u32			id;
151 
152 	u8			protocol;   /* app managing this nh */
153 	u8			nh_flags;
154 	bool			is_group;
155 	bool			dead;
156 	spinlock_t		lock;       /* protect dead and f6i_list */
157 
158 	refcount_t		refcnt;
159 	struct rcu_head		rcu;
160 
161 	union {
162 		struct nh_info	__rcu *nh_info;
163 		struct nh_group __rcu *nh_grp;
164 	};
165 };
166 
167 enum nexthop_event_type {
168 	NEXTHOP_EVENT_DEL,
169 	NEXTHOP_EVENT_REPLACE,
170 	NEXTHOP_EVENT_RES_TABLE_PRE_REPLACE,
171 	NEXTHOP_EVENT_BUCKET_REPLACE,
172 	NEXTHOP_EVENT_HW_STATS_REPORT_DELTA,
173 };
174 
175 enum nh_notifier_info_type {
176 	NH_NOTIFIER_INFO_TYPE_SINGLE,
177 	NH_NOTIFIER_INFO_TYPE_GRP,
178 	NH_NOTIFIER_INFO_TYPE_RES_TABLE,
179 	NH_NOTIFIER_INFO_TYPE_RES_BUCKET,
180 	NH_NOTIFIER_INFO_TYPE_GRP_HW_STATS,
181 };
182 
183 struct nh_notifier_single_info {
184 	struct net_device *dev;
185 	u8 gw_family;
186 	union {
187 		__be32 ipv4;
188 		struct in6_addr ipv6;
189 	};
190 	u32 id;
191 	u8 is_reject:1,
192 	   is_fdb:1,
193 	   has_encap:1;
194 };
195 
196 struct nh_notifier_grp_entry_info {
197 	u16 weight;
198 	struct nh_notifier_single_info nh;
199 };
200 
201 struct nh_notifier_grp_info {
202 	u16 num_nh;
203 	bool is_fdb;
204 	bool hw_stats;
205 	struct nh_notifier_grp_entry_info nh_entries[] __counted_by(num_nh);
206 };
207 
208 struct nh_notifier_res_bucket_info {
209 	u16 bucket_index;
210 	unsigned int idle_timer_ms;
211 	bool force;
212 	struct nh_notifier_single_info old_nh;
213 	struct nh_notifier_single_info new_nh;
214 };
215 
216 struct nh_notifier_res_table_info {
217 	u16 num_nh_buckets;
218 	bool hw_stats;
219 	struct nh_notifier_single_info nhs[] __counted_by(num_nh_buckets);
220 };
221 
222 struct nh_notifier_grp_hw_stats_entry_info {
223 	u32 id;
224 	u64 packets;
225 };
226 
227 struct nh_notifier_grp_hw_stats_info {
228 	u16 num_nh;
229 	bool hw_stats_used;
230 	struct nh_notifier_grp_hw_stats_entry_info stats[] __counted_by(num_nh);
231 };
232 
233 struct nh_notifier_info {
234 	struct net *net;
235 	struct netlink_ext_ack *extack;
236 	u32 id;
237 	enum nh_notifier_info_type type;
238 	union {
239 		struct nh_notifier_single_info *nh;
240 		struct nh_notifier_grp_info *nh_grp;
241 		struct nh_notifier_res_table_info *nh_res_table;
242 		struct nh_notifier_res_bucket_info *nh_res_bucket;
243 		struct nh_notifier_grp_hw_stats_info *nh_grp_hw_stats;
244 	};
245 };
246 
247 int register_nexthop_notifier(struct net *net, struct notifier_block *nb,
248 			      struct netlink_ext_ack *extack);
249 int __unregister_nexthop_notifier(struct net *net, struct notifier_block *nb);
250 int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb);
251 void nexthop_set_hw_flags(struct net *net, u32 id, bool offload, bool trap);
252 void nexthop_bucket_set_hw_flags(struct net *net, u32 id, u16 bucket_index,
253 				 bool offload, bool trap);
254 void nexthop_res_grp_activity_update(struct net *net, u32 id, u16 num_buckets,
255 				     unsigned long *activity);
256 void nh_grp_hw_stats_report_delta(struct nh_notifier_grp_hw_stats_info *info,
257 				  unsigned int nh_idx,
258 				  u64 delta_packets);
259 
260 /* caller is holding rcu or rtnl; no reference taken to nexthop */
261 struct nexthop *nexthop_find_by_id(struct net *net, u32 id);
262 void nexthop_free_rcu(struct rcu_head *head);
263 
264 static inline bool nexthop_get(struct nexthop *nh)
265 {
266 	return refcount_inc_not_zero(&nh->refcnt);
267 }
268 
269 static inline void nexthop_put(struct nexthop *nh)
270 {
271 	if (refcount_dec_and_test(&nh->refcnt))
272 		call_rcu_hurry(&nh->rcu, nexthop_free_rcu);
273 }
274 
275 static inline bool nexthop_cmp(const struct nexthop *nh1,
276 			       const struct nexthop *nh2)
277 {
278 	return nh1 == nh2;
279 }
280 
281 static inline bool nexthop_is_fdb(const struct nexthop *nh)
282 {
283 	if (nh->is_group) {
284 		const struct nh_group *nh_grp;
285 
286 		nh_grp = rcu_dereference_rtnl(nh->nh_grp);
287 		return nh_grp->fdb_nh;
288 	} else {
289 		const struct nh_info *nhi;
290 
291 		nhi = rcu_dereference_rtnl(nh->nh_info);
292 		return nhi->fdb_nh;
293 	}
294 }
295 
296 static inline bool nexthop_has_v4(const struct nexthop *nh)
297 {
298 	if (nh->is_group) {
299 		struct nh_group *nh_grp;
300 
301 		nh_grp = rcu_dereference_rtnl(nh->nh_grp);
302 		return nh_grp->has_v4;
303 	}
304 	return false;
305 }
306 
307 static inline bool nexthop_is_multipath(const struct nexthop *nh)
308 {
309 	if (nh->is_group) {
310 		struct nh_group *nh_grp;
311 
312 		nh_grp = rcu_dereference_rtnl(nh->nh_grp);
313 		return nh_grp->is_multipath;
314 	}
315 	return false;
316 }
317 
318 struct nexthop *nexthop_select_path(struct nexthop *nh, int hash);
319 
320 static inline unsigned int nexthop_num_path(const struct nexthop *nh)
321 {
322 	unsigned int rc = 1;
323 
324 	if (nh->is_group) {
325 		struct nh_group *nh_grp;
326 
327 		nh_grp = rcu_dereference_rtnl(nh->nh_grp);
328 		if (nh_grp->is_multipath)
329 			rc = nh_grp->num_nh;
330 	}
331 
332 	return rc;
333 }
334 
335 static inline
336 struct nexthop *nexthop_mpath_select(const struct nh_group *nhg, int nhsel)
337 {
338 	/* for_nexthops macros in fib_semantics.c grabs a pointer to
339 	 * the nexthop before checking nhsel
340 	 */
341 	if (nhsel >= nhg->num_nh)
342 		return NULL;
343 
344 	return nhg->nh_entries[nhsel].nh;
345 }
346 
347 static inline
348 int nexthop_mpath_fill_node(struct sk_buff *skb, struct nexthop *nh,
349 			    u8 rt_family)
350 {
351 	struct nh_group *nhg = rcu_dereference_rtnl(nh->nh_grp);
352 	int i;
353 
354 	for (i = 0; i < nhg->num_nh; i++) {
355 		struct nexthop *nhe = nhg->nh_entries[i].nh;
356 		struct nh_info *nhi = rcu_dereference_rtnl(nhe->nh_info);
357 		struct fib_nh_common *nhc = &nhi->fib_nhc;
358 		int weight = nhg->nh_entries[i].weight;
359 
360 		if (fib_add_nexthop(skb, nhc, weight, rt_family, 0) < 0)
361 			return -EMSGSIZE;
362 	}
363 
364 	return 0;
365 }
366 
367 /* called with rcu lock */
368 static inline bool nexthop_is_blackhole(const struct nexthop *nh)
369 {
370 	const struct nh_info *nhi;
371 
372 	if (nh->is_group) {
373 		struct nh_group *nh_grp;
374 
375 		nh_grp = rcu_dereference_rtnl(nh->nh_grp);
376 		if (nh_grp->num_nh > 1)
377 			return false;
378 
379 		nh = nh_grp->nh_entries[0].nh;
380 	}
381 
382 	nhi = rcu_dereference_rtnl(nh->nh_info);
383 	return nhi->reject_nh;
384 }
385 
386 static inline void nexthop_path_fib_result(struct fib_result *res, int hash)
387 {
388 	struct nh_info *nhi;
389 	struct nexthop *nh;
390 
391 	nh = nexthop_select_path(res->fi->nh, hash);
392 	nhi = rcu_dereference(nh->nh_info);
393 	res->nhc = &nhi->fib_nhc;
394 }
395 
396 /* called with rcu read lock or rtnl held */
397 static inline
398 struct fib_nh_common *nexthop_fib_nhc(struct nexthop *nh, int nhsel)
399 {
400 	struct nh_info *nhi;
401 
402 	BUILD_BUG_ON(offsetof(struct fib_nh, nh_common) != 0);
403 	BUILD_BUG_ON(offsetof(struct fib6_nh, nh_common) != 0);
404 
405 	if (nh->is_group) {
406 		struct nh_group *nh_grp;
407 
408 		nh_grp = rcu_dereference_rtnl(nh->nh_grp);
409 		if (nh_grp->is_multipath) {
410 			nh = nexthop_mpath_select(nh_grp, nhsel);
411 			if (!nh)
412 				return NULL;
413 		}
414 	}
415 
416 	nhi = rcu_dereference_rtnl(nh->nh_info);
417 	return &nhi->fib_nhc;
418 }
419 
420 /* called from fib_table_lookup with rcu_lock */
421 static inline
422 struct fib_nh_common *nexthop_get_nhc_lookup(const struct nexthop *nh,
423 					     int fib_flags,
424 					     const struct flowi4 *flp,
425 					     int *nhsel)
426 {
427 	struct nh_info *nhi;
428 
429 	if (nh->is_group) {
430 		struct nh_group *nhg = rcu_dereference(nh->nh_grp);
431 		int i;
432 
433 		for (i = 0; i < nhg->num_nh; i++) {
434 			struct nexthop *nhe = nhg->nh_entries[i].nh;
435 
436 			nhi = rcu_dereference(nhe->nh_info);
437 			if (fib_lookup_good_nhc(&nhi->fib_nhc, fib_flags, flp)) {
438 				*nhsel = i;
439 				return &nhi->fib_nhc;
440 			}
441 		}
442 	} else {
443 		nhi = rcu_dereference(nh->nh_info);
444 		if (fib_lookup_good_nhc(&nhi->fib_nhc, fib_flags, flp)) {
445 			*nhsel = 0;
446 			return &nhi->fib_nhc;
447 		}
448 	}
449 
450 	return NULL;
451 }
452 
453 static inline bool nexthop_uses_dev(const struct nexthop *nh,
454 				    const struct net_device *dev)
455 {
456 	struct nh_info *nhi;
457 
458 	if (nh->is_group) {
459 		struct nh_group *nhg = rcu_dereference(nh->nh_grp);
460 		int i;
461 
462 		for (i = 0; i < nhg->num_nh; i++) {
463 			struct nexthop *nhe = nhg->nh_entries[i].nh;
464 
465 			nhi = rcu_dereference(nhe->nh_info);
466 			if (nhc_l3mdev_matches_dev(&nhi->fib_nhc, dev))
467 				return true;
468 		}
469 	} else {
470 		nhi = rcu_dereference(nh->nh_info);
471 		if (nhc_l3mdev_matches_dev(&nhi->fib_nhc, dev))
472 			return true;
473 	}
474 
475 	return false;
476 }
477 
478 static inline unsigned int fib_info_num_path(const struct fib_info *fi)
479 {
480 	if (unlikely(fi->nh))
481 		return nexthop_num_path(fi->nh);
482 
483 	return fi->fib_nhs;
484 }
485 
486 int fib_check_nexthop(struct nexthop *nh, u8 scope,
487 		      struct netlink_ext_ack *extack);
488 
489 static inline struct fib_nh_common *fib_info_nhc(struct fib_info *fi, int nhsel)
490 {
491 	if (unlikely(fi->nh))
492 		return nexthop_fib_nhc(fi->nh, nhsel);
493 
494 	return &fi->fib_nh[nhsel].nh_common;
495 }
496 
497 /* only used when fib_nh is built into fib_info */
498 static inline struct fib_nh *fib_info_nh(struct fib_info *fi, int nhsel)
499 {
500 	WARN_ON(fi->nh);
501 
502 	return &fi->fib_nh[nhsel];
503 }
504 
505 /*
506  * IPv6 variants
507  */
508 int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg,
509 		       struct netlink_ext_ack *extack);
510 
511 /* Caller should either hold rcu_read_lock(), or RTNL. */
512 static inline struct fib6_nh *nexthop_fib6_nh(struct nexthop *nh)
513 {
514 	struct nh_info *nhi;
515 
516 	if (nh->is_group) {
517 		struct nh_group *nh_grp;
518 
519 		nh_grp = rcu_dereference_rtnl(nh->nh_grp);
520 		nh = nexthop_mpath_select(nh_grp, 0);
521 		if (!nh)
522 			return NULL;
523 	}
524 
525 	nhi = rcu_dereference_rtnl(nh->nh_info);
526 	if (nhi->family == AF_INET6)
527 		return &nhi->fib6_nh;
528 
529 	return NULL;
530 }
531 
532 static inline struct net_device *fib6_info_nh_dev(struct fib6_info *f6i)
533 {
534 	struct fib6_nh *fib6_nh;
535 
536 	fib6_nh = f6i->nh ? nexthop_fib6_nh(f6i->nh) : f6i->fib6_nh;
537 	return fib6_nh->fib_nh_dev;
538 }
539 
540 static inline void nexthop_path_fib6_result(struct fib6_result *res, int hash)
541 {
542 	struct nexthop *nh = res->f6i->nh;
543 	struct nh_info *nhi;
544 
545 	nh = nexthop_select_path(nh, hash);
546 
547 	nhi = rcu_dereference_rtnl(nh->nh_info);
548 	if (nhi->reject_nh) {
549 		res->fib6_type = RTN_BLACKHOLE;
550 		res->fib6_flags |= RTF_REJECT;
551 		res->nh = nexthop_fib6_nh(nh);
552 	} else {
553 		res->nh = &nhi->fib6_nh;
554 	}
555 }
556 
557 int nexthop_for_each_fib6_nh(struct nexthop *nh,
558 			     int (*cb)(struct fib6_nh *nh, void *arg),
559 			     void *arg);
560 
561 static inline int nexthop_get_family(struct nexthop *nh)
562 {
563 	struct nh_info *nhi = rcu_dereference_rtnl(nh->nh_info);
564 
565 	return nhi->family;
566 }
567 
568 static inline
569 struct fib_nh_common *nexthop_fdb_nhc(struct nexthop *nh)
570 {
571 	struct nh_info *nhi = rcu_dereference_rtnl(nh->nh_info);
572 
573 	return &nhi->fib_nhc;
574 }
575 
576 static inline struct fib_nh_common *nexthop_path_fdb_result(struct nexthop *nh,
577 							    int hash)
578 {
579 	struct nh_info *nhi;
580 	struct nexthop *nhp;
581 
582 	nhp = nexthop_select_path(nh, hash);
583 	if (unlikely(!nhp))
584 		return NULL;
585 	nhi = rcu_dereference(nhp->nh_info);
586 	return &nhi->fib_nhc;
587 }
588 #endif
589