xref: /linux/include/net/nexthop.h (revision 385ef48f468696d6d172eb367656a3466fa0408d)
1 /* SPDX-License-Identifier: GPL-2.0 */
2 /*
3  * Generic nexthop implementation
4  *
5  * Copyright (c) 2017-19 Cumulus Networks
6  * Copyright (c) 2017-19 David Ahern <dsa@cumulusnetworks.com>
7  */
8 
9 #ifndef __LINUX_NEXTHOP_H
10 #define __LINUX_NEXTHOP_H
11 
12 #include <linux/netdevice.h>
13 #include <linux/notifier.h>
14 #include <linux/route.h>
15 #include <linux/types.h>
16 #include <net/ip_fib.h>
17 #include <net/ip6_fib.h>
18 #include <net/netlink.h>
19 
20 #define NEXTHOP_VALID_USER_FLAGS RTNH_F_ONLINK
21 
22 struct nexthop;
23 
24 struct nh_config {
25 	u32		nh_id;
26 
27 	u8		nh_family;
28 	u8		nh_protocol;
29 	u8		nh_blackhole;
30 	u8		nh_fdb;
31 	u32		nh_flags;
32 
33 	int		nh_ifindex;
34 	struct net_device *dev;
35 
36 	union {
37 		__be32		ipv4;
38 		struct in6_addr	ipv6;
39 	} gw;
40 
41 	struct nlattr	*nh_grp;
42 	u16		nh_grp_type;
43 	u16		nh_grp_res_num_buckets;
44 	unsigned long	nh_grp_res_idle_timer;
45 	unsigned long	nh_grp_res_unbalanced_timer;
46 	bool		nh_grp_res_has_num_buckets;
47 	bool		nh_grp_res_has_idle_timer;
48 	bool		nh_grp_res_has_unbalanced_timer;
49 
50 	struct nlattr	*nh_encap;
51 	u16		nh_encap_type;
52 
53 	u32		nlflags;
54 	struct nl_info	nlinfo;
55 };
56 
57 struct nh_info {
58 	struct hlist_node	dev_hash;    /* entry on netns devhash */
59 	struct nexthop		*nh_parent;
60 
61 	u8			family;
62 	bool			reject_nh;
63 	bool			fdb_nh;
64 
65 	union {
66 		struct fib_nh_common	fib_nhc;
67 		struct fib_nh		fib_nh;
68 		struct fib6_nh		fib6_nh;
69 	};
70 };
71 
72 struct nh_res_bucket {
73 	struct nh_grp_entry __rcu *nh_entry;
74 	atomic_long_t		used_time;
75 	unsigned long		migrated_time;
76 	bool			occupied;
77 	u8			nh_flags;
78 };
79 
80 struct nh_res_table {
81 	struct net		*net;
82 	u32			nhg_id;
83 	struct delayed_work	upkeep_dw;
84 
85 	/* List of NHGEs that have too few buckets ("uw" for underweight).
86 	 * Reclaimed buckets will be given to entries in this list.
87 	 */
88 	struct list_head	uw_nh_entries;
89 	unsigned long		unbalanced_since;
90 
91 	u32			idle_timer;
92 	u32			unbalanced_timer;
93 
94 	u16			num_nh_buckets;
95 	struct nh_res_bucket	nh_buckets[] __counted_by(num_nh_buckets);
96 };
97 
98 struct nh_grp_entry {
99 	struct nexthop	*nh;
100 	u8		weight;
101 
102 	union {
103 		struct {
104 			atomic_t	upper_bound;
105 		} hthr;
106 		struct {
107 			/* Member on uw_nh_entries. */
108 			struct list_head	uw_nh_entry;
109 
110 			u16			count_buckets;
111 			u16			wants_buckets;
112 		} res;
113 	};
114 
115 	struct list_head nh_list;
116 	struct nexthop	*nh_parent;  /* nexthop of group with this entry */
117 };
118 
119 struct nh_group {
120 	struct nh_group		*spare; /* spare group for removals */
121 	u16			num_nh;
122 	bool			is_multipath;
123 	bool			hash_threshold;
124 	bool			resilient;
125 	bool			fdb_nh;
126 	bool			has_v4;
127 
128 	struct nh_res_table __rcu *res_table;
129 	struct nh_grp_entry	nh_entries[] __counted_by(num_nh);
130 };
131 
132 struct nexthop {
133 	struct rb_node		rb_node;    /* entry on netns rbtree */
134 	struct list_head	fi_list;    /* v4 entries using nh */
135 	struct list_head	f6i_list;   /* v6 entries using nh */
136 	struct list_head        fdb_list;   /* fdb entries using this nh */
137 	struct list_head	grp_list;   /* nh group entries using this nh */
138 	struct net		*net;
139 
140 	u32			id;
141 
142 	u8			protocol;   /* app managing this nh */
143 	u8			nh_flags;
144 	bool			is_group;
145 
146 	refcount_t		refcnt;
147 	struct rcu_head		rcu;
148 
149 	union {
150 		struct nh_info	__rcu *nh_info;
151 		struct nh_group __rcu *nh_grp;
152 	};
153 };
154 
155 enum nexthop_event_type {
156 	NEXTHOP_EVENT_DEL,
157 	NEXTHOP_EVENT_REPLACE,
158 	NEXTHOP_EVENT_RES_TABLE_PRE_REPLACE,
159 	NEXTHOP_EVENT_BUCKET_REPLACE,
160 };
161 
162 enum nh_notifier_info_type {
163 	NH_NOTIFIER_INFO_TYPE_SINGLE,
164 	NH_NOTIFIER_INFO_TYPE_GRP,
165 	NH_NOTIFIER_INFO_TYPE_RES_TABLE,
166 	NH_NOTIFIER_INFO_TYPE_RES_BUCKET,
167 };
168 
169 struct nh_notifier_single_info {
170 	struct net_device *dev;
171 	u8 gw_family;
172 	union {
173 		__be32 ipv4;
174 		struct in6_addr ipv6;
175 	};
176 	u8 is_reject:1,
177 	   is_fdb:1,
178 	   has_encap:1;
179 };
180 
181 struct nh_notifier_grp_entry_info {
182 	u8 weight;
183 	u32 id;
184 	struct nh_notifier_single_info nh;
185 };
186 
187 struct nh_notifier_grp_info {
188 	u16 num_nh;
189 	bool is_fdb;
190 	struct nh_notifier_grp_entry_info nh_entries[] __counted_by(num_nh);
191 };
192 
193 struct nh_notifier_res_bucket_info {
194 	u16 bucket_index;
195 	unsigned int idle_timer_ms;
196 	bool force;
197 	struct nh_notifier_single_info old_nh;
198 	struct nh_notifier_single_info new_nh;
199 };
200 
201 struct nh_notifier_res_table_info {
202 	u16 num_nh_buckets;
203 	struct nh_notifier_single_info nhs[] __counted_by(num_nh_buckets);
204 };
205 
206 struct nh_notifier_info {
207 	struct net *net;
208 	struct netlink_ext_ack *extack;
209 	u32 id;
210 	enum nh_notifier_info_type type;
211 	union {
212 		struct nh_notifier_single_info *nh;
213 		struct nh_notifier_grp_info *nh_grp;
214 		struct nh_notifier_res_table_info *nh_res_table;
215 		struct nh_notifier_res_bucket_info *nh_res_bucket;
216 	};
217 };
218 
219 int register_nexthop_notifier(struct net *net, struct notifier_block *nb,
220 			      struct netlink_ext_ack *extack);
221 int __unregister_nexthop_notifier(struct net *net, struct notifier_block *nb);
222 int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb);
223 void nexthop_set_hw_flags(struct net *net, u32 id, bool offload, bool trap);
224 void nexthop_bucket_set_hw_flags(struct net *net, u32 id, u16 bucket_index,
225 				 bool offload, bool trap);
226 void nexthop_res_grp_activity_update(struct net *net, u32 id, u16 num_buckets,
227 				     unsigned long *activity);
228 
229 /* caller is holding rcu or rtnl; no reference taken to nexthop */
230 struct nexthop *nexthop_find_by_id(struct net *net, u32 id);
231 void nexthop_free_rcu(struct rcu_head *head);
232 
233 static inline bool nexthop_get(struct nexthop *nh)
234 {
235 	return refcount_inc_not_zero(&nh->refcnt);
236 }
237 
238 static inline void nexthop_put(struct nexthop *nh)
239 {
240 	if (refcount_dec_and_test(&nh->refcnt))
241 		call_rcu(&nh->rcu, nexthop_free_rcu);
242 }
243 
244 static inline bool nexthop_cmp(const struct nexthop *nh1,
245 			       const struct nexthop *nh2)
246 {
247 	return nh1 == nh2;
248 }
249 
250 static inline bool nexthop_is_fdb(const struct nexthop *nh)
251 {
252 	if (nh->is_group) {
253 		const struct nh_group *nh_grp;
254 
255 		nh_grp = rcu_dereference_rtnl(nh->nh_grp);
256 		return nh_grp->fdb_nh;
257 	} else {
258 		const struct nh_info *nhi;
259 
260 		nhi = rcu_dereference_rtnl(nh->nh_info);
261 		return nhi->fdb_nh;
262 	}
263 }
264 
265 static inline bool nexthop_has_v4(const struct nexthop *nh)
266 {
267 	if (nh->is_group) {
268 		struct nh_group *nh_grp;
269 
270 		nh_grp = rcu_dereference_rtnl(nh->nh_grp);
271 		return nh_grp->has_v4;
272 	}
273 	return false;
274 }
275 
276 static inline bool nexthop_is_multipath(const struct nexthop *nh)
277 {
278 	if (nh->is_group) {
279 		struct nh_group *nh_grp;
280 
281 		nh_grp = rcu_dereference_rtnl(nh->nh_grp);
282 		return nh_grp->is_multipath;
283 	}
284 	return false;
285 }
286 
287 struct nexthop *nexthop_select_path(struct nexthop *nh, int hash);
288 
289 static inline unsigned int nexthop_num_path(const struct nexthop *nh)
290 {
291 	unsigned int rc = 1;
292 
293 	if (nh->is_group) {
294 		struct nh_group *nh_grp;
295 
296 		nh_grp = rcu_dereference_rtnl(nh->nh_grp);
297 		if (nh_grp->is_multipath)
298 			rc = nh_grp->num_nh;
299 	}
300 
301 	return rc;
302 }
303 
304 static inline
305 struct nexthop *nexthop_mpath_select(const struct nh_group *nhg, int nhsel)
306 {
307 	/* for_nexthops macros in fib_semantics.c grabs a pointer to
308 	 * the nexthop before checking nhsel
309 	 */
310 	if (nhsel >= nhg->num_nh)
311 		return NULL;
312 
313 	return nhg->nh_entries[nhsel].nh;
314 }
315 
316 static inline
317 int nexthop_mpath_fill_node(struct sk_buff *skb, struct nexthop *nh,
318 			    u8 rt_family)
319 {
320 	struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
321 	int i;
322 
323 	for (i = 0; i < nhg->num_nh; i++) {
324 		struct nexthop *nhe = nhg->nh_entries[i].nh;
325 		struct nh_info *nhi = rcu_dereference_rtnl(nhe->nh_info);
326 		struct fib_nh_common *nhc = &nhi->fib_nhc;
327 		int weight = nhg->nh_entries[i].weight;
328 
329 		if (fib_add_nexthop(skb, nhc, weight, rt_family, 0) < 0)
330 			return -EMSGSIZE;
331 	}
332 
333 	return 0;
334 }
335 
336 /* called with rcu lock */
337 static inline bool nexthop_is_blackhole(const struct nexthop *nh)
338 {
339 	const struct nh_info *nhi;
340 
341 	if (nh->is_group) {
342 		struct nh_group *nh_grp;
343 
344 		nh_grp = rcu_dereference_rtnl(nh->nh_grp);
345 		if (nh_grp->num_nh > 1)
346 			return false;
347 
348 		nh = nh_grp->nh_entries[0].nh;
349 	}
350 
351 	nhi = rcu_dereference_rtnl(nh->nh_info);
352 	return nhi->reject_nh;
353 }
354 
355 static inline void nexthop_path_fib_result(struct fib_result *res, int hash)
356 {
357 	struct nh_info *nhi;
358 	struct nexthop *nh;
359 
360 	nh = nexthop_select_path(res->fi->nh, hash);
361 	nhi = rcu_dereference(nh->nh_info);
362 	res->nhc = &nhi->fib_nhc;
363 }
364 
365 /* called with rcu read lock or rtnl held */
366 static inline
367 struct fib_nh_common *nexthop_fib_nhc(struct nexthop *nh, int nhsel)
368 {
369 	struct nh_info *nhi;
370 
371 	BUILD_BUG_ON(offsetof(struct fib_nh, nh_common) != 0);
372 	BUILD_BUG_ON(offsetof(struct fib6_nh, nh_common) != 0);
373 
374 	if (nh->is_group) {
375 		struct nh_group *nh_grp;
376 
377 		nh_grp = rcu_dereference_rtnl(nh->nh_grp);
378 		if (nh_grp->is_multipath) {
379 			nh = nexthop_mpath_select(nh_grp, nhsel);
380 			if (!nh)
381 				return NULL;
382 		}
383 	}
384 
385 	nhi = rcu_dereference_rtnl(nh->nh_info);
386 	return &nhi->fib_nhc;
387 }
388 
389 /* called from fib_table_lookup with rcu_lock */
390 static inline
391 struct fib_nh_common *nexthop_get_nhc_lookup(const struct nexthop *nh,
392 					     int fib_flags,
393 					     const struct flowi4 *flp,
394 					     int *nhsel)
395 {
396 	struct nh_info *nhi;
397 
398 	if (nh->is_group) {
399 		struct nh_group *nhg = rcu_dereference(nh->nh_grp);
400 		int i;
401 
402 		for (i = 0; i < nhg->num_nh; i++) {
403 			struct nexthop *nhe = nhg->nh_entries[i].nh;
404 
405 			nhi = rcu_dereference(nhe->nh_info);
406 			if (fib_lookup_good_nhc(&nhi->fib_nhc, fib_flags, flp)) {
407 				*nhsel = i;
408 				return &nhi->fib_nhc;
409 			}
410 		}
411 	} else {
412 		nhi = rcu_dereference(nh->nh_info);
413 		if (fib_lookup_good_nhc(&nhi->fib_nhc, fib_flags, flp)) {
414 			*nhsel = 0;
415 			return &nhi->fib_nhc;
416 		}
417 	}
418 
419 	return NULL;
420 }
421 
422 static inline bool nexthop_uses_dev(const struct nexthop *nh,
423 				    const struct net_device *dev)
424 {
425 	struct nh_info *nhi;
426 
427 	if (nh->is_group) {
428 		struct nh_group *nhg = rcu_dereference(nh->nh_grp);
429 		int i;
430 
431 		for (i = 0; i < nhg->num_nh; i++) {
432 			struct nexthop *nhe = nhg->nh_entries[i].nh;
433 
434 			nhi = rcu_dereference(nhe->nh_info);
435 			if (nhc_l3mdev_matches_dev(&nhi->fib_nhc, dev))
436 				return true;
437 		}
438 	} else {
439 		nhi = rcu_dereference(nh->nh_info);
440 		if (nhc_l3mdev_matches_dev(&nhi->fib_nhc, dev))
441 			return true;
442 	}
443 
444 	return false;
445 }
446 
447 static inline unsigned int fib_info_num_path(const struct fib_info *fi)
448 {
449 	if (unlikely(fi->nh))
450 		return nexthop_num_path(fi->nh);
451 
452 	return fi->fib_nhs;
453 }
454 
455 int fib_check_nexthop(struct nexthop *nh, u8 scope,
456 		      struct netlink_ext_ack *extack);
457 
458 static inline struct fib_nh_common *fib_info_nhc(struct fib_info *fi, int nhsel)
459 {
460 	if (unlikely(fi->nh))
461 		return nexthop_fib_nhc(fi->nh, nhsel);
462 
463 	return &fi->fib_nh[nhsel].nh_common;
464 }
465 
466 /* only used when fib_nh is built into fib_info */
467 static inline struct fib_nh *fib_info_nh(struct fib_info *fi, int nhsel)
468 {
469 	WARN_ON(fi->nh);
470 
471 	return &fi->fib_nh[nhsel];
472 }
473 
474 /*
475  * IPv6 variants
476  */
477 int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg,
478 		       struct netlink_ext_ack *extack);
479 
480 /* Caller should either hold rcu_read_lock(), or RTNL. */
481 static inline struct fib6_nh *nexthop_fib6_nh(struct nexthop *nh)
482 {
483 	struct nh_info *nhi;
484 
485 	if (nh->is_group) {
486 		struct nh_group *nh_grp;
487 
488 		nh_grp = rcu_dereference_rtnl(nh->nh_grp);
489 		nh = nexthop_mpath_select(nh_grp, 0);
490 		if (!nh)
491 			return NULL;
492 	}
493 
494 	nhi = rcu_dereference_rtnl(nh->nh_info);
495 	if (nhi->family == AF_INET6)
496 		return &nhi->fib6_nh;
497 
498 	return NULL;
499 }
500 
501 static inline struct net_device *fib6_info_nh_dev(struct fib6_info *f6i)
502 {
503 	struct fib6_nh *fib6_nh;
504 
505 	fib6_nh = f6i->nh ? nexthop_fib6_nh(f6i->nh) : f6i->fib6_nh;
506 	return fib6_nh->fib_nh_dev;
507 }
508 
509 static inline void nexthop_path_fib6_result(struct fib6_result *res, int hash)
510 {
511 	struct nexthop *nh = res->f6i->nh;
512 	struct nh_info *nhi;
513 
514 	nh = nexthop_select_path(nh, hash);
515 
516 	nhi = rcu_dereference_rtnl(nh->nh_info);
517 	if (nhi->reject_nh) {
518 		res->fib6_type = RTN_BLACKHOLE;
519 		res->fib6_flags |= RTF_REJECT;
520 		res->nh = nexthop_fib6_nh(nh);
521 	} else {
522 		res->nh = &nhi->fib6_nh;
523 	}
524 }
525 
526 int nexthop_for_each_fib6_nh(struct nexthop *nh,
527 			     int (*cb)(struct fib6_nh *nh, void *arg),
528 			     void *arg);
529 
530 static inline int nexthop_get_family(struct nexthop *nh)
531 {
532 	struct nh_info *nhi = rcu_dereference_rtnl(nh->nh_info);
533 
534 	return nhi->family;
535 }
536 
537 static inline
538 struct fib_nh_common *nexthop_fdb_nhc(struct nexthop *nh)
539 {
540 	struct nh_info *nhi = rcu_dereference_rtnl(nh->nh_info);
541 
542 	return &nhi->fib_nhc;
543 }
544 
545 static inline struct fib_nh_common *nexthop_path_fdb_result(struct nexthop *nh,
546 							    int hash)
547 {
548 	struct nh_info *nhi;
549 	struct nexthop *nhp;
550 
551 	nhp = nexthop_select_path(nh, hash);
552 	if (unlikely(!nhp))
553 		return NULL;
554 	nhi = rcu_dereference(nhp->nh_info);
555 	return &nhi->fib_nhc;
556 }
557 #endif
558