1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2020 Alexander V. Chernikov
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 #include <sys/cdefs.h>
29 #include "opt_inet.h"
30 #include "opt_route.h"
31
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/lock.h>
35 #include <sys/rwlock.h>
36 #include <sys/malloc.h>
37 #include <sys/mbuf.h>
38 #include <sys/socket.h>
39 #include <sys/kernel.h>
40
41 #include <net/if.h>
42 #include <net/if_var.h>
43 #include <net/route.h>
44 #include <net/route/route_var.h>
45 #include <net/route/nhop_utils.h>
46 #include <net/route/nhop.h>
47 #include <net/route/nhop_var.h>
48 #include <net/vnet.h>
49
50 #define DEBUG_MOD_NAME nhop
51 #define DEBUG_MAX_LEVEL LOG_DEBUG
52 #include <net/route/route_debug.h>
53 _DECLARE_DEBUG(LOG_INFO);
54
55 /*
56 * This file contains data structures management logic for the nexthop ("nhop")
57 * route subsystem.
58 *
59 * Nexthops in the original sense are the objects containing all the necessary
60 * information to forward the packet to the selected destination.
61 * In particular, nexthop is defined by a combination of
62 * ifp, ifa, aifp, mtu, gw addr(if set), nh_type, nh_family, mask of rt_flags and
63 * NHF_DEFAULT
64 *
65 * All nexthops are stored in the resizable hash table.
66 * Additionally, each nexthop gets assigned its unique index (nexthop index)
67 * so userland programs can interact with the nexthops easier. Index allocation
68 * is backed by the bitmask array.
69 */
70
71 MALLOC_DEFINE(M_NHOP, "nhops", "nexthops data");
72
73 /* Hash management functions */
74
75 int
nhops_init_rib(struct rib_head * rh)76 nhops_init_rib(struct rib_head *rh)
77 {
78 struct nh_control *ctl;
79 size_t alloc_size;
80 uint32_t num_buckets, num_items;
81 void *ptr;
82
83 ctl = malloc(sizeof(struct nh_control), M_NHOP, M_WAITOK | M_ZERO);
84
85 /*
86 * Allocate nexthop hash. Start with 16 items by default (128 bytes).
87 * This will be enough for most of the cases.
88 */
89 num_buckets = 16;
90 alloc_size = CHT_SLIST_GET_RESIZE_SIZE(num_buckets);
91 ptr = malloc(alloc_size, M_NHOP, M_WAITOK | M_ZERO);
92 CHT_SLIST_INIT(&ctl->nh_head, ptr, num_buckets);
93
94 /*
95 * Allocate nexthop index bitmask.
96 */
97 num_items = 128 * 8; /* 128 bytes */
98 ptr = malloc(bitmask_get_size(num_items), M_NHOP, M_WAITOK | M_ZERO);
99 bitmask_init(&ctl->nh_idx_head, ptr, num_items);
100
101 NHOPS_LOCK_INIT(ctl);
102
103 rh->nh_control = ctl;
104 ctl->ctl_rh = rh;
105
106 FIB_CTL_LOG(LOG_DEBUG2, ctl, "nhops init: ctl %p rh %p", ctl, rh);
107
108 return (0);
109 }
110
111 static void
destroy_ctl(struct nh_control * ctl)112 destroy_ctl(struct nh_control *ctl)
113 {
114
115 NHOPS_LOCK_DESTROY(ctl);
116 free(ctl->nh_head.ptr, M_NHOP);
117 free(ctl->nh_idx_head.idx, M_NHOP);
118 #ifdef ROUTE_MPATH
119 nhgrp_ctl_free(ctl);
120 #endif
121 free(ctl, M_NHOP);
122 }
123
124 /*
125 * Epoch callback indicating ctl is safe to destroy
126 */
127 static void
destroy_ctl_epoch(epoch_context_t ctx)128 destroy_ctl_epoch(epoch_context_t ctx)
129 {
130 struct nh_control *ctl;
131
132 ctl = __containerof(ctx, struct nh_control, ctl_epoch_ctx);
133
134 destroy_ctl(ctl);
135 }
136
137 void
nhops_destroy_rib(struct rib_head * rh)138 nhops_destroy_rib(struct rib_head *rh)
139 {
140 struct nh_control *ctl;
141 struct nhop_priv *nh_priv;
142
143 ctl = rh->nh_control;
144
145 /*
146 * All routes should have been deleted in rt_table_destroy().
147 * However, TCP stack or other consumers may store referenced
148 * nexthop pointers. When these references go to zero,
149 * nhop_free() will try to unlink these records from the
150 * datastructures, most likely leading to panic.
151 *
152 * Avoid that by explicitly marking all of the remaining
153 * nexthops as unlinked by removing a reference from a special
154 * counter. Please see nhop_free() comments for more
155 * details.
156 */
157
158 NHOPS_WLOCK(ctl);
159 CHT_SLIST_FOREACH(&ctl->nh_head, nhops, nh_priv) {
160 FIB_RH_LOG(LOG_DEBUG3, rh, "marking nhop %u unlinked", nh_priv->nh_idx);
161 refcount_release(&nh_priv->nh_linked);
162 } CHT_SLIST_FOREACH_END;
163 #ifdef ROUTE_MPATH
164 nhgrp_ctl_unlink_all(ctl);
165 #endif
166 NHOPS_WUNLOCK(ctl);
167
168 /*
169 * Postpone destruction till the end of current epoch
170 * so nhop_free() can safely use nh_control pointer.
171 */
172 NET_EPOCH_CALL(destroy_ctl_epoch, &ctl->ctl_epoch_ctx);
173 }
174
175 /*
176 * Nexhop hash calculation:
177 *
178 * Nexthops distribution:
179 * 2 "mandatory" nexthops per interface ("interface route", "loopback").
180 * For direct peering: 1 nexthop for the peering router per ifp/af.
181 * For Ix-like peering: tens to hundreds nexthops of neghbors per ifp/af.
182 * IGP control plane & broadcast segment: tens of nexthops per ifp/af.
183 *
184 * Each fib/af combination has its own hash table.
185 * With that in mind, hash nexthops by the combination of the interface
186 * and GW IP address.
187 *
188 * To optimize hash calculation, ignore lower bits of ifnet pointer,
189 * as they give very little entropy.
190 * Similarly, use lower 4 bytes of IPv6 address to distinguish between the
191 * neighbors.
192 */
193 struct _hash_data {
194 uint16_t ifentropy;
195 uint8_t family;
196 uint8_t nh_type;
197 uint32_t gw_addr;
198 };
199
200 static unsigned
djb_hash(const unsigned char * h,const int len)201 djb_hash(const unsigned char *h, const int len)
202 {
203 unsigned int result = 0;
204 int i;
205
206 for (i = 0; i < len; i++)
207 result = 33 * result ^ h[i];
208
209 return (result);
210 }
211
212 static uint32_t
hash_priv(const struct nhop_priv * priv)213 hash_priv(const struct nhop_priv *priv)
214 {
215 struct nhop_object *nh = priv->nh;
216 struct _hash_data key = {
217 .ifentropy = (uint16_t)((((uintptr_t)nh->nh_ifp) >> 6) & 0xFFFF),
218 .family = nh->gw_sa.sa_family,
219 .nh_type = priv->nh_type & 0xFF,
220 .gw_addr = (nh->gw_sa.sa_family == AF_INET6) ?
221 nh->gw6_sa.sin6_addr.s6_addr32[3] :
222 nh->gw4_sa.sin_addr.s_addr
223 };
224
225 return (uint32_t)(djb_hash((const unsigned char *)&key, sizeof(key)));
226 }
227
228 /*
229 * Checks if hash needs resizing and performs this resize if necessary
230 *
231 */
232 static void
consider_resize(struct nh_control * ctl,uint32_t new_nh_buckets,uint32_t new_idx_items)233 consider_resize(struct nh_control *ctl, uint32_t new_nh_buckets, uint32_t new_idx_items)
234 {
235 void *nh_ptr, *nh_idx_ptr;
236 void *old_idx_ptr;
237 size_t alloc_size;
238
239 nh_ptr = NULL;
240 if (new_nh_buckets != 0) {
241 alloc_size = CHT_SLIST_GET_RESIZE_SIZE(new_nh_buckets);
242 nh_ptr = malloc(alloc_size, M_NHOP, M_NOWAIT | M_ZERO);
243 }
244
245 nh_idx_ptr = NULL;
246 if (new_idx_items != 0) {
247 alloc_size = bitmask_get_size(new_idx_items);
248 nh_idx_ptr = malloc(alloc_size, M_NHOP, M_NOWAIT | M_ZERO);
249 }
250
251 if (nh_ptr == NULL && nh_idx_ptr == NULL) {
252 /* Either resize is not required or allocations have failed. */
253 return;
254 }
255
256 FIB_CTL_LOG(LOG_DEBUG, ctl,
257 "going to resize: nh:[ptr:%p sz:%u] idx:[ptr:%p sz:%u]",
258 nh_ptr, new_nh_buckets, nh_idx_ptr, new_idx_items);
259
260 old_idx_ptr = NULL;
261
262 NHOPS_WLOCK(ctl);
263 if (nh_ptr != NULL) {
264 CHT_SLIST_RESIZE(&ctl->nh_head, nhops, nh_ptr, new_nh_buckets);
265 }
266 if (nh_idx_ptr != NULL) {
267 if (bitmask_copy(&ctl->nh_idx_head, nh_idx_ptr, new_idx_items) == 0)
268 bitmask_swap(&ctl->nh_idx_head, nh_idx_ptr, new_idx_items, &old_idx_ptr);
269 }
270 NHOPS_WUNLOCK(ctl);
271
272 if (nh_ptr != NULL)
273 free(nh_ptr, M_NHOP);
274 if (old_idx_ptr != NULL)
275 free(old_idx_ptr, M_NHOP);
276 }
277
278 /*
279 * Links nextop @nh_priv to the nexhop hash table and allocates
280 * nexhop index.
281 * Returns allocated index or 0 on failure.
282 */
283 int
link_nhop(struct nh_control * ctl,struct nhop_priv * nh_priv)284 link_nhop(struct nh_control *ctl, struct nhop_priv *nh_priv)
285 {
286 uint16_t idx;
287 uint32_t num_buckets_new, num_items_new;
288
289 KASSERT((nh_priv->nh_idx == 0), ("nhop index is already allocated"));
290 NHOPS_WLOCK(ctl);
291
292 /*
293 * Check if we need to resize hash and index.
294 * The following 2 functions returns either new size or 0
295 * if resize is not required.
296 */
297 num_buckets_new = CHT_SLIST_GET_RESIZE_BUCKETS(&ctl->nh_head);
298 num_items_new = bitmask_get_resize_items(&ctl->nh_idx_head);
299
300 if (bitmask_alloc_idx(&ctl->nh_idx_head, &idx) != 0) {
301 NHOPS_WUNLOCK(ctl);
302 FIB_CTL_LOG(LOG_INFO, ctl, "Unable to allocate nhop index");
303 RTSTAT_INC(rts_nh_idx_alloc_failure);
304 consider_resize(ctl, num_buckets_new, num_items_new);
305 return (0);
306 }
307
308 nh_priv->nh_idx = idx;
309 nh_priv->nh_control = ctl;
310 nh_priv->nh_finalized = 1;
311
312 CHT_SLIST_INSERT_HEAD(&ctl->nh_head, nhops, nh_priv);
313
314 NHOPS_WUNLOCK(ctl);
315
316 FIB_RH_LOG(LOG_DEBUG2, ctl->ctl_rh,
317 "Linked nhop priv %p to %d, hash %u, ctl %p",
318 nh_priv, idx, hash_priv(nh_priv), ctl);
319 consider_resize(ctl, num_buckets_new, num_items_new);
320
321 return (idx);
322 }
323
324 /*
325 * Unlinks nexthop specified by @nh_priv data from the hash.
326 *
327 * Returns found nexthop or NULL.
328 */
329 struct nhop_priv *
unlink_nhop(struct nh_control * ctl,struct nhop_priv * nh_priv_del)330 unlink_nhop(struct nh_control *ctl, struct nhop_priv *nh_priv_del)
331 {
332 struct nhop_priv *priv_ret;
333 int idx;
334 uint32_t num_buckets_new, num_items_new;
335
336 idx = 0;
337
338 NHOPS_WLOCK(ctl);
339 CHT_SLIST_REMOVE(&ctl->nh_head, nhops, nh_priv_del, priv_ret);
340
341 if (priv_ret != NULL) {
342 idx = priv_ret->nh_idx;
343 priv_ret->nh_idx = 0;
344
345 KASSERT((idx != 0), ("bogus nhop index 0"));
346 if ((bitmask_free_idx(&ctl->nh_idx_head, idx)) != 0) {
347 FIB_CTL_LOG(LOG_DEBUG, ctl,
348 "Unable to remove index %d from fib %u af %d",
349 idx, ctl->ctl_rh->rib_fibnum, ctl->ctl_rh->rib_family);
350 }
351 }
352
353 /* Check if hash or index needs to be resized */
354 num_buckets_new = CHT_SLIST_GET_RESIZE_BUCKETS(&ctl->nh_head);
355 num_items_new = bitmask_get_resize_items(&ctl->nh_idx_head);
356
357 NHOPS_WUNLOCK(ctl);
358
359 if (priv_ret == NULL) {
360 FIB_CTL_LOG(LOG_INFO, ctl,
361 "Unable to unlink nhop priv %p from hash, hash %u ctl %p",
362 nh_priv_del, hash_priv(nh_priv_del), ctl);
363 } else {
364 FIB_CTL_LOG(LOG_DEBUG2, ctl, "Unlinked nhop %p priv idx %d",
365 priv_ret, idx);
366 }
367
368 consider_resize(ctl, num_buckets_new, num_items_new);
369
370 return (priv_ret);
371 }
372
373 /*
374 * Searches for the nexthop by data specifcied in @nh_priv.
375 * Returns referenced nexthop or NULL.
376 */
377 struct nhop_priv *
find_nhop(struct nh_control * ctl,const struct nhop_priv * nh_priv)378 find_nhop(struct nh_control *ctl, const struct nhop_priv *nh_priv)
379 {
380 struct nhop_priv *nh_priv_ret;
381
382 NHOPS_RLOCK(ctl);
383 CHT_SLIST_FIND_BYOBJ(&ctl->nh_head, nhops, nh_priv, nh_priv_ret);
384 if (nh_priv_ret != NULL) {
385 if (refcount_acquire_if_not_zero(&nh_priv_ret->nh_refcnt) == 0){
386 /* refcount was 0 -> nhop is being deleted */
387 nh_priv_ret = NULL;
388 }
389 }
390 NHOPS_RUNLOCK(ctl);
391
392 return (nh_priv_ret);
393 }
394