xref: /freebsd/sys/tests/fib_lookup/fib_lookup.c (revision 13ec1e3155c7e9bf037b12af186351b7fa9b9450)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2020 Alexander V. Chernikov
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30 #include "opt_inet.h"
31 #include "opt_inet6.h"
32 
33 #include <sys/param.h>
34 #include <sys/kernel.h>
35 #include <sys/lock.h>
36 #include <sys/rmlock.h>
37 #include <sys/malloc.h>
38 #include <sys/module.h>
39 #include <sys/kernel.h>
40 #include <sys/socket.h>
41 #include <sys/sysctl.h>
42 #include <net/vnet.h>
43 
44 #include <net/if.h>
45 #include <net/if_var.h>
46 
47 #include <netinet/in.h>
48 #include <netinet/in_fib.h>
49 #include <netinet/ip.h>
50 
51 #include <netinet6/in6_fib.h>
52 
53 #include <net/route.h>
54 #include <net/route/nhop.h>
55 #include <net/route/route_ctl.h>
56 #include <net/route/route_var.h>
57 #include <net/route/fib_algo.h>
58 
59 #define	CHUNK_SIZE	10000
60 
61 VNET_DEFINE_STATIC(struct in_addr *, inet_addr_list);
62 #define	V_inet_addr_list	VNET(inet_addr_list)
63 VNET_DEFINE_STATIC(int, inet_list_size);
64 #define	V_inet_list_size	VNET(inet_list_size)
65 
66 VNET_DEFINE_STATIC(struct in6_addr *, inet6_addr_list);
67 #define	V_inet6_addr_list	VNET(inet6_addr_list)
68 VNET_DEFINE_STATIC(int, inet6_list_size);
69 #define	V_inet6_list_size	VNET(inet6_list_size)
70 
71 SYSCTL_DECL(_net_route);
72 SYSCTL_NODE(_net_route, OID_AUTO, test, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
73     "Route algorithm lookups");
74 
75 static int
76 add_addr(int family, char *addr_str)
77 {
78 
79 	if (family == AF_INET) {
80 		struct in_addr *paddr_old = V_inet_addr_list;
81 		int size_old = V_inet_list_size;
82 		struct in_addr addr;
83 
84 		if (inet_pton(AF_INET, addr_str, &addr) != 1)
85 			return (EINVAL);
86 
87 		struct in_addr *paddr = mallocarray(size_old + 1,
88 		    sizeof(struct in_addr), M_TEMP, M_ZERO | M_WAITOK);
89 
90 		if (paddr_old != NULL) {
91 			memcpy(paddr, paddr_old, size_old * sizeof(struct in_addr));
92 			free(paddr_old, M_TEMP);
93 		}
94 		paddr[size_old] = addr;
95 
96 		V_inet_addr_list = paddr;
97 		V_inet_list_size = size_old + 1;
98 		inet_ntop(AF_INET, &addr, addr_str, sizeof(addr_str));
99 	} else if (family == AF_INET6) {
100 		struct in6_addr *paddr_old = V_inet6_addr_list;
101 		int size_old = V_inet6_list_size;
102 		struct in6_addr addr6;
103 
104 		if (inet_pton(AF_INET6, addr_str, &addr6) != 1)
105 			return (EINVAL);
106 
107 		struct in6_addr *paddr = mallocarray(size_old + 1,
108 		    sizeof(struct in6_addr), M_TEMP, M_ZERO | M_WAITOK);
109 
110 		if (paddr_old != NULL) {
111 			memcpy(paddr, paddr_old, size_old * sizeof(struct in6_addr));
112 			free(paddr_old, M_TEMP);
113 		}
114 		paddr[size_old] = addr6;
115 
116 		V_inet6_addr_list = paddr;
117 		V_inet6_list_size = size_old + 1;
118 		inet_ntop(AF_INET6, &addr6, addr_str, sizeof(addr_str));
119 	}
120 
121 	return (0);
122 }
123 
124 static int
125 add_addr_sysctl_handler(struct sysctl_oid *oidp, struct sysctl_req *req, int family)
126 {
127 	char addr_str[INET6_ADDRSTRLEN];
128 	int error;
129 
130 	bzero(addr_str, sizeof(addr_str));
131 
132 	error = sysctl_handle_string(oidp, addr_str, sizeof(addr_str), req);
133 	if (error != 0 || req->newptr == NULL)
134 		return (error);
135 
136 	error = add_addr(family, addr_str);
137 
138 	return (0);
139 }
140 
141 static int
142 add_inet_addr_sysctl_handler(SYSCTL_HANDLER_ARGS)
143 {
144 
145 	return (add_addr_sysctl_handler(oidp, req, AF_INET));
146 }
147 SYSCTL_PROC(_net_route_test, OID_AUTO, add_inet_addr,
148     CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0,
149     add_inet_addr_sysctl_handler, "A", "Set");
150 
151 static int
152 add_inet6_addr_sysctl_handler(SYSCTL_HANDLER_ARGS)
153 {
154 
155 	return (add_addr_sysctl_handler(oidp, req, AF_INET6));
156 }
157 SYSCTL_PROC(_net_route_test, OID_AUTO, add_inet6_addr,
158     CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0,
159     add_inet6_addr_sysctl_handler, "A", "Set");
160 
161 static uint64_t
162 run_test_inet_one_pass(uint32_t fibnum)
163 {
164 	/* Assume epoch */
165 	int sz = V_inet_list_size;
166 	int tries = CHUNK_SIZE / sz;
167 	const struct in_addr *a = V_inet_addr_list;
168 	uint64_t count = 0;
169 
170 	for (int pass = 0; pass < tries; pass++) {
171 		for (int i = 0; i < sz; i++) {
172 			fib4_lookup(fibnum, a[i], 0, NHR_NONE, 0);
173 			count++;
174 		}
175 	}
176 	return (count);
177 }
178 
179 static int
180 run_test_inet(SYSCTL_HANDLER_ARGS)
181 {
182 	struct epoch_tracker et;
183 
184 	int count = 0;
185 	int error = sysctl_handle_int(oidp, &count, 0, req);
186 	if (error != 0)
187 		return (error);
188 
189 	if (count == 0)
190 		return (0);
191 
192 	if (V_inet_list_size <= 0)
193 		return (ENOENT);
194 
195 	printf("run: %d packets vnet %p\n", count, curvnet);
196 	if (count < CHUNK_SIZE)
197 		count = CHUNK_SIZE;
198 
199 	struct timespec ts_pre, ts_post;
200 	int64_t pass_diff, total_diff = 0;
201 	uint64_t pass_packets, total_packets = 0;
202 	uint32_t fibnum = curthread->td_proc->p_fibnum;
203 
204 	for (int pass = 0; pass < count / CHUNK_SIZE; pass++) {
205 		NET_EPOCH_ENTER(et);
206 		nanouptime(&ts_pre);
207 		pass_packets = run_test_inet_one_pass(fibnum);
208 		nanouptime(&ts_post);
209 		NET_EPOCH_EXIT(et);
210 
211 		pass_diff = (ts_post.tv_sec - ts_pre.tv_sec) * 1000000000 +
212 		    (ts_post.tv_nsec - ts_pre.tv_nsec);
213 		total_diff += pass_diff;
214 		total_packets += pass_packets;
215 	}
216 
217 	printf("%zu packets in %zu nanoseconds, %zu pps\n",
218 	    total_packets, total_diff, total_packets * 1000000000 / total_diff);
219 
220 	return (0);
221 }
222 SYSCTL_PROC(_net_route_test, OID_AUTO, run_inet,
223     CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
224     0, 0, run_test_inet, "I", "Execute fib4_lookup test");
225 
226 static uint64_t
227 run_test_inet6_one_pass(uint32_t fibnum)
228 {
229 	/* Assume epoch */
230 	int sz = V_inet6_list_size;
231 	int tries = CHUNK_SIZE / sz;
232 	const struct in6_addr *a = V_inet6_addr_list;
233 	uint64_t count = 0;
234 
235 	for (int pass = 0; pass < tries; pass++) {
236 		for (int i = 0; i < sz; i++) {
237 			fib6_lookup(fibnum, &a[i], 0, NHR_NONE, 0);
238 			count++;
239 		}
240 	}
241 	return (count);
242 }
243 
244 static int
245 run_test_inet6(SYSCTL_HANDLER_ARGS)
246 {
247 	struct epoch_tracker et;
248 
249 	int count = 0;
250 	int error = sysctl_handle_int(oidp, &count, 0, req);
251 	if (error != 0)
252 		return (error);
253 
254 	if (count == 0)
255 		return (0);
256 
257 	if (V_inet6_list_size <= 0)
258 		return (ENOENT);
259 
260 	printf("run: %d packets vnet %p\n", count, curvnet);
261 	if (count < CHUNK_SIZE)
262 		count = CHUNK_SIZE;
263 
264 	struct timespec ts_pre, ts_post;
265 	int64_t pass_diff, total_diff = 0;
266 	uint64_t pass_packets, total_packets = 0;
267 	uint32_t fibnum = curthread->td_proc->p_fibnum;
268 
269 	for (int pass = 0; pass < count / CHUNK_SIZE; pass++) {
270 		NET_EPOCH_ENTER(et);
271 		nanouptime(&ts_pre);
272 		pass_packets = run_test_inet6_one_pass(fibnum);
273 		nanouptime(&ts_post);
274 		NET_EPOCH_EXIT(et);
275 
276 		pass_diff = (ts_post.tv_sec - ts_pre.tv_sec) * 1000000000 +
277 		    (ts_post.tv_nsec - ts_pre.tv_nsec);
278 		total_diff += pass_diff;
279 		total_packets += pass_packets;
280 	}
281 
282 	printf("%zu packets in %zu nanoseconds, %zu pps\n",
283 	    total_packets, total_diff, total_packets * 1000000000 / total_diff);
284 
285 	return (0);
286 }
287 SYSCTL_PROC(_net_route_test, OID_AUTO, run_inet6,
288     CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
289     0, 0, run_test_inet6, "I", "Execute fib6_lookup test");
290 
291 static bool
292 cmp_dst(uint32_t fibnum, struct in_addr a)
293 {
294 	struct nhop_object *nh_fib;
295 	struct rtentry *rt;
296 	struct route_nhop_data rnd = {};
297 
298 	nh_fib = fib4_lookup(fibnum, a, 0, NHR_NONE, 0);
299 	rt = fib4_lookup_rt(fibnum, a, 0, NHR_NONE, &rnd);
300 
301 	if (nh_fib == NULL && rt == NULL) {
302 		return (true);
303 	} else if (nh_fib == nhop_select(rnd.rnd_nhop, 0)) {
304 		return (true);
305 	}
306 
307 	struct in_addr dst;
308 	int plen;
309 	uint32_t scopeid;
310 	char key_str[INET_ADDRSTRLEN], dst_str[INET_ADDRSTRLEN];
311 
312 	inet_ntop(AF_INET, &a, key_str, sizeof(key_str));
313 	if (rnd.rnd_nhop == NULL) {
314 		printf("[RT BUG] lookup for %s: RIB: ENOENT FIB: nh=%u\n",
315 		    key_str, nhop_get_idx(nh_fib));
316 	} else {
317 		rt_get_inet_prefix_plen(rt, &dst, &plen, &scopeid);
318 		inet_ntop(AF_INET, &dst, dst_str, sizeof(dst_str));
319 		printf("[RT BUG] lookup for %s: RIB: %s/%d,nh=%u FIB: nh=%u\n",
320 		    key_str, dst_str, plen,
321 		    nhop_get_idx(nhop_select(rnd.rnd_nhop, 0)),
322 		    nh_fib ? nhop_get_idx(nh_fib) : 0);
323 	}
324 
325 	return (false);
326 }
327 
328 static bool
329 cmp_dst6(uint32_t fibnum, const struct in6_addr *a)
330 {
331 	struct nhop_object *nh_fib;
332 	struct rtentry *rt;
333 	struct route_nhop_data rnd = {};
334 
335 	nh_fib = fib6_lookup(fibnum, a, 0, NHR_NONE, 0);
336 	rt = fib6_lookup_rt(fibnum, a, 0, NHR_NONE, &rnd);
337 
338 	if (nh_fib == NULL && rt == NULL) {
339 		return (true);
340 	} else if (nh_fib == nhop_select(rnd.rnd_nhop, 0)) {
341 		return (true);
342 	}
343 
344 	struct in6_addr dst;
345 	int plen;
346 	uint32_t scopeid;
347 	char key_str[INET6_ADDRSTRLEN], dst_str[INET6_ADDRSTRLEN];
348 
349 	inet_ntop(AF_INET6, a, key_str, sizeof(key_str));
350 	if (rnd.rnd_nhop == NULL) {
351 		printf("[RT BUG] lookup for %s: RIB: ENOENT FIB: nh=%u\n",
352 		    key_str, nhop_get_idx(nh_fib));
353 	} else {
354 		rt_get_inet6_prefix_plen(rt, &dst, &plen, &scopeid);
355 		inet_ntop(AF_INET6, &dst, dst_str, sizeof(dst_str));
356 		printf("[RT BUG] lookup for %s: RIB: %s/%d,nh=%u FIB: nh=%u\n",
357 		    key_str, dst_str, plen,
358 		    nhop_get_idx(nhop_select(rnd.rnd_nhop, 0)),
359 		    nh_fib ? nhop_get_idx(nh_fib) : 0);
360 	}
361 
362 	return (false);
363 }
364 
365 /* Random lookups: correctness verification */
366 static uint64_t
367 run_test_inet_one_pass_random(uint32_t fibnum)
368 {
369 	/* Assume epoch */
370 	struct in_addr a[64];
371 	int sz = 64;
372 	uint64_t count = 0;
373 
374 	for (int pass = 0; pass < CHUNK_SIZE / sz; pass++) {
375 		arc4random_buf(a, sizeof(a));
376 		for (int i = 0; i < sz; i++) {
377 			if (!cmp_dst(fibnum, a[i]))
378 				return (0);
379 			count++;
380 		}
381 	}
382 	return (count);
383 }
384 
385 static int
386 run_test_inet_random(SYSCTL_HANDLER_ARGS)
387 {
388 	struct epoch_tracker et;
389 
390 	int count = 0;
391 	int error = sysctl_handle_int(oidp, &count, 0, req);
392 	if (error != 0)
393 		return (error);
394 
395 	if (count == 0)
396 		return (0);
397 
398 	if (count < CHUNK_SIZE)
399 		count = CHUNK_SIZE;
400 
401 	struct timespec ts_pre, ts_post;
402 	int64_t pass_diff, total_diff = 1;
403 	uint64_t pass_packets, total_packets = 0;
404 	uint32_t fibnum = curthread->td_proc->p_fibnum;
405 
406 	for (int pass = 0; pass < count / CHUNK_SIZE; pass++) {
407 		NET_EPOCH_ENTER(et);
408 		nanouptime(&ts_pre);
409 		pass_packets = run_test_inet_one_pass_random(fibnum);
410 		nanouptime(&ts_post);
411 		NET_EPOCH_EXIT(et);
412 
413 		pass_diff = (ts_post.tv_sec - ts_pre.tv_sec) * 1000000000 +
414 		    (ts_post.tv_nsec - ts_pre.tv_nsec);
415 		total_diff += pass_diff;
416 		total_packets += pass_packets;
417 
418 		if (pass_packets == 0)
419 			break;
420 	}
421 
422 	/* Signal error to userland */
423 	if (pass_packets == 0)
424 		return (EINVAL);
425 
426 	printf("%zu packets in %zu nanoseconds, %zu pps\n",
427 	    total_packets, total_diff, total_packets * 1000000000 / total_diff);
428 
429 	return (0);
430 }
431 SYSCTL_PROC(_net_route_test, OID_AUTO, run_inet_random,
432     CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
433     0, 0, run_test_inet_random, "I", "Execute fib4_lookup random check tests");
434 
435 
436 struct inet_array {
437 	uint32_t alloc_items;
438 	uint32_t num_items;
439 	uint32_t rnh_prefixes;
440 	int error;
441 	struct in_addr *arr;
442 };
443 
444 /*
445  * For each prefix, add the following records to the lookup array:
446  * * prefix-1, prefix, prefix + 1, prefix_end, prefix_end + 1
447  */
448 static int
449 add_prefix(struct rtentry *rt, void *_data)
450 {
451 	struct inet_array *pa = (struct inet_array *)_data;
452 	struct in_addr addr;
453 	int plen;
454 	uint32_t scopeid, haddr;
455 
456 	pa->rnh_prefixes++;
457 
458 	if (pa->num_items + 5 >= pa->alloc_items) {
459 		if (pa->error == 0)
460 			pa->error = ENOSPC;
461 		return (0);
462 	}
463 
464 	rt_get_inet_prefix_plen(rt, &addr, &plen, &scopeid);
465 
466 	pa->arr[pa->num_items++] = addr;
467 	haddr = ntohl(addr.s_addr);
468 	if (haddr > 0) {
469 		pa->arr[pa->num_items++].s_addr = htonl(haddr - 1);
470 		pa->arr[pa->num_items++].s_addr = htonl(haddr + 1);
471 		/* assume mask != 0 */
472 		uint32_t mlen = (1 << (32 - plen)) - 1;
473 		pa->arr[pa->num_items++].s_addr = htonl(haddr + mlen);
474 		/* can overflow, but who cares */
475 		pa->arr[pa->num_items++].s_addr = htonl(haddr + mlen + 1);
476 	}
477 
478 	return (0);
479 }
480 
481 static bool
482 prepare_list(uint32_t fibnum, struct inet_array *pa)
483 {
484 	struct rib_head *rh;
485 
486 	rh = rt_tables_get_rnh(fibnum, AF_INET);
487 
488 	uint32_t num_prefixes = rh->rnh_prefixes;
489 	bzero(pa, sizeof(struct inet_array));
490 	pa->alloc_items = (num_prefixes + 10) * 5;
491 	pa->arr = mallocarray(pa->alloc_items, sizeof(struct in_addr),
492 	    M_TEMP, M_ZERO | M_WAITOK);
493 
494 	rib_walk(fibnum, AF_INET, false, add_prefix, pa);
495 
496 	if (pa->error != 0) {
497 		printf("prefixes: old: %u, current: %u, walked: %u, allocated: %u\n",
498 		    num_prefixes, rh->rnh_prefixes, pa->rnh_prefixes, pa->alloc_items);
499 	}
500 
501 	return (pa->error == 0);
502 }
503 
504 static int
505 run_test_inet_scan(SYSCTL_HANDLER_ARGS)
506 {
507 	struct epoch_tracker et;
508 
509 	int count = 0;
510 	int error = sysctl_handle_int(oidp, &count, 0, req);
511 	if (error != 0)
512 		return (error);
513 
514 	if (count == 0)
515 		return (0);
516 
517 	struct inet_array pa = {};
518 	uint32_t fibnum = curthread->td_proc->p_fibnum;
519 
520 	if (!prepare_list(fibnum, &pa))
521 		return (pa.error);
522 
523 	struct timespec ts_pre, ts_post;
524 	int64_t total_diff = 1;
525 	uint64_t total_packets = 0;
526 	int failure_count = 0;
527 
528 	NET_EPOCH_ENTER(et);
529 	nanouptime(&ts_pre);
530 	for (int i = 0; i < pa.num_items; i++) {
531 		if (!cmp_dst(fibnum, pa.arr[i])) {
532 			failure_count++;
533 		}
534 		total_packets++;
535 	}
536 	nanouptime(&ts_post);
537 	NET_EPOCH_EXIT(et);
538 
539 	if (pa.arr != NULL)
540 		free(pa.arr, M_TEMP);
541 
542 	/* Signal error to userland */
543 	if (failure_count > 0) {
544 		printf("[RT ERROR] total failures: %d\n", failure_count);
545 		return (EINVAL);
546 	}
547 
548 	total_diff = (ts_post.tv_sec - ts_pre.tv_sec) * 1000000000 +
549 	    (ts_post.tv_nsec - ts_pre.tv_nsec);
550 	printf("%zu packets in %zu nanoseconds, %zu pps\n",
551 	    total_packets, total_diff, total_packets * 1000000000 / total_diff);
552 
553 	return (0);
554 }
555 SYSCTL_PROC(_net_route_test, OID_AUTO, run_inet_scan,
556     CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
557     0, 0, run_test_inet_scan, "I", "Execute fib4_lookup scan tests");
558 
559 struct inet6_array {
560 	uint32_t alloc_items;
561 	uint32_t num_items;
562 	uint32_t rnh_prefixes;
563 	int error;
564 	struct in6_addr *arr;
565 };
566 
567 static bool
568 safe_add(uint32_t *v, uint32_t inc)
569 {
570 	if (*v < (UINT32_MAX - inc)) {
571 		*v += inc;
572 		return (true);
573 	} else {
574 		*v -= (UINT32_MAX - inc + 1);
575 		return (false);
576 	}
577 }
578 
579 static bool
580 safe_dec(uint32_t *v, uint32_t inc)
581 {
582 	if (*v >= inc) {
583 		*v -= inc;
584 		return (true);
585 	} else {
586 		*v += (UINT32_MAX - inc + 1);
587 		return (false);
588 	}
589 }
590 
591 static void
592 inc_prefix6(struct in6_addr *addr, int inc)
593 {
594 	for (int i = 0; i < 4; i++) {
595 		uint32_t v = ntohl(addr->s6_addr32[3 - i]);
596 		bool ret = safe_add(&v, inc);
597 		addr->s6_addr32[3 - i] = htonl(v);
598 		if (ret)
599 			return;
600 		inc = 1;
601 	}
602 }
603 
604 static void
605 dec_prefix6(struct in6_addr *addr, int dec)
606 {
607 	for (int i = 0; i < 4; i++) {
608 		uint32_t v = ntohl(addr->s6_addr32[3 - i]);
609 		bool ret = safe_dec(&v, dec);
610 		addr->s6_addr32[3 - i] = htonl(v);
611 		if (ret)
612 			return;
613 		dec = 1;
614 	}
615 }
616 
617 static void
618 ipv6_writemask(struct in6_addr *addr6, uint8_t mask)
619 {
620 	uint32_t *cp;
621 
622 	for (cp = (uint32_t *)addr6; mask >= 32; mask -= 32)
623 		*cp++ = 0xFFFFFFFF;
624 	if (mask > 0)
625 		*cp = htonl(mask ? ~((1 << (32 - mask)) - 1) : 0);
626 }
627 
628 /*
629  * For each prefix, add the following records to the lookup array:
630  * * prefix-1, prefix, prefix + 1, prefix_end, prefix_end + 1
631  */
632 static int
633 add_prefix6(struct rtentry *rt, void *_data)
634 {
635 	struct inet6_array *pa = (struct inet6_array *)_data;
636 	struct in6_addr addr, naddr;
637 	int plen;
638 	uint32_t scopeid;
639 
640 	pa->rnh_prefixes++;
641 
642 	if (pa->num_items + 5 >= pa->alloc_items) {
643 		if (pa->error == 0)
644 			pa->error = ENOSPC;
645 		return (0);
646 	}
647 
648 	rt_get_inet6_prefix_plen(rt, &addr, &plen, &scopeid);
649 
650 	pa->arr[pa->num_items++] = addr;
651 	if (!IN6_ARE_ADDR_EQUAL(&addr, &in6addr_any)) {
652 		naddr = addr;
653 		dec_prefix6(&naddr, 1);
654 		pa->arr[pa->num_items++] = naddr;
655 		naddr = addr;
656 		inc_prefix6(&naddr, 1);
657 		pa->arr[pa->num_items++] = naddr;
658 
659 		/* assume mask != 0 */
660 		struct in6_addr mask6;
661 		ipv6_writemask(&mask6, plen);
662 		naddr = addr;
663 		for (int i = 0; i < 3; i++)
664 			naddr.s6_addr32[i] = htonl(ntohl(naddr.s6_addr32[i]) | ~ntohl(mask6.s6_addr32[i]));
665 
666 		pa->arr[pa->num_items++] = naddr;
667 		inc_prefix6(&naddr, 1);
668 		pa->arr[pa->num_items++] = naddr;
669 	}
670 
671 	return (0);
672 }
673 
674 static bool
675 prepare_list6(uint32_t fibnum, struct inet6_array *pa)
676 {
677 	struct rib_head *rh;
678 
679 	rh = rt_tables_get_rnh(fibnum, AF_INET6);
680 
681 	uint32_t num_prefixes = rh->rnh_prefixes;
682 	bzero(pa, sizeof(struct inet6_array));
683 	pa->alloc_items = (num_prefixes + 10) * 5;
684 	pa->arr = mallocarray(pa->alloc_items, sizeof(struct in6_addr),
685 	    M_TEMP, M_ZERO | M_WAITOK);
686 
687 	rib_walk(fibnum, AF_INET6, false, add_prefix6, pa);
688 
689 	if (pa->error != 0) {
690 		printf("prefixes: old: %u, current: %u, walked: %u, allocated: %u\n",
691 		    num_prefixes, rh->rnh_prefixes, pa->rnh_prefixes, pa->alloc_items);
692 	}
693 
694 	return (pa->error == 0);
695 }
696 
697 static int
698 run_test_inet6_scan(SYSCTL_HANDLER_ARGS)
699 {
700 	struct epoch_tracker et;
701 
702 	int count = 0;
703 	int error = sysctl_handle_int(oidp, &count, 0, req);
704 	if (error != 0)
705 		return (error);
706 
707 	if (count == 0)
708 		return (0);
709 
710 	struct inet6_array pa = {};
711 	uint32_t fibnum = curthread->td_proc->p_fibnum;
712 
713 	if (!prepare_list6(fibnum, &pa))
714 		return (pa.error);
715 
716 	struct timespec ts_pre, ts_post;
717 	int64_t total_diff = 1;
718 	uint64_t total_packets = 0;
719 	int failure_count = 0;
720 
721 	NET_EPOCH_ENTER(et);
722 	nanouptime(&ts_pre);
723 	for (int i = 0; i < pa.num_items; i++) {
724 		if (!cmp_dst6(fibnum, &pa.arr[i])) {
725 			failure_count++;
726 		}
727 		total_packets++;
728 	}
729 	nanouptime(&ts_post);
730 	NET_EPOCH_EXIT(et);
731 
732 	if (pa.arr != NULL)
733 		free(pa.arr, M_TEMP);
734 
735 	/* Signal error to userland */
736 	if (failure_count > 0) {
737 		printf("[RT ERROR] total failures: %d\n", failure_count);
738 		return (EINVAL);
739 	}
740 
741 	total_diff = (ts_post.tv_sec - ts_pre.tv_sec) * 1000000000 +
742 	    (ts_post.tv_nsec - ts_pre.tv_nsec);
743 	printf("%zu packets in %zu nanoseconds, %zu pps\n",
744 	    total_packets, total_diff, total_packets * 1000000000 / total_diff);
745 
746 	return (0);
747 }
748 SYSCTL_PROC(_net_route_test, OID_AUTO, run_inet6_scan,
749     CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
750     0, 0, run_test_inet6_scan, "I", "Execute fib6_lookup scan tests");
751 
752 #define	LPS_SEQ		0x1
753 #define	LPS_ANN		0x2
754 #define	LPS_REP		0x4
755 
756 struct lps_walk_state {
757 	uint32_t *keys;
758 	int pos;
759 	int lim;
760 };
761 
762 static int
763 reduce_keys(struct rtentry *rt, void *_data)
764 {
765         struct lps_walk_state *wa = (struct lps_walk_state *) _data;
766 	struct in_addr addr;
767 	uint32_t scopeid;
768 	int plen;
769 
770 	rt_get_inet_prefix_plen(rt, &addr, &plen, &scopeid);
771 	wa->keys[wa->pos] = ntohl(addr.s_addr) |
772 	    (wa->keys[wa->pos] & ~(0xffffffffU << (32 - plen)));
773 
774 	wa->pos++;
775 	return (wa->pos == wa->lim);
776 }
777 
778 static int
779 rnd_lps(SYSCTL_HANDLER_ARGS)
780 {
781 	struct epoch_tracker et;
782 	struct in_addr key;
783 	struct lps_walk_state wa;
784 	struct timespec ts_pre, ts_post;
785 	struct nhop_object *nh_fib;
786 	uint64_t total_diff, lps;
787 	uint32_t *keys, fibnum;
788 	uint32_t t, p;
789 	uintptr_t acc = 0;
790 	int i, pos, count = 0;
791 	int seq = 0, rep = 0;
792 	int error;
793 
794 	error = sysctl_handle_int(oidp, &count, 0, req);
795 	if (error != 0)
796 		return (error);
797 	if (count <= 0)
798 		return (0);
799 	fibnum = curthread->td_proc->p_fibnum;
800 
801 	keys = malloc(sizeof(*keys) * count, M_TEMP, M_NOWAIT);
802 	if (keys == NULL)
803 		return (ENOMEM);
804 	printf("Preparing %d random keys...\n", count);
805 	arc4random_buf(keys, sizeof(*keys) * count);
806 	if (arg2 & LPS_ANN) {
807 		wa.keys = keys;
808 		wa.pos = 0;
809 		wa.lim = count;
810 		printf("Reducing keys to announced address space...\n");
811 		do {
812 			rib_walk(fibnum, AF_INET, false, reduce_keys,
813 			    &wa);
814 		} while (wa.pos < wa.lim);
815 		printf("Reshuffling keys...\n");
816 		for (int i = 0; i < count; i++) {
817 			p = random() % count;
818 			t = keys[p];
819 			keys[p] = keys[i];
820 			keys[i] = t;
821 		}
822 	}
823 
824 	if (arg2 & LPS_REP) {
825 		rep = 1;
826 		printf("REP ");
827 	}
828 	if (arg2 & LPS_SEQ) {
829 		seq = 1;
830 		printf("SEQ");
831 	} else if (arg2 & LPS_ANN)
832 		printf("ANN");
833 	else
834 		printf("RND");
835 	printf(" LPS test starting...\n");
836 
837 	NET_EPOCH_ENTER(et);
838 	nanouptime(&ts_pre);
839 	for (i = 0, pos = 0; i < count; i++) {
840 		key.s_addr = keys[pos++] ^ ((acc >> 10) & 0xff);
841 		nh_fib = fib4_lookup(fibnum, key, 0, NHR_NONE, 0);
842 		if (seq) {
843 			if (nh_fib != NULL) {
844 				acc += (uintptr_t) nh_fib + 123;
845 				if (acc & 0x1000)
846 					acc += (uintptr_t) nh_fib->nh_ifp;
847 				else
848 					acc -= (uintptr_t) nh_fib->nh_ifp;
849 			} else
850 				acc ^= (acc >> 3) + (acc << 2) + i;
851 			if (acc & 0x800)
852 				pos++;
853 			if (pos >= count)
854 				pos = 0;
855 		}
856 		if (rep && ((i & 0xf) == 0xf)) {
857 			pos -= 0xf;
858 			if (pos < 0)
859 				pos += 0xf;
860 		}
861 	}
862 	nanouptime(&ts_post);
863 	NET_EPOCH_EXIT(et);
864 
865 	free(keys, M_TEMP);
866 
867 	total_diff = (ts_post.tv_sec - ts_pre.tv_sec) * 1000000000 +
868 	    (ts_post.tv_nsec - ts_pre.tv_nsec);
869 	lps = 1000000000ULL * count / total_diff;
870 	printf("%d lookups in %zu.%06zu milliseconds, %lu.%06lu MLPS\n",
871 	    count, total_diff / 1000000, total_diff % 1000000,
872 	    lps / 1000000, lps % 1000000);
873 
874 	return (0);
875 }
876 SYSCTL_PROC(_net_route_test, OID_AUTO, run_lps_rnd,
877     CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
878     0, 0, rnd_lps, "I",
879     "Measure lookups per second, uniformly random keys, independent lookups");
880 SYSCTL_PROC(_net_route_test, OID_AUTO, run_lps_rnd_ann,
881     CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
882     0, LPS_ANN, rnd_lps, "I",
883     "Measure lookups per second, random keys from announced address space, "
884     "independent lookups");
885 SYSCTL_PROC(_net_route_test, OID_AUTO, run_lps_seq,
886     CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
887     0, LPS_SEQ, rnd_lps, "I",
888     "Measure lookups per second, uniformly random keys, "
889     "artificial dependencies between lookups");
890 SYSCTL_PROC(_net_route_test, OID_AUTO, run_lps_seq_ann,
891     CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
892     0, LPS_SEQ | LPS_ANN, rnd_lps, "I",
893     "Measure lookups per second, random keys from announced address space, "
894     "artificial dependencies between lookups");
895 SYSCTL_PROC(_net_route_test, OID_AUTO, run_lps_rnd_rep,
896     CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
897     0, LPS_REP, rnd_lps, "I",
898     "Measure lookups per second, uniformly random keys, independent lookups, "
899     "repeated keys");
900 SYSCTL_PROC(_net_route_test, OID_AUTO, run_lps_rnd_ann_rep,
901     CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
902     0, LPS_ANN | LPS_REP, rnd_lps, "I",
903     "Measure lookups per second, random keys from announced address space, "
904     "independent lookups, repeated keys");
905 SYSCTL_PROC(_net_route_test, OID_AUTO, run_lps_seq_rep,
906     CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
907     0, LPS_SEQ | LPS_REP, rnd_lps, "I",
908     "Measure lookups per second, uniformly random keys, "
909     "artificial dependencies between lookups, repeated keys");
910 SYSCTL_PROC(_net_route_test, OID_AUTO, run_lps_seq_ann_rep,
911     CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
912     0, LPS_SEQ | LPS_ANN | LPS_REP, rnd_lps, "I",
913     "Measure lookups per second, random keys from announced address space, "
914     "artificial dependencies between lookups, repeated keys");
915 
916 static int
917 test_fib_lookup_modevent(module_t mod, int type, void *unused)
918 {
919 	int error = 0;
920 
921 	switch (type) {
922 	case MOD_LOAD:
923 		break;
924 	case MOD_UNLOAD:
925 		if (V_inet_addr_list != NULL)
926 			free(V_inet_addr_list, M_TEMP);
927 		if (V_inet6_addr_list != NULL)
928 			free(V_inet6_addr_list, M_TEMP);
929 		break;
930 	default:
931 		error = EOPNOTSUPP;
932 		break;
933 	}
934 	return (error);
935 }
936 
937 static moduledata_t testfiblookupmod = {
938         "test_fib_lookup",
939         test_fib_lookup_modevent,
940         0
941 };
942 
943 DECLARE_MODULE(testfiblookupmod, testfiblookupmod, SI_SUB_PSEUDO, SI_ORDER_ANY);
944 MODULE_VERSION(testfiblookup, 1);
945