xref: /freebsd/sys/tests/fib_lookup/fib_lookup.c (revision a03411e84728e9b267056fd31c7d1d9d1dc1b01e)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2020 Alexander V. Chernikov
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 #include "opt_inet.h"
30 #include "opt_inet6.h"
31 
32 #include <sys/param.h>
33 #include <sys/kernel.h>
34 #include <sys/lock.h>
35 #include <sys/rmlock.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/kernel.h>
39 #include <sys/socket.h>
40 #include <sys/sysctl.h>
41 #include <net/vnet.h>
42 
43 #include <net/if.h>
44 #include <net/if_var.h>
45 
46 #include <netinet/in.h>
47 #include <netinet/in_fib.h>
48 #include <netinet/ip.h>
49 
50 #include <netinet6/in6_fib.h>
51 
52 #include <net/route.h>
53 #include <net/route/nhop.h>
54 #include <net/route/route_ctl.h>
55 #include <net/route/route_var.h>
56 #include <net/route/fib_algo.h>
57 
58 #define	CHUNK_SIZE	10000
59 
60 VNET_DEFINE_STATIC(struct in_addr *, inet_addr_list);
61 #define	V_inet_addr_list	VNET(inet_addr_list)
62 VNET_DEFINE_STATIC(int, inet_list_size);
63 #define	V_inet_list_size	VNET(inet_list_size)
64 
65 VNET_DEFINE_STATIC(struct in6_addr *, inet6_addr_list);
66 #define	V_inet6_addr_list	VNET(inet6_addr_list)
67 VNET_DEFINE_STATIC(int, inet6_list_size);
68 #define	V_inet6_list_size	VNET(inet6_list_size)
69 
70 SYSCTL_DECL(_net_route);
71 SYSCTL_NODE(_net_route, OID_AUTO, test, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
72     "Route algorithm lookups");
73 
74 static int
75 add_addr(int family, char *addr_str)
76 {
77 
78 	if (family == AF_INET) {
79 		struct in_addr *paddr_old = V_inet_addr_list;
80 		int size_old = V_inet_list_size;
81 		struct in_addr addr;
82 
83 		if (inet_pton(AF_INET, addr_str, &addr) != 1)
84 			return (EINVAL);
85 
86 		struct in_addr *paddr = mallocarray(size_old + 1,
87 		    sizeof(struct in_addr), M_TEMP, M_ZERO | M_WAITOK);
88 
89 		if (paddr_old != NULL) {
90 			memcpy(paddr, paddr_old, size_old * sizeof(struct in_addr));
91 			free(paddr_old, M_TEMP);
92 		}
93 		paddr[size_old] = addr;
94 
95 		V_inet_addr_list = paddr;
96 		V_inet_list_size = size_old + 1;
97 		inet_ntop(AF_INET, &addr, addr_str, sizeof(addr_str));
98 	} else if (family == AF_INET6) {
99 		struct in6_addr *paddr_old = V_inet6_addr_list;
100 		int size_old = V_inet6_list_size;
101 		struct in6_addr addr6;
102 
103 		if (inet_pton(AF_INET6, addr_str, &addr6) != 1)
104 			return (EINVAL);
105 
106 		struct in6_addr *paddr = mallocarray(size_old + 1,
107 		    sizeof(struct in6_addr), M_TEMP, M_ZERO | M_WAITOK);
108 
109 		if (paddr_old != NULL) {
110 			memcpy(paddr, paddr_old, size_old * sizeof(struct in6_addr));
111 			free(paddr_old, M_TEMP);
112 		}
113 		paddr[size_old] = addr6;
114 
115 		V_inet6_addr_list = paddr;
116 		V_inet6_list_size = size_old + 1;
117 		inet_ntop(AF_INET6, &addr6, addr_str, sizeof(addr_str));
118 	}
119 
120 	return (0);
121 }
122 
123 static int
124 add_addr_sysctl_handler(struct sysctl_oid *oidp, struct sysctl_req *req, int family)
125 {
126 	char addr_str[INET6_ADDRSTRLEN];
127 	int error;
128 
129 	bzero(addr_str, sizeof(addr_str));
130 
131 	error = sysctl_handle_string(oidp, addr_str, sizeof(addr_str), req);
132 	if (error != 0 || req->newptr == NULL)
133 		return (error);
134 
135 	error = add_addr(family, addr_str);
136 
137 	return (0);
138 }
139 
140 static int
141 add_inet_addr_sysctl_handler(SYSCTL_HANDLER_ARGS)
142 {
143 
144 	return (add_addr_sysctl_handler(oidp, req, AF_INET));
145 }
146 SYSCTL_PROC(_net_route_test, OID_AUTO, add_inet_addr,
147     CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0,
148     add_inet_addr_sysctl_handler, "A", "Set");
149 
150 static int
151 add_inet6_addr_sysctl_handler(SYSCTL_HANDLER_ARGS)
152 {
153 
154 	return (add_addr_sysctl_handler(oidp, req, AF_INET6));
155 }
156 SYSCTL_PROC(_net_route_test, OID_AUTO, add_inet6_addr,
157     CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0,
158     add_inet6_addr_sysctl_handler, "A", "Set");
159 
160 static uint64_t
161 run_test_inet_one_pass(uint32_t fibnum)
162 {
163 	/* Assume epoch */
164 	int sz = V_inet_list_size;
165 	int tries = CHUNK_SIZE / sz;
166 	const struct in_addr *a = V_inet_addr_list;
167 	uint64_t count = 0;
168 
169 	for (int pass = 0; pass < tries; pass++) {
170 		for (int i = 0; i < sz; i++) {
171 			fib4_lookup(fibnum, a[i], 0, NHR_NONE, 0);
172 			count++;
173 		}
174 	}
175 	return (count);
176 }
177 
178 static int
179 run_test_inet(SYSCTL_HANDLER_ARGS)
180 {
181 	struct epoch_tracker et;
182 
183 	int count = 0;
184 	int error = sysctl_handle_int(oidp, &count, 0, req);
185 	if (error != 0)
186 		return (error);
187 
188 	if (count == 0)
189 		return (0);
190 
191 	if (V_inet_list_size <= 0)
192 		return (ENOENT);
193 
194 	printf("run: %d packets vnet %p\n", count, curvnet);
195 	if (count < CHUNK_SIZE)
196 		count = CHUNK_SIZE;
197 
198 	struct timespec ts_pre, ts_post;
199 	int64_t pass_diff, total_diff = 0;
200 	uint64_t pass_packets, total_packets = 0;
201 	uint32_t fibnum = curthread->td_proc->p_fibnum;
202 
203 	for (int pass = 0; pass < count / CHUNK_SIZE; pass++) {
204 		NET_EPOCH_ENTER(et);
205 		nanouptime(&ts_pre);
206 		pass_packets = run_test_inet_one_pass(fibnum);
207 		nanouptime(&ts_post);
208 		NET_EPOCH_EXIT(et);
209 
210 		pass_diff = (ts_post.tv_sec - ts_pre.tv_sec) * 1000000000 +
211 		    (ts_post.tv_nsec - ts_pre.tv_nsec);
212 		total_diff += pass_diff;
213 		total_packets += pass_packets;
214 	}
215 
216 	printf("%zu packets in %zu nanoseconds, %zu pps\n",
217 	    total_packets, total_diff, total_packets * 1000000000 / total_diff);
218 
219 	return (0);
220 }
221 SYSCTL_PROC(_net_route_test, OID_AUTO, run_inet,
222     CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
223     0, 0, run_test_inet, "I", "Execute fib4_lookup test");
224 
225 static uint64_t
226 run_test_inet6_one_pass(uint32_t fibnum)
227 {
228 	/* Assume epoch */
229 	int sz = V_inet6_list_size;
230 	int tries = CHUNK_SIZE / sz;
231 	const struct in6_addr *a = V_inet6_addr_list;
232 	uint64_t count = 0;
233 
234 	for (int pass = 0; pass < tries; pass++) {
235 		for (int i = 0; i < sz; i++) {
236 			fib6_lookup(fibnum, &a[i], 0, NHR_NONE, 0);
237 			count++;
238 		}
239 	}
240 	return (count);
241 }
242 
243 static int
244 run_test_inet6(SYSCTL_HANDLER_ARGS)
245 {
246 	struct epoch_tracker et;
247 
248 	int count = 0;
249 	int error = sysctl_handle_int(oidp, &count, 0, req);
250 	if (error != 0)
251 		return (error);
252 
253 	if (count == 0)
254 		return (0);
255 
256 	if (V_inet6_list_size <= 0)
257 		return (ENOENT);
258 
259 	printf("run: %d packets vnet %p\n", count, curvnet);
260 	if (count < CHUNK_SIZE)
261 		count = CHUNK_SIZE;
262 
263 	struct timespec ts_pre, ts_post;
264 	int64_t pass_diff, total_diff = 0;
265 	uint64_t pass_packets, total_packets = 0;
266 	uint32_t fibnum = curthread->td_proc->p_fibnum;
267 
268 	for (int pass = 0; pass < count / CHUNK_SIZE; pass++) {
269 		NET_EPOCH_ENTER(et);
270 		nanouptime(&ts_pre);
271 		pass_packets = run_test_inet6_one_pass(fibnum);
272 		nanouptime(&ts_post);
273 		NET_EPOCH_EXIT(et);
274 
275 		pass_diff = (ts_post.tv_sec - ts_pre.tv_sec) * 1000000000 +
276 		    (ts_post.tv_nsec - ts_pre.tv_nsec);
277 		total_diff += pass_diff;
278 		total_packets += pass_packets;
279 	}
280 
281 	printf("%zu packets in %zu nanoseconds, %zu pps\n",
282 	    total_packets, total_diff, total_packets * 1000000000 / total_diff);
283 
284 	return (0);
285 }
286 SYSCTL_PROC(_net_route_test, OID_AUTO, run_inet6,
287     CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
288     0, 0, run_test_inet6, "I", "Execute fib6_lookup test");
289 
290 static bool
291 cmp_dst(uint32_t fibnum, struct in_addr a)
292 {
293 	struct nhop_object *nh_fib;
294 	struct rtentry *rt;
295 	struct route_nhop_data rnd = {};
296 
297 	nh_fib = fib4_lookup(fibnum, a, 0, NHR_NONE, 0);
298 	rt = fib4_lookup_rt(fibnum, a, 0, NHR_NONE, &rnd);
299 
300 	if (nh_fib == NULL && rt == NULL) {
301 		return (true);
302 	} else if (nh_fib == nhop_select(rnd.rnd_nhop, 0)) {
303 		return (true);
304 	}
305 
306 	struct in_addr dst;
307 	int plen;
308 	uint32_t scopeid;
309 	char key_str[INET_ADDRSTRLEN], dst_str[INET_ADDRSTRLEN];
310 
311 	inet_ntop(AF_INET, &a, key_str, sizeof(key_str));
312 	if (rnd.rnd_nhop == NULL) {
313 		printf("[RT BUG] lookup for %s: RIB: ENOENT FIB: nh=%u\n",
314 		    key_str, nhop_get_idx(nh_fib));
315 	} else {
316 		rt_get_inet_prefix_plen(rt, &dst, &plen, &scopeid);
317 		inet_ntop(AF_INET, &dst, dst_str, sizeof(dst_str));
318 		printf("[RT BUG] lookup for %s: RIB: %s/%d,nh=%u FIB: nh=%u\n",
319 		    key_str, dst_str, plen,
320 		    nhop_get_idx(nhop_select(rnd.rnd_nhop, 0)),
321 		    nh_fib ? nhop_get_idx(nh_fib) : 0);
322 	}
323 
324 	return (false);
325 }
326 
327 static bool
328 cmp_dst6(uint32_t fibnum, const struct in6_addr *a)
329 {
330 	struct nhop_object *nh_fib;
331 	struct rtentry *rt;
332 	struct route_nhop_data rnd = {};
333 
334 	nh_fib = fib6_lookup(fibnum, a, 0, NHR_NONE, 0);
335 	rt = fib6_lookup_rt(fibnum, a, 0, NHR_NONE, &rnd);
336 
337 	if (nh_fib == NULL && rt == NULL) {
338 		return (true);
339 	} else if (nh_fib == nhop_select(rnd.rnd_nhop, 0)) {
340 		return (true);
341 	}
342 
343 	struct in6_addr dst;
344 	int plen;
345 	uint32_t scopeid;
346 	char key_str[INET6_ADDRSTRLEN], dst_str[INET6_ADDRSTRLEN];
347 
348 	inet_ntop(AF_INET6, a, key_str, sizeof(key_str));
349 	if (rnd.rnd_nhop == NULL) {
350 		printf("[RT BUG] lookup for %s: RIB: ENOENT FIB: nh=%u\n",
351 		    key_str, nhop_get_idx(nh_fib));
352 	} else {
353 		rt_get_inet6_prefix_plen(rt, &dst, &plen, &scopeid);
354 		inet_ntop(AF_INET6, &dst, dst_str, sizeof(dst_str));
355 		printf("[RT BUG] lookup for %s: RIB: %s/%d,nh=%u FIB: nh=%u\n",
356 		    key_str, dst_str, plen,
357 		    nhop_get_idx(nhop_select(rnd.rnd_nhop, 0)),
358 		    nh_fib ? nhop_get_idx(nh_fib) : 0);
359 	}
360 
361 	return (false);
362 }
363 
364 /* Random lookups: correctness verification */
365 static uint64_t
366 run_test_inet_one_pass_random(uint32_t fibnum)
367 {
368 	/* Assume epoch */
369 	struct in_addr a[64];
370 	int sz = 64;
371 	uint64_t count = 0;
372 
373 	for (int pass = 0; pass < CHUNK_SIZE / sz; pass++) {
374 		arc4random_buf(a, sizeof(a));
375 		for (int i = 0; i < sz; i++) {
376 			if (!cmp_dst(fibnum, a[i]))
377 				return (0);
378 			count++;
379 		}
380 	}
381 	return (count);
382 }
383 
384 static int
385 run_test_inet_random(SYSCTL_HANDLER_ARGS)
386 {
387 	struct epoch_tracker et;
388 
389 	int count = 0;
390 	int error = sysctl_handle_int(oidp, &count, 0, req);
391 	if (error != 0)
392 		return (error);
393 
394 	if (count == 0)
395 		return (0);
396 
397 	if (count < CHUNK_SIZE)
398 		count = CHUNK_SIZE;
399 
400 	struct timespec ts_pre, ts_post;
401 	int64_t pass_diff, total_diff = 1;
402 	uint64_t pass_packets, total_packets = 0;
403 	uint32_t fibnum = curthread->td_proc->p_fibnum;
404 
405 	for (int pass = 0; pass < count / CHUNK_SIZE; pass++) {
406 		NET_EPOCH_ENTER(et);
407 		nanouptime(&ts_pre);
408 		pass_packets = run_test_inet_one_pass_random(fibnum);
409 		nanouptime(&ts_post);
410 		NET_EPOCH_EXIT(et);
411 
412 		pass_diff = (ts_post.tv_sec - ts_pre.tv_sec) * 1000000000 +
413 		    (ts_post.tv_nsec - ts_pre.tv_nsec);
414 		total_diff += pass_diff;
415 		total_packets += pass_packets;
416 
417 		if (pass_packets == 0)
418 			break;
419 	}
420 
421 	/* Signal error to userland */
422 	if (pass_packets == 0)
423 		return (EINVAL);
424 
425 	printf("%zu packets in %zu nanoseconds, %zu pps\n",
426 	    total_packets, total_diff, total_packets * 1000000000 / total_diff);
427 
428 	return (0);
429 }
430 SYSCTL_PROC(_net_route_test, OID_AUTO, run_inet_random,
431     CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
432     0, 0, run_test_inet_random, "I", "Execute fib4_lookup random check tests");
433 
434 
435 struct inet_array {
436 	uint32_t alloc_items;
437 	uint32_t num_items;
438 	uint32_t rnh_prefixes;
439 	int error;
440 	struct in_addr *arr;
441 };
442 
443 /*
444  * For each prefix, add the following records to the lookup array:
445  * * prefix-1, prefix, prefix + 1, prefix_end, prefix_end + 1
446  */
447 static int
448 add_prefix(struct rtentry *rt, void *_data)
449 {
450 	struct inet_array *pa = (struct inet_array *)_data;
451 	struct in_addr addr;
452 	int plen;
453 	uint32_t scopeid, haddr;
454 
455 	pa->rnh_prefixes++;
456 
457 	if (pa->num_items + 5 >= pa->alloc_items) {
458 		if (pa->error == 0)
459 			pa->error = ENOSPC;
460 		return (0);
461 	}
462 
463 	rt_get_inet_prefix_plen(rt, &addr, &plen, &scopeid);
464 
465 	pa->arr[pa->num_items++] = addr;
466 	haddr = ntohl(addr.s_addr);
467 	if (haddr > 0) {
468 		pa->arr[pa->num_items++].s_addr = htonl(haddr - 1);
469 		pa->arr[pa->num_items++].s_addr = htonl(haddr + 1);
470 		/* assume mask != 0 */
471 		uint32_t mlen = (1 << (32 - plen)) - 1;
472 		pa->arr[pa->num_items++].s_addr = htonl(haddr + mlen);
473 		/* can overflow, but who cares */
474 		pa->arr[pa->num_items++].s_addr = htonl(haddr + mlen + 1);
475 	}
476 
477 	return (0);
478 }
479 
480 static bool
481 prepare_list(uint32_t fibnum, struct inet_array *pa)
482 {
483 	struct rib_head *rh;
484 
485 	rh = rt_tables_get_rnh(fibnum, AF_INET);
486 
487 	uint32_t num_prefixes = rh->rnh_prefixes;
488 	bzero(pa, sizeof(struct inet_array));
489 	pa->alloc_items = (num_prefixes + 10) * 5;
490 	pa->arr = mallocarray(pa->alloc_items, sizeof(struct in_addr),
491 	    M_TEMP, M_ZERO | M_WAITOK);
492 
493 	rib_walk(fibnum, AF_INET, false, add_prefix, pa);
494 
495 	if (pa->error != 0) {
496 		printf("prefixes: old: %u, current: %u, walked: %u, allocated: %u\n",
497 		    num_prefixes, rh->rnh_prefixes, pa->rnh_prefixes, pa->alloc_items);
498 	}
499 
500 	return (pa->error == 0);
501 }
502 
503 static int
504 run_test_inet_scan(SYSCTL_HANDLER_ARGS)
505 {
506 	struct epoch_tracker et;
507 
508 	int count = 0;
509 	int error = sysctl_handle_int(oidp, &count, 0, req);
510 	if (error != 0)
511 		return (error);
512 
513 	if (count == 0)
514 		return (0);
515 
516 	struct inet_array pa = {};
517 	uint32_t fibnum = curthread->td_proc->p_fibnum;
518 
519 	if (!prepare_list(fibnum, &pa))
520 		return (pa.error);
521 
522 	struct timespec ts_pre, ts_post;
523 	int64_t total_diff = 1;
524 	uint64_t total_packets = 0;
525 	int failure_count = 0;
526 
527 	NET_EPOCH_ENTER(et);
528 	nanouptime(&ts_pre);
529 	for (int i = 0; i < pa.num_items; i++) {
530 		if (!cmp_dst(fibnum, pa.arr[i])) {
531 			failure_count++;
532 		}
533 		total_packets++;
534 	}
535 	nanouptime(&ts_post);
536 	NET_EPOCH_EXIT(et);
537 
538 	if (pa.arr != NULL)
539 		free(pa.arr, M_TEMP);
540 
541 	/* Signal error to userland */
542 	if (failure_count > 0) {
543 		printf("[RT ERROR] total failures: %d\n", failure_count);
544 		return (EINVAL);
545 	}
546 
547 	total_diff = (ts_post.tv_sec - ts_pre.tv_sec) * 1000000000 +
548 	    (ts_post.tv_nsec - ts_pre.tv_nsec);
549 	printf("%zu packets in %zu nanoseconds, %zu pps\n",
550 	    total_packets, total_diff, total_packets * 1000000000 / total_diff);
551 
552 	return (0);
553 }
554 SYSCTL_PROC(_net_route_test, OID_AUTO, run_inet_scan,
555     CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
556     0, 0, run_test_inet_scan, "I", "Execute fib4_lookup scan tests");
557 
558 struct inet6_array {
559 	uint32_t alloc_items;
560 	uint32_t num_items;
561 	uint32_t rnh_prefixes;
562 	int error;
563 	struct in6_addr *arr;
564 };
565 
566 static bool
567 safe_add(uint32_t *v, uint32_t inc)
568 {
569 	if (*v < (UINT32_MAX - inc)) {
570 		*v += inc;
571 		return (true);
572 	} else {
573 		*v -= (UINT32_MAX - inc + 1);
574 		return (false);
575 	}
576 }
577 
578 static bool
579 safe_dec(uint32_t *v, uint32_t inc)
580 {
581 	if (*v >= inc) {
582 		*v -= inc;
583 		return (true);
584 	} else {
585 		*v += (UINT32_MAX - inc + 1);
586 		return (false);
587 	}
588 }
589 
590 static void
591 inc_prefix6(struct in6_addr *addr, int inc)
592 {
593 	for (int i = 0; i < 4; i++) {
594 		uint32_t v = ntohl(addr->s6_addr32[3 - i]);
595 		bool ret = safe_add(&v, inc);
596 		addr->s6_addr32[3 - i] = htonl(v);
597 		if (ret)
598 			return;
599 		inc = 1;
600 	}
601 }
602 
603 static void
604 dec_prefix6(struct in6_addr *addr, int dec)
605 {
606 	for (int i = 0; i < 4; i++) {
607 		uint32_t v = ntohl(addr->s6_addr32[3 - i]);
608 		bool ret = safe_dec(&v, dec);
609 		addr->s6_addr32[3 - i] = htonl(v);
610 		if (ret)
611 			return;
612 		dec = 1;
613 	}
614 }
615 
616 static void
617 ipv6_writemask(struct in6_addr *addr6, uint8_t mask)
618 {
619 	uint32_t *cp;
620 
621 	for (cp = (uint32_t *)addr6; mask >= 32; mask -= 32)
622 		*cp++ = 0xFFFFFFFF;
623 	if (mask > 0)
624 		*cp = htonl(mask ? ~((1 << (32 - mask)) - 1) : 0);
625 }
626 
627 /*
628  * For each prefix, add the following records to the lookup array:
629  * * prefix-1, prefix, prefix + 1, prefix_end, prefix_end + 1
630  */
631 static int
632 add_prefix6(struct rtentry *rt, void *_data)
633 {
634 	struct inet6_array *pa = (struct inet6_array *)_data;
635 	struct in6_addr addr, naddr;
636 	int plen;
637 	uint32_t scopeid;
638 
639 	pa->rnh_prefixes++;
640 
641 	if (pa->num_items + 5 >= pa->alloc_items) {
642 		if (pa->error == 0)
643 			pa->error = ENOSPC;
644 		return (0);
645 	}
646 
647 	rt_get_inet6_prefix_plen(rt, &addr, &plen, &scopeid);
648 
649 	pa->arr[pa->num_items++] = addr;
650 	if (!IN6_ARE_ADDR_EQUAL(&addr, &in6addr_any)) {
651 		naddr = addr;
652 		dec_prefix6(&naddr, 1);
653 		pa->arr[pa->num_items++] = naddr;
654 		naddr = addr;
655 		inc_prefix6(&naddr, 1);
656 		pa->arr[pa->num_items++] = naddr;
657 
658 		/* assume mask != 0 */
659 		struct in6_addr mask6;
660 		ipv6_writemask(&mask6, plen);
661 		naddr = addr;
662 		for (int i = 0; i < 3; i++)
663 			naddr.s6_addr32[i] = htonl(ntohl(naddr.s6_addr32[i]) | ~ntohl(mask6.s6_addr32[i]));
664 
665 		pa->arr[pa->num_items++] = naddr;
666 		inc_prefix6(&naddr, 1);
667 		pa->arr[pa->num_items++] = naddr;
668 	}
669 
670 	return (0);
671 }
672 
673 static bool
674 prepare_list6(uint32_t fibnum, struct inet6_array *pa)
675 {
676 	struct rib_head *rh;
677 
678 	rh = rt_tables_get_rnh(fibnum, AF_INET6);
679 
680 	uint32_t num_prefixes = rh->rnh_prefixes;
681 	bzero(pa, sizeof(struct inet6_array));
682 	pa->alloc_items = (num_prefixes + 10) * 5;
683 	pa->arr = mallocarray(pa->alloc_items, sizeof(struct in6_addr),
684 	    M_TEMP, M_ZERO | M_WAITOK);
685 
686 	rib_walk(fibnum, AF_INET6, false, add_prefix6, pa);
687 
688 	if (pa->error != 0) {
689 		printf("prefixes: old: %u, current: %u, walked: %u, allocated: %u\n",
690 		    num_prefixes, rh->rnh_prefixes, pa->rnh_prefixes, pa->alloc_items);
691 	}
692 
693 	return (pa->error == 0);
694 }
695 
696 static int
697 run_test_inet6_scan(SYSCTL_HANDLER_ARGS)
698 {
699 	struct epoch_tracker et;
700 
701 	int count = 0;
702 	int error = sysctl_handle_int(oidp, &count, 0, req);
703 	if (error != 0)
704 		return (error);
705 
706 	if (count == 0)
707 		return (0);
708 
709 	struct inet6_array pa = {};
710 	uint32_t fibnum = curthread->td_proc->p_fibnum;
711 
712 	if (!prepare_list6(fibnum, &pa))
713 		return (pa.error);
714 
715 	struct timespec ts_pre, ts_post;
716 	int64_t total_diff = 1;
717 	uint64_t total_packets = 0;
718 	int failure_count = 0;
719 
720 	NET_EPOCH_ENTER(et);
721 	nanouptime(&ts_pre);
722 	for (int i = 0; i < pa.num_items; i++) {
723 		if (!cmp_dst6(fibnum, &pa.arr[i])) {
724 			failure_count++;
725 		}
726 		total_packets++;
727 	}
728 	nanouptime(&ts_post);
729 	NET_EPOCH_EXIT(et);
730 
731 	if (pa.arr != NULL)
732 		free(pa.arr, M_TEMP);
733 
734 	/* Signal error to userland */
735 	if (failure_count > 0) {
736 		printf("[RT ERROR] total failures: %d\n", failure_count);
737 		return (EINVAL);
738 	}
739 
740 	total_diff = (ts_post.tv_sec - ts_pre.tv_sec) * 1000000000 +
741 	    (ts_post.tv_nsec - ts_pre.tv_nsec);
742 	printf("%zu packets in %zu nanoseconds, %zu pps\n",
743 	    total_packets, total_diff, total_packets * 1000000000 / total_diff);
744 
745 	return (0);
746 }
747 SYSCTL_PROC(_net_route_test, OID_AUTO, run_inet6_scan,
748     CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
749     0, 0, run_test_inet6_scan, "I", "Execute fib6_lookup scan tests");
750 
751 #define	LPS_SEQ		0x1
752 #define	LPS_ANN		0x2
753 #define	LPS_REP		0x4
754 
755 struct lps_walk_state {
756 	uint32_t *keys;
757 	int pos;
758 	int lim;
759 };
760 
761 static int
762 reduce_keys(struct rtentry *rt, void *_data)
763 {
764         struct lps_walk_state *wa = (struct lps_walk_state *) _data;
765 	struct in_addr addr;
766 	uint32_t scopeid;
767 	int plen;
768 
769 	rt_get_inet_prefix_plen(rt, &addr, &plen, &scopeid);
770 	wa->keys[wa->pos] = ntohl(addr.s_addr) |
771 	    (wa->keys[wa->pos] & ~(0xffffffffU << (32 - plen)));
772 
773 	wa->pos++;
774 	return (wa->pos == wa->lim);
775 }
776 
777 static int
778 rnd_lps(SYSCTL_HANDLER_ARGS)
779 {
780 	struct epoch_tracker et;
781 	struct in_addr key;
782 	struct lps_walk_state wa;
783 	struct timespec ts_pre, ts_post;
784 	struct nhop_object *nh_fib;
785 	uint64_t total_diff, lps;
786 	uint32_t *keys, fibnum;
787 	uint32_t t, p;
788 	uintptr_t acc = 0;
789 	int i, pos, count = 0;
790 	int seq = 0, rep = 0;
791 	int error;
792 
793 	error = sysctl_handle_int(oidp, &count, 0, req);
794 	if (error != 0)
795 		return (error);
796 	if (count <= 0)
797 		return (0);
798 	fibnum = curthread->td_proc->p_fibnum;
799 
800 	keys = malloc(sizeof(*keys) * count, M_TEMP, M_NOWAIT);
801 	if (keys == NULL)
802 		return (ENOMEM);
803 	printf("Preparing %d random keys...\n", count);
804 	arc4random_buf(keys, sizeof(*keys) * count);
805 	if (arg2 & LPS_ANN) {
806 		wa.keys = keys;
807 		wa.pos = 0;
808 		wa.lim = count;
809 		printf("Reducing keys to announced address space...\n");
810 		do {
811 			rib_walk(fibnum, AF_INET, false, reduce_keys,
812 			    &wa);
813 		} while (wa.pos < wa.lim);
814 		printf("Reshuffling keys...\n");
815 		for (int i = 0; i < count; i++) {
816 			p = random() % count;
817 			t = keys[p];
818 			keys[p] = keys[i];
819 			keys[i] = t;
820 		}
821 	}
822 
823 	if (arg2 & LPS_REP) {
824 		rep = 1;
825 		printf("REP ");
826 	}
827 	if (arg2 & LPS_SEQ) {
828 		seq = 1;
829 		printf("SEQ");
830 	} else if (arg2 & LPS_ANN)
831 		printf("ANN");
832 	else
833 		printf("RND");
834 	printf(" LPS test starting...\n");
835 
836 	NET_EPOCH_ENTER(et);
837 	nanouptime(&ts_pre);
838 	for (i = 0, pos = 0; i < count; i++) {
839 		key.s_addr = keys[pos++] ^ ((acc >> 10) & 0xff);
840 		nh_fib = fib4_lookup(fibnum, key, 0, NHR_NONE, 0);
841 		if (seq) {
842 			if (nh_fib != NULL) {
843 				acc += (uintptr_t) nh_fib + 123;
844 				if (acc & 0x1000)
845 					acc += (uintptr_t) nh_fib->nh_ifp;
846 				else
847 					acc -= (uintptr_t) nh_fib->nh_ifp;
848 			} else
849 				acc ^= (acc >> 3) + (acc << 2) + i;
850 			if (acc & 0x800)
851 				pos++;
852 			if (pos >= count)
853 				pos = 0;
854 		}
855 		if (rep && ((i & 0xf) == 0xf)) {
856 			pos -= 0xf;
857 			if (pos < 0)
858 				pos += 0xf;
859 		}
860 	}
861 	nanouptime(&ts_post);
862 	NET_EPOCH_EXIT(et);
863 
864 	free(keys, M_TEMP);
865 
866 	total_diff = (ts_post.tv_sec - ts_pre.tv_sec) * 1000000000 +
867 	    (ts_post.tv_nsec - ts_pre.tv_nsec);
868 	lps = 1000000000ULL * count / total_diff;
869 	printf("%d lookups in %zu.%06zu milliseconds, %lu.%06lu MLPS\n",
870 	    count, total_diff / 1000000, total_diff % 1000000,
871 	    lps / 1000000, lps % 1000000);
872 
873 	return (0);
874 }
875 SYSCTL_PROC(_net_route_test, OID_AUTO, run_lps_rnd,
876     CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
877     0, 0, rnd_lps, "I",
878     "Measure lookups per second, uniformly random keys, independent lookups");
879 SYSCTL_PROC(_net_route_test, OID_AUTO, run_lps_rnd_ann,
880     CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
881     0, LPS_ANN, rnd_lps, "I",
882     "Measure lookups per second, random keys from announced address space, "
883     "independent lookups");
884 SYSCTL_PROC(_net_route_test, OID_AUTO, run_lps_seq,
885     CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
886     0, LPS_SEQ, rnd_lps, "I",
887     "Measure lookups per second, uniformly random keys, "
888     "artificial dependencies between lookups");
889 SYSCTL_PROC(_net_route_test, OID_AUTO, run_lps_seq_ann,
890     CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
891     0, LPS_SEQ | LPS_ANN, rnd_lps, "I",
892     "Measure lookups per second, random keys from announced address space, "
893     "artificial dependencies between lookups");
894 SYSCTL_PROC(_net_route_test, OID_AUTO, run_lps_rnd_rep,
895     CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
896     0, LPS_REP, rnd_lps, "I",
897     "Measure lookups per second, uniformly random keys, independent lookups, "
898     "repeated keys");
899 SYSCTL_PROC(_net_route_test, OID_AUTO, run_lps_rnd_ann_rep,
900     CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
901     0, LPS_ANN | LPS_REP, rnd_lps, "I",
902     "Measure lookups per second, random keys from announced address space, "
903     "independent lookups, repeated keys");
904 SYSCTL_PROC(_net_route_test, OID_AUTO, run_lps_seq_rep,
905     CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
906     0, LPS_SEQ | LPS_REP, rnd_lps, "I",
907     "Measure lookups per second, uniformly random keys, "
908     "artificial dependencies between lookups, repeated keys");
909 SYSCTL_PROC(_net_route_test, OID_AUTO, run_lps_seq_ann_rep,
910     CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
911     0, LPS_SEQ | LPS_ANN | LPS_REP, rnd_lps, "I",
912     "Measure lookups per second, random keys from announced address space, "
913     "artificial dependencies between lookups, repeated keys");
914 
915 static int
916 test_fib_lookup_modevent(module_t mod, int type, void *unused)
917 {
918 	int error = 0;
919 
920 	switch (type) {
921 	case MOD_LOAD:
922 		break;
923 	case MOD_UNLOAD:
924 		if (V_inet_addr_list != NULL)
925 			free(V_inet_addr_list, M_TEMP);
926 		if (V_inet6_addr_list != NULL)
927 			free(V_inet6_addr_list, M_TEMP);
928 		break;
929 	default:
930 		error = EOPNOTSUPP;
931 		break;
932 	}
933 	return (error);
934 }
935 
936 static moduledata_t testfiblookupmod = {
937         "test_fib_lookup",
938         test_fib_lookup_modevent,
939         0
940 };
941 
942 DECLARE_MODULE(testfiblookupmod, testfiblookupmod, SI_SUB_PSEUDO, SI_ORDER_ANY);
943 MODULE_VERSION(testfiblookup, 1);
944