xref: /freebsd/sys/contrib/dpdk_rte_lpm/dpdk_lpm6.c (revision 19cca0b9613d7c3058e41baf0204245119732235)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2020 Alexander V. Chernikov
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30 #include "opt_inet6.h"
31 
32 #include <sys/param.h>
33 #include <sys/kernel.h>
34 #include <sys/lock.h>
35 #include <sys/rmlock.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/kernel.h>
39 #include <sys/socket.h>
40 #include <sys/sysctl.h>
41 #include <sys/syslog.h>
42 #include <net/vnet.h>
43 
44 #include <net/if.h>
45 #include <net/if_var.h>
46 
47 #include <netinet/in.h>
48 #include <netinet/ip.h>
49 #include <netinet/ip6.h>
50 #include <netinet6/ip6_var.h>
51 #include <netinet6/in6_fib.h>
52 
53 #include <net/route.h>
54 #include <net/route/nhop.h>
55 #include <net/route/route_ctl.h>
56 #include <net/route/fib_algo.h>
57 #define	RTDEBUG
58 
59 #include "rte_lpm6.h"
60 
61 #define	LPM6_MIN_TBL8	8		/* 2 pages of memory */
62 #define	LPM6_MAX_TBL8	65536 * 16	/* 256M */
63 
64 struct fib_algo_calldata {
65 	void *lookup;
66 	void *arg;
67 };
68 
69 struct dpdk_lpm6_data {
70 	struct rte_lpm6 *lpm6;
71 	uint64_t routes_added;
72 	uint64_t routes_failed;
73 	uint32_t number_tbl8s;
74 	uint32_t fibnum;
75 	uint8_t hit_tables;
76 	struct fib_data *fd;
77 };
78 
79 static struct nhop_object *
80 lookup_ptr_ll(const struct rte_lpm6 *lpm6, const struct in6_addr *dst6,
81     uint32_t scopeid)
82 {
83 	const struct rte_lpm6_external *rte_ext;
84 
85 	rte_ext = (const struct rte_lpm6_external *)lpm6;
86 
87 	return (fib6_radix_lookup_nh(rte_ext->fibnum, dst6, scopeid));
88 }
89 
90 /*
91  * Main datapath routing
92  */
93 static struct nhop_object *
94 lookup_ptr(void *algo_data, const struct flm_lookup_key key, uint32_t scopeid)
95 {
96 	const struct rte_lpm6 *lpm6;
97 	const struct rte_lpm6_external *rte_ext;
98 	const struct in6_addr *addr6;
99 	uint32_t nhidx = 0;
100 	int ret;
101 
102 	lpm6 = (const struct rte_lpm6 *)algo_data;
103 	addr6 = key.addr6;
104 	rte_ext = (const struct rte_lpm6_external *)lpm6;
105 
106 	if (!IN6_IS_SCOPE_LINKLOCAL(addr6)) {
107 		ret = rte_lpm6_lookup(lpm6, (const uint8_t *)addr6, &nhidx);
108 		if (ret == 0) {
109 			/* Success! */
110 			return (rte_ext->nh_idx[nhidx]);
111 		} else {
112 			/* Not found. Check default route */
113 			if (rte_ext->default_idx > 0)
114 				return (rte_ext->nh_idx[rte_ext->default_idx]);
115 			else
116 				return (NULL);
117 		}
118 	} else {
119 		/* LL */
120 		return (lookup_ptr_ll(lpm6, addr6, scopeid));
121 	}
122 }
123 
124 static uint8_t
125 rte6_get_pref(const struct rib_rtable_info *rinfo)
126 {
127 
128 	if (rinfo->num_prefixes < 10)
129 		return (1);
130 	else if (rinfo->num_prefixes < 1000)
131 		return (rinfo->num_prefixes / 10);
132 	else if (rinfo->num_prefixes < 500000)
133 		return (100 + rinfo->num_prefixes / 3334);
134 	else
135 		return (250);
136 }
137 
138 static enum flm_op_result
139 handle_default_change(struct dpdk_lpm6_data *dd, struct rib_cmd_info *rc)
140 {
141 	struct rte_lpm6_external *rte_ext;
142 	rte_ext = (struct rte_lpm6_external *)dd->lpm6;
143 
144 	if (rc->rc_cmd != RTM_DELETE) {
145 		/* Reference new */
146 		uint32_t nhidx = fib_get_nhop_idx(dd->fd, rc->rc_nh_new);
147 
148 		if (nhidx == 0)
149 			return (FLM_REBUILD);
150 		rte_ext->default_idx = nhidx;
151 	} else {
152 		/* No default route */
153 		rte_ext->default_idx = 0;
154 	}
155 
156 	return (FLM_SUCCESS);
157 }
158 
159 static enum flm_op_result
160 handle_ll_change(struct dpdk_lpm6_data *dd, struct rib_cmd_info *rc,
161     const struct in6_addr addr6, int plen, uint32_t scopeid)
162 {
163 
164 	return (FLM_SUCCESS);
165 }
166 
167 static struct rte_lpm6_rule *
168 pack_parent_rule(struct dpdk_lpm6_data *dd, const struct in6_addr *addr6,
169     char *buffer)
170 {
171 	struct rte_lpm6_rule *lsp_rule = NULL;
172 	struct route_nhop_data rnd;
173 	struct rtentry *rt;
174 	int plen;
175 
176 	rt = fib6_lookup_rt(dd->fibnum, addr6, 0, NHR_UNLOCKED, &rnd);
177 	/* plen = 0 means default route and it's out of scope */
178 	if (rt != NULL) {
179 		uint32_t scopeid;
180 		struct in6_addr new_addr6;
181 		rt_get_inet6_prefix_plen(rt, &new_addr6, &plen, &scopeid);
182 		if (plen > 0) {
183 			uint32_t nhidx = fib_get_nhop_idx(dd->fd, rnd.rnd_nhop);
184 			if (nhidx == 0) {
185 				/*
186 				 * shouldn't happen as we already have parent route.
187 				 * It will trigger rebuild automatically.
188 				 */
189 				return (NULL);
190 			}
191 			lsp_rule = fill_rule6(buffer, (uint8_t *)&new_addr6, plen, nhidx);
192 		}
193 	}
194 
195 	return (lsp_rule);
196 }
197 
198 static enum flm_op_result
199 handle_gu_change(struct dpdk_lpm6_data *dd, const struct rib_cmd_info *rc,
200     const struct in6_addr *addr6, int plen)
201 {
202 	int ret;
203 	char abuf[INET6_ADDRSTRLEN];
204 	inet_ntop(AF_INET6, addr6, abuf, sizeof(abuf));
205 
206 	/* So we get sin6, plen and nhidx */
207 	if (rc->rc_cmd != RTM_DELETE) {
208 		/*
209 		 * Addition or change. Save nhop in the internal table
210 		 * and get index.
211 		 */
212 		uint32_t nhidx = fib_get_nhop_idx(dd->fd, rc->rc_nh_new);
213 		if (nhidx == 0) {
214 			FIB_PRINTF(LOG_INFO, dd->fd, "nhop limit reached, need rebuild");
215 			return (FLM_REBUILD);
216 		}
217 
218 		ret = rte_lpm6_add(dd->lpm6, (const uint8_t *)addr6,
219 				   plen, nhidx, (rc->rc_cmd == RTM_ADD) ? 1 : 0);
220 		FIB_PRINTF(LOG_DEBUG, dd->fd, "DPDK GU: %s %s/%d nhop %u = %d",
221 		    (rc->rc_cmd == RTM_ADD) ? "ADD" : "UPDATE",
222 		    abuf, plen, nhidx, ret);
223 	} else {
224 		/*
225 		 * Need to lookup parent. Assume deletion happened already
226 		 */
227 		char buffer[RTE_LPM6_RULE_SIZE];
228 		struct rte_lpm6_rule *lsp_rule = NULL;
229 		lsp_rule = pack_parent_rule(dd, addr6, buffer);
230 
231 		ret = rte_lpm6_delete(dd->lpm6, (const uint8_t *)addr6, plen, lsp_rule);
232 		FIB_PRINTF(LOG_DEBUG, dd->fd, "DPDK GU: %s %s/%d nhop ? = %d",
233 		    "DEL", abuf, plen, ret);
234 	}
235 
236 	if (ret != 0) {
237 		FIB_PRINTF(LOG_INFO, dd->fd, "error: %d", ret);
238 		if (ret == -ENOSPC)
239 			return (FLM_REBUILD);
240 		return (FLM_ERROR);
241 	}
242 	return (FLM_SUCCESS);
243 }
244 
245 static enum flm_op_result
246 handle_any_change(struct dpdk_lpm6_data *dd, struct rib_cmd_info *rc)
247 {
248 	enum flm_op_result ret;
249 	struct in6_addr addr6;
250 	uint32_t scopeid;
251 	int plen;
252 
253 	rt_get_inet6_prefix_plen(rc->rc_rt, &addr6, &plen, &scopeid);
254 
255 	if (IN6_IS_SCOPE_LINKLOCAL(&addr6))
256 		ret = handle_ll_change(dd, rc, addr6, plen, scopeid);
257 	else if (plen == 0)
258 		ret = handle_default_change(dd, rc);
259 	else
260 		ret = handle_gu_change(dd, rc, &addr6, plen);
261 
262 	if (ret != 0)
263 		FIB_PRINTF(LOG_INFO, dd->fd, "error handling route");
264 	return (ret);
265 }
266 
267 static enum flm_op_result
268 handle_rtable_change_cb(struct rib_head *rnh, struct rib_cmd_info *rc,
269     void *_data)
270 {
271 	struct dpdk_lpm6_data *dd;
272 
273 	dd = (struct dpdk_lpm6_data *)_data;
274 
275 	return (handle_any_change(dd, rc));
276 }
277 
278 static void
279 destroy_dd(struct dpdk_lpm6_data *dd)
280 {
281 
282 	FIB_PRINTF(LOG_INFO, dd->fd, "destroy dd %p", dd);
283 	if (dd->lpm6 != NULL)
284 		rte_lpm6_free(dd->lpm6);
285 	free(dd, M_TEMP);
286 }
287 
288 static void
289 destroy_table(void *_data)
290 {
291 
292 	destroy_dd((struct dpdk_lpm6_data *)_data);
293 }
294 
295 static enum flm_op_result
296 add_route_cb(struct rtentry *rt, void *_data)
297 {
298 	struct dpdk_lpm6_data *dd = (struct dpdk_lpm6_data *)_data;
299 	struct in6_addr addr6;
300 	struct nhop_object *nh;
301 	uint32_t scopeid;
302 	int plen;
303 	int ret;
304 
305 	rt_get_inet6_prefix_plen(rt, &addr6, &plen, &scopeid);
306 	nh = rt_get_raw_nhop(rt);
307 
308 	if (IN6_IS_SCOPE_LINKLOCAL(&addr6)) {
309 
310 		/*
311 		 * We don't operate on LL directly, however
312 		 * reference them to maintain guarantee on
313 		 * ability to refcount nhops in epoch.
314 		 */
315 		fib_get_nhop_idx(dd->fd, nh);
316 		return (FLM_SUCCESS);
317 	}
318 
319 	char abuf[INET6_ADDRSTRLEN];
320 	inet_ntop(AF_INET6, &addr6, abuf, sizeof(abuf));
321 	FIB_PRINTF(LOG_DEBUG, dd->fd, "Operating on %s/%d", abuf, plen);
322 
323 	if (plen == 0) {
324 		struct rib_cmd_info rc = {
325 			.rc_cmd = RTM_ADD,
326 			.rc_nh_new = nh,
327 		};
328 
329 		FIB_PRINTF(LOG_DEBUG, dd->fd, "Adding default route");
330 		return (handle_default_change(dd, &rc));
331 	}
332 
333 	uint32_t nhidx = fib_get_nhop_idx(dd->fd, nh);
334 	if (nhidx == 0) {
335 		FIB_PRINTF(LOG_INFO, dd->fd, "unable to get nhop index");
336 		return (FLM_REBUILD);
337 	}
338 	ret = rte_lpm6_add(dd->lpm6, (const uint8_t *)&addr6, plen, nhidx, 1);
339 	FIB_PRINTF(LOG_DEBUG, dd->fd, "ADD %p %s/%d nh %u = %d",
340 	    dd->lpm6, abuf, plen, nhidx, ret);
341 
342 	if (ret != 0) {
343 		FIB_PRINTF(LOG_INFO, dd->fd, "rte_lpm6_add() returned %d", ret);
344 		if (ret == -ENOSPC) {
345 			dd->hit_tables = 1;
346 			return (FLM_REBUILD);
347 		}
348 		dd->routes_failed++;
349 		return (FLM_ERROR);
350 	} else
351 		dd->routes_added++;
352 
353 	return (FLM_SUCCESS);
354 }
355 
356 static enum flm_op_result
357 check_dump_success(void *_data, struct fib_dp *dp)
358 {
359 	struct dpdk_lpm6_data *dd;
360 
361 	dd = (struct dpdk_lpm6_data *)_data;
362 
363 	FIB_PRINTF(LOG_INFO, dd->fd, "scan completed. added: %zu failed: %zu",
364 	    dd->routes_added, dd->routes_failed);
365 	if (dd->hit_tables || dd->routes_failed > 0)
366 		return (FLM_REBUILD);
367 
368 	FIB_PRINTF(LOG_INFO, dd->fd,
369 	    "DPDK lookup engine synced with IPv6 RIB id %u, %zu routes",
370 	    dd->fibnum, dd->routes_added);
371 
372 	dp->f = lookup_ptr;
373 	dp->arg = dd->lpm6;
374 
375 	return (FLM_SUCCESS);
376 }
377 
378 static void
379 estimate_scale(const struct dpdk_lpm6_data *dd_src, struct dpdk_lpm6_data *dd)
380 {
381 
382 	/* XXX: update at 75% capacity */
383 	if (dd_src->hit_tables)
384 		dd->number_tbl8s = dd_src->number_tbl8s * 2;
385 	else
386 		dd->number_tbl8s = dd_src->number_tbl8s;
387 
388 	/* TODO: look into the appropriate RIB to adjust */
389 }
390 
391 static struct dpdk_lpm6_data *
392 build_table(struct dpdk_lpm6_data *dd_prev, struct fib_data *fd)
393 {
394 	struct dpdk_lpm6_data *dd;
395 	struct rte_lpm6 *lpm6;
396 
397 	dd = malloc(sizeof(struct dpdk_lpm6_data), M_TEMP, M_NOWAIT | M_ZERO);
398 	if (dd == NULL) {
399 		FIB_PRINTF(LOG_INFO, fd, "Unable to allocate base datastructure");
400 		return (NULL);
401 	}
402 	dd->fibnum = dd_prev->fibnum;
403 	dd->fd = fd;
404 
405 	estimate_scale(dd_prev, dd);
406 
407 	struct rte_lpm6_config cfg = {.number_tbl8s = dd->number_tbl8s};
408 	lpm6 = rte_lpm6_create("test", 0, &cfg);
409 	if (lpm6 == NULL) {
410 		FIB_PRINTF(LOG_INFO, fd, "unable to create lpm6");
411 		free(dd, M_TEMP);
412 		return (NULL);
413 	}
414 	dd->lpm6 = lpm6;
415 	struct rte_lpm6_external *ext = (struct rte_lpm6_external *)lpm6;
416 	ext->nh_idx = fib_get_nhop_array(dd->fd);
417 
418 	FIB_PRINTF(LOG_INFO, fd, "allocated %u tbl8s", dd->number_tbl8s);
419 
420 	return (dd);
421 }
422 
423 static enum flm_op_result
424 init_table(uint32_t fibnum, struct fib_data *fd, void *_old_data, void **data)
425 {
426 	struct dpdk_lpm6_data *dd, dd_base;
427 
428 	if (_old_data == NULL) {
429 		bzero(&dd_base, sizeof(struct dpdk_lpm6_data));
430 		dd_base.fibnum = fibnum;
431 		/* TODO: get rib statistics */
432 		dd_base.number_tbl8s = LPM6_MIN_TBL8;
433 		dd = &dd_base;
434 	} else {
435 		FIB_PRINTF(LOG_INFO, fd, "Starting with old data");
436 		dd = (struct dpdk_lpm6_data *)_old_data;
437 	}
438 
439 	/* Guaranteed to be in epoch */
440 	dd = build_table(dd, fd);
441 	if (dd == NULL) {
442 		FIB_PRINTF(LOG_INFO, fd, "table creation failed");
443 		return (FLM_REBUILD);
444 	}
445 
446 	*data = dd;
447 	return (FLM_SUCCESS);
448 }
449 
450 static struct fib_lookup_module dpdk_lpm6 = {
451 	.flm_name = "dpdk_lpm6",
452 	.flm_family = AF_INET6,
453 	.flm_init_cb = init_table,
454 	.flm_destroy_cb = destroy_table,
455 	.flm_dump_rib_item_cb = add_route_cb,
456 	.flm_dump_end_cb = check_dump_success,
457 	.flm_change_rib_item_cb = handle_rtable_change_cb,
458 	.flm_get_pref = rte6_get_pref,
459 };
460 
461 static int
462 lpm6_modevent(module_t mod, int type, void *unused)
463 {
464 	int error = 0;
465 
466 	switch (type) {
467 	case MOD_LOAD:
468 		fib_module_register(&dpdk_lpm6);
469 		break;
470 	case MOD_UNLOAD:
471 		error = fib_module_unregister(&dpdk_lpm6);
472 		break;
473 	default:
474 		error = EOPNOTSUPP;
475 		break;
476 	}
477 	return (error);
478 }
479 
480 static moduledata_t lpm6mod = {
481         "dpdk_lpm6",
482         lpm6_modevent,
483         0
484 };
485 
486 DECLARE_MODULE(lpm6mod, lpm6mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
487 MODULE_VERSION(lpm6mod, 1);
488