xref: /freebsd/sys/contrib/dpdk_rte_lpm/dpdk_lpm.c (revision 19cca0b9613d7c3058e41baf0204245119732235)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2020 Alexander V. Chernikov
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30 #include "opt_inet.h"
31 
32 #include <sys/param.h>
33 #include <sys/kernel.h>
34 #include <sys/lock.h>
35 #include <sys/rmlock.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/kernel.h>
39 #include <sys/socket.h>
40 #include <sys/sysctl.h>
41 #include <sys/syslog.h>
42 #include <net/vnet.h>
43 
44 #include <net/if.h>
45 #include <net/if_var.h>
46 
47 #include <netinet/in.h>
48 #include <netinet/in_fib.h>
49 #include <netinet/ip.h>
50 
51 #include <net/route.h>
52 #include <net/route/nhop.h>
53 #include <net/route/route_ctl.h>
54 #include <net/route/fib_algo.h>
55 
56 #include "rte_shim.h"
57 #include "rte_lpm.h"
58 
59 #define	LPM_MIN_TBL8	8		/* 2 pages of memory */
60 #define	LPM_MAX_TBL8	65536 * 16	/* 256M */
61 
62 MALLOC_DECLARE(M_RTABLE);
63 
64 struct dpdk_lpm_data {
65 	struct rte_lpm *lpm;
66 	uint64_t routes_added;
67 	uint64_t routes_failed;
68 	uint32_t number_tbl8s;
69 	uint32_t fibnum;
70 	uint8_t hit_tables;
71 	uint8_t	hit_records;
72 	struct fib_data *fd;
73 };
74 
75 /*
76  * Main datapath routing
77  */
78 static struct nhop_object *
79 lookup_ptr(void *algo_data, const struct flm_lookup_key key, uint32_t scopeid)
80 {
81 	struct rte_lpm *lpm;
82 	const struct rte_lpm_external *rte_ext;
83 	uint32_t nhidx = 0;
84 	int ret;
85 
86 	lpm = (struct rte_lpm *)algo_data;
87 	rte_ext = (const struct rte_lpm_external *)lpm;
88 
89 	ret = rte_lpm_lookup(lpm, ntohl(key.addr4.s_addr), &nhidx);
90 	if (ret == 0) {
91 		/* Success! */
92 		return (rte_ext->nh_idx[nhidx]);
93 	} else {
94 		/* Not found. Check default route */
95 		return (rte_ext->nh_idx[rte_ext->default_idx]);
96 	}
97 
98 	return (NULL);
99 }
100 
101 static uint8_t
102 rte_get_pref(const struct rib_rtable_info *rinfo)
103 {
104 
105 	if (rinfo->num_prefixes < 10)
106 		return (1);
107 	else if (rinfo->num_prefixes < 1000)
108 		return (rinfo->num_prefixes / 10);
109 	else if (rinfo->num_prefixes < 500000)
110 		return (100 + rinfo->num_prefixes / 3334);
111 	else
112 		return (250);
113 }
114 
115 static enum flm_op_result
116 handle_default_change(struct dpdk_lpm_data *dd, struct rib_cmd_info *rc)
117 {
118 	struct rte_lpm_external *rte_ext;
119 	rte_ext = (struct rte_lpm_external *)dd->lpm;
120 
121 	if (rc->rc_cmd != RTM_DELETE) {
122 		/* Reference new */
123 		uint32_t nhidx = fib_get_nhop_idx(dd->fd, rc->rc_nh_new);
124 
125 		if (nhidx == 0)
126 			return (FLM_REBUILD);
127 		rte_ext->default_idx = nhidx;
128 	} else {
129 		/* No default route */
130 		rte_ext->default_idx = 0;
131 	}
132 
133 	return (FLM_SUCCESS);
134 }
135 
136 static void
137 get_parent_rule(struct dpdk_lpm_data *dd, struct in_addr addr, uint8_t *plen, uint32_t *nhop_idx)
138 {
139 	struct route_nhop_data rnd;
140 	struct rtentry *rt;
141 
142 	rt = fib4_lookup_rt(dd->fibnum, addr, 0, NHR_UNLOCKED, &rnd);
143 	if (rt != NULL) {
144 		struct in_addr addr4;
145 		uint32_t scopeid;
146 		int inet_plen;
147 		rt_get_inet_prefix_plen(rt, &addr4, &inet_plen, &scopeid);
148 		if (inet_plen > 0) {
149 			*plen = inet_plen;
150 			*nhop_idx = fib_get_nhop_idx(dd->fd, rnd.rnd_nhop);
151 			return;
152 		}
153 	}
154 
155 	*nhop_idx = 0;
156 	*plen = 0;
157 }
158 
159 static enum flm_op_result
160 handle_gu_change(struct dpdk_lpm_data *dd, const struct rib_cmd_info *rc,
161     const struct in_addr addr, int plen)
162 {
163 	uint32_t nhidx = 0;
164 	int ret;
165 	char abuf[INET_ADDRSTRLEN];
166 	uint32_t ip;
167 
168 	ip = ntohl(addr.s_addr);
169 	inet_ntop(AF_INET, &addr, abuf, sizeof(abuf));
170 
171 	/* So we get sin, plen and nhidx */
172 	if (rc->rc_cmd != RTM_DELETE) {
173 		/*
174 		 * Addition or change. Save nhop in the internal table
175 		 * and get index.
176 		 */
177 		nhidx = fib_get_nhop_idx(dd->fd, rc->rc_nh_new);
178 		if (nhidx == 0) {
179 			FIB_PRINTF(LOG_INFO, dd->fd, "nhop limit reached, need rebuild");
180 			return (FLM_REBUILD);
181 		}
182 
183 		ret = rte_lpm_add(dd->lpm, ip, plen, nhidx);
184 		FIB_PRINTF(LOG_DEBUG, dd->fd, "DPDK GU: %s %s/%d nhop %u = %d",
185 		    (rc->rc_cmd == RTM_ADD) ? "ADD" : "UPDATE",
186 		    abuf, plen, nhidx, ret);
187 	} else {
188 		/*
189 		 * Need to lookup parent. Assume deletion happened already
190 		 */
191 		uint8_t parent_plen;
192 		uint32_t parent_nhop_idx;
193 		get_parent_rule(dd, addr, &parent_plen, &parent_nhop_idx);
194 
195 		ret = rte_lpm_delete(dd->lpm, ip, plen, parent_plen, parent_nhop_idx);
196 		FIB_PRINTF(LOG_DEBUG, dd->fd, "DPDK: %s %s/%d nhop %u = %d",
197 		    "DEL", abuf, plen, nhidx, ret);
198 	}
199 
200 	if (ret != 0) {
201 		FIB_PRINTF(LOG_INFO, dd->fd, "error: %d", ret);
202 		if (ret == -ENOSPC)
203 			return (FLM_REBUILD);
204 		return (FLM_ERROR);
205 	}
206 	return (FLM_SUCCESS);
207 }
208 
209 static enum flm_op_result
210 handle_rtable_change_cb(struct rib_head *rnh, struct rib_cmd_info *rc,
211     void *_data)
212 {
213 	struct dpdk_lpm_data *dd;
214 	enum flm_op_result ret;
215 	struct in_addr addr4;
216 	uint32_t scopeid;
217 	int plen;
218 
219 	dd = (struct dpdk_lpm_data *)_data;
220 	rt_get_inet_prefix_plen(rc->rc_rt, &addr4, &plen, &scopeid);
221 
222 	if (plen != 0)
223 		ret = handle_gu_change(dd, rc, addr4, plen);
224 	else
225 		ret = handle_default_change(dd, rc);
226 
227 	if (ret != 0)
228 		FIB_PRINTF(LOG_INFO, dd->fd, "error handling route");
229 	return (ret);
230 }
231 
232 static void
233 destroy_table(void *_data)
234 {
235 	struct dpdk_lpm_data *dd = (struct dpdk_lpm_data *)_data;
236 
237 	if (dd->lpm != NULL)
238 		rte_lpm_free(dd->lpm);
239 	free(dd, M_RTABLE);
240 }
241 
242 static enum flm_op_result
243 add_route_cb(struct rtentry *rt, void *_data)
244 {
245 	struct dpdk_lpm_data *dd = (struct dpdk_lpm_data *)_data;
246 	struct nhop_object *nh;
247 	int plen, ret;
248 	struct in_addr addr4;
249 	uint32_t scopeid;
250 
251 	nh = rt_get_raw_nhop(rt);
252 	rt_get_inet_prefix_plen(rt, &addr4, &plen, &scopeid);
253 
254 	char abuf[INET_ADDRSTRLEN];
255 	inet_ntop(AF_INET, &addr4, abuf, sizeof(abuf));
256 
257 	FIB_PRINTF(LOG_DEBUG, dd->fd, "Operating on %s/%d", abuf, plen);
258 
259 	if (plen == 0) {
260 		struct rib_cmd_info rc = {
261 			.rc_cmd = RTM_ADD,
262 			.rc_nh_new = nh,
263 		};
264 
265 		FIB_PRINTF(LOG_DEBUG, dd->fd, "Adding default route");
266 		return (handle_default_change(dd, &rc));
267 	}
268 
269 	uint32_t nhidx = fib_get_nhop_idx(dd->fd, nh);
270 	if (nhidx == 0) {
271 		FIB_PRINTF(LOG_INFO, dd->fd, "unable to get nhop index");
272 		return (FLM_REBUILD);
273 	}
274 	ret = rte_lpm_add(dd->lpm, ntohl(addr4.s_addr), plen, nhidx);
275 	FIB_PRINTF(LOG_DEBUG, dd->fd, "ADD %p %s/%d nh %u = %d",
276 	    dd->lpm, abuf, plen, nhidx, ret);
277 
278 	if (ret != 0) {
279 		FIB_PRINTF(LOG_INFO, dd->fd, "rte_lpm_add() returned %d", ret);
280 		if (ret == -ENOSPC) {
281 			dd->hit_tables = 1;
282 			return (FLM_REBUILD);
283 		}
284 		dd->routes_failed++;
285 		return (FLM_ERROR);
286 	} else
287 		dd->routes_added++;
288 
289 	return (FLM_SUCCESS);
290 }
291 
292 static enum flm_op_result
293 check_dump_success(void *_data, struct fib_dp *dp)
294 {
295 	struct dpdk_lpm_data *dd;
296 
297 	dd = (struct dpdk_lpm_data *)_data;
298 
299 	FIB_PRINTF(LOG_INFO, dd->fd, "scan completed. added: %zu failed: %zu",
300 	    dd->routes_added, dd->routes_failed);
301 	if (dd->hit_tables || dd->routes_failed > 0)
302 		return (FLM_REBUILD);
303 
304 	FIB_PRINTF(LOG_INFO, dd->fd,
305 	    "DPDK lookup engine synced with IPv4 RIB id %u, %zu routes",
306 	    dd->fibnum, dd->routes_added);
307 
308 	dp->f = lookup_ptr;
309 	dp->arg = dd->lpm;
310 
311 	return (FLM_SUCCESS);
312 }
313 
314 static void
315 estimate_scale(const struct dpdk_lpm_data *dd_src, struct dpdk_lpm_data *dd)
316 {
317 
318 	/* XXX: update at 75% capacity */
319 	if (dd_src->hit_tables)
320 		dd->number_tbl8s = dd_src->number_tbl8s * 2;
321 	else
322 		dd->number_tbl8s = dd_src->number_tbl8s;
323 
324 	/* TODO: look into the appropriate RIB to adjust */
325 }
326 
327 static struct dpdk_lpm_data *
328 build_table(struct dpdk_lpm_data *dd_prev, struct fib_data *fd)
329 {
330 	struct dpdk_lpm_data *dd;
331 	struct rte_lpm *lpm;
332 
333 	dd = malloc(sizeof(struct dpdk_lpm_data), M_RTABLE, M_NOWAIT | M_ZERO);
334 	if (dd == NULL) {
335 		FIB_PRINTF(LOG_INFO, fd, "Unable to allocate base datastructure");
336 		return (NULL);
337 	}
338 	dd->fibnum = dd_prev->fibnum;
339 	dd->fd = fd;
340 
341 	estimate_scale(dd_prev, dd);
342 
343 	struct rte_lpm_config cfg = {.number_tbl8s = dd->number_tbl8s};
344 	lpm = rte_lpm_create("test", 0, &cfg);
345 	if (lpm == NULL) {
346 		FIB_PRINTF(LOG_INFO, fd, "unable to create lpm");
347 		free(dd, M_RTABLE);
348 		return (NULL);
349 	}
350 	dd->lpm = lpm;
351 	struct rte_lpm_external *ext = (struct rte_lpm_external *)lpm;
352 	ext->nh_idx = fib_get_nhop_array(dd->fd);
353 
354 	FIB_PRINTF(LOG_INFO, fd, "allocated %u tbl8s", dd->number_tbl8s);
355 
356 	return (dd);
357 }
358 
359 static enum flm_op_result
360 init_table(uint32_t fibnum, struct fib_data *fd, void *_old_data, void **data)
361 {
362 	struct dpdk_lpm_data *dd, dd_base;
363 
364 	if (_old_data == NULL) {
365 		bzero(&dd_base, sizeof(struct dpdk_lpm_data));
366 		dd_base.fibnum = fibnum;
367 		/* TODO: get rib statistics */
368 		dd_base.number_tbl8s = LPM_MIN_TBL8;
369 		dd = &dd_base;
370 	} else {
371 		FIB_PRINTF(LOG_DEBUG, fd, "Starting with old data");
372 		dd = (struct dpdk_lpm_data *)_old_data;
373 	}
374 
375 	/* Guaranteed to be in epoch */
376 	dd = build_table(dd, fd);
377 	if (dd == NULL) {
378 		FIB_PRINTF(LOG_NOTICE, fd, "table creation failed");
379 		return (FLM_REBUILD);
380 	}
381 
382 	*data = dd;
383 	return (FLM_SUCCESS);
384 }
385 
386 static struct fib_lookup_module dpdk_lpm4 = {
387 	.flm_name = "dpdk_lpm4",
388 	.flm_family = AF_INET,
389 	.flm_init_cb = init_table,
390 	.flm_destroy_cb = destroy_table,
391 	.flm_dump_rib_item_cb = add_route_cb,
392 	.flm_dump_end_cb = check_dump_success,
393 	.flm_change_rib_item_cb = handle_rtable_change_cb,
394 	.flm_get_pref = rte_get_pref,
395 };
396 
397 static int
398 lpm4_modevent(module_t mod, int type, void *unused)
399 {
400 	int error = 0;
401 
402 	switch (type) {
403 	case MOD_LOAD:
404 		fib_module_register(&dpdk_lpm4);
405 		break;
406 	case MOD_UNLOAD:
407 		error = fib_module_unregister(&dpdk_lpm4);
408 		break;
409 	default:
410 		error = EOPNOTSUPP;
411 		break;
412 	}
413 	return (error);
414 }
415 
416 static moduledata_t lpm4mod = {
417         "dpdk_lpm4",
418         lpm4_modevent,
419         0
420 };
421 
422 DECLARE_MODULE(lpm4mod, lpm4mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
423 MODULE_VERSION(lpm4mod, 1);
424