xref: /linux/tools/lib/bpf/netlink.c (revision 7f81907b7e3f93dfed2e903af52659baa4944341)
1 // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
2 /* Copyright (c) 2018 Facebook */
3 
4 #include <stdlib.h>
5 #include <memory.h>
6 #include <unistd.h>
7 #include <arpa/inet.h>
8 #include <linux/bpf.h>
9 #include <linux/if_ether.h>
10 #include <linux/pkt_cls.h>
11 #include <linux/rtnetlink.h>
12 #include <linux/netdev.h>
13 #include <sys/socket.h>
14 #include <errno.h>
15 #include <time.h>
16 
17 #include "bpf.h"
18 #include "libbpf.h"
19 #include "libbpf_internal.h"
20 #include "nlattr.h"
21 
22 #ifndef SOL_NETLINK
23 #define SOL_NETLINK 270
24 #endif
25 
26 typedef int (*libbpf_dump_nlmsg_t)(void *cookie, void *msg, struct nlattr **tb);
27 
28 typedef int (*__dump_nlmsg_t)(struct nlmsghdr *nlmsg, libbpf_dump_nlmsg_t,
29 			      void *cookie);
30 
31 struct xdp_link_info {
32 	__u32 prog_id;
33 	__u32 drv_prog_id;
34 	__u32 hw_prog_id;
35 	__u32 skb_prog_id;
36 	__u8 attach_mode;
37 };
38 
39 struct xdp_id_md {
40 	int ifindex;
41 	__u32 flags;
42 	struct xdp_link_info info;
43 	__u64 feature_flags;
44 };
45 
46 struct xdp_features_md {
47 	int ifindex;
48 	__u32 xdp_zc_max_segs;
49 	__u64 flags;
50 };
51 
52 static int libbpf_netlink_open(__u32 *nl_pid, int proto)
53 {
54 	struct sockaddr_nl sa;
55 	socklen_t addrlen;
56 	int one = 1, ret;
57 	int sock;
58 
59 	memset(&sa, 0, sizeof(sa));
60 	sa.nl_family = AF_NETLINK;
61 
62 	sock = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, proto);
63 	if (sock < 0)
64 		return -errno;
65 
66 	if (setsockopt(sock, SOL_NETLINK, NETLINK_EXT_ACK,
67 		       &one, sizeof(one)) < 0) {
68 		pr_warn("Netlink error reporting not supported\n");
69 	}
70 
71 	if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
72 		ret = -errno;
73 		goto cleanup;
74 	}
75 
76 	addrlen = sizeof(sa);
77 	if (getsockname(sock, (struct sockaddr *)&sa, &addrlen) < 0) {
78 		ret = -errno;
79 		goto cleanup;
80 	}
81 
82 	if (addrlen != sizeof(sa)) {
83 		ret = -LIBBPF_ERRNO__INTERNAL;
84 		goto cleanup;
85 	}
86 
87 	*nl_pid = sa.nl_pid;
88 	return sock;
89 
90 cleanup:
91 	close(sock);
92 	return ret;
93 }
94 
95 static void libbpf_netlink_close(int sock)
96 {
97 	close(sock);
98 }
99 
100 enum {
101 	NL_CONT,
102 	NL_NEXT,
103 	NL_DONE,
104 };
105 
106 static int netlink_recvmsg(int sock, struct msghdr *mhdr, int flags)
107 {
108 	int len;
109 
110 	do {
111 		len = recvmsg(sock, mhdr, flags);
112 	} while (len < 0 && (errno == EINTR || errno == EAGAIN));
113 
114 	if (len < 0)
115 		return -errno;
116 	return len;
117 }
118 
119 static int alloc_iov(struct iovec *iov, int len)
120 {
121 	void *nbuf;
122 
123 	nbuf = realloc(iov->iov_base, len);
124 	if (!nbuf)
125 		return -ENOMEM;
126 
127 	iov->iov_base = nbuf;
128 	iov->iov_len = len;
129 	return 0;
130 }
131 
132 static int libbpf_netlink_recv(int sock, __u32 nl_pid, int seq,
133 			       __dump_nlmsg_t _fn, libbpf_dump_nlmsg_t fn,
134 			       void *cookie)
135 {
136 	struct iovec iov = {};
137 	struct msghdr mhdr = {
138 		.msg_iov = &iov,
139 		.msg_iovlen = 1,
140 	};
141 	bool multipart = true;
142 	struct nlmsgerr *err;
143 	struct nlmsghdr *nh;
144 	int len, ret;
145 
146 	ret = alloc_iov(&iov, 4096);
147 	if (ret)
148 		goto done;
149 
150 	while (multipart) {
151 start:
152 		multipart = false;
153 		len = netlink_recvmsg(sock, &mhdr, MSG_PEEK | MSG_TRUNC);
154 		if (len < 0) {
155 			ret = len;
156 			goto done;
157 		}
158 
159 		if (len > iov.iov_len) {
160 			ret = alloc_iov(&iov, len);
161 			if (ret)
162 				goto done;
163 		}
164 
165 		len = netlink_recvmsg(sock, &mhdr, 0);
166 		if (len < 0) {
167 			ret = len;
168 			goto done;
169 		}
170 
171 		if (len == 0)
172 			break;
173 
174 		for (nh = (struct nlmsghdr *)iov.iov_base; NLMSG_OK(nh, len);
175 		     nh = NLMSG_NEXT(nh, len)) {
176 			if (nh->nlmsg_pid != nl_pid) {
177 				ret = -LIBBPF_ERRNO__WRNGPID;
178 				goto done;
179 			}
180 			if (nh->nlmsg_seq != seq) {
181 				ret = -LIBBPF_ERRNO__INVSEQ;
182 				goto done;
183 			}
184 			if (nh->nlmsg_flags & NLM_F_MULTI)
185 				multipart = true;
186 			switch (nh->nlmsg_type) {
187 			case NLMSG_ERROR:
188 				err = (struct nlmsgerr *)NLMSG_DATA(nh);
189 				if (!err->error)
190 					continue;
191 				ret = err->error;
192 				libbpf_nla_dump_errormsg(nh);
193 				goto done;
194 			case NLMSG_DONE:
195 				ret = 0;
196 				goto done;
197 			default:
198 				break;
199 			}
200 			if (_fn) {
201 				ret = _fn(nh, fn, cookie);
202 				switch (ret) {
203 				case NL_CONT:
204 					break;
205 				case NL_NEXT:
206 					goto start;
207 				case NL_DONE:
208 					ret = 0;
209 					goto done;
210 				default:
211 					goto done;
212 				}
213 			}
214 		}
215 	}
216 	ret = 0;
217 done:
218 	free(iov.iov_base);
219 	return ret;
220 }
221 
222 static int libbpf_netlink_send_recv(struct libbpf_nla_req *req,
223 				    int proto, __dump_nlmsg_t parse_msg,
224 				    libbpf_dump_nlmsg_t parse_attr,
225 				    void *cookie)
226 {
227 	__u32 nl_pid = 0;
228 	int sock, ret;
229 
230 	sock = libbpf_netlink_open(&nl_pid, proto);
231 	if (sock < 0)
232 		return sock;
233 
234 	req->nh.nlmsg_pid = 0;
235 	req->nh.nlmsg_seq = time(NULL);
236 
237 	if (send(sock, req, req->nh.nlmsg_len, 0) < 0) {
238 		ret = -errno;
239 		goto out;
240 	}
241 
242 	ret = libbpf_netlink_recv(sock, nl_pid, req->nh.nlmsg_seq,
243 				  parse_msg, parse_attr, cookie);
244 out:
245 	libbpf_netlink_close(sock);
246 	return ret;
247 }
248 
249 static int parse_genl_family_id(struct nlmsghdr *nh, libbpf_dump_nlmsg_t fn,
250 				void *cookie)
251 {
252 	struct genlmsghdr *gnl = NLMSG_DATA(nh);
253 	struct nlattr *na = (struct nlattr *)((void *)gnl + GENL_HDRLEN);
254 	struct nlattr *tb[CTRL_ATTR_FAMILY_ID + 1];
255 	__u16 *id = cookie;
256 
257 	libbpf_nla_parse(tb, CTRL_ATTR_FAMILY_ID, na,
258 			 NLMSG_PAYLOAD(nh, sizeof(*gnl)), NULL);
259 	if (!tb[CTRL_ATTR_FAMILY_ID])
260 		return NL_CONT;
261 
262 	*id = libbpf_nla_getattr_u16(tb[CTRL_ATTR_FAMILY_ID]);
263 	return NL_DONE;
264 }
265 
266 static int libbpf_netlink_resolve_genl_family_id(const char *name,
267 						 __u16 len, __u16 *id)
268 {
269 	struct libbpf_nla_req req = {
270 		.nh.nlmsg_len	= NLMSG_LENGTH(GENL_HDRLEN),
271 		.nh.nlmsg_type	= GENL_ID_CTRL,
272 		.nh.nlmsg_flags	= NLM_F_REQUEST,
273 		.gnl.cmd	= CTRL_CMD_GETFAMILY,
274 		.gnl.version	= 2,
275 	};
276 	int err;
277 
278 	err = nlattr_add(&req, CTRL_ATTR_FAMILY_NAME, name, len);
279 	if (err < 0)
280 		return err;
281 
282 	return libbpf_netlink_send_recv(&req, NETLINK_GENERIC,
283 					parse_genl_family_id, NULL, id);
284 }
285 
286 static int __bpf_set_link_xdp_fd_replace(int ifindex, int fd, int old_fd,
287 					 __u32 flags)
288 {
289 	struct nlattr *nla;
290 	int ret;
291 	struct libbpf_nla_req req;
292 
293 	memset(&req, 0, sizeof(req));
294 	req.nh.nlmsg_len      = NLMSG_LENGTH(sizeof(struct ifinfomsg));
295 	req.nh.nlmsg_flags    = NLM_F_REQUEST | NLM_F_ACK;
296 	req.nh.nlmsg_type     = RTM_SETLINK;
297 	req.ifinfo.ifi_family = AF_UNSPEC;
298 	req.ifinfo.ifi_index  = ifindex;
299 
300 	nla = nlattr_begin_nested(&req, IFLA_XDP);
301 	if (!nla)
302 		return -EMSGSIZE;
303 	ret = nlattr_add(&req, IFLA_XDP_FD, &fd, sizeof(fd));
304 	if (ret < 0)
305 		return ret;
306 	if (flags) {
307 		ret = nlattr_add(&req, IFLA_XDP_FLAGS, &flags, sizeof(flags));
308 		if (ret < 0)
309 			return ret;
310 	}
311 	if (flags & XDP_FLAGS_REPLACE) {
312 		ret = nlattr_add(&req, IFLA_XDP_EXPECTED_FD, &old_fd,
313 				 sizeof(old_fd));
314 		if (ret < 0)
315 			return ret;
316 	}
317 	nlattr_end_nested(&req, nla);
318 
319 	return libbpf_netlink_send_recv(&req, NETLINK_ROUTE, NULL, NULL, NULL);
320 }
321 
322 int bpf_xdp_attach(int ifindex, int prog_fd, __u32 flags, const struct bpf_xdp_attach_opts *opts)
323 {
324 	int old_prog_fd, err;
325 
326 	if (!OPTS_VALID(opts, bpf_xdp_attach_opts))
327 		return libbpf_err(-EINVAL);
328 
329 	old_prog_fd = OPTS_GET(opts, old_prog_fd, 0);
330 	if (old_prog_fd)
331 		flags |= XDP_FLAGS_REPLACE;
332 	else
333 		old_prog_fd = -1;
334 
335 	err = __bpf_set_link_xdp_fd_replace(ifindex, prog_fd, old_prog_fd, flags);
336 	return libbpf_err(err);
337 }
338 
339 int bpf_xdp_detach(int ifindex, __u32 flags, const struct bpf_xdp_attach_opts *opts)
340 {
341 	return bpf_xdp_attach(ifindex, -1, flags, opts);
342 }
343 
344 static int __dump_link_nlmsg(struct nlmsghdr *nlh,
345 			     libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie)
346 {
347 	struct nlattr *tb[IFLA_MAX + 1], *attr;
348 	struct ifinfomsg *ifi = NLMSG_DATA(nlh);
349 	int len;
350 
351 	len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*ifi));
352 	attr = (struct nlattr *) ((void *) ifi + NLMSG_ALIGN(sizeof(*ifi)));
353 
354 	if (libbpf_nla_parse(tb, IFLA_MAX, attr, len, NULL) != 0)
355 		return -LIBBPF_ERRNO__NLPARSE;
356 
357 	return dump_link_nlmsg(cookie, ifi, tb);
358 }
359 
360 static int get_xdp_info(void *cookie, void *msg, struct nlattr **tb)
361 {
362 	struct nlattr *xdp_tb[IFLA_XDP_MAX + 1];
363 	struct xdp_id_md *xdp_id = cookie;
364 	struct ifinfomsg *ifinfo = msg;
365 	int ret;
366 
367 	if (xdp_id->ifindex && xdp_id->ifindex != ifinfo->ifi_index)
368 		return 0;
369 
370 	if (!tb[IFLA_XDP])
371 		return 0;
372 
373 	ret = libbpf_nla_parse_nested(xdp_tb, IFLA_XDP_MAX, tb[IFLA_XDP], NULL);
374 	if (ret)
375 		return ret;
376 
377 	if (!xdp_tb[IFLA_XDP_ATTACHED])
378 		return 0;
379 
380 	xdp_id->info.attach_mode = libbpf_nla_getattr_u8(
381 		xdp_tb[IFLA_XDP_ATTACHED]);
382 
383 	if (xdp_id->info.attach_mode == XDP_ATTACHED_NONE)
384 		return 0;
385 
386 	if (xdp_tb[IFLA_XDP_PROG_ID])
387 		xdp_id->info.prog_id = libbpf_nla_getattr_u32(
388 			xdp_tb[IFLA_XDP_PROG_ID]);
389 
390 	if (xdp_tb[IFLA_XDP_SKB_PROG_ID])
391 		xdp_id->info.skb_prog_id = libbpf_nla_getattr_u32(
392 			xdp_tb[IFLA_XDP_SKB_PROG_ID]);
393 
394 	if (xdp_tb[IFLA_XDP_DRV_PROG_ID])
395 		xdp_id->info.drv_prog_id = libbpf_nla_getattr_u32(
396 			xdp_tb[IFLA_XDP_DRV_PROG_ID]);
397 
398 	if (xdp_tb[IFLA_XDP_HW_PROG_ID])
399 		xdp_id->info.hw_prog_id = libbpf_nla_getattr_u32(
400 			xdp_tb[IFLA_XDP_HW_PROG_ID]);
401 
402 	return 0;
403 }
404 
405 static int parse_xdp_features(struct nlmsghdr *nh, libbpf_dump_nlmsg_t fn,
406 			      void *cookie)
407 {
408 	struct genlmsghdr *gnl = NLMSG_DATA(nh);
409 	struct nlattr *na = (struct nlattr *)((void *)gnl + GENL_HDRLEN);
410 	struct nlattr *tb[NETDEV_CMD_MAX + 1];
411 	struct xdp_features_md *md = cookie;
412 	__u32 ifindex;
413 
414 	libbpf_nla_parse(tb, NETDEV_CMD_MAX, na,
415 			 NLMSG_PAYLOAD(nh, sizeof(*gnl)), NULL);
416 
417 	if (!tb[NETDEV_A_DEV_IFINDEX] || !tb[NETDEV_A_DEV_XDP_FEATURES])
418 		return NL_CONT;
419 
420 	ifindex = libbpf_nla_getattr_u32(tb[NETDEV_A_DEV_IFINDEX]);
421 	if (ifindex != md->ifindex)
422 		return NL_CONT;
423 
424 	md->flags = libbpf_nla_getattr_u64(tb[NETDEV_A_DEV_XDP_FEATURES]);
425 	if (tb[NETDEV_A_DEV_XDP_ZC_MAX_SEGS])
426 		md->xdp_zc_max_segs =
427 			libbpf_nla_getattr_u32(tb[NETDEV_A_DEV_XDP_ZC_MAX_SEGS]);
428 	return NL_DONE;
429 }
430 
431 int bpf_xdp_query(int ifindex, int xdp_flags, struct bpf_xdp_query_opts *opts)
432 {
433 	struct libbpf_nla_req req = {
434 		.nh.nlmsg_len      = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
435 		.nh.nlmsg_type     = RTM_GETLINK,
436 		.nh.nlmsg_flags    = NLM_F_DUMP | NLM_F_REQUEST,
437 		.ifinfo.ifi_family = AF_PACKET,
438 	};
439 	struct xdp_id_md xdp_id = {};
440 	struct xdp_features_md md = {
441 		.ifindex = ifindex,
442 	};
443 	__u16 id;
444 	int err;
445 
446 	if (!OPTS_VALID(opts, bpf_xdp_query_opts))
447 		return libbpf_err(-EINVAL);
448 
449 	if (xdp_flags & ~XDP_FLAGS_MASK)
450 		return libbpf_err(-EINVAL);
451 
452 	/* Check whether the single {HW,DRV,SKB} mode is set */
453 	xdp_flags &= XDP_FLAGS_SKB_MODE | XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE;
454 	if (xdp_flags & (xdp_flags - 1))
455 		return libbpf_err(-EINVAL);
456 
457 	xdp_id.ifindex = ifindex;
458 	xdp_id.flags = xdp_flags;
459 
460 	err = libbpf_netlink_send_recv(&req, NETLINK_ROUTE, __dump_link_nlmsg,
461 				       get_xdp_info, &xdp_id);
462 	if (err)
463 		return libbpf_err(err);
464 
465 	OPTS_SET(opts, prog_id, xdp_id.info.prog_id);
466 	OPTS_SET(opts, drv_prog_id, xdp_id.info.drv_prog_id);
467 	OPTS_SET(opts, hw_prog_id, xdp_id.info.hw_prog_id);
468 	OPTS_SET(opts, skb_prog_id, xdp_id.info.skb_prog_id);
469 	OPTS_SET(opts, attach_mode, xdp_id.info.attach_mode);
470 
471 	if (!OPTS_HAS(opts, feature_flags))
472 		return 0;
473 
474 	err = libbpf_netlink_resolve_genl_family_id("netdev", sizeof("netdev"), &id);
475 	if (err < 0) {
476 		if (err == -ENOENT) {
477 			opts->feature_flags = 0;
478 			goto skip_feature_flags;
479 		}
480 		return libbpf_err(err);
481 	}
482 
483 	memset(&req, 0, sizeof(req));
484 	req.nh.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN);
485 	req.nh.nlmsg_flags = NLM_F_REQUEST;
486 	req.nh.nlmsg_type = id;
487 	req.gnl.cmd = NETDEV_CMD_DEV_GET;
488 	req.gnl.version = 2;
489 
490 	err = nlattr_add(&req, NETDEV_A_DEV_IFINDEX, &ifindex, sizeof(ifindex));
491 	if (err < 0)
492 		return libbpf_err(err);
493 
494 	err = libbpf_netlink_send_recv(&req, NETLINK_GENERIC,
495 				       parse_xdp_features, NULL, &md);
496 	if (err)
497 		return libbpf_err(err);
498 
499 	OPTS_SET(opts, feature_flags, md.flags);
500 	OPTS_SET(opts, xdp_zc_max_segs, md.xdp_zc_max_segs);
501 
502 skip_feature_flags:
503 	return 0;
504 }
505 
506 int bpf_xdp_query_id(int ifindex, int flags, __u32 *prog_id)
507 {
508 	LIBBPF_OPTS(bpf_xdp_query_opts, opts);
509 	int ret;
510 
511 	ret = bpf_xdp_query(ifindex, flags, &opts);
512 	if (ret)
513 		return libbpf_err(ret);
514 
515 	flags &= XDP_FLAGS_MODES;
516 
517 	if (opts.attach_mode != XDP_ATTACHED_MULTI && !flags)
518 		*prog_id = opts.prog_id;
519 	else if (flags & XDP_FLAGS_DRV_MODE)
520 		*prog_id = opts.drv_prog_id;
521 	else if (flags & XDP_FLAGS_HW_MODE)
522 		*prog_id = opts.hw_prog_id;
523 	else if (flags & XDP_FLAGS_SKB_MODE)
524 		*prog_id = opts.skb_prog_id;
525 	else
526 		*prog_id = 0;
527 
528 	return 0;
529 }
530 
531 
532 typedef int (*qdisc_config_t)(struct libbpf_nla_req *req, const struct bpf_tc_hook *hook);
533 
534 static int clsact_config(struct libbpf_nla_req *req, const struct bpf_tc_hook *hook)
535 {
536 	req->tc.tcm_parent = TC_H_CLSACT;
537 	req->tc.tcm_handle = TC_H_MAKE(TC_H_CLSACT, 0);
538 
539 	return nlattr_add(req, TCA_KIND, "clsact", sizeof("clsact"));
540 }
541 
542 static int qdisc_config(struct libbpf_nla_req *req, const struct bpf_tc_hook *hook)
543 {
544 	const char *qdisc = OPTS_GET(hook, qdisc, NULL);
545 
546 	req->tc.tcm_parent = OPTS_GET(hook, parent, TC_H_ROOT);
547 	req->tc.tcm_handle = OPTS_GET(hook, handle, 0);
548 
549 	return nlattr_add(req, TCA_KIND, qdisc, strlen(qdisc) + 1);
550 }
551 
552 static int attach_point_to_config(struct bpf_tc_hook *hook,
553 				  qdisc_config_t *config)
554 {
555 	switch (OPTS_GET(hook, attach_point, 0)) {
556 	case BPF_TC_INGRESS:
557 	case BPF_TC_EGRESS:
558 	case BPF_TC_INGRESS | BPF_TC_EGRESS:
559 		if (OPTS_GET(hook, parent, 0))
560 			return -EINVAL;
561 		*config = &clsact_config;
562 		return 0;
563 	case BPF_TC_CUSTOM:
564 		return -EOPNOTSUPP;
565 	case BPF_TC_QDISC:
566 		*config = &qdisc_config;
567 		return 0;
568 	default:
569 		return -EINVAL;
570 	}
571 }
572 
573 static int tc_get_tcm_parent(enum bpf_tc_attach_point attach_point,
574 			     __u32 *parent)
575 {
576 	switch (attach_point) {
577 	case BPF_TC_INGRESS:
578 	case BPF_TC_EGRESS:
579 		if (*parent)
580 			return -EINVAL;
581 		*parent = TC_H_MAKE(TC_H_CLSACT,
582 				    attach_point == BPF_TC_INGRESS ?
583 				    TC_H_MIN_INGRESS : TC_H_MIN_EGRESS);
584 		break;
585 	case BPF_TC_CUSTOM:
586 		if (!*parent)
587 			return -EINVAL;
588 		break;
589 	default:
590 		return -EINVAL;
591 	}
592 	return 0;
593 }
594 
595 static int tc_qdisc_modify(struct bpf_tc_hook *hook, int cmd, int flags)
596 {
597 	qdisc_config_t config;
598 	int ret;
599 	struct libbpf_nla_req req;
600 
601 	ret = attach_point_to_config(hook, &config);
602 	if (ret < 0)
603 		return ret;
604 
605 	memset(&req, 0, sizeof(req));
606 	req.nh.nlmsg_len   = NLMSG_LENGTH(sizeof(struct tcmsg));
607 	req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | flags;
608 	req.nh.nlmsg_type  = cmd;
609 	req.tc.tcm_family  = AF_UNSPEC;
610 	req.tc.tcm_ifindex = OPTS_GET(hook, ifindex, 0);
611 
612 	ret = config(&req, hook);
613 	if (ret < 0)
614 		return ret;
615 
616 	return libbpf_netlink_send_recv(&req, NETLINK_ROUTE, NULL, NULL, NULL);
617 }
618 
619 static int tc_qdisc_create_excl(struct bpf_tc_hook *hook)
620 {
621 	return tc_qdisc_modify(hook, RTM_NEWQDISC, NLM_F_CREATE | NLM_F_EXCL);
622 }
623 
624 static int tc_qdisc_delete(struct bpf_tc_hook *hook)
625 {
626 	return tc_qdisc_modify(hook, RTM_DELQDISC, 0);
627 }
628 
629 int bpf_tc_hook_create(struct bpf_tc_hook *hook)
630 {
631 	int ret;
632 
633 	if (!hook || !OPTS_VALID(hook, bpf_tc_hook) ||
634 	    OPTS_GET(hook, ifindex, 0) <= 0)
635 		return libbpf_err(-EINVAL);
636 
637 	ret = tc_qdisc_create_excl(hook);
638 	return libbpf_err(ret);
639 }
640 
641 static int __bpf_tc_detach(const struct bpf_tc_hook *hook,
642 			   const struct bpf_tc_opts *opts,
643 			   const bool flush);
644 
645 int bpf_tc_hook_destroy(struct bpf_tc_hook *hook)
646 {
647 	if (!hook || !OPTS_VALID(hook, bpf_tc_hook) ||
648 	    OPTS_GET(hook, ifindex, 0) <= 0)
649 		return libbpf_err(-EINVAL);
650 
651 	switch (OPTS_GET(hook, attach_point, 0)) {
652 	case BPF_TC_INGRESS:
653 	case BPF_TC_EGRESS:
654 		return libbpf_err(__bpf_tc_detach(hook, NULL, true));
655 	case BPF_TC_QDISC:
656 	case BPF_TC_INGRESS | BPF_TC_EGRESS:
657 		return libbpf_err(tc_qdisc_delete(hook));
658 	case BPF_TC_CUSTOM:
659 		return libbpf_err(-EOPNOTSUPP);
660 	default:
661 		return libbpf_err(-EINVAL);
662 	}
663 }
664 
665 struct bpf_cb_ctx {
666 	struct bpf_tc_opts *opts;
667 	bool processed;
668 };
669 
670 static int __get_tc_info(void *cookie, struct tcmsg *tc, struct nlattr **tb,
671 			 bool unicast)
672 {
673 	struct nlattr *tbb[TCA_BPF_MAX + 1];
674 	struct bpf_cb_ctx *info = cookie;
675 
676 	if (!info || !info->opts)
677 		return -EINVAL;
678 	if (unicast && info->processed)
679 		return -EINVAL;
680 	if (!tb[TCA_OPTIONS])
681 		return NL_CONT;
682 
683 	libbpf_nla_parse_nested(tbb, TCA_BPF_MAX, tb[TCA_OPTIONS], NULL);
684 	if (!tbb[TCA_BPF_ID])
685 		return -EINVAL;
686 
687 	OPTS_SET(info->opts, prog_id, libbpf_nla_getattr_u32(tbb[TCA_BPF_ID]));
688 	OPTS_SET(info->opts, handle, tc->tcm_handle);
689 	OPTS_SET(info->opts, priority, TC_H_MAJ(tc->tcm_info) >> 16);
690 
691 	info->processed = true;
692 	return unicast ? NL_NEXT : NL_DONE;
693 }
694 
695 static int get_tc_info(struct nlmsghdr *nh, libbpf_dump_nlmsg_t fn,
696 		       void *cookie)
697 {
698 	struct tcmsg *tc = NLMSG_DATA(nh);
699 	struct nlattr *tb[TCA_MAX + 1];
700 
701 	libbpf_nla_parse(tb, TCA_MAX,
702 			 (struct nlattr *)((void *)tc + NLMSG_ALIGN(sizeof(*tc))),
703 			 NLMSG_PAYLOAD(nh, sizeof(*tc)), NULL);
704 	if (!tb[TCA_KIND])
705 		return NL_CONT;
706 	return __get_tc_info(cookie, tc, tb, nh->nlmsg_flags & NLM_F_ECHO);
707 }
708 
709 static int tc_add_fd_and_name(struct libbpf_nla_req *req, int fd)
710 {
711 	struct bpf_prog_info info;
712 	__u32 info_len = sizeof(info);
713 	char name[256];
714 	int len, ret;
715 
716 	memset(&info, 0, info_len);
717 	ret = bpf_prog_get_info_by_fd(fd, &info, &info_len);
718 	if (ret < 0)
719 		return ret;
720 
721 	ret = nlattr_add(req, TCA_BPF_FD, &fd, sizeof(fd));
722 	if (ret < 0)
723 		return ret;
724 	len = snprintf(name, sizeof(name), "%s:[%u]", info.name, info.id);
725 	if (len < 0)
726 		return -errno;
727 	if (len >= sizeof(name))
728 		return -ENAMETOOLONG;
729 	return nlattr_add(req, TCA_BPF_NAME, name, len + 1);
730 }
731 
732 int bpf_tc_attach(const struct bpf_tc_hook *hook, struct bpf_tc_opts *opts)
733 {
734 	__u32 protocol, bpf_flags, handle, priority, parent, prog_id, flags;
735 	int ret, ifindex, attach_point, prog_fd;
736 	struct bpf_cb_ctx info = {};
737 	struct libbpf_nla_req req;
738 	struct nlattr *nla;
739 
740 	if (!hook || !opts ||
741 	    !OPTS_VALID(hook, bpf_tc_hook) ||
742 	    !OPTS_VALID(opts, bpf_tc_opts))
743 		return libbpf_err(-EINVAL);
744 
745 	ifindex      = OPTS_GET(hook, ifindex, 0);
746 	parent       = OPTS_GET(hook, parent, 0);
747 	attach_point = OPTS_GET(hook, attach_point, 0);
748 
749 	handle       = OPTS_GET(opts, handle, 0);
750 	priority     = OPTS_GET(opts, priority, 0);
751 	prog_fd      = OPTS_GET(opts, prog_fd, 0);
752 	prog_id      = OPTS_GET(opts, prog_id, 0);
753 	flags        = OPTS_GET(opts, flags, 0);
754 
755 	if (ifindex <= 0 || !prog_fd || prog_id)
756 		return libbpf_err(-EINVAL);
757 	if (priority > UINT16_MAX)
758 		return libbpf_err(-EINVAL);
759 	if (flags & ~BPF_TC_F_REPLACE)
760 		return libbpf_err(-EINVAL);
761 
762 	flags = (flags & BPF_TC_F_REPLACE) ? NLM_F_REPLACE : NLM_F_EXCL;
763 	protocol = ETH_P_ALL;
764 
765 	memset(&req, 0, sizeof(req));
766 	req.nh.nlmsg_len   = NLMSG_LENGTH(sizeof(struct tcmsg));
767 	req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_CREATE |
768 			     NLM_F_ECHO | flags;
769 	req.nh.nlmsg_type  = RTM_NEWTFILTER;
770 	req.tc.tcm_family  = AF_UNSPEC;
771 	req.tc.tcm_ifindex = ifindex;
772 	req.tc.tcm_handle  = handle;
773 	req.tc.tcm_info    = TC_H_MAKE(priority << 16, htons(protocol));
774 
775 	ret = tc_get_tcm_parent(attach_point, &parent);
776 	if (ret < 0)
777 		return libbpf_err(ret);
778 	req.tc.tcm_parent = parent;
779 
780 	ret = nlattr_add(&req, TCA_KIND, "bpf", sizeof("bpf"));
781 	if (ret < 0)
782 		return libbpf_err(ret);
783 	nla = nlattr_begin_nested(&req, TCA_OPTIONS);
784 	if (!nla)
785 		return libbpf_err(-EMSGSIZE);
786 	ret = tc_add_fd_and_name(&req, prog_fd);
787 	if (ret < 0)
788 		return libbpf_err(ret);
789 	bpf_flags = TCA_BPF_FLAG_ACT_DIRECT;
790 	ret = nlattr_add(&req, TCA_BPF_FLAGS, &bpf_flags, sizeof(bpf_flags));
791 	if (ret < 0)
792 		return libbpf_err(ret);
793 	nlattr_end_nested(&req, nla);
794 
795 	info.opts = opts;
796 
797 	ret = libbpf_netlink_send_recv(&req, NETLINK_ROUTE, get_tc_info, NULL,
798 				       &info);
799 	if (ret < 0)
800 		return libbpf_err(ret);
801 	if (!info.processed)
802 		return libbpf_err(-ENOENT);
803 	return ret;
804 }
805 
806 static int __bpf_tc_detach(const struct bpf_tc_hook *hook,
807 			   const struct bpf_tc_opts *opts,
808 			   const bool flush)
809 {
810 	__u32 protocol = 0, handle, priority, parent, prog_id, flags;
811 	int ret, ifindex, attach_point, prog_fd;
812 	struct libbpf_nla_req req;
813 
814 	if (!hook ||
815 	    !OPTS_VALID(hook, bpf_tc_hook) ||
816 	    !OPTS_VALID(opts, bpf_tc_opts))
817 		return -EINVAL;
818 
819 	ifindex      = OPTS_GET(hook, ifindex, 0);
820 	parent       = OPTS_GET(hook, parent, 0);
821 	attach_point = OPTS_GET(hook, attach_point, 0);
822 
823 	handle       = OPTS_GET(opts, handle, 0);
824 	priority     = OPTS_GET(opts, priority, 0);
825 	prog_fd      = OPTS_GET(opts, prog_fd, 0);
826 	prog_id      = OPTS_GET(opts, prog_id, 0);
827 	flags        = OPTS_GET(opts, flags, 0);
828 
829 	if (ifindex <= 0 || flags || prog_fd || prog_id)
830 		return -EINVAL;
831 	if (priority > UINT16_MAX)
832 		return -EINVAL;
833 	if (!flush) {
834 		if (!handle || !priority)
835 			return -EINVAL;
836 		protocol = ETH_P_ALL;
837 	} else {
838 		if (handle || priority)
839 			return -EINVAL;
840 	}
841 
842 	memset(&req, 0, sizeof(req));
843 	req.nh.nlmsg_len   = NLMSG_LENGTH(sizeof(struct tcmsg));
844 	req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
845 	req.nh.nlmsg_type  = RTM_DELTFILTER;
846 	req.tc.tcm_family  = AF_UNSPEC;
847 	req.tc.tcm_ifindex = ifindex;
848 	if (!flush) {
849 		req.tc.tcm_handle = handle;
850 		req.tc.tcm_info   = TC_H_MAKE(priority << 16, htons(protocol));
851 	}
852 
853 	ret = tc_get_tcm_parent(attach_point, &parent);
854 	if (ret < 0)
855 		return ret;
856 	req.tc.tcm_parent = parent;
857 
858 	if (!flush) {
859 		ret = nlattr_add(&req, TCA_KIND, "bpf", sizeof("bpf"));
860 		if (ret < 0)
861 			return ret;
862 	}
863 
864 	return libbpf_netlink_send_recv(&req, NETLINK_ROUTE, NULL, NULL, NULL);
865 }
866 
867 int bpf_tc_detach(const struct bpf_tc_hook *hook,
868 		  const struct bpf_tc_opts *opts)
869 {
870 	int ret;
871 
872 	if (!opts)
873 		return libbpf_err(-EINVAL);
874 
875 	ret = __bpf_tc_detach(hook, opts, false);
876 	return libbpf_err(ret);
877 }
878 
879 int bpf_tc_query(const struct bpf_tc_hook *hook, struct bpf_tc_opts *opts)
880 {
881 	__u32 protocol, handle, priority, parent, prog_id, flags;
882 	int ret, ifindex, attach_point, prog_fd;
883 	struct bpf_cb_ctx info = {};
884 	struct libbpf_nla_req req;
885 
886 	if (!hook || !opts ||
887 	    !OPTS_VALID(hook, bpf_tc_hook) ||
888 	    !OPTS_VALID(opts, bpf_tc_opts))
889 		return libbpf_err(-EINVAL);
890 
891 	ifindex      = OPTS_GET(hook, ifindex, 0);
892 	parent       = OPTS_GET(hook, parent, 0);
893 	attach_point = OPTS_GET(hook, attach_point, 0);
894 
895 	handle       = OPTS_GET(opts, handle, 0);
896 	priority     = OPTS_GET(opts, priority, 0);
897 	prog_fd      = OPTS_GET(opts, prog_fd, 0);
898 	prog_id      = OPTS_GET(opts, prog_id, 0);
899 	flags        = OPTS_GET(opts, flags, 0);
900 
901 	if (ifindex <= 0 || flags || prog_fd || prog_id ||
902 	    !handle || !priority)
903 		return libbpf_err(-EINVAL);
904 	if (priority > UINT16_MAX)
905 		return libbpf_err(-EINVAL);
906 
907 	protocol = ETH_P_ALL;
908 
909 	memset(&req, 0, sizeof(req));
910 	req.nh.nlmsg_len   = NLMSG_LENGTH(sizeof(struct tcmsg));
911 	req.nh.nlmsg_flags = NLM_F_REQUEST;
912 	req.nh.nlmsg_type  = RTM_GETTFILTER;
913 	req.tc.tcm_family  = AF_UNSPEC;
914 	req.tc.tcm_ifindex = ifindex;
915 	req.tc.tcm_handle  = handle;
916 	req.tc.tcm_info    = TC_H_MAKE(priority << 16, htons(protocol));
917 
918 	ret = tc_get_tcm_parent(attach_point, &parent);
919 	if (ret < 0)
920 		return libbpf_err(ret);
921 	req.tc.tcm_parent = parent;
922 
923 	ret = nlattr_add(&req, TCA_KIND, "bpf", sizeof("bpf"));
924 	if (ret < 0)
925 		return libbpf_err(ret);
926 
927 	info.opts = opts;
928 
929 	ret = libbpf_netlink_send_recv(&req, NETLINK_ROUTE, get_tc_info, NULL,
930 				       &info);
931 	if (ret < 0)
932 		return libbpf_err(ret);
933 	if (!info.processed)
934 		return libbpf_err(-ENOENT);
935 	return ret;
936 }
937