xref: /linux/tools/lib/bpf/netlink.c (revision 0969001569e403107c11561d497893a07394d691)
1 // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
2 /* Copyright (c) 2018 Facebook */
3 
4 #include <stdlib.h>
5 #include <memory.h>
6 #include <unistd.h>
7 #include <arpa/inet.h>
8 #include <linux/bpf.h>
9 #include <linux/if_ether.h>
10 #include <linux/pkt_cls.h>
11 #include <linux/rtnetlink.h>
12 #include <linux/netdev.h>
13 #include <sys/socket.h>
14 #include <errno.h>
15 #include <time.h>
16 
17 #include "bpf.h"
18 #include "libbpf.h"
19 #include "libbpf_internal.h"
20 #include "nlattr.h"
21 
22 #ifndef SOL_NETLINK
23 #define SOL_NETLINK 270
24 #endif
25 
26 typedef int (*libbpf_dump_nlmsg_t)(void *cookie, void *msg, struct nlattr **tb);
27 
28 typedef int (*__dump_nlmsg_t)(struct nlmsghdr *nlmsg, libbpf_dump_nlmsg_t,
29 			      void *cookie);
30 
31 struct xdp_link_info {
32 	__u32 prog_id;
33 	__u32 drv_prog_id;
34 	__u32 hw_prog_id;
35 	__u32 skb_prog_id;
36 	__u8 attach_mode;
37 };
38 
39 struct xdp_id_md {
40 	int ifindex;
41 	__u32 flags;
42 	struct xdp_link_info info;
43 	__u64 feature_flags;
44 };
45 
46 struct xdp_features_md {
47 	int ifindex;
48 	__u64 flags;
49 };
50 
51 static int libbpf_netlink_open(__u32 *nl_pid, int proto)
52 {
53 	struct sockaddr_nl sa;
54 	socklen_t addrlen;
55 	int one = 1, ret;
56 	int sock;
57 
58 	memset(&sa, 0, sizeof(sa));
59 	sa.nl_family = AF_NETLINK;
60 
61 	sock = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, proto);
62 	if (sock < 0)
63 		return -errno;
64 
65 	if (setsockopt(sock, SOL_NETLINK, NETLINK_EXT_ACK,
66 		       &one, sizeof(one)) < 0) {
67 		pr_warn("Netlink error reporting not supported\n");
68 	}
69 
70 	if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
71 		ret = -errno;
72 		goto cleanup;
73 	}
74 
75 	addrlen = sizeof(sa);
76 	if (getsockname(sock, (struct sockaddr *)&sa, &addrlen) < 0) {
77 		ret = -errno;
78 		goto cleanup;
79 	}
80 
81 	if (addrlen != sizeof(sa)) {
82 		ret = -LIBBPF_ERRNO__INTERNAL;
83 		goto cleanup;
84 	}
85 
86 	*nl_pid = sa.nl_pid;
87 	return sock;
88 
89 cleanup:
90 	close(sock);
91 	return ret;
92 }
93 
94 static void libbpf_netlink_close(int sock)
95 {
96 	close(sock);
97 }
98 
99 enum {
100 	NL_CONT,
101 	NL_NEXT,
102 	NL_DONE,
103 };
104 
105 static int netlink_recvmsg(int sock, struct msghdr *mhdr, int flags)
106 {
107 	int len;
108 
109 	do {
110 		len = recvmsg(sock, mhdr, flags);
111 	} while (len < 0 && (errno == EINTR || errno == EAGAIN));
112 
113 	if (len < 0)
114 		return -errno;
115 	return len;
116 }
117 
118 static int alloc_iov(struct iovec *iov, int len)
119 {
120 	void *nbuf;
121 
122 	nbuf = realloc(iov->iov_base, len);
123 	if (!nbuf)
124 		return -ENOMEM;
125 
126 	iov->iov_base = nbuf;
127 	iov->iov_len = len;
128 	return 0;
129 }
130 
131 static int libbpf_netlink_recv(int sock, __u32 nl_pid, int seq,
132 			       __dump_nlmsg_t _fn, libbpf_dump_nlmsg_t fn,
133 			       void *cookie)
134 {
135 	struct iovec iov = {};
136 	struct msghdr mhdr = {
137 		.msg_iov = &iov,
138 		.msg_iovlen = 1,
139 	};
140 	bool multipart = true;
141 	struct nlmsgerr *err;
142 	struct nlmsghdr *nh;
143 	int len, ret;
144 
145 	ret = alloc_iov(&iov, 4096);
146 	if (ret)
147 		goto done;
148 
149 	while (multipart) {
150 start:
151 		multipart = false;
152 		len = netlink_recvmsg(sock, &mhdr, MSG_PEEK | MSG_TRUNC);
153 		if (len < 0) {
154 			ret = len;
155 			goto done;
156 		}
157 
158 		if (len > iov.iov_len) {
159 			ret = alloc_iov(&iov, len);
160 			if (ret)
161 				goto done;
162 		}
163 
164 		len = netlink_recvmsg(sock, &mhdr, 0);
165 		if (len < 0) {
166 			ret = len;
167 			goto done;
168 		}
169 
170 		if (len == 0)
171 			break;
172 
173 		for (nh = (struct nlmsghdr *)iov.iov_base; NLMSG_OK(nh, len);
174 		     nh = NLMSG_NEXT(nh, len)) {
175 			if (nh->nlmsg_pid != nl_pid) {
176 				ret = -LIBBPF_ERRNO__WRNGPID;
177 				goto done;
178 			}
179 			if (nh->nlmsg_seq != seq) {
180 				ret = -LIBBPF_ERRNO__INVSEQ;
181 				goto done;
182 			}
183 			if (nh->nlmsg_flags & NLM_F_MULTI)
184 				multipart = true;
185 			switch (nh->nlmsg_type) {
186 			case NLMSG_ERROR:
187 				err = (struct nlmsgerr *)NLMSG_DATA(nh);
188 				if (!err->error)
189 					continue;
190 				ret = err->error;
191 				libbpf_nla_dump_errormsg(nh);
192 				goto done;
193 			case NLMSG_DONE:
194 				ret = 0;
195 				goto done;
196 			default:
197 				break;
198 			}
199 			if (_fn) {
200 				ret = _fn(nh, fn, cookie);
201 				switch (ret) {
202 				case NL_CONT:
203 					break;
204 				case NL_NEXT:
205 					goto start;
206 				case NL_DONE:
207 					ret = 0;
208 					goto done;
209 				default:
210 					goto done;
211 				}
212 			}
213 		}
214 	}
215 	ret = 0;
216 done:
217 	free(iov.iov_base);
218 	return ret;
219 }
220 
221 static int libbpf_netlink_send_recv(struct libbpf_nla_req *req,
222 				    int proto, __dump_nlmsg_t parse_msg,
223 				    libbpf_dump_nlmsg_t parse_attr,
224 				    void *cookie)
225 {
226 	__u32 nl_pid = 0;
227 	int sock, ret;
228 
229 	sock = libbpf_netlink_open(&nl_pid, proto);
230 	if (sock < 0)
231 		return sock;
232 
233 	req->nh.nlmsg_pid = 0;
234 	req->nh.nlmsg_seq = time(NULL);
235 
236 	if (send(sock, req, req->nh.nlmsg_len, 0) < 0) {
237 		ret = -errno;
238 		goto out;
239 	}
240 
241 	ret = libbpf_netlink_recv(sock, nl_pid, req->nh.nlmsg_seq,
242 				  parse_msg, parse_attr, cookie);
243 out:
244 	libbpf_netlink_close(sock);
245 	return ret;
246 }
247 
248 static int parse_genl_family_id(struct nlmsghdr *nh, libbpf_dump_nlmsg_t fn,
249 				void *cookie)
250 {
251 	struct genlmsghdr *gnl = NLMSG_DATA(nh);
252 	struct nlattr *na = (struct nlattr *)((void *)gnl + GENL_HDRLEN);
253 	struct nlattr *tb[CTRL_ATTR_FAMILY_ID + 1];
254 	__u16 *id = cookie;
255 
256 	libbpf_nla_parse(tb, CTRL_ATTR_FAMILY_ID, na,
257 			 NLMSG_PAYLOAD(nh, sizeof(*gnl)), NULL);
258 	if (!tb[CTRL_ATTR_FAMILY_ID])
259 		return NL_CONT;
260 
261 	*id = libbpf_nla_getattr_u16(tb[CTRL_ATTR_FAMILY_ID]);
262 	return NL_DONE;
263 }
264 
265 static int libbpf_netlink_resolve_genl_family_id(const char *name,
266 						 __u16 len, __u16 *id)
267 {
268 	struct libbpf_nla_req req = {
269 		.nh.nlmsg_len	= NLMSG_LENGTH(GENL_HDRLEN),
270 		.nh.nlmsg_type	= GENL_ID_CTRL,
271 		.nh.nlmsg_flags	= NLM_F_REQUEST,
272 		.gnl.cmd	= CTRL_CMD_GETFAMILY,
273 		.gnl.version	= 2,
274 	};
275 	int err;
276 
277 	err = nlattr_add(&req, CTRL_ATTR_FAMILY_NAME, name, len);
278 	if (err < 0)
279 		return err;
280 
281 	return libbpf_netlink_send_recv(&req, NETLINK_GENERIC,
282 					parse_genl_family_id, NULL, id);
283 }
284 
285 static int __bpf_set_link_xdp_fd_replace(int ifindex, int fd, int old_fd,
286 					 __u32 flags)
287 {
288 	struct nlattr *nla;
289 	int ret;
290 	struct libbpf_nla_req req;
291 
292 	memset(&req, 0, sizeof(req));
293 	req.nh.nlmsg_len      = NLMSG_LENGTH(sizeof(struct ifinfomsg));
294 	req.nh.nlmsg_flags    = NLM_F_REQUEST | NLM_F_ACK;
295 	req.nh.nlmsg_type     = RTM_SETLINK;
296 	req.ifinfo.ifi_family = AF_UNSPEC;
297 	req.ifinfo.ifi_index  = ifindex;
298 
299 	nla = nlattr_begin_nested(&req, IFLA_XDP);
300 	if (!nla)
301 		return -EMSGSIZE;
302 	ret = nlattr_add(&req, IFLA_XDP_FD, &fd, sizeof(fd));
303 	if (ret < 0)
304 		return ret;
305 	if (flags) {
306 		ret = nlattr_add(&req, IFLA_XDP_FLAGS, &flags, sizeof(flags));
307 		if (ret < 0)
308 			return ret;
309 	}
310 	if (flags & XDP_FLAGS_REPLACE) {
311 		ret = nlattr_add(&req, IFLA_XDP_EXPECTED_FD, &old_fd,
312 				 sizeof(old_fd));
313 		if (ret < 0)
314 			return ret;
315 	}
316 	nlattr_end_nested(&req, nla);
317 
318 	return libbpf_netlink_send_recv(&req, NETLINK_ROUTE, NULL, NULL, NULL);
319 }
320 
321 int bpf_xdp_attach(int ifindex, int prog_fd, __u32 flags, const struct bpf_xdp_attach_opts *opts)
322 {
323 	int old_prog_fd, err;
324 
325 	if (!OPTS_VALID(opts, bpf_xdp_attach_opts))
326 		return libbpf_err(-EINVAL);
327 
328 	old_prog_fd = OPTS_GET(opts, old_prog_fd, 0);
329 	if (old_prog_fd)
330 		flags |= XDP_FLAGS_REPLACE;
331 	else
332 		old_prog_fd = -1;
333 
334 	err = __bpf_set_link_xdp_fd_replace(ifindex, prog_fd, old_prog_fd, flags);
335 	return libbpf_err(err);
336 }
337 
338 int bpf_xdp_detach(int ifindex, __u32 flags, const struct bpf_xdp_attach_opts *opts)
339 {
340 	return bpf_xdp_attach(ifindex, -1, flags, opts);
341 }
342 
343 static int __dump_link_nlmsg(struct nlmsghdr *nlh,
344 			     libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie)
345 {
346 	struct nlattr *tb[IFLA_MAX + 1], *attr;
347 	struct ifinfomsg *ifi = NLMSG_DATA(nlh);
348 	int len;
349 
350 	len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*ifi));
351 	attr = (struct nlattr *) ((void *) ifi + NLMSG_ALIGN(sizeof(*ifi)));
352 
353 	if (libbpf_nla_parse(tb, IFLA_MAX, attr, len, NULL) != 0)
354 		return -LIBBPF_ERRNO__NLPARSE;
355 
356 	return dump_link_nlmsg(cookie, ifi, tb);
357 }
358 
359 static int get_xdp_info(void *cookie, void *msg, struct nlattr **tb)
360 {
361 	struct nlattr *xdp_tb[IFLA_XDP_MAX + 1];
362 	struct xdp_id_md *xdp_id = cookie;
363 	struct ifinfomsg *ifinfo = msg;
364 	int ret;
365 
366 	if (xdp_id->ifindex && xdp_id->ifindex != ifinfo->ifi_index)
367 		return 0;
368 
369 	if (!tb[IFLA_XDP])
370 		return 0;
371 
372 	ret = libbpf_nla_parse_nested(xdp_tb, IFLA_XDP_MAX, tb[IFLA_XDP], NULL);
373 	if (ret)
374 		return ret;
375 
376 	if (!xdp_tb[IFLA_XDP_ATTACHED])
377 		return 0;
378 
379 	xdp_id->info.attach_mode = libbpf_nla_getattr_u8(
380 		xdp_tb[IFLA_XDP_ATTACHED]);
381 
382 	if (xdp_id->info.attach_mode == XDP_ATTACHED_NONE)
383 		return 0;
384 
385 	if (xdp_tb[IFLA_XDP_PROG_ID])
386 		xdp_id->info.prog_id = libbpf_nla_getattr_u32(
387 			xdp_tb[IFLA_XDP_PROG_ID]);
388 
389 	if (xdp_tb[IFLA_XDP_SKB_PROG_ID])
390 		xdp_id->info.skb_prog_id = libbpf_nla_getattr_u32(
391 			xdp_tb[IFLA_XDP_SKB_PROG_ID]);
392 
393 	if (xdp_tb[IFLA_XDP_DRV_PROG_ID])
394 		xdp_id->info.drv_prog_id = libbpf_nla_getattr_u32(
395 			xdp_tb[IFLA_XDP_DRV_PROG_ID]);
396 
397 	if (xdp_tb[IFLA_XDP_HW_PROG_ID])
398 		xdp_id->info.hw_prog_id = libbpf_nla_getattr_u32(
399 			xdp_tb[IFLA_XDP_HW_PROG_ID]);
400 
401 	return 0;
402 }
403 
404 static int parse_xdp_features(struct nlmsghdr *nh, libbpf_dump_nlmsg_t fn,
405 			      void *cookie)
406 {
407 	struct genlmsghdr *gnl = NLMSG_DATA(nh);
408 	struct nlattr *na = (struct nlattr *)((void *)gnl + GENL_HDRLEN);
409 	struct nlattr *tb[NETDEV_CMD_MAX + 1];
410 	struct xdp_features_md *md = cookie;
411 	__u32 ifindex;
412 
413 	libbpf_nla_parse(tb, NETDEV_CMD_MAX, na,
414 			 NLMSG_PAYLOAD(nh, sizeof(*gnl)), NULL);
415 
416 	if (!tb[NETDEV_A_DEV_IFINDEX] || !tb[NETDEV_A_DEV_XDP_FEATURES])
417 		return NL_CONT;
418 
419 	ifindex = libbpf_nla_getattr_u32(tb[NETDEV_A_DEV_IFINDEX]);
420 	if (ifindex != md->ifindex)
421 		return NL_CONT;
422 
423 	md->flags = libbpf_nla_getattr_u64(tb[NETDEV_A_DEV_XDP_FEATURES]);
424 	return NL_DONE;
425 }
426 
427 int bpf_xdp_query(int ifindex, int xdp_flags, struct bpf_xdp_query_opts *opts)
428 {
429 	struct libbpf_nla_req req = {
430 		.nh.nlmsg_len      = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
431 		.nh.nlmsg_type     = RTM_GETLINK,
432 		.nh.nlmsg_flags    = NLM_F_DUMP | NLM_F_REQUEST,
433 		.ifinfo.ifi_family = AF_PACKET,
434 	};
435 	struct xdp_id_md xdp_id = {};
436 	struct xdp_features_md md = {
437 		.ifindex = ifindex,
438 	};
439 	__u16 id;
440 	int err;
441 
442 	if (!OPTS_VALID(opts, bpf_xdp_query_opts))
443 		return libbpf_err(-EINVAL);
444 
445 	if (xdp_flags & ~XDP_FLAGS_MASK)
446 		return libbpf_err(-EINVAL);
447 
448 	/* Check whether the single {HW,DRV,SKB} mode is set */
449 	xdp_flags &= XDP_FLAGS_SKB_MODE | XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE;
450 	if (xdp_flags & (xdp_flags - 1))
451 		return libbpf_err(-EINVAL);
452 
453 	xdp_id.ifindex = ifindex;
454 	xdp_id.flags = xdp_flags;
455 
456 	err = libbpf_netlink_send_recv(&req, NETLINK_ROUTE, __dump_link_nlmsg,
457 				       get_xdp_info, &xdp_id);
458 	if (err)
459 		return libbpf_err(err);
460 
461 	OPTS_SET(opts, prog_id, xdp_id.info.prog_id);
462 	OPTS_SET(opts, drv_prog_id, xdp_id.info.drv_prog_id);
463 	OPTS_SET(opts, hw_prog_id, xdp_id.info.hw_prog_id);
464 	OPTS_SET(opts, skb_prog_id, xdp_id.info.skb_prog_id);
465 	OPTS_SET(opts, attach_mode, xdp_id.info.attach_mode);
466 
467 	if (!OPTS_HAS(opts, feature_flags))
468 		return 0;
469 
470 	err = libbpf_netlink_resolve_genl_family_id("netdev", sizeof("netdev"), &id);
471 	if (err < 0) {
472 		if (err == -ENOENT) {
473 			opts->feature_flags = 0;
474 			goto skip_feature_flags;
475 		}
476 		return libbpf_err(err);
477 	}
478 
479 	memset(&req, 0, sizeof(req));
480 	req.nh.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN);
481 	req.nh.nlmsg_flags = NLM_F_REQUEST;
482 	req.nh.nlmsg_type = id;
483 	req.gnl.cmd = NETDEV_CMD_DEV_GET;
484 	req.gnl.version = 2;
485 
486 	err = nlattr_add(&req, NETDEV_A_DEV_IFINDEX, &ifindex, sizeof(ifindex));
487 	if (err < 0)
488 		return libbpf_err(err);
489 
490 	err = libbpf_netlink_send_recv(&req, NETLINK_GENERIC,
491 				       parse_xdp_features, NULL, &md);
492 	if (err)
493 		return libbpf_err(err);
494 
495 	opts->feature_flags = md.flags;
496 
497 skip_feature_flags:
498 	return 0;
499 }
500 
501 int bpf_xdp_query_id(int ifindex, int flags, __u32 *prog_id)
502 {
503 	LIBBPF_OPTS(bpf_xdp_query_opts, opts);
504 	int ret;
505 
506 	ret = bpf_xdp_query(ifindex, flags, &opts);
507 	if (ret)
508 		return libbpf_err(ret);
509 
510 	flags &= XDP_FLAGS_MODES;
511 
512 	if (opts.attach_mode != XDP_ATTACHED_MULTI && !flags)
513 		*prog_id = opts.prog_id;
514 	else if (flags & XDP_FLAGS_DRV_MODE)
515 		*prog_id = opts.drv_prog_id;
516 	else if (flags & XDP_FLAGS_HW_MODE)
517 		*prog_id = opts.hw_prog_id;
518 	else if (flags & XDP_FLAGS_SKB_MODE)
519 		*prog_id = opts.skb_prog_id;
520 	else
521 		*prog_id = 0;
522 
523 	return 0;
524 }
525 
526 
527 typedef int (*qdisc_config_t)(struct libbpf_nla_req *req);
528 
529 static int clsact_config(struct libbpf_nla_req *req)
530 {
531 	req->tc.tcm_parent = TC_H_CLSACT;
532 	req->tc.tcm_handle = TC_H_MAKE(TC_H_CLSACT, 0);
533 
534 	return nlattr_add(req, TCA_KIND, "clsact", sizeof("clsact"));
535 }
536 
537 static int attach_point_to_config(struct bpf_tc_hook *hook,
538 				  qdisc_config_t *config)
539 {
540 	switch (OPTS_GET(hook, attach_point, 0)) {
541 	case BPF_TC_INGRESS:
542 	case BPF_TC_EGRESS:
543 	case BPF_TC_INGRESS | BPF_TC_EGRESS:
544 		if (OPTS_GET(hook, parent, 0))
545 			return -EINVAL;
546 		*config = &clsact_config;
547 		return 0;
548 	case BPF_TC_CUSTOM:
549 		return -EOPNOTSUPP;
550 	default:
551 		return -EINVAL;
552 	}
553 }
554 
555 static int tc_get_tcm_parent(enum bpf_tc_attach_point attach_point,
556 			     __u32 *parent)
557 {
558 	switch (attach_point) {
559 	case BPF_TC_INGRESS:
560 	case BPF_TC_EGRESS:
561 		if (*parent)
562 			return -EINVAL;
563 		*parent = TC_H_MAKE(TC_H_CLSACT,
564 				    attach_point == BPF_TC_INGRESS ?
565 				    TC_H_MIN_INGRESS : TC_H_MIN_EGRESS);
566 		break;
567 	case BPF_TC_CUSTOM:
568 		if (!*parent)
569 			return -EINVAL;
570 		break;
571 	default:
572 		return -EINVAL;
573 	}
574 	return 0;
575 }
576 
577 static int tc_qdisc_modify(struct bpf_tc_hook *hook, int cmd, int flags)
578 {
579 	qdisc_config_t config;
580 	int ret;
581 	struct libbpf_nla_req req;
582 
583 	ret = attach_point_to_config(hook, &config);
584 	if (ret < 0)
585 		return ret;
586 
587 	memset(&req, 0, sizeof(req));
588 	req.nh.nlmsg_len   = NLMSG_LENGTH(sizeof(struct tcmsg));
589 	req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | flags;
590 	req.nh.nlmsg_type  = cmd;
591 	req.tc.tcm_family  = AF_UNSPEC;
592 	req.tc.tcm_ifindex = OPTS_GET(hook, ifindex, 0);
593 
594 	ret = config(&req);
595 	if (ret < 0)
596 		return ret;
597 
598 	return libbpf_netlink_send_recv(&req, NETLINK_ROUTE, NULL, NULL, NULL);
599 }
600 
601 static int tc_qdisc_create_excl(struct bpf_tc_hook *hook)
602 {
603 	return tc_qdisc_modify(hook, RTM_NEWQDISC, NLM_F_CREATE | NLM_F_EXCL);
604 }
605 
606 static int tc_qdisc_delete(struct bpf_tc_hook *hook)
607 {
608 	return tc_qdisc_modify(hook, RTM_DELQDISC, 0);
609 }
610 
611 int bpf_tc_hook_create(struct bpf_tc_hook *hook)
612 {
613 	int ret;
614 
615 	if (!hook || !OPTS_VALID(hook, bpf_tc_hook) ||
616 	    OPTS_GET(hook, ifindex, 0) <= 0)
617 		return libbpf_err(-EINVAL);
618 
619 	ret = tc_qdisc_create_excl(hook);
620 	return libbpf_err(ret);
621 }
622 
623 static int __bpf_tc_detach(const struct bpf_tc_hook *hook,
624 			   const struct bpf_tc_opts *opts,
625 			   const bool flush);
626 
627 int bpf_tc_hook_destroy(struct bpf_tc_hook *hook)
628 {
629 	if (!hook || !OPTS_VALID(hook, bpf_tc_hook) ||
630 	    OPTS_GET(hook, ifindex, 0) <= 0)
631 		return libbpf_err(-EINVAL);
632 
633 	switch (OPTS_GET(hook, attach_point, 0)) {
634 	case BPF_TC_INGRESS:
635 	case BPF_TC_EGRESS:
636 		return libbpf_err(__bpf_tc_detach(hook, NULL, true));
637 	case BPF_TC_INGRESS | BPF_TC_EGRESS:
638 		return libbpf_err(tc_qdisc_delete(hook));
639 	case BPF_TC_CUSTOM:
640 		return libbpf_err(-EOPNOTSUPP);
641 	default:
642 		return libbpf_err(-EINVAL);
643 	}
644 }
645 
646 struct bpf_cb_ctx {
647 	struct bpf_tc_opts *opts;
648 	bool processed;
649 };
650 
651 static int __get_tc_info(void *cookie, struct tcmsg *tc, struct nlattr **tb,
652 			 bool unicast)
653 {
654 	struct nlattr *tbb[TCA_BPF_MAX + 1];
655 	struct bpf_cb_ctx *info = cookie;
656 
657 	if (!info || !info->opts)
658 		return -EINVAL;
659 	if (unicast && info->processed)
660 		return -EINVAL;
661 	if (!tb[TCA_OPTIONS])
662 		return NL_CONT;
663 
664 	libbpf_nla_parse_nested(tbb, TCA_BPF_MAX, tb[TCA_OPTIONS], NULL);
665 	if (!tbb[TCA_BPF_ID])
666 		return -EINVAL;
667 
668 	OPTS_SET(info->opts, prog_id, libbpf_nla_getattr_u32(tbb[TCA_BPF_ID]));
669 	OPTS_SET(info->opts, handle, tc->tcm_handle);
670 	OPTS_SET(info->opts, priority, TC_H_MAJ(tc->tcm_info) >> 16);
671 
672 	info->processed = true;
673 	return unicast ? NL_NEXT : NL_DONE;
674 }
675 
676 static int get_tc_info(struct nlmsghdr *nh, libbpf_dump_nlmsg_t fn,
677 		       void *cookie)
678 {
679 	struct tcmsg *tc = NLMSG_DATA(nh);
680 	struct nlattr *tb[TCA_MAX + 1];
681 
682 	libbpf_nla_parse(tb, TCA_MAX,
683 			 (struct nlattr *)((void *)tc + NLMSG_ALIGN(sizeof(*tc))),
684 			 NLMSG_PAYLOAD(nh, sizeof(*tc)), NULL);
685 	if (!tb[TCA_KIND])
686 		return NL_CONT;
687 	return __get_tc_info(cookie, tc, tb, nh->nlmsg_flags & NLM_F_ECHO);
688 }
689 
690 static int tc_add_fd_and_name(struct libbpf_nla_req *req, int fd)
691 {
692 	struct bpf_prog_info info;
693 	__u32 info_len = sizeof(info);
694 	char name[256];
695 	int len, ret;
696 
697 	memset(&info, 0, info_len);
698 	ret = bpf_prog_get_info_by_fd(fd, &info, &info_len);
699 	if (ret < 0)
700 		return ret;
701 
702 	ret = nlattr_add(req, TCA_BPF_FD, &fd, sizeof(fd));
703 	if (ret < 0)
704 		return ret;
705 	len = snprintf(name, sizeof(name), "%s:[%u]", info.name, info.id);
706 	if (len < 0)
707 		return -errno;
708 	if (len >= sizeof(name))
709 		return -ENAMETOOLONG;
710 	return nlattr_add(req, TCA_BPF_NAME, name, len + 1);
711 }
712 
713 int bpf_tc_attach(const struct bpf_tc_hook *hook, struct bpf_tc_opts *opts)
714 {
715 	__u32 protocol, bpf_flags, handle, priority, parent, prog_id, flags;
716 	int ret, ifindex, attach_point, prog_fd;
717 	struct bpf_cb_ctx info = {};
718 	struct libbpf_nla_req req;
719 	struct nlattr *nla;
720 
721 	if (!hook || !opts ||
722 	    !OPTS_VALID(hook, bpf_tc_hook) ||
723 	    !OPTS_VALID(opts, bpf_tc_opts))
724 		return libbpf_err(-EINVAL);
725 
726 	ifindex      = OPTS_GET(hook, ifindex, 0);
727 	parent       = OPTS_GET(hook, parent, 0);
728 	attach_point = OPTS_GET(hook, attach_point, 0);
729 
730 	handle       = OPTS_GET(opts, handle, 0);
731 	priority     = OPTS_GET(opts, priority, 0);
732 	prog_fd      = OPTS_GET(opts, prog_fd, 0);
733 	prog_id      = OPTS_GET(opts, prog_id, 0);
734 	flags        = OPTS_GET(opts, flags, 0);
735 
736 	if (ifindex <= 0 || !prog_fd || prog_id)
737 		return libbpf_err(-EINVAL);
738 	if (priority > UINT16_MAX)
739 		return libbpf_err(-EINVAL);
740 	if (flags & ~BPF_TC_F_REPLACE)
741 		return libbpf_err(-EINVAL);
742 
743 	flags = (flags & BPF_TC_F_REPLACE) ? NLM_F_REPLACE : NLM_F_EXCL;
744 	protocol = ETH_P_ALL;
745 
746 	memset(&req, 0, sizeof(req));
747 	req.nh.nlmsg_len   = NLMSG_LENGTH(sizeof(struct tcmsg));
748 	req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_CREATE |
749 			     NLM_F_ECHO | flags;
750 	req.nh.nlmsg_type  = RTM_NEWTFILTER;
751 	req.tc.tcm_family  = AF_UNSPEC;
752 	req.tc.tcm_ifindex = ifindex;
753 	req.tc.tcm_handle  = handle;
754 	req.tc.tcm_info    = TC_H_MAKE(priority << 16, htons(protocol));
755 
756 	ret = tc_get_tcm_parent(attach_point, &parent);
757 	if (ret < 0)
758 		return libbpf_err(ret);
759 	req.tc.tcm_parent = parent;
760 
761 	ret = nlattr_add(&req, TCA_KIND, "bpf", sizeof("bpf"));
762 	if (ret < 0)
763 		return libbpf_err(ret);
764 	nla = nlattr_begin_nested(&req, TCA_OPTIONS);
765 	if (!nla)
766 		return libbpf_err(-EMSGSIZE);
767 	ret = tc_add_fd_and_name(&req, prog_fd);
768 	if (ret < 0)
769 		return libbpf_err(ret);
770 	bpf_flags = TCA_BPF_FLAG_ACT_DIRECT;
771 	ret = nlattr_add(&req, TCA_BPF_FLAGS, &bpf_flags, sizeof(bpf_flags));
772 	if (ret < 0)
773 		return libbpf_err(ret);
774 	nlattr_end_nested(&req, nla);
775 
776 	info.opts = opts;
777 
778 	ret = libbpf_netlink_send_recv(&req, NETLINK_ROUTE, get_tc_info, NULL,
779 				       &info);
780 	if (ret < 0)
781 		return libbpf_err(ret);
782 	if (!info.processed)
783 		return libbpf_err(-ENOENT);
784 	return ret;
785 }
786 
787 static int __bpf_tc_detach(const struct bpf_tc_hook *hook,
788 			   const struct bpf_tc_opts *opts,
789 			   const bool flush)
790 {
791 	__u32 protocol = 0, handle, priority, parent, prog_id, flags;
792 	int ret, ifindex, attach_point, prog_fd;
793 	struct libbpf_nla_req req;
794 
795 	if (!hook ||
796 	    !OPTS_VALID(hook, bpf_tc_hook) ||
797 	    !OPTS_VALID(opts, bpf_tc_opts))
798 		return -EINVAL;
799 
800 	ifindex      = OPTS_GET(hook, ifindex, 0);
801 	parent       = OPTS_GET(hook, parent, 0);
802 	attach_point = OPTS_GET(hook, attach_point, 0);
803 
804 	handle       = OPTS_GET(opts, handle, 0);
805 	priority     = OPTS_GET(opts, priority, 0);
806 	prog_fd      = OPTS_GET(opts, prog_fd, 0);
807 	prog_id      = OPTS_GET(opts, prog_id, 0);
808 	flags        = OPTS_GET(opts, flags, 0);
809 
810 	if (ifindex <= 0 || flags || prog_fd || prog_id)
811 		return -EINVAL;
812 	if (priority > UINT16_MAX)
813 		return -EINVAL;
814 	if (!flush) {
815 		if (!handle || !priority)
816 			return -EINVAL;
817 		protocol = ETH_P_ALL;
818 	} else {
819 		if (handle || priority)
820 			return -EINVAL;
821 	}
822 
823 	memset(&req, 0, sizeof(req));
824 	req.nh.nlmsg_len   = NLMSG_LENGTH(sizeof(struct tcmsg));
825 	req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
826 	req.nh.nlmsg_type  = RTM_DELTFILTER;
827 	req.tc.tcm_family  = AF_UNSPEC;
828 	req.tc.tcm_ifindex = ifindex;
829 	if (!flush) {
830 		req.tc.tcm_handle = handle;
831 		req.tc.tcm_info   = TC_H_MAKE(priority << 16, htons(protocol));
832 	}
833 
834 	ret = tc_get_tcm_parent(attach_point, &parent);
835 	if (ret < 0)
836 		return ret;
837 	req.tc.tcm_parent = parent;
838 
839 	if (!flush) {
840 		ret = nlattr_add(&req, TCA_KIND, "bpf", sizeof("bpf"));
841 		if (ret < 0)
842 			return ret;
843 	}
844 
845 	return libbpf_netlink_send_recv(&req, NETLINK_ROUTE, NULL, NULL, NULL);
846 }
847 
848 int bpf_tc_detach(const struct bpf_tc_hook *hook,
849 		  const struct bpf_tc_opts *opts)
850 {
851 	int ret;
852 
853 	if (!opts)
854 		return libbpf_err(-EINVAL);
855 
856 	ret = __bpf_tc_detach(hook, opts, false);
857 	return libbpf_err(ret);
858 }
859 
860 int bpf_tc_query(const struct bpf_tc_hook *hook, struct bpf_tc_opts *opts)
861 {
862 	__u32 protocol, handle, priority, parent, prog_id, flags;
863 	int ret, ifindex, attach_point, prog_fd;
864 	struct bpf_cb_ctx info = {};
865 	struct libbpf_nla_req req;
866 
867 	if (!hook || !opts ||
868 	    !OPTS_VALID(hook, bpf_tc_hook) ||
869 	    !OPTS_VALID(opts, bpf_tc_opts))
870 		return libbpf_err(-EINVAL);
871 
872 	ifindex      = OPTS_GET(hook, ifindex, 0);
873 	parent       = OPTS_GET(hook, parent, 0);
874 	attach_point = OPTS_GET(hook, attach_point, 0);
875 
876 	handle       = OPTS_GET(opts, handle, 0);
877 	priority     = OPTS_GET(opts, priority, 0);
878 	prog_fd      = OPTS_GET(opts, prog_fd, 0);
879 	prog_id      = OPTS_GET(opts, prog_id, 0);
880 	flags        = OPTS_GET(opts, flags, 0);
881 
882 	if (ifindex <= 0 || flags || prog_fd || prog_id ||
883 	    !handle || !priority)
884 		return libbpf_err(-EINVAL);
885 	if (priority > UINT16_MAX)
886 		return libbpf_err(-EINVAL);
887 
888 	protocol = ETH_P_ALL;
889 
890 	memset(&req, 0, sizeof(req));
891 	req.nh.nlmsg_len   = NLMSG_LENGTH(sizeof(struct tcmsg));
892 	req.nh.nlmsg_flags = NLM_F_REQUEST;
893 	req.nh.nlmsg_type  = RTM_GETTFILTER;
894 	req.tc.tcm_family  = AF_UNSPEC;
895 	req.tc.tcm_ifindex = ifindex;
896 	req.tc.tcm_handle  = handle;
897 	req.tc.tcm_info    = TC_H_MAKE(priority << 16, htons(protocol));
898 
899 	ret = tc_get_tcm_parent(attach_point, &parent);
900 	if (ret < 0)
901 		return libbpf_err(ret);
902 	req.tc.tcm_parent = parent;
903 
904 	ret = nlattr_add(&req, TCA_KIND, "bpf", sizeof("bpf"));
905 	if (ret < 0)
906 		return libbpf_err(ret);
907 
908 	info.opts = opts;
909 
910 	ret = libbpf_netlink_send_recv(&req, NETLINK_ROUTE, get_tc_info, NULL,
911 				       &info);
912 	if (ret < 0)
913 		return libbpf_err(ret);
914 	if (!info.processed)
915 		return libbpf_err(-ENOENT);
916 	return ret;
917 }
918