xref: /linux/tools/testing/selftests/net/busy_poller.c (revision 6dfafbd0299a60bfb5d5e277fdf100037c7ded07)
1 // SPDX-License-Identifier: GPL-2.0
2 #include <assert.h>
3 #include <errno.h>
4 #include <error.h>
5 #include <fcntl.h>
6 #include <inttypes.h>
7 #include <limits.h>
8 #include <stdlib.h>
9 #include <stdio.h>
10 #include <string.h>
11 #include <unistd.h>
12 #include <ynl.h>
13 
14 #include <arpa/inet.h>
15 #include <netinet/in.h>
16 
17 #include <sys/epoll.h>
18 #include <sys/ioctl.h>
19 #include <sys/socket.h>
20 #include <sys/types.h>
21 
22 #include <linux/genetlink.h>
23 #include <linux/netlink.h>
24 
25 #include "netdev-user.h"
26 
27 /* The below ifdef blob is required because:
28  *
29  * - sys/epoll.h does not (yet) have the ioctl definitions included. So,
30  *   systems with older glibcs will not have them available. However,
31  *   sys/epoll.h does include the type definition for epoll_data, which is
32  *   needed by the user program (e.g. epoll_event.data.fd)
33  *
34  * - linux/eventpoll.h does not define the epoll_data type, it is simply an
35  *   opaque __u64. It does, however, include the ioctl definition.
36  *
37  * Including both headers is impossible (types would be redefined), so I've
38  * opted instead to take sys/epoll.h, and include the blob below.
39  *
40  * Someday, when glibc is globally up to date, the blob below can be removed.
41  */
42 #if !defined(EPOLL_IOC_TYPE)
43 struct epoll_params {
44 	uint32_t busy_poll_usecs;
45 	uint16_t busy_poll_budget;
46 	uint8_t prefer_busy_poll;
47 
48 	/* pad the struct to a multiple of 64bits */
49 	uint8_t __pad;
50 };
51 
52 #define EPOLL_IOC_TYPE 0x8A
53 #define EPIOCSPARAMS _IOW(EPOLL_IOC_TYPE, 0x01, struct epoll_params)
54 #define EPIOCGPARAMS _IOR(EPOLL_IOC_TYPE, 0x02, struct epoll_params)
55 #endif
56 
57 static uint16_t cfg_port = 8000;
58 static struct in_addr cfg_bind_addr = { .s_addr = INADDR_ANY };
59 static char *cfg_outfile;
60 static int cfg_max_events = 8;
61 static uint32_t cfg_ifindex;
62 
63 /* busy poll params */
64 static uint32_t cfg_busy_poll_usecs;
65 static uint16_t cfg_busy_poll_budget;
66 static uint8_t cfg_prefer_busy_poll;
67 
68 /* NAPI params */
69 static uint32_t cfg_defer_hard_irqs;
70 static uint64_t cfg_gro_flush_timeout;
71 static uint64_t cfg_irq_suspend_timeout;
72 static enum netdev_napi_threaded cfg_napi_threaded_poll = NETDEV_NAPI_THREADED_DISABLED;
73 
74 static void usage(const char *filepath)
75 {
76 	error(1, 0,
77 	      "Usage: %s -p<port> -b<addr> -m<max_events> -u<busy_poll_usecs> -P<prefer_busy_poll> -g<busy_poll_budget> -o<outfile> -d<defer_hard_irqs> -r<gro_flush_timeout> -s<irq_suspend_timeout> -t<napi_threaded_poll> -i<ifindex>",
78 	      filepath);
79 }
80 
81 static void parse_opts(int argc, char **argv)
82 {
83 	unsigned long long tmp;
84 	int ret;
85 	int c;
86 
87 	if (argc <= 1)
88 		usage(argv[0]);
89 
90 	while ((c = getopt(argc, argv, "p:m:b:u:P:g:o:d:r:s:i:t:")) != -1) {
91 		/* most options take integer values, except o and b, so reduce
92 		 * code duplication a bit for the common case by calling
93 		 * strtoull here and leave bounds checking and casting per
94 		 * option below.
95 		 */
96 		if (c != 'o' && c != 'b')
97 			tmp = strtoull(optarg, NULL, 0);
98 
99 		switch (c) {
100 		case 'u':
101 			if (tmp == ULLONG_MAX || tmp > UINT32_MAX)
102 				error(1, ERANGE, "busy_poll_usecs too large");
103 
104 			cfg_busy_poll_usecs = (uint32_t)tmp;
105 			break;
106 		case 'P':
107 			if (tmp == ULLONG_MAX || tmp > 1)
108 				error(1, ERANGE,
109 				      "prefer busy poll should be 0 or 1");
110 
111 			cfg_prefer_busy_poll = (uint8_t)tmp;
112 			break;
113 		case 'g':
114 			if (tmp == ULLONG_MAX || tmp > UINT16_MAX)
115 				error(1, ERANGE,
116 				      "busy poll budget must be [0, UINT16_MAX]");
117 
118 			cfg_busy_poll_budget = (uint16_t)tmp;
119 			break;
120 		case 'p':
121 			if (tmp == ULLONG_MAX || tmp > UINT16_MAX)
122 				error(1, ERANGE, "port must be <= 65535");
123 
124 			cfg_port = (uint16_t)tmp;
125 			break;
126 		case 'b':
127 			ret = inet_aton(optarg, &cfg_bind_addr);
128 			if (ret == 0)
129 				error(1, errno,
130 				      "bind address %s invalid", optarg);
131 			break;
132 		case 'o':
133 			cfg_outfile = strdup(optarg);
134 			if (!cfg_outfile)
135 				error(1, 0, "outfile invalid");
136 			break;
137 		case 'm':
138 			if (tmp == ULLONG_MAX || tmp > INT_MAX)
139 				error(1, ERANGE,
140 				      "max events must be > 0 and <= INT_MAX");
141 
142 			cfg_max_events = (int)tmp;
143 			break;
144 		case 'd':
145 			if (tmp == ULLONG_MAX || tmp > INT32_MAX)
146 				error(1, ERANGE,
147 				      "defer_hard_irqs must be <= INT32_MAX");
148 
149 			cfg_defer_hard_irqs = (uint32_t)tmp;
150 			break;
151 		case 'r':
152 			if (tmp == ULLONG_MAX || tmp > UINT64_MAX)
153 				error(1, ERANGE,
154 				      "gro_flush_timeout must be < UINT64_MAX");
155 
156 			cfg_gro_flush_timeout = (uint64_t)tmp;
157 			break;
158 		case 's':
159 			if (tmp == ULLONG_MAX || tmp > UINT64_MAX)
160 				error(1, ERANGE,
161 				      "irq_suspend_timeout must be < ULLONG_MAX");
162 
163 			cfg_irq_suspend_timeout = (uint64_t)tmp;
164 			break;
165 		case 'i':
166 			if (tmp == ULLONG_MAX || tmp > INT_MAX)
167 				error(1, ERANGE,
168 				      "ifindex must be <= INT_MAX");
169 
170 			cfg_ifindex = (int)tmp;
171 			break;
172 		case 't':
173 			if (tmp > 2)
174 				error(1, ERANGE, "napi threaded poll value must be 0-2");
175 
176 			cfg_napi_threaded_poll = (enum netdev_napi_threaded)tmp;
177 			break;
178 		}
179 	}
180 
181 	if (!cfg_ifindex)
182 		usage(argv[0]);
183 
184 	if (optind != argc)
185 		usage(argv[0]);
186 }
187 
188 static void epoll_ctl_add(int epfd, int fd, uint32_t events)
189 {
190 	struct epoll_event ev;
191 
192 	ev.events = events;
193 	ev.data.fd = fd;
194 	if (epoll_ctl(epfd, EPOLL_CTL_ADD, fd, &ev) == -1)
195 		error(1, errno, "epoll_ctl add fd: %d", fd);
196 }
197 
198 static void setnonblock(int sockfd)
199 {
200 	int flags;
201 
202 	flags = fcntl(sockfd, F_GETFL, 0);
203 
204 	if (fcntl(sockfd, F_SETFL, flags | O_NONBLOCK) == -1)
205 		error(1, errno, "unable to set socket to nonblocking mode");
206 }
207 
208 static void write_chunk(int fd, char *buf, ssize_t buflen)
209 {
210 	ssize_t remaining = buflen;
211 	char *buf_offset = buf;
212 	ssize_t writelen = 0;
213 	ssize_t write_result;
214 
215 	while (writelen < buflen) {
216 		write_result = write(fd, buf_offset, remaining);
217 		if (write_result == -1)
218 			error(1, errno, "unable to write data to outfile");
219 
220 		writelen += write_result;
221 		remaining -= write_result;
222 		buf_offset += write_result;
223 	}
224 }
225 
226 static void setup_queue(void)
227 {
228 	struct netdev_napi_get_list *napi_list = NULL;
229 	struct netdev_napi_get_req_dump *req = NULL;
230 	struct netdev_napi_set_req *set_req = NULL;
231 	struct ynl_sock *ys;
232 	struct ynl_error yerr;
233 	uint32_t napi_id = 0;
234 
235 	ys = ynl_sock_create(&ynl_netdev_family, &yerr);
236 	if (!ys)
237 		error(1, 0, "YNL: %s", yerr.msg);
238 
239 	req = netdev_napi_get_req_dump_alloc();
240 	netdev_napi_get_req_dump_set_ifindex(req, cfg_ifindex);
241 	napi_list = netdev_napi_get_dump(ys, req);
242 
243 	/* assume there is 1 NAPI configured and take the first */
244 	if (napi_list->obj._present.id)
245 		napi_id = napi_list->obj.id;
246 	else
247 		error(1, 0, "napi ID not present?");
248 
249 	set_req = netdev_napi_set_req_alloc();
250 	netdev_napi_set_req_set_id(set_req, napi_id);
251 	netdev_napi_set_req_set_defer_hard_irqs(set_req, cfg_defer_hard_irqs);
252 	netdev_napi_set_req_set_gro_flush_timeout(set_req,
253 						  cfg_gro_flush_timeout);
254 	netdev_napi_set_req_set_irq_suspend_timeout(set_req,
255 						    cfg_irq_suspend_timeout);
256 
257 	if (cfg_napi_threaded_poll)
258 		netdev_napi_set_req_set_threaded(set_req, cfg_napi_threaded_poll);
259 
260 	if (netdev_napi_set(ys, set_req))
261 		error(1, 0, "can't set NAPI params: %s\n", yerr.msg);
262 
263 	netdev_napi_get_list_free(napi_list);
264 	netdev_napi_get_req_dump_free(req);
265 	netdev_napi_set_req_free(set_req);
266 	ynl_sock_destroy(ys);
267 }
268 
269 static void run_poller(void)
270 {
271 	struct epoll_event events[cfg_max_events];
272 	struct epoll_params epoll_params = {0};
273 	struct sockaddr_in server_addr;
274 	int i, epfd, nfds;
275 	ssize_t readlen;
276 	int outfile_fd;
277 	char buf[1024];
278 	int sockfd;
279 	int conn;
280 	int val;
281 
282 	outfile_fd = open(cfg_outfile, O_WRONLY | O_CREAT, 0644);
283 	if (outfile_fd == -1)
284 		error(1, errno, "unable to open outfile: %s", cfg_outfile);
285 
286 	sockfd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
287 	if (sockfd == -1)
288 		error(1, errno, "unable to create listen socket");
289 
290 	server_addr.sin_family = AF_INET;
291 	server_addr.sin_port = htons(cfg_port);
292 	server_addr.sin_addr = cfg_bind_addr;
293 
294 	/* these values are range checked during parse_opts, so casting is safe
295 	 * here
296 	 */
297 	epoll_params.busy_poll_usecs = cfg_busy_poll_usecs;
298 	epoll_params.busy_poll_budget = cfg_busy_poll_budget;
299 	epoll_params.prefer_busy_poll = cfg_prefer_busy_poll;
300 	epoll_params.__pad = 0;
301 
302 	val = 1;
303 	if (setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val)))
304 		error(1, errno, "poller setsockopt reuseaddr");
305 
306 	setnonblock(sockfd);
307 
308 	if (bind(sockfd, (struct sockaddr *)&server_addr,
309 		 sizeof(struct sockaddr_in)))
310 		error(0, errno, "poller bind to port: %d\n", cfg_port);
311 
312 	if (listen(sockfd, 1))
313 		error(1, errno, "poller listen");
314 
315 	epfd = epoll_create1(0);
316 	if (ioctl(epfd, EPIOCSPARAMS, &epoll_params) == -1)
317 		error(1, errno, "unable to set busy poll params");
318 
319 	epoll_ctl_add(epfd, sockfd, EPOLLIN | EPOLLOUT | EPOLLET);
320 
321 	for (;;) {
322 		nfds = epoll_wait(epfd, events, cfg_max_events, -1);
323 		for (i = 0; i < nfds; i++) {
324 			if (events[i].data.fd == sockfd) {
325 				conn = accept(sockfd, NULL, NULL);
326 				if (conn == -1)
327 					error(1, errno,
328 					      "accepting incoming connection failed");
329 
330 				setnonblock(conn);
331 				epoll_ctl_add(epfd, conn,
332 					      EPOLLIN | EPOLLET | EPOLLRDHUP |
333 					      EPOLLHUP);
334 			} else if (events[i].events & EPOLLIN) {
335 				for (;;) {
336 					readlen = read(events[i].data.fd, buf,
337 						       sizeof(buf));
338 					if (readlen > 0)
339 						write_chunk(outfile_fd, buf,
340 							    readlen);
341 					else
342 						break;
343 				}
344 			} else {
345 				/* spurious event ? */
346 			}
347 			if (events[i].events & (EPOLLRDHUP | EPOLLHUP)) {
348 				epoll_ctl(epfd, EPOLL_CTL_DEL,
349 					  events[i].data.fd, NULL);
350 				close(events[i].data.fd);
351 				close(outfile_fd);
352 				return;
353 			}
354 		}
355 	}
356 }
357 
358 int main(int argc, char *argv[])
359 {
360 	parse_opts(argc, argv);
361 	setup_queue();
362 	run_poller();
363 
364 	if (cfg_outfile)
365 		free(cfg_outfile);
366 
367 	return 0;
368 }
369