1 // SPDX-License-Identifier: GPL-2.0 2 #include <assert.h> 3 #include <errno.h> 4 #include <error.h> 5 #include <fcntl.h> 6 #include <inttypes.h> 7 #include <limits.h> 8 #include <stdlib.h> 9 #include <stdio.h> 10 #include <string.h> 11 #include <unistd.h> 12 #include <ynl.h> 13 14 #include <arpa/inet.h> 15 #include <netinet/in.h> 16 17 #include <sys/epoll.h> 18 #include <sys/ioctl.h> 19 #include <sys/socket.h> 20 #include <sys/types.h> 21 22 #include <linux/genetlink.h> 23 #include <linux/netlink.h> 24 25 #include "netdev-user.h" 26 27 /* The below ifdef blob is required because: 28 * 29 * - sys/epoll.h does not (yet) have the ioctl definitions included. So, 30 * systems with older glibcs will not have them available. However, 31 * sys/epoll.h does include the type definition for epoll_data, which is 32 * needed by the user program (e.g. epoll_event.data.fd) 33 * 34 * - linux/eventpoll.h does not define the epoll_data type, it is simply an 35 * opaque __u64. It does, however, include the ioctl definition. 36 * 37 * Including both headers is impossible (types would be redefined), so I've 38 * opted instead to take sys/epoll.h, and include the blob below. 39 * 40 * Someday, when glibc is globally up to date, the blob below can be removed. 41 */ 42 #if !defined(EPOLL_IOC_TYPE) 43 struct epoll_params { 44 uint32_t busy_poll_usecs; 45 uint16_t busy_poll_budget; 46 uint8_t prefer_busy_poll; 47 48 /* pad the struct to a multiple of 64bits */ 49 uint8_t __pad; 50 }; 51 52 #define EPOLL_IOC_TYPE 0x8A 53 #define EPIOCSPARAMS _IOW(EPOLL_IOC_TYPE, 0x01, struct epoll_params) 54 #define EPIOCGPARAMS _IOR(EPOLL_IOC_TYPE, 0x02, struct epoll_params) 55 #endif 56 57 static uint16_t cfg_port = 8000; 58 static struct in_addr cfg_bind_addr = { .s_addr = INADDR_ANY }; 59 static char *cfg_outfile; 60 static int cfg_max_events = 8; 61 static uint32_t cfg_ifindex; 62 63 /* busy poll params */ 64 static uint32_t cfg_busy_poll_usecs; 65 static uint16_t cfg_busy_poll_budget; 66 static uint8_t cfg_prefer_busy_poll; 67 68 /* NAPI params */ 69 static uint32_t cfg_defer_hard_irqs; 70 static uint64_t cfg_gro_flush_timeout; 71 static uint64_t cfg_irq_suspend_timeout; 72 static enum netdev_napi_threaded cfg_napi_threaded_poll = NETDEV_NAPI_THREADED_DISABLED; 73 74 static void usage(const char *filepath) 75 { 76 error(1, 0, 77 "Usage: %s -p<port> -b<addr> -m<max_events> -u<busy_poll_usecs> -P<prefer_busy_poll> -g<busy_poll_budget> -o<outfile> -d<defer_hard_irqs> -r<gro_flush_timeout> -s<irq_suspend_timeout> -t<napi_threaded_poll> -i<ifindex>", 78 filepath); 79 } 80 81 static void parse_opts(int argc, char **argv) 82 { 83 unsigned long long tmp; 84 int ret; 85 int c; 86 87 if (argc <= 1) 88 usage(argv[0]); 89 90 while ((c = getopt(argc, argv, "p:m:b:u:P:g:o:d:r:s:i:t:")) != -1) { 91 /* most options take integer values, except o and b, so reduce 92 * code duplication a bit for the common case by calling 93 * strtoull here and leave bounds checking and casting per 94 * option below. 95 */ 96 if (c != 'o' && c != 'b') 97 tmp = strtoull(optarg, NULL, 0); 98 99 switch (c) { 100 case 'u': 101 if (tmp == ULLONG_MAX || tmp > UINT32_MAX) 102 error(1, ERANGE, "busy_poll_usecs too large"); 103 104 cfg_busy_poll_usecs = (uint32_t)tmp; 105 break; 106 case 'P': 107 if (tmp == ULLONG_MAX || tmp > 1) 108 error(1, ERANGE, 109 "prefer busy poll should be 0 or 1"); 110 111 cfg_prefer_busy_poll = (uint8_t)tmp; 112 break; 113 case 'g': 114 if (tmp == ULLONG_MAX || tmp > UINT16_MAX) 115 error(1, ERANGE, 116 "busy poll budget must be [0, UINT16_MAX]"); 117 118 cfg_busy_poll_budget = (uint16_t)tmp; 119 break; 120 case 'p': 121 if (tmp == ULLONG_MAX || tmp > UINT16_MAX) 122 error(1, ERANGE, "port must be <= 65535"); 123 124 cfg_port = (uint16_t)tmp; 125 break; 126 case 'b': 127 ret = inet_aton(optarg, &cfg_bind_addr); 128 if (ret == 0) 129 error(1, errno, 130 "bind address %s invalid", optarg); 131 break; 132 case 'o': 133 cfg_outfile = strdup(optarg); 134 if (!cfg_outfile) 135 error(1, 0, "outfile invalid"); 136 break; 137 case 'm': 138 if (tmp == ULLONG_MAX || tmp > INT_MAX) 139 error(1, ERANGE, 140 "max events must be > 0 and <= INT_MAX"); 141 142 cfg_max_events = (int)tmp; 143 break; 144 case 'd': 145 if (tmp == ULLONG_MAX || tmp > INT32_MAX) 146 error(1, ERANGE, 147 "defer_hard_irqs must be <= INT32_MAX"); 148 149 cfg_defer_hard_irqs = (uint32_t)tmp; 150 break; 151 case 'r': 152 if (tmp == ULLONG_MAX || tmp > UINT64_MAX) 153 error(1, ERANGE, 154 "gro_flush_timeout must be < UINT64_MAX"); 155 156 cfg_gro_flush_timeout = (uint64_t)tmp; 157 break; 158 case 's': 159 if (tmp == ULLONG_MAX || tmp > UINT64_MAX) 160 error(1, ERANGE, 161 "irq_suspend_timeout must be < ULLONG_MAX"); 162 163 cfg_irq_suspend_timeout = (uint64_t)tmp; 164 break; 165 case 'i': 166 if (tmp == ULLONG_MAX || tmp > INT_MAX) 167 error(1, ERANGE, 168 "ifindex must be <= INT_MAX"); 169 170 cfg_ifindex = (int)tmp; 171 break; 172 case 't': 173 if (tmp > 2) 174 error(1, ERANGE, "napi threaded poll value must be 0-2"); 175 176 cfg_napi_threaded_poll = (enum netdev_napi_threaded)tmp; 177 break; 178 } 179 } 180 181 if (!cfg_ifindex) 182 usage(argv[0]); 183 184 if (optind != argc) 185 usage(argv[0]); 186 } 187 188 static void epoll_ctl_add(int epfd, int fd, uint32_t events) 189 { 190 struct epoll_event ev; 191 192 ev.events = events; 193 ev.data.fd = fd; 194 if (epoll_ctl(epfd, EPOLL_CTL_ADD, fd, &ev) == -1) 195 error(1, errno, "epoll_ctl add fd: %d", fd); 196 } 197 198 static void setnonblock(int sockfd) 199 { 200 int flags; 201 202 flags = fcntl(sockfd, F_GETFL, 0); 203 204 if (fcntl(sockfd, F_SETFL, flags | O_NONBLOCK) == -1) 205 error(1, errno, "unable to set socket to nonblocking mode"); 206 } 207 208 static void write_chunk(int fd, char *buf, ssize_t buflen) 209 { 210 ssize_t remaining = buflen; 211 char *buf_offset = buf; 212 ssize_t writelen = 0; 213 ssize_t write_result; 214 215 while (writelen < buflen) { 216 write_result = write(fd, buf_offset, remaining); 217 if (write_result == -1) 218 error(1, errno, "unable to write data to outfile"); 219 220 writelen += write_result; 221 remaining -= write_result; 222 buf_offset += write_result; 223 } 224 } 225 226 static void setup_queue(void) 227 { 228 struct netdev_napi_get_list *napi_list = NULL; 229 struct netdev_napi_get_req_dump *req = NULL; 230 struct netdev_napi_set_req *set_req = NULL; 231 struct ynl_sock *ys; 232 struct ynl_error yerr; 233 uint32_t napi_id = 0; 234 235 ys = ynl_sock_create(&ynl_netdev_family, &yerr); 236 if (!ys) 237 error(1, 0, "YNL: %s", yerr.msg); 238 239 req = netdev_napi_get_req_dump_alloc(); 240 netdev_napi_get_req_dump_set_ifindex(req, cfg_ifindex); 241 napi_list = netdev_napi_get_dump(ys, req); 242 243 /* assume there is 1 NAPI configured and take the first */ 244 if (napi_list->obj._present.id) 245 napi_id = napi_list->obj.id; 246 else 247 error(1, 0, "napi ID not present?"); 248 249 set_req = netdev_napi_set_req_alloc(); 250 netdev_napi_set_req_set_id(set_req, napi_id); 251 netdev_napi_set_req_set_defer_hard_irqs(set_req, cfg_defer_hard_irqs); 252 netdev_napi_set_req_set_gro_flush_timeout(set_req, 253 cfg_gro_flush_timeout); 254 netdev_napi_set_req_set_irq_suspend_timeout(set_req, 255 cfg_irq_suspend_timeout); 256 257 if (cfg_napi_threaded_poll) 258 netdev_napi_set_req_set_threaded(set_req, cfg_napi_threaded_poll); 259 260 if (netdev_napi_set(ys, set_req)) 261 error(1, 0, "can't set NAPI params: %s\n", yerr.msg); 262 263 netdev_napi_get_list_free(napi_list); 264 netdev_napi_get_req_dump_free(req); 265 netdev_napi_set_req_free(set_req); 266 ynl_sock_destroy(ys); 267 } 268 269 static void run_poller(void) 270 { 271 struct epoll_event events[cfg_max_events]; 272 struct epoll_params epoll_params = {0}; 273 struct sockaddr_in server_addr; 274 int i, epfd, nfds; 275 ssize_t readlen; 276 int outfile_fd; 277 char buf[1024]; 278 int sockfd; 279 int conn; 280 int val; 281 282 outfile_fd = open(cfg_outfile, O_WRONLY | O_CREAT, 0644); 283 if (outfile_fd == -1) 284 error(1, errno, "unable to open outfile: %s", cfg_outfile); 285 286 sockfd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); 287 if (sockfd == -1) 288 error(1, errno, "unable to create listen socket"); 289 290 server_addr.sin_family = AF_INET; 291 server_addr.sin_port = htons(cfg_port); 292 server_addr.sin_addr = cfg_bind_addr; 293 294 /* these values are range checked during parse_opts, so casting is safe 295 * here 296 */ 297 epoll_params.busy_poll_usecs = cfg_busy_poll_usecs; 298 epoll_params.busy_poll_budget = cfg_busy_poll_budget; 299 epoll_params.prefer_busy_poll = cfg_prefer_busy_poll; 300 epoll_params.__pad = 0; 301 302 val = 1; 303 if (setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val))) 304 error(1, errno, "poller setsockopt reuseaddr"); 305 306 setnonblock(sockfd); 307 308 if (bind(sockfd, (struct sockaddr *)&server_addr, 309 sizeof(struct sockaddr_in))) 310 error(0, errno, "poller bind to port: %d\n", cfg_port); 311 312 if (listen(sockfd, 1)) 313 error(1, errno, "poller listen"); 314 315 epfd = epoll_create1(0); 316 if (ioctl(epfd, EPIOCSPARAMS, &epoll_params) == -1) 317 error(1, errno, "unable to set busy poll params"); 318 319 epoll_ctl_add(epfd, sockfd, EPOLLIN | EPOLLOUT | EPOLLET); 320 321 for (;;) { 322 nfds = epoll_wait(epfd, events, cfg_max_events, -1); 323 for (i = 0; i < nfds; i++) { 324 if (events[i].data.fd == sockfd) { 325 conn = accept(sockfd, NULL, NULL); 326 if (conn == -1) 327 error(1, errno, 328 "accepting incoming connection failed"); 329 330 setnonblock(conn); 331 epoll_ctl_add(epfd, conn, 332 EPOLLIN | EPOLLET | EPOLLRDHUP | 333 EPOLLHUP); 334 } else if (events[i].events & EPOLLIN) { 335 for (;;) { 336 readlen = read(events[i].data.fd, buf, 337 sizeof(buf)); 338 if (readlen > 0) 339 write_chunk(outfile_fd, buf, 340 readlen); 341 else 342 break; 343 } 344 } else { 345 /* spurious event ? */ 346 } 347 if (events[i].events & (EPOLLRDHUP | EPOLLHUP)) { 348 epoll_ctl(epfd, EPOLL_CTL_DEL, 349 events[i].data.fd, NULL); 350 close(events[i].data.fd); 351 close(outfile_fd); 352 return; 353 } 354 } 355 } 356 } 357 358 int main(int argc, char *argv[]) 359 { 360 parse_opts(argc, argv); 361 setup_queue(); 362 run_poller(); 363 364 if (cfg_outfile) 365 free(cfg_outfile); 366 367 return 0; 368 } 369