1 // SPDX-License-Identifier: GPL-2.0 2 #include <assert.h> 3 #include <errno.h> 4 #include <error.h> 5 #include <fcntl.h> 6 #include <inttypes.h> 7 #include <limits.h> 8 #include <stdlib.h> 9 #include <stdio.h> 10 #include <string.h> 11 #include <unistd.h> 12 #include <ynl.h> 13 14 #include <arpa/inet.h> 15 #include <netinet/in.h> 16 17 #include <sys/epoll.h> 18 #include <sys/ioctl.h> 19 #include <sys/socket.h> 20 #include <sys/types.h> 21 22 #include <linux/genetlink.h> 23 #include <linux/netlink.h> 24 25 #include "netdev-user.h" 26 27 /* The below ifdef blob is required because: 28 * 29 * - sys/epoll.h does not (yet) have the ioctl definitions included. So, 30 * systems with older glibcs will not have them available. However, 31 * sys/epoll.h does include the type definition for epoll_data, which is 32 * needed by the user program (e.g. epoll_event.data.fd) 33 * 34 * - linux/eventpoll.h does not define the epoll_data type, it is simply an 35 * opaque __u64. It does, however, include the ioctl definition. 36 * 37 * Including both headers is impossible (types would be redefined), so I've 38 * opted instead to take sys/epoll.h, and include the blob below. 39 * 40 * Someday, when glibc is globally up to date, the blob below can be removed. 41 */ 42 #if !defined(EPOLL_IOC_TYPE) 43 struct epoll_params { 44 uint32_t busy_poll_usecs; 45 uint16_t busy_poll_budget; 46 uint8_t prefer_busy_poll; 47 48 /* pad the struct to a multiple of 64bits */ 49 uint8_t __pad; 50 }; 51 52 #define EPOLL_IOC_TYPE 0x8A 53 #define EPIOCSPARAMS _IOW(EPOLL_IOC_TYPE, 0x01, struct epoll_params) 54 #define EPIOCGPARAMS _IOR(EPOLL_IOC_TYPE, 0x02, struct epoll_params) 55 #endif 56 57 static uint16_t cfg_port = 8000; 58 static struct in_addr cfg_bind_addr = { .s_addr = INADDR_ANY }; 59 static char *cfg_outfile; 60 static int cfg_max_events = 8; 61 static uint32_t cfg_ifindex; 62 63 /* busy poll params */ 64 static uint32_t cfg_busy_poll_usecs; 65 static uint16_t cfg_busy_poll_budget; 66 static uint8_t cfg_prefer_busy_poll; 67 68 /* IRQ params */ 69 static uint32_t cfg_defer_hard_irqs; 70 static uint64_t cfg_gro_flush_timeout; 71 static uint64_t cfg_irq_suspend_timeout; 72 73 static void usage(const char *filepath) 74 { 75 error(1, 0, 76 "Usage: %s -p<port> -b<addr> -m<max_events> -u<busy_poll_usecs> -P<prefer_busy_poll> -g<busy_poll_budget> -o<outfile> -d<defer_hard_irqs> -r<gro_flush_timeout> -s<irq_suspend_timeout> -i<ifindex>", 77 filepath); 78 } 79 80 static void parse_opts(int argc, char **argv) 81 { 82 unsigned long long tmp; 83 int ret; 84 int c; 85 86 if (argc <= 1) 87 usage(argv[0]); 88 89 while ((c = getopt(argc, argv, "p:m:b:u:P:g:o:d:r:s:i:")) != -1) { 90 /* most options take integer values, except o and b, so reduce 91 * code duplication a bit for the common case by calling 92 * strtoull here and leave bounds checking and casting per 93 * option below. 94 */ 95 if (c != 'o' && c != 'b') 96 tmp = strtoull(optarg, NULL, 0); 97 98 switch (c) { 99 case 'u': 100 if (tmp == ULLONG_MAX || tmp > UINT32_MAX) 101 error(1, ERANGE, "busy_poll_usecs too large"); 102 103 cfg_busy_poll_usecs = (uint32_t)tmp; 104 break; 105 case 'P': 106 if (tmp == ULLONG_MAX || tmp > 1) 107 error(1, ERANGE, 108 "prefer busy poll should be 0 or 1"); 109 110 cfg_prefer_busy_poll = (uint8_t)tmp; 111 break; 112 case 'g': 113 if (tmp == ULLONG_MAX || tmp > UINT16_MAX) 114 error(1, ERANGE, 115 "busy poll budget must be [0, UINT16_MAX]"); 116 117 cfg_busy_poll_budget = (uint16_t)tmp; 118 break; 119 case 'p': 120 if (tmp == ULLONG_MAX || tmp > UINT16_MAX) 121 error(1, ERANGE, "port must be <= 65535"); 122 123 cfg_port = (uint16_t)tmp; 124 break; 125 case 'b': 126 ret = inet_aton(optarg, &cfg_bind_addr); 127 if (ret == 0) 128 error(1, errno, 129 "bind address %s invalid", optarg); 130 break; 131 case 'o': 132 cfg_outfile = strdup(optarg); 133 if (!cfg_outfile) 134 error(1, 0, "outfile invalid"); 135 break; 136 case 'm': 137 if (tmp == ULLONG_MAX || tmp > INT_MAX) 138 error(1, ERANGE, 139 "max events must be > 0 and <= INT_MAX"); 140 141 cfg_max_events = (int)tmp; 142 break; 143 case 'd': 144 if (tmp == ULLONG_MAX || tmp > INT32_MAX) 145 error(1, ERANGE, 146 "defer_hard_irqs must be <= INT32_MAX"); 147 148 cfg_defer_hard_irqs = (uint32_t)tmp; 149 break; 150 case 'r': 151 if (tmp == ULLONG_MAX || tmp > UINT64_MAX) 152 error(1, ERANGE, 153 "gro_flush_timeout must be < UINT64_MAX"); 154 155 cfg_gro_flush_timeout = (uint64_t)tmp; 156 break; 157 case 's': 158 if (tmp == ULLONG_MAX || tmp > UINT64_MAX) 159 error(1, ERANGE, 160 "irq_suspend_timeout must be < ULLONG_MAX"); 161 162 cfg_irq_suspend_timeout = (uint64_t)tmp; 163 break; 164 case 'i': 165 if (tmp == ULLONG_MAX || tmp > INT_MAX) 166 error(1, ERANGE, 167 "ifindex must be <= INT_MAX"); 168 169 cfg_ifindex = (int)tmp; 170 break; 171 } 172 } 173 174 if (!cfg_ifindex) 175 usage(argv[0]); 176 177 if (optind != argc) 178 usage(argv[0]); 179 } 180 181 static void epoll_ctl_add(int epfd, int fd, uint32_t events) 182 { 183 struct epoll_event ev; 184 185 ev.events = events; 186 ev.data.fd = fd; 187 if (epoll_ctl(epfd, EPOLL_CTL_ADD, fd, &ev) == -1) 188 error(1, errno, "epoll_ctl add fd: %d", fd); 189 } 190 191 static void setnonblock(int sockfd) 192 { 193 int flags; 194 195 flags = fcntl(sockfd, F_GETFL, 0); 196 197 if (fcntl(sockfd, F_SETFL, flags | O_NONBLOCK) == -1) 198 error(1, errno, "unable to set socket to nonblocking mode"); 199 } 200 201 static void write_chunk(int fd, char *buf, ssize_t buflen) 202 { 203 ssize_t remaining = buflen; 204 char *buf_offset = buf; 205 ssize_t writelen = 0; 206 ssize_t write_result; 207 208 while (writelen < buflen) { 209 write_result = write(fd, buf_offset, remaining); 210 if (write_result == -1) 211 error(1, errno, "unable to write data to outfile"); 212 213 writelen += write_result; 214 remaining -= write_result; 215 buf_offset += write_result; 216 } 217 } 218 219 static void setup_queue(void) 220 { 221 struct netdev_napi_get_list *napi_list = NULL; 222 struct netdev_napi_get_req_dump *req = NULL; 223 struct netdev_napi_set_req *set_req = NULL; 224 struct ynl_sock *ys; 225 struct ynl_error yerr; 226 uint32_t napi_id = 0; 227 228 ys = ynl_sock_create(&ynl_netdev_family, &yerr); 229 if (!ys) 230 error(1, 0, "YNL: %s", yerr.msg); 231 232 req = netdev_napi_get_req_dump_alloc(); 233 netdev_napi_get_req_dump_set_ifindex(req, cfg_ifindex); 234 napi_list = netdev_napi_get_dump(ys, req); 235 236 /* assume there is 1 NAPI configured and take the first */ 237 if (napi_list->obj._present.id) 238 napi_id = napi_list->obj.id; 239 else 240 error(1, 0, "napi ID not present?"); 241 242 set_req = netdev_napi_set_req_alloc(); 243 netdev_napi_set_req_set_id(set_req, napi_id); 244 netdev_napi_set_req_set_defer_hard_irqs(set_req, cfg_defer_hard_irqs); 245 netdev_napi_set_req_set_gro_flush_timeout(set_req, 246 cfg_gro_flush_timeout); 247 netdev_napi_set_req_set_irq_suspend_timeout(set_req, 248 cfg_irq_suspend_timeout); 249 250 if (netdev_napi_set(ys, set_req)) 251 error(1, 0, "can't set NAPI params: %s\n", yerr.msg); 252 253 netdev_napi_get_list_free(napi_list); 254 netdev_napi_get_req_dump_free(req); 255 netdev_napi_set_req_free(set_req); 256 ynl_sock_destroy(ys); 257 } 258 259 static void run_poller(void) 260 { 261 struct epoll_event events[cfg_max_events]; 262 struct epoll_params epoll_params = {0}; 263 struct sockaddr_in server_addr; 264 int i, epfd, nfds; 265 ssize_t readlen; 266 int outfile_fd; 267 char buf[1024]; 268 int sockfd; 269 int conn; 270 int val; 271 272 outfile_fd = open(cfg_outfile, O_WRONLY | O_CREAT, 0644); 273 if (outfile_fd == -1) 274 error(1, errno, "unable to open outfile: %s", cfg_outfile); 275 276 sockfd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); 277 if (sockfd == -1) 278 error(1, errno, "unable to create listen socket"); 279 280 server_addr.sin_family = AF_INET; 281 server_addr.sin_port = htons(cfg_port); 282 server_addr.sin_addr = cfg_bind_addr; 283 284 /* these values are range checked during parse_opts, so casting is safe 285 * here 286 */ 287 epoll_params.busy_poll_usecs = cfg_busy_poll_usecs; 288 epoll_params.busy_poll_budget = cfg_busy_poll_budget; 289 epoll_params.prefer_busy_poll = cfg_prefer_busy_poll; 290 epoll_params.__pad = 0; 291 292 val = 1; 293 if (setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val))) 294 error(1, errno, "poller setsockopt reuseaddr"); 295 296 setnonblock(sockfd); 297 298 if (bind(sockfd, (struct sockaddr *)&server_addr, 299 sizeof(struct sockaddr_in))) 300 error(0, errno, "poller bind to port: %d\n", cfg_port); 301 302 if (listen(sockfd, 1)) 303 error(1, errno, "poller listen"); 304 305 epfd = epoll_create1(0); 306 if (ioctl(epfd, EPIOCSPARAMS, &epoll_params) == -1) 307 error(1, errno, "unable to set busy poll params"); 308 309 epoll_ctl_add(epfd, sockfd, EPOLLIN | EPOLLOUT | EPOLLET); 310 311 for (;;) { 312 nfds = epoll_wait(epfd, events, cfg_max_events, -1); 313 for (i = 0; i < nfds; i++) { 314 if (events[i].data.fd == sockfd) { 315 conn = accept(sockfd, NULL, NULL); 316 if (conn == -1) 317 error(1, errno, 318 "accepting incoming connection failed"); 319 320 setnonblock(conn); 321 epoll_ctl_add(epfd, conn, 322 EPOLLIN | EPOLLET | EPOLLRDHUP | 323 EPOLLHUP); 324 } else if (events[i].events & EPOLLIN) { 325 for (;;) { 326 readlen = read(events[i].data.fd, buf, 327 sizeof(buf)); 328 if (readlen > 0) 329 write_chunk(outfile_fd, buf, 330 readlen); 331 else 332 break; 333 } 334 } else { 335 /* spurious event ? */ 336 } 337 if (events[i].events & (EPOLLRDHUP | EPOLLHUP)) { 338 epoll_ctl(epfd, EPOLL_CTL_DEL, 339 events[i].data.fd, NULL); 340 close(events[i].data.fd); 341 close(outfile_fd); 342 return; 343 } 344 } 345 } 346 } 347 348 int main(int argc, char *argv[]) 349 { 350 parse_opts(argc, argv); 351 setup_queue(); 352 run_poller(); 353 354 if (cfg_outfile) 355 free(cfg_outfile); 356 357 return 0; 358 } 359