1*347fcdc4SJoe Damato // SPDX-License-Identifier: GPL-2.0 2*347fcdc4SJoe Damato #include <assert.h> 3*347fcdc4SJoe Damato #include <errno.h> 4*347fcdc4SJoe Damato #include <error.h> 5*347fcdc4SJoe Damato #include <fcntl.h> 6*347fcdc4SJoe Damato #include <inttypes.h> 7*347fcdc4SJoe Damato #include <limits.h> 8*347fcdc4SJoe Damato #include <stdlib.h> 9*347fcdc4SJoe Damato #include <stdio.h> 10*347fcdc4SJoe Damato #include <string.h> 11*347fcdc4SJoe Damato #include <unistd.h> 12*347fcdc4SJoe Damato #include <ynl.h> 13*347fcdc4SJoe Damato 14*347fcdc4SJoe Damato #include <arpa/inet.h> 15*347fcdc4SJoe Damato #include <netinet/in.h> 16*347fcdc4SJoe Damato 17*347fcdc4SJoe Damato #include <sys/epoll.h> 18*347fcdc4SJoe Damato #include <sys/ioctl.h> 19*347fcdc4SJoe Damato #include <sys/socket.h> 20*347fcdc4SJoe Damato #include <sys/types.h> 21*347fcdc4SJoe Damato 22*347fcdc4SJoe Damato #include <linux/genetlink.h> 23*347fcdc4SJoe Damato #include <linux/netlink.h> 24*347fcdc4SJoe Damato 25*347fcdc4SJoe Damato #include "netdev-user.h" 26*347fcdc4SJoe Damato 27*347fcdc4SJoe Damato /* The below ifdef blob is required because: 28*347fcdc4SJoe Damato * 29*347fcdc4SJoe Damato * - sys/epoll.h does not (yet) have the ioctl definitions included. So, 30*347fcdc4SJoe Damato * systems with older glibcs will not have them available. However, 31*347fcdc4SJoe Damato * sys/epoll.h does include the type definition for epoll_data, which is 32*347fcdc4SJoe Damato * needed by the user program (e.g. epoll_event.data.fd) 33*347fcdc4SJoe Damato * 34*347fcdc4SJoe Damato * - linux/eventpoll.h does not define the epoll_data type, it is simply an 35*347fcdc4SJoe Damato * opaque __u64. It does, however, include the ioctl definition. 36*347fcdc4SJoe Damato * 37*347fcdc4SJoe Damato * Including both headers is impossible (types would be redefined), so I've 38*347fcdc4SJoe Damato * opted instead to take sys/epoll.h, and include the blob below. 39*347fcdc4SJoe Damato * 40*347fcdc4SJoe Damato * Someday, when glibc is globally up to date, the blob below can be removed. 41*347fcdc4SJoe Damato */ 42*347fcdc4SJoe Damato #if !defined(EPOLL_IOC_TYPE) 43*347fcdc4SJoe Damato struct epoll_params { 44*347fcdc4SJoe Damato uint32_t busy_poll_usecs; 45*347fcdc4SJoe Damato uint16_t busy_poll_budget; 46*347fcdc4SJoe Damato uint8_t prefer_busy_poll; 47*347fcdc4SJoe Damato 48*347fcdc4SJoe Damato /* pad the struct to a multiple of 64bits */ 49*347fcdc4SJoe Damato uint8_t __pad; 50*347fcdc4SJoe Damato }; 51*347fcdc4SJoe Damato 52*347fcdc4SJoe Damato #define EPOLL_IOC_TYPE 0x8A 53*347fcdc4SJoe Damato #define EPIOCSPARAMS _IOW(EPOLL_IOC_TYPE, 0x01, struct epoll_params) 54*347fcdc4SJoe Damato #define EPIOCGPARAMS _IOR(EPOLL_IOC_TYPE, 0x02, struct epoll_params) 55*347fcdc4SJoe Damato #endif 56*347fcdc4SJoe Damato 57*347fcdc4SJoe Damato static uint32_t cfg_port = 8000; 58*347fcdc4SJoe Damato static struct in_addr cfg_bind_addr = { .s_addr = INADDR_ANY }; 59*347fcdc4SJoe Damato static char *cfg_outfile; 60*347fcdc4SJoe Damato static int cfg_max_events = 8; 61*347fcdc4SJoe Damato static int cfg_ifindex; 62*347fcdc4SJoe Damato 63*347fcdc4SJoe Damato /* busy poll params */ 64*347fcdc4SJoe Damato static uint32_t cfg_busy_poll_usecs; 65*347fcdc4SJoe Damato static uint32_t cfg_busy_poll_budget; 66*347fcdc4SJoe Damato static uint32_t cfg_prefer_busy_poll; 67*347fcdc4SJoe Damato 68*347fcdc4SJoe Damato /* IRQ params */ 69*347fcdc4SJoe Damato static uint32_t cfg_defer_hard_irqs; 70*347fcdc4SJoe Damato static uint64_t cfg_gro_flush_timeout; 71*347fcdc4SJoe Damato static uint64_t cfg_irq_suspend_timeout; 72*347fcdc4SJoe Damato 73*347fcdc4SJoe Damato static void usage(const char *filepath) 74*347fcdc4SJoe Damato { 75*347fcdc4SJoe Damato error(1, 0, 76*347fcdc4SJoe Damato "Usage: %s -p<port> -b<addr> -m<max_events> -u<busy_poll_usecs> -P<prefer_busy_poll> -g<busy_poll_budget> -o<outfile> -d<defer_hard_irqs> -r<gro_flush_timeout> -s<irq_suspend_timeout> -i<ifindex>", 77*347fcdc4SJoe Damato filepath); 78*347fcdc4SJoe Damato } 79*347fcdc4SJoe Damato 80*347fcdc4SJoe Damato static void parse_opts(int argc, char **argv) 81*347fcdc4SJoe Damato { 82*347fcdc4SJoe Damato int ret; 83*347fcdc4SJoe Damato int c; 84*347fcdc4SJoe Damato 85*347fcdc4SJoe Damato if (argc <= 1) 86*347fcdc4SJoe Damato usage(argv[0]); 87*347fcdc4SJoe Damato 88*347fcdc4SJoe Damato while ((c = getopt(argc, argv, "p:m:b:u:P:g:o:d:r:s:i:")) != -1) { 89*347fcdc4SJoe Damato switch (c) { 90*347fcdc4SJoe Damato case 'u': 91*347fcdc4SJoe Damato cfg_busy_poll_usecs = strtoul(optarg, NULL, 0); 92*347fcdc4SJoe Damato if (cfg_busy_poll_usecs == ULONG_MAX || 93*347fcdc4SJoe Damato cfg_busy_poll_usecs > UINT32_MAX) 94*347fcdc4SJoe Damato error(1, ERANGE, "busy_poll_usecs too large"); 95*347fcdc4SJoe Damato break; 96*347fcdc4SJoe Damato case 'P': 97*347fcdc4SJoe Damato cfg_prefer_busy_poll = strtoul(optarg, NULL, 0); 98*347fcdc4SJoe Damato if (cfg_prefer_busy_poll == ULONG_MAX || 99*347fcdc4SJoe Damato cfg_prefer_busy_poll > 1) 100*347fcdc4SJoe Damato error(1, ERANGE, 101*347fcdc4SJoe Damato "prefer busy poll should be 0 or 1"); 102*347fcdc4SJoe Damato break; 103*347fcdc4SJoe Damato case 'g': 104*347fcdc4SJoe Damato cfg_busy_poll_budget = strtoul(optarg, NULL, 0); 105*347fcdc4SJoe Damato if (cfg_busy_poll_budget == ULONG_MAX || 106*347fcdc4SJoe Damato cfg_busy_poll_budget > UINT16_MAX) 107*347fcdc4SJoe Damato error(1, ERANGE, 108*347fcdc4SJoe Damato "busy poll budget must be [0, UINT16_MAX]"); 109*347fcdc4SJoe Damato break; 110*347fcdc4SJoe Damato case 'p': 111*347fcdc4SJoe Damato cfg_port = strtoul(optarg, NULL, 0); 112*347fcdc4SJoe Damato if (cfg_port > UINT16_MAX) 113*347fcdc4SJoe Damato error(1, ERANGE, "port must be <= 65535"); 114*347fcdc4SJoe Damato break; 115*347fcdc4SJoe Damato case 'b': 116*347fcdc4SJoe Damato ret = inet_aton(optarg, &cfg_bind_addr); 117*347fcdc4SJoe Damato if (ret == 0) 118*347fcdc4SJoe Damato error(1, errno, 119*347fcdc4SJoe Damato "bind address %s invalid", optarg); 120*347fcdc4SJoe Damato break; 121*347fcdc4SJoe Damato case 'o': 122*347fcdc4SJoe Damato cfg_outfile = strdup(optarg); 123*347fcdc4SJoe Damato if (!cfg_outfile) 124*347fcdc4SJoe Damato error(1, 0, "outfile invalid"); 125*347fcdc4SJoe Damato break; 126*347fcdc4SJoe Damato case 'm': 127*347fcdc4SJoe Damato cfg_max_events = strtol(optarg, NULL, 0); 128*347fcdc4SJoe Damato 129*347fcdc4SJoe Damato if (cfg_max_events == LONG_MIN || 130*347fcdc4SJoe Damato cfg_max_events == LONG_MAX || 131*347fcdc4SJoe Damato cfg_max_events <= 0) 132*347fcdc4SJoe Damato error(1, ERANGE, 133*347fcdc4SJoe Damato "max events must be > 0 and < LONG_MAX"); 134*347fcdc4SJoe Damato break; 135*347fcdc4SJoe Damato case 'd': 136*347fcdc4SJoe Damato cfg_defer_hard_irqs = strtoul(optarg, NULL, 0); 137*347fcdc4SJoe Damato 138*347fcdc4SJoe Damato if (cfg_defer_hard_irqs == ULONG_MAX || 139*347fcdc4SJoe Damato cfg_defer_hard_irqs > INT32_MAX) 140*347fcdc4SJoe Damato error(1, ERANGE, 141*347fcdc4SJoe Damato "defer_hard_irqs must be <= INT32_MAX"); 142*347fcdc4SJoe Damato break; 143*347fcdc4SJoe Damato case 'r': 144*347fcdc4SJoe Damato cfg_gro_flush_timeout = strtoull(optarg, NULL, 0); 145*347fcdc4SJoe Damato 146*347fcdc4SJoe Damato if (cfg_gro_flush_timeout == ULLONG_MAX) 147*347fcdc4SJoe Damato error(1, ERANGE, 148*347fcdc4SJoe Damato "gro_flush_timeout must be < ULLONG_MAX"); 149*347fcdc4SJoe Damato break; 150*347fcdc4SJoe Damato case 's': 151*347fcdc4SJoe Damato cfg_irq_suspend_timeout = strtoull(optarg, NULL, 0); 152*347fcdc4SJoe Damato 153*347fcdc4SJoe Damato if (cfg_irq_suspend_timeout == ULLONG_MAX) 154*347fcdc4SJoe Damato error(1, ERANGE, 155*347fcdc4SJoe Damato "irq_suspend_timeout must be < ULLONG_MAX"); 156*347fcdc4SJoe Damato break; 157*347fcdc4SJoe Damato case 'i': 158*347fcdc4SJoe Damato cfg_ifindex = strtoul(optarg, NULL, 0); 159*347fcdc4SJoe Damato if (cfg_ifindex == ULONG_MAX) 160*347fcdc4SJoe Damato error(1, ERANGE, 161*347fcdc4SJoe Damato "ifindex must be < ULONG_MAX"); 162*347fcdc4SJoe Damato break; 163*347fcdc4SJoe Damato } 164*347fcdc4SJoe Damato } 165*347fcdc4SJoe Damato 166*347fcdc4SJoe Damato if (!cfg_ifindex) 167*347fcdc4SJoe Damato usage(argv[0]); 168*347fcdc4SJoe Damato 169*347fcdc4SJoe Damato if (optind != argc) 170*347fcdc4SJoe Damato usage(argv[0]); 171*347fcdc4SJoe Damato } 172*347fcdc4SJoe Damato 173*347fcdc4SJoe Damato static void epoll_ctl_add(int epfd, int fd, uint32_t events) 174*347fcdc4SJoe Damato { 175*347fcdc4SJoe Damato struct epoll_event ev; 176*347fcdc4SJoe Damato 177*347fcdc4SJoe Damato ev.events = events; 178*347fcdc4SJoe Damato ev.data.fd = fd; 179*347fcdc4SJoe Damato if (epoll_ctl(epfd, EPOLL_CTL_ADD, fd, &ev) == -1) 180*347fcdc4SJoe Damato error(1, errno, "epoll_ctl add fd: %d", fd); 181*347fcdc4SJoe Damato } 182*347fcdc4SJoe Damato 183*347fcdc4SJoe Damato static void setnonblock(int sockfd) 184*347fcdc4SJoe Damato { 185*347fcdc4SJoe Damato int flags; 186*347fcdc4SJoe Damato 187*347fcdc4SJoe Damato flags = fcntl(sockfd, F_GETFL, 0); 188*347fcdc4SJoe Damato 189*347fcdc4SJoe Damato if (fcntl(sockfd, F_SETFL, flags | O_NONBLOCK) == -1) 190*347fcdc4SJoe Damato error(1, errno, "unable to set socket to nonblocking mode"); 191*347fcdc4SJoe Damato } 192*347fcdc4SJoe Damato 193*347fcdc4SJoe Damato static void write_chunk(int fd, char *buf, ssize_t buflen) 194*347fcdc4SJoe Damato { 195*347fcdc4SJoe Damato ssize_t remaining = buflen; 196*347fcdc4SJoe Damato char *buf_offset = buf; 197*347fcdc4SJoe Damato ssize_t writelen = 0; 198*347fcdc4SJoe Damato ssize_t write_result; 199*347fcdc4SJoe Damato 200*347fcdc4SJoe Damato while (writelen < buflen) { 201*347fcdc4SJoe Damato write_result = write(fd, buf_offset, remaining); 202*347fcdc4SJoe Damato if (write_result == -1) 203*347fcdc4SJoe Damato error(1, errno, "unable to write data to outfile"); 204*347fcdc4SJoe Damato 205*347fcdc4SJoe Damato writelen += write_result; 206*347fcdc4SJoe Damato remaining -= write_result; 207*347fcdc4SJoe Damato buf_offset += write_result; 208*347fcdc4SJoe Damato } 209*347fcdc4SJoe Damato } 210*347fcdc4SJoe Damato 211*347fcdc4SJoe Damato static void setup_queue(void) 212*347fcdc4SJoe Damato { 213*347fcdc4SJoe Damato struct netdev_napi_get_list *napi_list = NULL; 214*347fcdc4SJoe Damato struct netdev_napi_get_req_dump *req = NULL; 215*347fcdc4SJoe Damato struct netdev_napi_set_req *set_req = NULL; 216*347fcdc4SJoe Damato struct ynl_sock *ys; 217*347fcdc4SJoe Damato struct ynl_error yerr; 218*347fcdc4SJoe Damato uint32_t napi_id; 219*347fcdc4SJoe Damato 220*347fcdc4SJoe Damato ys = ynl_sock_create(&ynl_netdev_family, &yerr); 221*347fcdc4SJoe Damato if (!ys) 222*347fcdc4SJoe Damato error(1, 0, "YNL: %s", yerr.msg); 223*347fcdc4SJoe Damato 224*347fcdc4SJoe Damato req = netdev_napi_get_req_dump_alloc(); 225*347fcdc4SJoe Damato netdev_napi_get_req_dump_set_ifindex(req, cfg_ifindex); 226*347fcdc4SJoe Damato napi_list = netdev_napi_get_dump(ys, req); 227*347fcdc4SJoe Damato 228*347fcdc4SJoe Damato /* assume there is 1 NAPI configured and take the first */ 229*347fcdc4SJoe Damato if (napi_list->obj._present.id) 230*347fcdc4SJoe Damato napi_id = napi_list->obj.id; 231*347fcdc4SJoe Damato else 232*347fcdc4SJoe Damato error(1, 0, "napi ID not present?"); 233*347fcdc4SJoe Damato 234*347fcdc4SJoe Damato set_req = netdev_napi_set_req_alloc(); 235*347fcdc4SJoe Damato netdev_napi_set_req_set_id(set_req, napi_id); 236*347fcdc4SJoe Damato netdev_napi_set_req_set_defer_hard_irqs(set_req, cfg_defer_hard_irqs); 237*347fcdc4SJoe Damato netdev_napi_set_req_set_gro_flush_timeout(set_req, 238*347fcdc4SJoe Damato cfg_gro_flush_timeout); 239*347fcdc4SJoe Damato netdev_napi_set_req_set_irq_suspend_timeout(set_req, 240*347fcdc4SJoe Damato cfg_irq_suspend_timeout); 241*347fcdc4SJoe Damato 242*347fcdc4SJoe Damato if (netdev_napi_set(ys, set_req)) 243*347fcdc4SJoe Damato error(1, 0, "can't set NAPI params: %s\n", yerr.msg); 244*347fcdc4SJoe Damato 245*347fcdc4SJoe Damato netdev_napi_get_list_free(napi_list); 246*347fcdc4SJoe Damato netdev_napi_get_req_dump_free(req); 247*347fcdc4SJoe Damato netdev_napi_set_req_free(set_req); 248*347fcdc4SJoe Damato ynl_sock_destroy(ys); 249*347fcdc4SJoe Damato } 250*347fcdc4SJoe Damato 251*347fcdc4SJoe Damato static void run_poller(void) 252*347fcdc4SJoe Damato { 253*347fcdc4SJoe Damato struct epoll_event events[cfg_max_events]; 254*347fcdc4SJoe Damato struct epoll_params epoll_params = {0}; 255*347fcdc4SJoe Damato struct sockaddr_in server_addr; 256*347fcdc4SJoe Damato int i, epfd, nfds; 257*347fcdc4SJoe Damato ssize_t readlen; 258*347fcdc4SJoe Damato int outfile_fd; 259*347fcdc4SJoe Damato char buf[1024]; 260*347fcdc4SJoe Damato int sockfd; 261*347fcdc4SJoe Damato int conn; 262*347fcdc4SJoe Damato int val; 263*347fcdc4SJoe Damato 264*347fcdc4SJoe Damato outfile_fd = open(cfg_outfile, O_WRONLY | O_CREAT, 0644); 265*347fcdc4SJoe Damato if (outfile_fd == -1) 266*347fcdc4SJoe Damato error(1, errno, "unable to open outfile: %s", cfg_outfile); 267*347fcdc4SJoe Damato 268*347fcdc4SJoe Damato sockfd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); 269*347fcdc4SJoe Damato if (sockfd == -1) 270*347fcdc4SJoe Damato error(1, errno, "unable to create listen socket"); 271*347fcdc4SJoe Damato 272*347fcdc4SJoe Damato server_addr.sin_family = AF_INET; 273*347fcdc4SJoe Damato server_addr.sin_port = htons(cfg_port); 274*347fcdc4SJoe Damato server_addr.sin_addr = cfg_bind_addr; 275*347fcdc4SJoe Damato 276*347fcdc4SJoe Damato /* these values are range checked during parse_opts, so casting is safe 277*347fcdc4SJoe Damato * here 278*347fcdc4SJoe Damato */ 279*347fcdc4SJoe Damato epoll_params.busy_poll_usecs = cfg_busy_poll_usecs; 280*347fcdc4SJoe Damato epoll_params.busy_poll_budget = (uint16_t)cfg_busy_poll_budget; 281*347fcdc4SJoe Damato epoll_params.prefer_busy_poll = (uint8_t)cfg_prefer_busy_poll; 282*347fcdc4SJoe Damato epoll_params.__pad = 0; 283*347fcdc4SJoe Damato 284*347fcdc4SJoe Damato val = 1; 285*347fcdc4SJoe Damato if (setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val))) 286*347fcdc4SJoe Damato error(1, errno, "poller setsockopt reuseaddr"); 287*347fcdc4SJoe Damato 288*347fcdc4SJoe Damato setnonblock(sockfd); 289*347fcdc4SJoe Damato 290*347fcdc4SJoe Damato if (bind(sockfd, (struct sockaddr *)&server_addr, 291*347fcdc4SJoe Damato sizeof(struct sockaddr_in))) 292*347fcdc4SJoe Damato error(0, errno, "poller bind to port: %d\n", cfg_port); 293*347fcdc4SJoe Damato 294*347fcdc4SJoe Damato if (listen(sockfd, 1)) 295*347fcdc4SJoe Damato error(1, errno, "poller listen"); 296*347fcdc4SJoe Damato 297*347fcdc4SJoe Damato epfd = epoll_create1(0); 298*347fcdc4SJoe Damato if (ioctl(epfd, EPIOCSPARAMS, &epoll_params) == -1) 299*347fcdc4SJoe Damato error(1, errno, "unable to set busy poll params"); 300*347fcdc4SJoe Damato 301*347fcdc4SJoe Damato epoll_ctl_add(epfd, sockfd, EPOLLIN | EPOLLOUT | EPOLLET); 302*347fcdc4SJoe Damato 303*347fcdc4SJoe Damato for (;;) { 304*347fcdc4SJoe Damato nfds = epoll_wait(epfd, events, cfg_max_events, -1); 305*347fcdc4SJoe Damato for (i = 0; i < nfds; i++) { 306*347fcdc4SJoe Damato if (events[i].data.fd == sockfd) { 307*347fcdc4SJoe Damato conn = accept(sockfd, NULL, NULL); 308*347fcdc4SJoe Damato if (conn == -1) 309*347fcdc4SJoe Damato error(1, errno, 310*347fcdc4SJoe Damato "accepting incoming connection failed"); 311*347fcdc4SJoe Damato 312*347fcdc4SJoe Damato setnonblock(conn); 313*347fcdc4SJoe Damato epoll_ctl_add(epfd, conn, 314*347fcdc4SJoe Damato EPOLLIN | EPOLLET | EPOLLRDHUP | 315*347fcdc4SJoe Damato EPOLLHUP); 316*347fcdc4SJoe Damato } else if (events[i].events & EPOLLIN) { 317*347fcdc4SJoe Damato for (;;) { 318*347fcdc4SJoe Damato readlen = read(events[i].data.fd, buf, 319*347fcdc4SJoe Damato sizeof(buf)); 320*347fcdc4SJoe Damato if (readlen > 0) 321*347fcdc4SJoe Damato write_chunk(outfile_fd, buf, 322*347fcdc4SJoe Damato readlen); 323*347fcdc4SJoe Damato else 324*347fcdc4SJoe Damato break; 325*347fcdc4SJoe Damato } 326*347fcdc4SJoe Damato } else { 327*347fcdc4SJoe Damato /* spurious event ? */ 328*347fcdc4SJoe Damato } 329*347fcdc4SJoe Damato if (events[i].events & (EPOLLRDHUP | EPOLLHUP)) { 330*347fcdc4SJoe Damato epoll_ctl(epfd, EPOLL_CTL_DEL, 331*347fcdc4SJoe Damato events[i].data.fd, NULL); 332*347fcdc4SJoe Damato close(events[i].data.fd); 333*347fcdc4SJoe Damato close(outfile_fd); 334*347fcdc4SJoe Damato return; 335*347fcdc4SJoe Damato } 336*347fcdc4SJoe Damato } 337*347fcdc4SJoe Damato } 338*347fcdc4SJoe Damato } 339*347fcdc4SJoe Damato 340*347fcdc4SJoe Damato int main(int argc, char *argv[]) 341*347fcdc4SJoe Damato { 342*347fcdc4SJoe Damato parse_opts(argc, argv); 343*347fcdc4SJoe Damato setup_queue(); 344*347fcdc4SJoe Damato run_poller(); 345*347fcdc4SJoe Damato return 0; 346*347fcdc4SJoe Damato } 347