1 /*
2 * Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved.
3 * Copyright (C) 2013-2015 Universita` di Pisa. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27 /*
28 * $Id: pkt-gen.c 12346 2013-06-12 17:36:25Z luigi $
29 *
30 * Example program to show how to build a multithreaded packet
31 * source/sink using the netmap device.
32 *
33 * In this example we create a programmable number of threads
34 * to take care of all the queues of the interface used to
35 * send or receive traffic.
36 *
37 */
38
39 #define _GNU_SOURCE /* for CPU_SET() */
40 #include <arpa/inet.h> /* ntohs */
41 #include <assert.h>
42 #include <ctype.h> // isprint()
43 #include <errno.h>
44 #include <fcntl.h>
45 #include <ifaddrs.h> /* getifaddrs */
46 #include <libnetmap.h>
47 #include <math.h>
48 #include <net/ethernet.h>
49 #include <netinet/in.h>
50 #include <netinet/ip.h>
51 #include <netinet/ip6.h>
52 #include <netinet/udp.h>
53 #ifndef NO_PCAP
54 #include <pcap/pcap.h>
55 #endif
56 #include <pthread.h>
57 #include <signal.h>
58 #include <stdio.h>
59 #include <stdlib.h>
60 #include <string.h>
61 #include <sys/ioctl.h>
62 #include <sys/poll.h>
63 #include <sys/stat.h>
64 #if !defined(_WIN32) && !defined(linux)
65 #include <sys/sysctl.h> /* sysctl */
66 #endif
67 #include <sys/types.h>
68 #include <unistd.h> // sysconf()
69 #ifdef linux
70 #define IPV6_VERSION 0x60
71 #define IPV6_DEFHLIM 64
72 #endif
73
74 #include "ctrs.h"
75
76 static void usage(int);
77
78 #ifdef _WIN32
79 #define cpuset_t DWORD_PTR //uint64_t
CPU_ZERO(cpuset_t * p)80 static inline void CPU_ZERO(cpuset_t *p)
81 {
82 *p = 0;
83 }
84
CPU_SET(uint32_t i,cpuset_t * p)85 static inline void CPU_SET(uint32_t i, cpuset_t *p)
86 {
87 *p |= 1<< (i & 0x3f);
88 }
89
90 #define pthread_setaffinity_np(a, b, c) !SetThreadAffinityMask(a, *c) //((void)a, 0)
91 #define TAP_CLONEDEV "/dev/tap"
92 #define AF_LINK 18 //defined in winsocks.h
93 #define CLOCK_REALTIME_PRECISE CLOCK_REALTIME
94 #include <net/if_dl.h>
95
96 /*
97 * Convert an ASCII representation of an ethernet address to
98 * binary form.
99 */
100 struct ether_addr *
ether_aton(const char * a)101 ether_aton(const char *a)
102 {
103 int i;
104 static struct ether_addr o;
105 unsigned int o0, o1, o2, o3, o4, o5;
106
107 i = sscanf(a, "%x:%x:%x:%x:%x:%x", &o0, &o1, &o2, &o3, &o4, &o5);
108
109 if (i != 6)
110 return (NULL);
111
112 o.octet[0]=o0;
113 o.octet[1]=o1;
114 o.octet[2]=o2;
115 o.octet[3]=o3;
116 o.octet[4]=o4;
117 o.octet[5]=o5;
118
119 return ((struct ether_addr *)&o);
120 }
121
122 /*
123 * Convert a binary representation of an ethernet address to
124 * an ASCII string.
125 */
126 char *
ether_ntoa(const struct ether_addr * n)127 ether_ntoa(const struct ether_addr *n)
128 {
129 int i;
130 static char a[18];
131
132 i = sprintf(a, "%02x:%02x:%02x:%02x:%02x:%02x",
133 n->octet[0], n->octet[1], n->octet[2],
134 n->octet[3], n->octet[4], n->octet[5]);
135 return (i < 17 ? NULL : (char *)&a);
136 }
137 #endif /* _WIN32 */
138
139 #ifdef linux
140
141 #define cpuset_t cpu_set_t
142
143 #define ifr_flagshigh ifr_flags /* only the low 16 bits here */
144 #define IFF_PPROMISC IFF_PROMISC /* IFF_PPROMISC does not exist */
145 #include <linux/ethtool.h>
146 #include <linux/sockios.h>
147
148 #define CLOCK_REALTIME_PRECISE CLOCK_REALTIME
149 #include <netinet/ether.h> /* ether_aton */
150 #include <linux/if_packet.h> /* sockaddr_ll */
151 #endif /* linux */
152
153 #ifdef __FreeBSD__
154 #include <sys/endian.h> /* le64toh */
155 #include <machine/param.h>
156
157 #include <pthread_np.h> /* pthread w/ affinity */
158 #include <sys/cpuset.h> /* cpu_set */
159 #include <net/if_dl.h> /* LLADDR */
160 #endif /* __FreeBSD__ */
161
162 #ifdef __APPLE__
163
164 #define cpuset_t uint64_t // XXX
CPU_ZERO(cpuset_t * p)165 static inline void CPU_ZERO(cpuset_t *p)
166 {
167 *p = 0;
168 }
169
CPU_SET(uint32_t i,cpuset_t * p)170 static inline void CPU_SET(uint32_t i, cpuset_t *p)
171 {
172 *p |= 1<< (i & 0x3f);
173 }
174
175 #define pthread_setaffinity_np(a, b, c) ((void)a, 0)
176
177 #define ifr_flagshigh ifr_flags // XXX
178 #define IFF_PPROMISC IFF_PROMISC
179 #include <net/if_dl.h> /* LLADDR */
180 #define clock_gettime(a,b) \
181 do {struct timespec t0 = {0,0}; *(b) = t0; } while (0)
182 #endif /* __APPLE__ */
183
184 static const char *default_payload = "netmap pkt-gen DIRECT payload\n"
185 "http://info.iet.unipi.it/~luigi/netmap/ ";
186
187 static const char *indirect_payload = "netmap pkt-gen indirect payload\n"
188 "http://info.iet.unipi.it/~luigi/netmap/ ";
189
190 static int verbose = 0;
191 static int normalize = 1;
192
193 #define VIRT_HDR_1 10 /* length of a base vnet-hdr */
194 #define VIRT_HDR_2 12 /* length of the extenede vnet-hdr */
195 #define VIRT_HDR_MAX VIRT_HDR_2
196 struct virt_header {
197 uint8_t fields[VIRT_HDR_MAX];
198 };
199
200 #define MAX_BODYSIZE 65536
201
202 struct pkt {
203 struct virt_header vh;
204 struct ether_header eh;
205 union {
206 struct {
207 struct ip ip;
208 struct udphdr udp;
209 uint8_t body[MAX_BODYSIZE]; /* hardwired */
210 } ipv4;
211 struct {
212 struct ip6_hdr ip;
213 struct udphdr udp;
214 uint8_t body[MAX_BODYSIZE]; /* hardwired */
215 } ipv6;
216 };
217 } __attribute__((__packed__));
218
219 #define PKT(p, f, af) \
220 ((af) == AF_INET ? (p)->ipv4.f: (p)->ipv6.f)
221
222 struct ip_range {
223 const char *name;
224 union {
225 struct {
226 uint32_t start, end; /* same as struct in_addr */
227 } ipv4;
228 struct {
229 struct in6_addr start, end;
230 uint8_t sgroup, egroup;
231 } ipv6;
232 };
233 uint16_t port0, port1;
234 };
235
236 struct mac_range {
237 const char *name;
238 struct ether_addr start, end;
239 };
240
241 /* ifname can be netmap:foo-xxxx */
242 #define MAX_IFNAMELEN 512 /* our buffer for ifname */
243 //#define MAX_PKTSIZE 1536
244 #define MAX_PKTSIZE MAX_BODYSIZE /* XXX: + IP_HDR + ETH_HDR */
245
246 /* compact timestamp to fit into 60 byte packet. (enough to obtain RTT) */
247 struct tstamp {
248 uint32_t sec;
249 uint32_t nsec;
250 };
251
252 /*
253 * global arguments for all threads
254 */
255
256 struct glob_arg {
257 int af; /* address family AF_INET/AF_INET6 */
258 struct ip_range src_ip;
259 struct ip_range dst_ip;
260 struct mac_range dst_mac;
261 struct mac_range src_mac;
262 int pkt_size;
263 int pkt_min_size;
264 int burst;
265 int forever;
266 uint64_t npackets; /* total packets to send */
267 int frags; /* fragments per packet */
268 u_int frag_size; /* size of each fragment */
269 int nthreads;
270 int cpus; /* cpus used for running */
271 int system_cpus; /* cpus on the system */
272
273 int options; /* testing */
274 #define OPT_PREFETCH 1
275 #define OPT_ACCESS 2
276 #define OPT_COPY 4
277 #define OPT_MEMCPY 8
278 #define OPT_TS 16 /* add a timestamp */
279 #define OPT_INDIRECT 32 /* use indirect buffers, tx only */
280 #define OPT_DUMP 64 /* dump rx/tx traffic */
281 #define OPT_RUBBISH 256 /* send whatever the buffers contain */
282 #define OPT_RANDOM_SRC 512
283 #define OPT_RANDOM_DST 1024
284 #define OPT_PPS_STATS 2048
285 #define OPT_UPDATE_CSUM 4096
286 int dev_type;
287 #ifndef NO_PCAP
288 pcap_t *p;
289 #endif
290
291 int tx_rate;
292 struct timespec tx_period;
293
294 int affinity;
295 int main_fd;
296 struct nmport_d *nmd;
297 uint32_t orig_mode;
298 int report_interval; /* milliseconds between prints */
299 void *(*td_body)(void *);
300 int td_type;
301 void *mmap_addr;
302 char ifname[MAX_IFNAMELEN];
303 const char *nmr_config;
304 int dummy_send;
305 int virt_header; /* send also the virt_header */
306 char *packet_file; /* -P option */
307 #define STATS_WIN 15
308 int win_idx;
309 int64_t win[STATS_WIN];
310 int wait_link;
311 int framing; /* #bits of framing (for bw output) */
312 };
313 enum dev_type { DEV_NONE, DEV_NETMAP, DEV_PCAP, DEV_TAP };
314
315 enum {
316 TD_TYPE_SENDER = 1,
317 TD_TYPE_RECEIVER,
318 TD_TYPE_OTHER,
319 };
320
321 /*
322 * Arguments for a new thread. The same structure is used by
323 * the source and the sink
324 */
325 struct targ {
326 struct glob_arg *g;
327 int used;
328 int completed;
329 int cancel;
330 int fd;
331 struct nmport_d *nmd;
332 /* these ought to be volatile, but they are
333 * only sampled and errors should not accumulate
334 */
335 struct my_ctrs ctr;
336
337 struct timespec tic, toc;
338 int me;
339 pthread_t thread;
340 int affinity;
341
342 struct pkt pkt;
343 void *frame;
344 uint16_t seed[3];
345 u_int frags;
346 u_int frag_size;
347 };
348
349 static __inline uint16_t
cksum_add(uint16_t sum,uint16_t a)350 cksum_add(uint16_t sum, uint16_t a)
351 {
352 uint16_t res;
353
354 res = sum + a;
355 return (res + (res < a));
356 }
357
358 static void
extract_ipv4_addr(char * name,uint32_t * addr,uint16_t * port)359 extract_ipv4_addr(char *name, uint32_t *addr, uint16_t *port)
360 {
361 struct in_addr a;
362 char *pp;
363
364 pp = strchr(name, ':');
365 if (pp != NULL) { /* do we have ports ? */
366 *pp++ = '\0';
367 *port = (uint16_t)strtol(pp, NULL, 0);
368 }
369
370 inet_pton(AF_INET, name, &a);
371 *addr = ntohl(a.s_addr);
372 }
373
374 static void
extract_ipv6_addr(char * name,struct in6_addr * addr,uint16_t * port,uint8_t * group)375 extract_ipv6_addr(char *name, struct in6_addr *addr, uint16_t *port,
376 uint8_t *group)
377 {
378 char *pp;
379
380 /*
381 * We accept IPv6 address in the following form:
382 * group@[2001:DB8::1001]:port (w/ brackets and port)
383 * group@[2001:DB8::1] (w/ brackets and w/o port)
384 * group@2001:DB8::1234 (w/o brackets and w/o port)
385 */
386 pp = strchr(name, '@');
387 if (pp != NULL) {
388 *pp++ = '\0';
389 *group = (uint8_t)strtol(name, NULL, 0);
390 if (*group > 7)
391 *group = 7;
392 name = pp;
393 }
394 if (name[0] == '[')
395 name++;
396 pp = strchr(name, ']');
397 if (pp != NULL)
398 *pp++ = '\0';
399 if (pp != NULL && *pp != ':')
400 pp = NULL;
401 if (pp != NULL) { /* do we have ports ? */
402 *pp++ = '\0';
403 *port = (uint16_t)strtol(pp, NULL, 0);
404 }
405 inet_pton(AF_INET6, name, addr);
406 }
407 /*
408 * extract the extremes from a range of ipv4 addresses.
409 * addr_lo[-addr_hi][:port_lo[-port_hi]]
410 */
411 static int
extract_ip_range(struct ip_range * r,int af)412 extract_ip_range(struct ip_range *r, int af)
413 {
414 char *name, *ap, start[INET6_ADDRSTRLEN];
415 char end[INET6_ADDRSTRLEN];
416 struct in_addr a;
417 uint32_t tmp;
418
419 if (verbose)
420 D("extract IP range from %s", r->name);
421
422 name = strdup(r->name);
423 if (name == NULL) {
424 D("strdup failed");
425 usage(-1);
426 }
427 /* the first - splits start/end of range */
428 ap = strchr(name, '-');
429 if (ap != NULL)
430 *ap++ = '\0';
431 r->port0 = 1234; /* default port */
432 if (af == AF_INET6) {
433 r->ipv6.sgroup = 7; /* default group */
434 extract_ipv6_addr(name, &r->ipv6.start, &r->port0,
435 &r->ipv6.sgroup);
436 } else
437 extract_ipv4_addr(name, &r->ipv4.start, &r->port0);
438
439 r->port1 = r->port0;
440 if (af == AF_INET6) {
441 if (ap != NULL) {
442 r->ipv6.egroup = r->ipv6.sgroup;
443 extract_ipv6_addr(ap, &r->ipv6.end, &r->port1,
444 &r->ipv6.egroup);
445 } else {
446 r->ipv6.end = r->ipv6.start;
447 r->ipv6.egroup = r->ipv6.sgroup;
448 }
449 } else {
450 if (ap != NULL) {
451 extract_ipv4_addr(ap, &r->ipv4.end, &r->port1);
452 if (r->ipv4.start > r->ipv4.end) {
453 tmp = r->ipv4.end;
454 r->ipv4.end = r->ipv4.start;
455 r->ipv4.start = tmp;
456 }
457 } else
458 r->ipv4.end = r->ipv4.start;
459 }
460
461 if (r->port0 > r->port1) {
462 tmp = r->port0;
463 r->port0 = r->port1;
464 r->port1 = tmp;
465 }
466 if (af == AF_INET) {
467 a.s_addr = htonl(r->ipv4.start);
468 inet_ntop(af, &a, start, sizeof(start));
469 a.s_addr = htonl(r->ipv4.end);
470 inet_ntop(af, &a, end, sizeof(end));
471 } else {
472 inet_ntop(af, &r->ipv6.start, start, sizeof(start));
473 inet_ntop(af, &r->ipv6.end, end, sizeof(end));
474 }
475 if (af == AF_INET)
476 D("range is %s:%d to %s:%d", start, r->port0, end, r->port1);
477 else
478 D("range is %d@[%s]:%d to %d@[%s]:%d", r->ipv6.sgroup,
479 start, r->port0, r->ipv6.egroup, end, r->port1);
480
481 free(name);
482 if (r->port0 != r->port1 ||
483 (af == AF_INET && r->ipv4.start != r->ipv4.end) ||
484 (af == AF_INET6 &&
485 !IN6_ARE_ADDR_EQUAL(&r->ipv6.start, &r->ipv6.end)))
486 return (OPT_COPY);
487 return (0);
488 }
489
490 static int
extract_mac_range(struct mac_range * r)491 extract_mac_range(struct mac_range *r)
492 {
493 struct ether_addr *e;
494 if (verbose)
495 D("extract MAC range from %s", r->name);
496
497 e = ether_aton(r->name);
498 if (e == NULL) {
499 D("invalid MAC address '%s'", r->name);
500 return 1;
501 }
502 bcopy(e, &r->start, 6);
503 bcopy(e, &r->end, 6);
504 #if 0
505 bcopy(targ->src_mac, eh->ether_shost, 6);
506 p = index(targ->g->src_mac, '-');
507 if (p)
508 targ->src_mac_range = atoi(p+1);
509
510 bcopy(ether_aton(targ->g->dst_mac), targ->dst_mac, 6);
511 bcopy(targ->dst_mac, eh->ether_dhost, 6);
512 p = index(targ->g->dst_mac, '-');
513 if (p)
514 targ->dst_mac_range = atoi(p+1);
515 #endif
516 if (verbose)
517 D("%s starts at %s", r->name, ether_ntoa(&r->start));
518 return 0;
519 }
520
521 static int
get_if_mtu(const struct glob_arg * g)522 get_if_mtu(const struct glob_arg *g)
523 {
524 struct ifreq ifreq;
525 int s, ret;
526 const char *ifname = g->nmd->hdr.nr_name;
527 size_t len;
528
529 if (!strncmp(g->ifname, "netmap:", 7) && !strchr(ifname, '{')
530 && !strchr(ifname, '}')) {
531
532 len = strlen(ifname);
533
534 if (len > IFNAMSIZ) {
535 D("'%s' too long, cannot ask for MTU", ifname);
536 return -1;
537 }
538
539 s = socket(AF_INET, SOCK_DGRAM, 0);
540 if (s < 0) {
541 D("socket() failed: %s", strerror(errno));
542 return s;
543 }
544
545 memset(&ifreq, 0, sizeof(ifreq));
546 memcpy(ifreq.ifr_name, ifname, len);
547
548 ret = ioctl(s, SIOCGIFMTU, &ifreq);
549 if (ret) {
550 D("ioctl(SIOCGIFMTU) failed: %s", strerror(errno));
551 }
552
553 close(s);
554
555 return ifreq.ifr_mtu;
556 }
557
558 /* This is a pipe or a VALE port, where the MTU is very large,
559 * so we use some practical limit. */
560 return 65536;
561 }
562
563 static struct targ *targs;
564 static int global_nthreads;
565
566 /* control-C handler */
567 static void
sigint_h(int sig)568 sigint_h(int sig)
569 {
570 int i;
571
572 (void)sig; /* UNUSED */
573 D("received control-C on thread %p", (void *)pthread_self());
574 for (i = 0; i < global_nthreads; i++) {
575 targs[i].cancel = 1;
576 }
577 }
578
579 /* sysctl wrapper to return the number of active CPUs */
580 static int
system_ncpus(void)581 system_ncpus(void)
582 {
583 int ncpus;
584 #if defined (__FreeBSD__)
585 int mib[2] = { CTL_HW, HW_NCPU };
586 size_t len = sizeof(mib);
587 sysctl(mib, 2, &ncpus, &len, NULL, 0);
588 #elif defined(linux)
589 ncpus = sysconf(_SC_NPROCESSORS_ONLN);
590 #elif defined(_WIN32)
591 {
592 SYSTEM_INFO sysinfo;
593 GetSystemInfo(&sysinfo);
594 ncpus = sysinfo.dwNumberOfProcessors;
595 }
596 #else /* others */
597 ncpus = 1;
598 #endif /* others */
599 return (ncpus);
600 }
601
602 #ifdef __linux__
603 #define sockaddr_dl sockaddr_ll
604 #define sdl_family sll_family
605 #define AF_LINK AF_PACKET
606 #define LLADDR(s) s->sll_addr;
607 #include <linux/if_tun.h>
608 #define TAP_CLONEDEV "/dev/net/tun"
609 #endif /* __linux__ */
610
611 #ifdef __FreeBSD__
612 #include <net/if_tun.h>
613 #define TAP_CLONEDEV "/dev/tap"
614 #endif /* __FreeBSD */
615
616 #ifdef __APPLE__
617 // #warning TAP not supported on apple ?
618 #include <net/if_utun.h>
619 #define TAP_CLONEDEV "/dev/tap"
620 #endif /* __APPLE__ */
621
622
623 /*
624 * parse the vale configuration in conf and put it in nmr.
625 * Return the flag set if necessary.
626 * The configuration may consist of 1 to 4 numbers separated
627 * by commas: #tx-slots,#rx-slots,#tx-rings,#rx-rings.
628 * Missing numbers or zeroes stand for default values.
629 * As an additional convenience, if exactly one number
630 * is specified, then this is assigned to both #tx-slots and #rx-slots.
631 * If there is no 4th number, then the 3rd is assigned to both #tx-rings
632 * and #rx-rings.
633 */
634 static int
parse_nmr_config(const char * conf,struct nmreq_register * nmr)635 parse_nmr_config(const char* conf, struct nmreq_register *nmr)
636 {
637 char *w, *tok;
638 int i, v;
639
640 if (conf == NULL || ! *conf)
641 return 0;
642 nmr->nr_tx_rings = nmr->nr_rx_rings = 0;
643 nmr->nr_tx_slots = nmr->nr_rx_slots = 0;
644 w = strdup(conf);
645 for (i = 0, tok = strtok(w, ","); tok; i++, tok = strtok(NULL, ",")) {
646 v = atoi(tok);
647 switch (i) {
648 case 0:
649 nmr->nr_tx_slots = nmr->nr_rx_slots = v;
650 break;
651 case 1:
652 nmr->nr_rx_slots = v;
653 break;
654 case 2:
655 nmr->nr_tx_rings = nmr->nr_rx_rings = v;
656 break;
657 case 3:
658 nmr->nr_rx_rings = v;
659 break;
660 default:
661 D("ignored config: %s", tok);
662 break;
663 }
664 }
665 D("txr %d txd %d rxr %d rxd %d",
666 nmr->nr_tx_rings, nmr->nr_tx_slots,
667 nmr->nr_rx_rings, nmr->nr_rx_slots);
668 free(w);
669 return 0;
670 }
671
672
673 /*
674 * locate the src mac address for our interface, put it
675 * into the user-supplied buffer. return 0 if ok, -1 on error.
676 */
677 static int
source_hwaddr(const char * ifname,char * buf)678 source_hwaddr(const char *ifname, char *buf)
679 {
680 struct ifaddrs *ifaphead, *ifap;
681
682 if (getifaddrs(&ifaphead) != 0) {
683 D("getifaddrs %s failed", ifname);
684 return (-1);
685 }
686
687 /* remove 'netmap:' prefix before comparing interfaces */
688 if (!strncmp(ifname, "netmap:", 7))
689 ifname = &ifname[7];
690
691 for (ifap = ifaphead; ifap; ifap = ifap->ifa_next) {
692 struct sockaddr_dl *sdl =
693 (struct sockaddr_dl *)ifap->ifa_addr;
694 uint8_t *mac;
695
696 if (!sdl || sdl->sdl_family != AF_LINK)
697 continue;
698 if (strncmp(ifap->ifa_name, ifname, IFNAMSIZ) != 0)
699 continue;
700 mac = (uint8_t *)LLADDR(sdl);
701 sprintf(buf, "%02x:%02x:%02x:%02x:%02x:%02x",
702 mac[0], mac[1], mac[2],
703 mac[3], mac[4], mac[5]);
704 if (verbose)
705 D("source hwaddr %s", buf);
706 break;
707 }
708 freeifaddrs(ifaphead);
709 return ifap ? 0 : 1;
710 }
711
712
713 /* set the thread affinity. */
714 static int
setaffinity(pthread_t me,int i)715 setaffinity(pthread_t me, int i)
716 {
717 cpuset_t cpumask;
718
719 if (i == -1)
720 return 0;
721
722 /* Set thread affinity affinity.*/
723 CPU_ZERO(&cpumask);
724 CPU_SET(i, &cpumask);
725
726 if (pthread_setaffinity_np(me, sizeof(cpuset_t), &cpumask) != 0) {
727 D("Unable to set affinity: %s", strerror(errno));
728 return 1;
729 }
730 return 0;
731 }
732
733
734 /* Compute the checksum of the given ip header. */
735 static uint32_t
checksum(const void * data,uint16_t len,uint32_t sum)736 checksum(const void *data, uint16_t len, uint32_t sum)
737 {
738 const uint8_t *addr = data;
739 uint32_t i;
740
741 /* Checksum all the pairs of bytes first... */
742 for (i = 0; i < (len & ~1U); i += 2) {
743 sum += (uint16_t)ntohs(*((const uint16_t *)(addr + i)));
744 if (sum > 0xFFFF)
745 sum -= 0xFFFF;
746 }
747 /*
748 * If there's a single byte left over, checksum it, too.
749 * Network byte order is big-endian, so the remaining byte is
750 * the high byte.
751 */
752 if (i < len) {
753 sum += addr[i] << 8;
754 if (sum > 0xFFFF)
755 sum -= 0xFFFF;
756 }
757 return sum;
758 }
759
760 static uint16_t
wrapsum(uint32_t sum)761 wrapsum(uint32_t sum)
762 {
763 sum = ~sum & 0xFFFF;
764 return (htons(sum));
765 }
766
767 /* Check the payload of the packet for errors (use it for debug).
768 * Look for consecutive ascii representations of the size of the packet.
769 */
770 static void
dump_payload(const char * _p,int len,struct netmap_ring * ring,int cur)771 dump_payload(const char *_p, int len, struct netmap_ring *ring, int cur)
772 {
773 char buf[128];
774 int i, j, i0;
775 const unsigned char *p = (const unsigned char *)_p;
776
777 /* get the length in ASCII of the length of the packet. */
778
779 printf("ring %p cur %5d [buf %6d flags 0x%04x len %5d]\n",
780 ring, cur, ring->slot[cur].buf_idx,
781 ring->slot[cur].flags, len);
782 /* hexdump routine */
783 for (i = 0; i < len; ) {
784 memset(buf, ' ', sizeof(buf));
785 sprintf(buf, "%5d: ", i);
786 i0 = i;
787 for (j=0; j < 16 && i < len; i++, j++)
788 sprintf(buf+7+j*3, "%02x ", (uint8_t)(p[i]));
789 i = i0;
790 for (j=0; j < 16 && i < len; i++, j++)
791 sprintf(buf+7+j + 48, "%c",
792 isprint(p[i]) ? p[i] : '.');
793 printf("%s\n", buf);
794 }
795 }
796
797 /*
798 * Fill a packet with some payload.
799 * We create a UDP packet so the payload starts at
800 * 14+20+8 = 42 bytes.
801 */
802 #ifdef __linux__
803 #define uh_sport source
804 #define uh_dport dest
805 #define uh_ulen len
806 #define uh_sum check
807 #endif /* linux */
808
809 static uint16_t
new_ip_sum(uint16_t ip_sum,uint32_t oaddr,uint32_t naddr)810 new_ip_sum(uint16_t ip_sum, uint32_t oaddr, uint32_t naddr)
811 {
812 ip_sum = cksum_add(ip_sum, ~oaddr >> 16);
813 ip_sum = cksum_add(ip_sum, ~oaddr & 0xffff);
814 ip_sum = cksum_add(ip_sum, naddr >> 16);
815 ip_sum = cksum_add(ip_sum, naddr & 0xffff);
816 return ip_sum;
817 }
818
819 static uint16_t
new_udp_sum(uint16_t udp_sum,uint16_t oport,uint16_t nport)820 new_udp_sum(uint16_t udp_sum, uint16_t oport, uint16_t nport)
821 {
822 udp_sum = cksum_add(udp_sum, ~oport);
823 udp_sum = cksum_add(udp_sum, nport);
824 return udp_sum;
825 }
826
827
828 static void
update_ip(struct pkt * pkt,struct targ * t)829 update_ip(struct pkt *pkt, struct targ *t)
830 {
831 struct glob_arg *g = t->g;
832 struct ip ip;
833 struct udphdr udp;
834 uint32_t oaddr, naddr;
835 uint16_t oport, nport;
836 uint16_t ip_sum = 0, udp_sum = 0;
837
838 memcpy(&ip, &pkt->ipv4.ip, sizeof(ip));
839 memcpy(&udp, &pkt->ipv4.udp, sizeof(udp));
840 do {
841 ip_sum = udp_sum = 0;
842 naddr = oaddr = ntohl(ip.ip_src.s_addr);
843 nport = oport = ntohs(udp.uh_sport);
844 if (g->options & OPT_RANDOM_SRC) {
845 ip.ip_src.s_addr = nrand48(t->seed);
846 udp.uh_sport = nrand48(t->seed);
847 naddr = ntohl(ip.ip_src.s_addr);
848 nport = ntohs(udp.uh_sport);
849 ip_sum = new_ip_sum(ip_sum, oaddr, naddr);
850 udp_sum = new_udp_sum(udp_sum, oport, nport);
851 } else {
852 if (oport < g->src_ip.port1) {
853 nport = oport + 1;
854 udp.uh_sport = htons(nport);
855 udp_sum = new_udp_sum(udp_sum, oport, nport);
856 break;
857 }
858 nport = g->src_ip.port0;
859 udp.uh_sport = htons(nport);
860 if (oaddr < g->src_ip.ipv4.end) {
861 naddr = oaddr + 1;
862 ip.ip_src.s_addr = htonl(naddr);
863 ip_sum = new_ip_sum(ip_sum, oaddr, naddr);
864 break;
865 }
866 naddr = g->src_ip.ipv4.start;
867 ip.ip_src.s_addr = htonl(naddr);
868 ip_sum = new_ip_sum(ip_sum, oaddr, naddr);
869 }
870
871 naddr = oaddr = ntohl(ip.ip_dst.s_addr);
872 nport = oport = ntohs(udp.uh_dport);
873 if (g->options & OPT_RANDOM_DST) {
874 ip.ip_dst.s_addr = nrand48(t->seed);
875 udp.uh_dport = nrand48(t->seed);
876 naddr = ntohl(ip.ip_dst.s_addr);
877 nport = ntohs(udp.uh_dport);
878 ip_sum = new_ip_sum(ip_sum, oaddr, naddr);
879 udp_sum = new_udp_sum(udp_sum, oport, nport);
880 } else {
881 if (oport < g->dst_ip.port1) {
882 nport = oport + 1;
883 udp.uh_dport = htons(nport);
884 udp_sum = new_udp_sum(udp_sum, oport, nport);
885 break;
886 }
887 nport = g->dst_ip.port0;
888 udp.uh_dport = htons(nport);
889 if (oaddr < g->dst_ip.ipv4.end) {
890 naddr = oaddr + 1;
891 ip.ip_dst.s_addr = htonl(naddr);
892 ip_sum = new_ip_sum(ip_sum, oaddr, naddr);
893 break;
894 }
895 naddr = g->dst_ip.ipv4.start;
896 ip.ip_dst.s_addr = htonl(naddr);
897 ip_sum = new_ip_sum(ip_sum, oaddr, naddr);
898 }
899 } while (0);
900 /* update checksums */
901 if (udp_sum != 0)
902 udp.uh_sum = ~cksum_add(~udp.uh_sum, htons(udp_sum));
903 if (ip_sum != 0) {
904 ip.ip_sum = ~cksum_add(~ip.ip_sum, htons(ip_sum));
905 udp.uh_sum = ~cksum_add(~udp.uh_sum, htons(ip_sum));
906 }
907 memcpy(&pkt->ipv4.ip, &ip, sizeof(ip));
908 memcpy(&pkt->ipv4.udp, &udp, sizeof(udp));
909 }
910
911 #ifndef s6_addr16
912 #define s6_addr16 __u6_addr.__u6_addr16
913 #endif
914 static void
update_ip6(struct pkt * pkt,struct targ * t)915 update_ip6(struct pkt *pkt, struct targ *t)
916 {
917 struct glob_arg *g = t->g;
918 struct ip6_hdr ip6;
919 struct udphdr udp;
920 uint16_t udp_sum;
921 uint16_t oaddr, naddr;
922 uint16_t oport, nport;
923 uint8_t group;
924
925 memcpy(&ip6, &pkt->ipv6.ip, sizeof(ip6));
926 memcpy(&udp, &pkt->ipv6.udp, sizeof(udp));
927 do {
928 udp_sum = 0;
929 group = g->src_ip.ipv6.sgroup;
930 naddr = oaddr = ntohs(ip6.ip6_src.s6_addr16[group]);
931 nport = oport = ntohs(udp.uh_sport);
932 if (g->options & OPT_RANDOM_SRC) {
933 ip6.ip6_src.s6_addr16[group] = nrand48(t->seed);
934 udp.uh_sport = nrand48(t->seed);
935 naddr = ntohs(ip6.ip6_src.s6_addr16[group]);
936 nport = ntohs(udp.uh_sport);
937 break;
938 }
939 if (oport < g->src_ip.port1) {
940 nport = oport + 1;
941 udp.uh_sport = htons(nport);
942 break;
943 }
944 nport = g->src_ip.port0;
945 udp.uh_sport = htons(nport);
946 if (oaddr < ntohs(g->src_ip.ipv6.end.s6_addr16[group])) {
947 naddr = oaddr + 1;
948 ip6.ip6_src.s6_addr16[group] = htons(naddr);
949 break;
950 }
951 naddr = ntohs(g->src_ip.ipv6.start.s6_addr16[group]);
952 ip6.ip6_src.s6_addr16[group] = htons(naddr);
953
954 /* update checksums if needed */
955 if (oaddr != naddr)
956 udp_sum = cksum_add(~oaddr, naddr);
957 if (oport != nport)
958 udp_sum = cksum_add(udp_sum,
959 cksum_add(~oport, nport));
960
961 group = g->dst_ip.ipv6.egroup;
962 naddr = oaddr = ntohs(ip6.ip6_dst.s6_addr16[group]);
963 nport = oport = ntohs(udp.uh_dport);
964 if (g->options & OPT_RANDOM_DST) {
965 ip6.ip6_dst.s6_addr16[group] = nrand48(t->seed);
966 udp.uh_dport = nrand48(t->seed);
967 naddr = ntohs(ip6.ip6_dst.s6_addr16[group]);
968 nport = ntohs(udp.uh_dport);
969 break;
970 }
971 if (oport < g->dst_ip.port1) {
972 nport = oport + 1;
973 udp.uh_dport = htons(nport);
974 break;
975 }
976 nport = g->dst_ip.port0;
977 udp.uh_dport = htons(nport);
978 if (oaddr < ntohs(g->dst_ip.ipv6.end.s6_addr16[group])) {
979 naddr = oaddr + 1;
980 ip6.ip6_dst.s6_addr16[group] = htons(naddr);
981 break;
982 }
983 naddr = ntohs(g->dst_ip.ipv6.start.s6_addr16[group]);
984 ip6.ip6_dst.s6_addr16[group] = htons(naddr);
985 } while (0);
986 /* update checksums */
987 if (oaddr != naddr)
988 udp_sum = cksum_add(udp_sum,
989 cksum_add(~oaddr, naddr));
990 if (oport != nport)
991 udp_sum = cksum_add(udp_sum,
992 cksum_add(~oport, nport));
993 if (udp_sum != 0)
994 udp.uh_sum = ~cksum_add(~udp.uh_sum, udp_sum);
995 memcpy(&pkt->ipv6.ip, &ip6, sizeof(ip6));
996 memcpy(&pkt->ipv6.udp, &udp, sizeof(udp));
997 }
998
999 static void
update_addresses(struct pkt * pkt,struct targ * t)1000 update_addresses(struct pkt *pkt, struct targ *t)
1001 {
1002
1003 if (t->g->af == AF_INET)
1004 update_ip(pkt, t);
1005 else
1006 update_ip6(pkt, t);
1007 }
1008
1009 static void
update_ip_size(struct pkt * pkt,int size)1010 update_ip_size(struct pkt *pkt, int size)
1011 {
1012 struct ip ip;
1013 struct udphdr udp;
1014 uint16_t oiplen, niplen;
1015 uint16_t nudplen;
1016 uint16_t ip_sum = 0;
1017
1018 memcpy(&ip, &pkt->ipv4.ip, sizeof(ip));
1019 memcpy(&udp, &pkt->ipv4.udp, sizeof(udp));
1020
1021 oiplen = ntohs(ip.ip_len);
1022 niplen = size - sizeof(struct ether_header);
1023 ip.ip_len = htons(niplen);
1024 nudplen = niplen - sizeof(struct ip);
1025 udp.uh_ulen = htons(nudplen);
1026 ip_sum = new_udp_sum(ip_sum, oiplen, niplen);
1027
1028 /* update checksums */
1029 if (ip_sum != 0)
1030 ip.ip_sum = ~cksum_add(~ip.ip_sum, htons(ip_sum));
1031
1032 udp.uh_sum = 0;
1033 /* Magic: taken from sbin/dhclient/packet.c */
1034 udp.uh_sum = wrapsum(
1035 checksum(&udp, sizeof(udp), /* udp header */
1036 checksum(pkt->ipv4.body, /* udp payload */
1037 nudplen - sizeof(udp),
1038 checksum(&ip.ip_src, /* pseudo header */
1039 2 * sizeof(ip.ip_src),
1040 IPPROTO_UDP + (u_int32_t)ntohs(udp.uh_ulen)))));
1041
1042 memcpy(&pkt->ipv4.ip, &ip, sizeof(ip));
1043 memcpy(&pkt->ipv4.udp, &udp, sizeof(udp));
1044 }
1045
1046 static void
update_ip6_size(struct pkt * pkt,int size)1047 update_ip6_size(struct pkt *pkt, int size)
1048 {
1049 struct ip6_hdr ip6;
1050 struct udphdr udp;
1051 uint16_t niplen, nudplen;
1052 uint32_t csum;
1053
1054 memcpy(&ip6, &pkt->ipv6.ip, sizeof(ip6));
1055 memcpy(&udp, &pkt->ipv6.udp, sizeof(udp));
1056
1057 nudplen = niplen = size - sizeof(struct ether_header) - sizeof(ip6);
1058 ip6.ip6_plen = htons(niplen);
1059 udp.uh_ulen = htons(nudplen);
1060
1061 /* Save part of pseudo header checksum into csum */
1062 udp.uh_sum = 0;
1063 csum = IPPROTO_UDP << 24;
1064 csum = checksum(&csum, sizeof(csum), nudplen);
1065 udp.uh_sum = wrapsum(
1066 checksum(&udp, sizeof(udp), /* udp header */
1067 checksum(pkt->ipv6.body, /* udp payload */
1068 nudplen - sizeof(udp),
1069 checksum(&pkt->ipv6.ip.ip6_src, /* pseudo header */
1070 2 * sizeof(pkt->ipv6.ip.ip6_src), csum))));
1071
1072 memcpy(&pkt->ipv6.ip, &ip6, sizeof(ip6));
1073 memcpy(&pkt->ipv6.udp, &udp, sizeof(udp));
1074 }
1075
1076 static void
update_size(struct pkt * pkt,struct targ * t,int size)1077 update_size(struct pkt *pkt, struct targ *t, int size)
1078 {
1079 if (t->g->options & OPT_UPDATE_CSUM) {
1080 if (t->g->af == AF_INET)
1081 update_ip_size(pkt, size);
1082 else
1083 update_ip6_size(pkt, size);
1084 }
1085 }
1086
1087 /*
1088 * initialize one packet and prepare for the next one.
1089 * The copy could be done better instead of repeating it each time.
1090 */
1091 static void
initialize_packet(struct targ * targ)1092 initialize_packet(struct targ *targ)
1093 {
1094 struct pkt *pkt = &targ->pkt;
1095 struct ether_header *eh;
1096 struct ip6_hdr ip6;
1097 struct ip ip;
1098 struct udphdr udp;
1099 void *udp_ptr;
1100 uint16_t paylen;
1101 uint32_t csum = 0;
1102 const char *payload = targ->g->options & OPT_INDIRECT ?
1103 indirect_payload : default_payload;
1104 int i, l0 = strlen(payload);
1105
1106 #ifndef NO_PCAP
1107 char errbuf[PCAP_ERRBUF_SIZE];
1108 pcap_t *file;
1109 struct pcap_pkthdr *header;
1110 const unsigned char *packet;
1111
1112 /* Read a packet from a PCAP file if asked. */
1113 if (targ->g->packet_file != NULL) {
1114 if ((file = pcap_open_offline(targ->g->packet_file,
1115 errbuf)) == NULL)
1116 D("failed to open pcap file %s",
1117 targ->g->packet_file);
1118 if (pcap_next_ex(file, &header, &packet) < 0)
1119 D("failed to read packet from %s",
1120 targ->g->packet_file);
1121 if ((targ->frame = malloc(header->caplen)) == NULL)
1122 D("out of memory");
1123 bcopy(packet, (unsigned char *)targ->frame, header->caplen);
1124 targ->g->pkt_size = header->caplen;
1125 pcap_close(file);
1126 return;
1127 }
1128 #endif
1129
1130 paylen = targ->g->pkt_size - sizeof(*eh) -
1131 (targ->g->af == AF_INET ? sizeof(ip): sizeof(ip6));
1132
1133 /* create a nice NUL-terminated string */
1134 for (i = 0; i < paylen; i += l0) {
1135 if (l0 > paylen - i)
1136 l0 = paylen - i; // last round
1137 bcopy(payload, PKT(pkt, body, targ->g->af) + i, l0);
1138 }
1139 PKT(pkt, body, targ->g->af)[i - 1] = '\0';
1140
1141 /* prepare the headers */
1142 eh = &pkt->eh;
1143 bcopy(&targ->g->src_mac.start, eh->ether_shost, 6);
1144 bcopy(&targ->g->dst_mac.start, eh->ether_dhost, 6);
1145
1146 if (targ->g->af == AF_INET) {
1147 eh->ether_type = htons(ETHERTYPE_IP);
1148 memcpy(&ip, &pkt->ipv4.ip, sizeof(ip));
1149 udp_ptr = &pkt->ipv4.udp;
1150 ip.ip_v = IPVERSION;
1151 ip.ip_hl = sizeof(ip) >> 2;
1152 ip.ip_id = 0;
1153 ip.ip_tos = IPTOS_LOWDELAY;
1154 ip.ip_len = htons(targ->g->pkt_size - sizeof(*eh));
1155 ip.ip_id = 0;
1156 ip.ip_off = htons(IP_DF); /* Don't fragment */
1157 ip.ip_ttl = IPDEFTTL;
1158 ip.ip_p = IPPROTO_UDP;
1159 ip.ip_dst.s_addr = htonl(targ->g->dst_ip.ipv4.start);
1160 ip.ip_src.s_addr = htonl(targ->g->src_ip.ipv4.start);
1161 ip.ip_sum = wrapsum(checksum(&ip, sizeof(ip), 0));
1162 memcpy(&pkt->ipv4.ip, &ip, sizeof(ip));
1163 } else {
1164 eh->ether_type = htons(ETHERTYPE_IPV6);
1165 memcpy(&ip6, &pkt->ipv4.ip, sizeof(ip6));
1166 udp_ptr = &pkt->ipv6.udp;
1167 ip6.ip6_flow = 0;
1168 ip6.ip6_plen = htons(paylen);
1169 ip6.ip6_vfc = IPV6_VERSION;
1170 ip6.ip6_nxt = IPPROTO_UDP;
1171 ip6.ip6_hlim = IPV6_DEFHLIM;
1172 ip6.ip6_src = targ->g->src_ip.ipv6.start;
1173 ip6.ip6_dst = targ->g->dst_ip.ipv6.start;
1174 }
1175 memcpy(&udp, udp_ptr, sizeof(udp));
1176
1177 udp.uh_sport = htons(targ->g->src_ip.port0);
1178 udp.uh_dport = htons(targ->g->dst_ip.port0);
1179 udp.uh_ulen = htons(paylen);
1180 if (targ->g->af == AF_INET) {
1181 /* Magic: taken from sbin/dhclient/packet.c */
1182 udp.uh_sum = wrapsum(
1183 checksum(&udp, sizeof(udp), /* udp header */
1184 checksum(pkt->ipv4.body, /* udp payload */
1185 paylen - sizeof(udp),
1186 checksum(&pkt->ipv4.ip.ip_src, /* pseudo header */
1187 2 * sizeof(pkt->ipv4.ip.ip_src),
1188 IPPROTO_UDP + (u_int32_t)ntohs(udp.uh_ulen)))));
1189 memcpy(&pkt->ipv4.ip, &ip, sizeof(ip));
1190 } else {
1191 /* Save part of pseudo header checksum into csum */
1192 csum = IPPROTO_UDP << 24;
1193 csum = checksum(&csum, sizeof(csum), paylen);
1194 udp.uh_sum = wrapsum(
1195 checksum(udp_ptr, sizeof(udp), /* udp header */
1196 checksum(pkt->ipv6.body, /* udp payload */
1197 paylen - sizeof(udp),
1198 checksum(&pkt->ipv6.ip.ip6_src, /* pseudo header */
1199 2 * sizeof(pkt->ipv6.ip.ip6_src), csum))));
1200 memcpy(&pkt->ipv6.ip, &ip6, sizeof(ip6));
1201 }
1202 memcpy(udp_ptr, &udp, sizeof(udp));
1203
1204 bzero(&pkt->vh, sizeof(pkt->vh));
1205 // dump_payload((void *)pkt, targ->g->pkt_size, NULL, 0);
1206 }
1207
1208 static void
get_vnet_hdr_len(struct glob_arg * g)1209 get_vnet_hdr_len(struct glob_arg *g)
1210 {
1211 struct nmreq_header hdr;
1212 struct nmreq_port_hdr ph;
1213 int err;
1214
1215 hdr = g->nmd->hdr; /* copy name and version */
1216 hdr.nr_reqtype = NETMAP_REQ_PORT_HDR_GET;
1217 hdr.nr_options = 0;
1218 memset(&ph, 0, sizeof(ph));
1219 hdr.nr_body = (uintptr_t)&ph;
1220 err = ioctl(g->main_fd, NIOCCTRL, &hdr);
1221 if (err) {
1222 D("Unable to get virtio-net header length");
1223 return;
1224 }
1225
1226 g->virt_header = ph.nr_hdr_len;
1227 if (g->virt_header) {
1228 D("Port requires virtio-net header, length = %d",
1229 g->virt_header);
1230 }
1231 }
1232
1233 static void
set_vnet_hdr_len(struct glob_arg * g)1234 set_vnet_hdr_len(struct glob_arg *g)
1235 {
1236 int err, l = g->virt_header;
1237 struct nmreq_header hdr;
1238 struct nmreq_port_hdr ph;
1239
1240 if (l == 0)
1241 return;
1242
1243 hdr = g->nmd->hdr; /* copy name and version */
1244 hdr.nr_reqtype = NETMAP_REQ_PORT_HDR_SET;
1245 hdr.nr_options = 0;
1246 memset(&ph, 0, sizeof(ph));
1247 hdr.nr_body = (uintptr_t)&ph;
1248 err = ioctl(g->main_fd, NIOCCTRL, &hdr);
1249 if (err) {
1250 D("Unable to set virtio-net header length %d", l);
1251 }
1252 }
1253
1254 /*
1255 * create and enqueue a batch of packets on a ring.
1256 * On the last one set NS_REPORT to tell the driver to generate
1257 * an interrupt when done.
1258 */
1259 static int
send_packets(struct netmap_ring * ring,struct pkt * pkt,void * frame,int size,struct targ * t,u_int count,int options)1260 send_packets(struct netmap_ring *ring, struct pkt *pkt, void *frame,
1261 int size, struct targ *t, u_int count, int options)
1262 {
1263 u_int n, sent, head = ring->head;
1264 u_int frags = t->frags;
1265 u_int frag_size = t->frag_size;
1266 struct netmap_slot *slot = &ring->slot[head];
1267
1268 n = nm_ring_space(ring);
1269 #if 0
1270 if (options & (OPT_COPY | OPT_PREFETCH) ) {
1271 for (sent = 0; sent < count; sent++) {
1272 struct netmap_slot *slot = &ring->slot[head];
1273 char *p = NETMAP_BUF(ring, slot->buf_idx);
1274
1275 __builtin_prefetch(p);
1276 head = nm_ring_next(ring, head);
1277 }
1278 head = ring->head;
1279 }
1280 #endif
1281 for (sent = 0; sent < count && n >= frags; sent++, n--) {
1282 char *p;
1283 int buf_changed;
1284 u_int tosend = size;
1285
1286 slot = &ring->slot[head];
1287 p = NETMAP_BUF(ring, slot->buf_idx);
1288 buf_changed = slot->flags & NS_BUF_CHANGED;
1289
1290 slot->flags = 0;
1291 if (options & OPT_RUBBISH) {
1292 /* do nothing */
1293 } else if (options & OPT_INDIRECT) {
1294 slot->flags |= NS_INDIRECT;
1295 slot->ptr = (uint64_t)((uintptr_t)frame);
1296 } else if (frags > 1) {
1297 u_int i;
1298 const char *f = frame;
1299 char *fp = p;
1300 for (i = 0; i < frags - 1; i++) {
1301 memcpy(fp, f, frag_size);
1302 slot->len = frag_size;
1303 slot->flags = NS_MOREFRAG;
1304 if (options & OPT_DUMP)
1305 dump_payload(fp, frag_size, ring, head);
1306 tosend -= frag_size;
1307 f += frag_size;
1308 head = nm_ring_next(ring, head);
1309 slot = &ring->slot[head];
1310 fp = NETMAP_BUF(ring, slot->buf_idx);
1311 }
1312 n -= (frags - 1);
1313 p = fp;
1314 slot->flags = 0;
1315 memcpy(p, f, tosend);
1316 update_addresses(pkt, t);
1317 } else if ((options & (OPT_COPY | OPT_MEMCPY)) || buf_changed) {
1318 if (options & OPT_COPY)
1319 nm_pkt_copy(frame, p, size);
1320 else
1321 memcpy(p, frame, size);
1322 update_addresses(pkt, t);
1323 } else if (options & OPT_PREFETCH) {
1324 __builtin_prefetch(p);
1325 }
1326 slot->len = tosend;
1327 if (options & OPT_DUMP)
1328 dump_payload(p, tosend, ring, head);
1329 head = nm_ring_next(ring, head);
1330 }
1331 if (sent) {
1332 slot->flags |= NS_REPORT;
1333 ring->head = ring->cur = head;
1334 }
1335 if (sent < count) {
1336 /* tell netmap that we need more slots */
1337 ring->cur = ring->tail;
1338 }
1339
1340 return (sent);
1341 }
1342
1343 /*
1344 * Index of the highest bit set
1345 */
1346 static uint32_t
msb64(uint64_t x)1347 msb64(uint64_t x)
1348 {
1349 uint64_t m = 1ULL << 63;
1350 int i;
1351
1352 for (i = 63; i >= 0; i--, m >>=1)
1353 if (m & x)
1354 return i;
1355 return 0;
1356 }
1357
1358 /*
1359 * wait until ts, either busy or sleeping if more than 1ms.
1360 * Return wakeup time.
1361 */
1362 static struct timespec
wait_time(struct timespec ts)1363 wait_time(struct timespec ts)
1364 {
1365 for (;;) {
1366 struct timespec w, cur;
1367 clock_gettime(CLOCK_REALTIME_PRECISE, &cur);
1368 w = timespec_sub(ts, cur);
1369 if (w.tv_sec < 0)
1370 return cur;
1371 else if (w.tv_sec > 0 || w.tv_nsec > 1000000)
1372 poll(NULL, 0, 1);
1373 }
1374 }
1375
1376 /*
1377 * Send a packet, and wait for a response.
1378 * The payload (after UDP header, ofs 42) has a 4-byte sequence
1379 * followed by a struct timeval (or bintime?)
1380 */
1381
1382 static void *
ping_body(void * data)1383 ping_body(void *data)
1384 {
1385 struct targ *targ = (struct targ *) data;
1386 struct pollfd pfd = { .fd = targ->fd, .events = POLLIN };
1387 struct netmap_if *nifp = targ->nmd->nifp;
1388 int i, m;
1389 void *frame;
1390 int size;
1391 struct timespec ts, now, last_print;
1392 struct timespec nexttime = {0, 0}; /* silence compiler */
1393 uint64_t sent = 0, n = targ->g->npackets;
1394 uint64_t count = 0, t_cur, t_min = ~0, av = 0;
1395 uint64_t g_min = ~0, g_av = 0;
1396 uint64_t buckets[64]; /* bins for delays, ns */
1397 int rate_limit = targ->g->tx_rate, tosend = 0;
1398
1399 frame = (char*)&targ->pkt + sizeof(targ->pkt.vh) - targ->g->virt_header;
1400 size = targ->g->pkt_size + targ->g->virt_header;
1401
1402
1403 if (targ->g->nthreads > 1) {
1404 D("can only ping with 1 thread");
1405 return NULL;
1406 }
1407
1408 if (targ->g->af == AF_INET6) {
1409 D("Warning: ping-pong with IPv6 not supported");
1410 }
1411
1412 bzero(&buckets, sizeof(buckets));
1413 clock_gettime(CLOCK_REALTIME_PRECISE, &last_print);
1414 now = last_print;
1415 if (rate_limit) {
1416 targ->tic = timespec_add(now, (struct timespec){2,0});
1417 targ->tic.tv_nsec = 0;
1418 wait_time(targ->tic);
1419 nexttime = targ->tic;
1420 }
1421 while (!targ->cancel && (n == 0 || sent < n)) {
1422 struct netmap_ring *ring = NETMAP_TXRING(nifp, targ->nmd->first_tx_ring);
1423 struct netmap_slot *slot;
1424 char *p;
1425 int rv;
1426 uint64_t limit, event = 0;
1427
1428 if (rate_limit && tosend <= 0) {
1429 tosend = targ->g->burst;
1430 nexttime = timespec_add(nexttime, targ->g->tx_period);
1431 wait_time(nexttime);
1432 }
1433
1434 limit = rate_limit ? tosend : targ->g->burst;
1435 if (n > 0 && n - sent < limit)
1436 limit = n - sent;
1437 for (m = 0; (unsigned)m < limit; m++) {
1438 slot = &ring->slot[ring->head];
1439 slot->len = size;
1440 p = NETMAP_BUF(ring, slot->buf_idx);
1441
1442 if (nm_ring_empty(ring)) {
1443 D("-- ouch, cannot send");
1444 break;
1445 } else {
1446 struct tstamp *tp;
1447 nm_pkt_copy(frame, p, size);
1448 clock_gettime(CLOCK_REALTIME_PRECISE, &ts);
1449 bcopy(&sent, p+42, sizeof(sent));
1450 tp = (struct tstamp *)(p+46);
1451 tp->sec = (uint32_t)ts.tv_sec;
1452 tp->nsec = (uint32_t)ts.tv_nsec;
1453 sent++;
1454 ring->head = ring->cur = nm_ring_next(ring, ring->head);
1455 }
1456 }
1457 if (m > 0)
1458 event++;
1459 targ->ctr.pkts = sent;
1460 targ->ctr.bytes = sent*size;
1461 targ->ctr.events = event;
1462 if (rate_limit)
1463 tosend -= m;
1464 #ifdef BUSYWAIT
1465 rv = ioctl(pfd.fd, NIOCTXSYNC, NULL);
1466 if (rv < 0) {
1467 D("TXSYNC error on queue %d: %s", targ->me,
1468 strerror(errno));
1469 }
1470 again:
1471 ioctl(pfd.fd, NIOCRXSYNC, NULL);
1472 #else
1473 /* should use a parameter to decide how often to send */
1474 if ( (rv = poll(&pfd, 1, 3000)) <= 0) {
1475 D("poll error on queue %d: %s", targ->me,
1476 (rv ? strerror(errno) : "timeout"));
1477 continue;
1478 }
1479 #endif /* BUSYWAIT */
1480 /* see what we got back */
1481 #ifdef BUSYWAIT
1482 int rx = 0;
1483 #endif
1484 for (i = targ->nmd->first_rx_ring;
1485 i <= targ->nmd->last_rx_ring; i++) {
1486 ring = NETMAP_RXRING(nifp, i);
1487 while (!nm_ring_empty(ring)) {
1488 uint32_t seq;
1489 struct tstamp *tp;
1490 int pos;
1491
1492 slot = &ring->slot[ring->head];
1493 p = NETMAP_BUF(ring, slot->buf_idx);
1494
1495 clock_gettime(CLOCK_REALTIME_PRECISE, &now);
1496 bcopy(p+42, &seq, sizeof(seq));
1497 tp = (struct tstamp *)(p+46);
1498 ts.tv_sec = (time_t)tp->sec;
1499 ts.tv_nsec = (long)tp->nsec;
1500 ts.tv_sec = now.tv_sec - ts.tv_sec;
1501 ts.tv_nsec = now.tv_nsec - ts.tv_nsec;
1502 if (ts.tv_nsec < 0) {
1503 ts.tv_nsec += 1000000000;
1504 ts.tv_sec--;
1505 }
1506 if (0) D("seq %d/%llu delta %d.%09d", seq,
1507 (unsigned long long)sent,
1508 (int)ts.tv_sec, (int)ts.tv_nsec);
1509 t_cur = ts.tv_sec * 1000000000UL + ts.tv_nsec;
1510 if (t_cur < t_min)
1511 t_min = t_cur;
1512 count ++;
1513 av += t_cur;
1514 pos = msb64(t_cur);
1515 buckets[pos]++;
1516 /* now store it in a bucket */
1517 ring->head = ring->cur = nm_ring_next(ring, ring->head);
1518 #ifdef BUSYWAIT
1519 rx++;
1520 #endif
1521 }
1522 }
1523 //D("tx %d rx %d", sent, rx);
1524 //usleep(100000);
1525 ts.tv_sec = now.tv_sec - last_print.tv_sec;
1526 ts.tv_nsec = now.tv_nsec - last_print.tv_nsec;
1527 if (ts.tv_nsec < 0) {
1528 ts.tv_nsec += 1000000000;
1529 ts.tv_sec--;
1530 }
1531 if (ts.tv_sec >= 1) {
1532 D("count %d RTT: min %d av %d ns",
1533 (int)count, (int)t_min, (int)(av/count));
1534 int k, j, kmin, off;
1535 char buf[512];
1536
1537 for (kmin = 0; kmin < 64; kmin ++)
1538 if (buckets[kmin])
1539 break;
1540 for (k = 63; k >= kmin; k--)
1541 if (buckets[k])
1542 break;
1543 buf[0] = '\0';
1544 off = 0;
1545 for (j = kmin; j <= k; j++) {
1546 off += sprintf(buf + off, " %5d", (int)buckets[j]);
1547 }
1548 D("k: %d .. %d\n\t%s", 1<<kmin, 1<<k, buf);
1549 bzero(&buckets, sizeof(buckets));
1550 count = 0;
1551 g_av += av;
1552 av = 0;
1553 if (t_min < g_min)
1554 g_min = t_min;
1555 t_min = ~0;
1556 last_print = now;
1557 }
1558 #ifdef BUSYWAIT
1559 if (rx < m && ts.tv_sec <= 3 && !targ->cancel)
1560 goto again;
1561 #endif /* BUSYWAIT */
1562 }
1563
1564 if (sent > 0) {
1565 D("RTT over %llu packets: min %d av %d ns",
1566 (long long unsigned)sent, (int)g_min,
1567 (int)((double)g_av/sent));
1568 }
1569 targ->completed = 1;
1570
1571 /* reset the ``used`` flag. */
1572 targ->used = 0;
1573
1574 return NULL;
1575 }
1576
1577
1578 /*
1579 * reply to ping requests
1580 */
1581 static void *
pong_body(void * data)1582 pong_body(void *data)
1583 {
1584 struct targ *targ = (struct targ *) data;
1585 struct pollfd pfd = { .fd = targ->fd, .events = POLLIN };
1586 struct netmap_if *nifp = targ->nmd->nifp;
1587 struct netmap_ring *txring, *rxring;
1588 int i;
1589 uint64_t sent = 0, n = targ->g->npackets;
1590
1591 if (targ->g->nthreads > 1) {
1592 D("can only reply ping with 1 thread");
1593 return NULL;
1594 }
1595 if (n > 0)
1596 D("understood ponger %llu but don't know how to do it",
1597 (unsigned long long)n);
1598
1599 if (targ->g->af == AF_INET6) {
1600 D("Warning: ping-pong with IPv6 not supported");
1601 }
1602
1603 while (!targ->cancel && (n == 0 || sent < n)) {
1604 uint32_t txhead, txavail;
1605 //#define BUSYWAIT
1606 #ifdef BUSYWAIT
1607 ioctl(pfd.fd, NIOCRXSYNC, NULL);
1608 #else
1609 int rv;
1610 if ( (rv = poll(&pfd, 1, 1000)) <= 0) {
1611 D("poll error on queue %d: %s", targ->me,
1612 rv ? strerror(errno) : "timeout");
1613 continue;
1614 }
1615 #endif
1616 txring = NETMAP_TXRING(nifp, targ->nmd->first_tx_ring);
1617 txhead = txring->head;
1618 txavail = nm_ring_space(txring);
1619 /* see what we got back */
1620 for (i = targ->nmd->first_rx_ring; i <= targ->nmd->last_rx_ring; i++) {
1621 rxring = NETMAP_RXRING(nifp, i);
1622 while (!nm_ring_empty(rxring)) {
1623 uint16_t *spkt, *dpkt;
1624 uint32_t head = rxring->head;
1625 struct netmap_slot *slot = &rxring->slot[head];
1626 char *src, *dst;
1627 src = NETMAP_BUF(rxring, slot->buf_idx);
1628 //D("got pkt %p of size %d", src, slot->len);
1629 rxring->head = rxring->cur = nm_ring_next(rxring, head);
1630 if (txavail == 0)
1631 continue;
1632 dst = NETMAP_BUF(txring,
1633 txring->slot[txhead].buf_idx);
1634 /* copy... */
1635 dpkt = (uint16_t *)dst;
1636 spkt = (uint16_t *)src;
1637 nm_pkt_copy(src, dst, slot->len);
1638 /* swap source and destination MAC */
1639 dpkt[0] = spkt[3];
1640 dpkt[1] = spkt[4];
1641 dpkt[2] = spkt[5];
1642 dpkt[3] = spkt[0];
1643 dpkt[4] = spkt[1];
1644 dpkt[5] = spkt[2];
1645 /* swap source and destination IPv4 */
1646 if (spkt[6] == htons(ETHERTYPE_IP)) {
1647 dpkt[13] = spkt[15];
1648 dpkt[14] = spkt[16];
1649 dpkt[15] = spkt[13];
1650 dpkt[16] = spkt[14];
1651 }
1652 txring->slot[txhead].len = slot->len;
1653 //dump_payload(dst, slot->len, txring, txhead);
1654 txhead = nm_ring_next(txring, txhead);
1655 txavail--;
1656 sent++;
1657 }
1658 }
1659 txring->head = txring->cur = txhead;
1660 targ->ctr.pkts = sent;
1661 #ifdef BUSYWAIT
1662 ioctl(pfd.fd, NIOCTXSYNC, NULL);
1663 #endif
1664 }
1665
1666 targ->completed = 1;
1667
1668 /* reset the ``used`` flag. */
1669 targ->used = 0;
1670
1671 return NULL;
1672 }
1673
1674
1675 static void *
sender_body(void * data)1676 sender_body(void *data)
1677 {
1678 struct targ *targ = (struct targ *) data;
1679 struct pollfd pfd = { .fd = targ->fd, .events = POLLOUT };
1680 struct netmap_if *nifp;
1681 struct netmap_ring *txring = NULL;
1682 int i;
1683 uint64_t n = targ->g->npackets / targ->g->nthreads;
1684 uint64_t sent = 0;
1685 uint64_t event = 0;
1686 int options = targ->g->options;
1687 struct timespec nexttime = { 0, 0}; // XXX silence compiler
1688 int rate_limit = targ->g->tx_rate;
1689 struct pkt *pkt = &targ->pkt;
1690 void *frame;
1691 int size;
1692
1693 if (targ->frame == NULL) {
1694 frame = (char *)pkt + sizeof(pkt->vh) - targ->g->virt_header;
1695 size = targ->g->pkt_size + targ->g->virt_header;
1696 } else {
1697 frame = targ->frame;
1698 size = targ->g->pkt_size;
1699 }
1700
1701 D("start, fd %d main_fd %d", targ->fd, targ->g->main_fd);
1702 if (setaffinity(targ->thread, targ->affinity))
1703 goto quit;
1704
1705 /* main loop.*/
1706 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->tic);
1707 if (rate_limit) {
1708 targ->tic = timespec_add(targ->tic, (struct timespec){2,0});
1709 targ->tic.tv_nsec = 0;
1710 wait_time(targ->tic);
1711 nexttime = targ->tic;
1712 }
1713 if (targ->g->dev_type == DEV_TAP) {
1714 D("writing to file desc %d", targ->g->main_fd);
1715
1716 for (i = 0; !targ->cancel && (n == 0 || sent < n); i++) {
1717 if (write(targ->g->main_fd, frame, size) != -1)
1718 sent++;
1719 update_addresses(pkt, targ);
1720 if (i > 10000) {
1721 targ->ctr.pkts = sent;
1722 targ->ctr.bytes = sent*size;
1723 targ->ctr.events = sent;
1724 i = 0;
1725 }
1726 }
1727 #ifndef NO_PCAP
1728 } else if (targ->g->dev_type == DEV_PCAP) {
1729 pcap_t *p = targ->g->p;
1730
1731 for (i = 0; !targ->cancel && (n == 0 || sent < n); i++) {
1732 if (pcap_inject(p, frame, size) != -1)
1733 sent++;
1734 update_addresses(pkt, targ);
1735 if (i > 10000) {
1736 targ->ctr.pkts = sent;
1737 targ->ctr.bytes = sent*size;
1738 targ->ctr.events = sent;
1739 i = 0;
1740 }
1741 }
1742 #endif /* NO_PCAP */
1743 } else {
1744 int tosend = 0;
1745 u_int bufsz, frag_size = targ->g->frag_size;
1746
1747 nifp = targ->nmd->nifp;
1748 txring = NETMAP_TXRING(nifp, targ->nmd->first_tx_ring);
1749 bufsz = txring->nr_buf_size;
1750 if (bufsz < frag_size)
1751 frag_size = bufsz;
1752 targ->frag_size = targ->g->pkt_size / targ->frags;
1753 if (targ->frag_size > frag_size) {
1754 targ->frags = targ->g->pkt_size / frag_size;
1755 targ->frag_size = frag_size;
1756 if (targ->g->pkt_size % frag_size != 0)
1757 targ->frags++;
1758 }
1759 D("frags %u frag_size %u", targ->frags, targ->frag_size);
1760
1761 /* mark all slots of all rings as changed so initial copy will be done */
1762 for (i = targ->nmd->first_tx_ring; i <= targ->nmd->last_tx_ring; i++) {
1763 uint32_t j;
1764 struct netmap_slot *slot;
1765
1766 txring = NETMAP_TXRING(nifp, i);
1767 for (j = 0; j < txring->num_slots; j++) {
1768 slot = &txring->slot[j];
1769 slot->flags = NS_BUF_CHANGED;
1770 }
1771 }
1772
1773 while (!targ->cancel && (n == 0 || sent < n)) {
1774 int rv;
1775
1776 if (rate_limit && tosend <= 0) {
1777 tosend = targ->g->burst;
1778 nexttime = timespec_add(nexttime, targ->g->tx_period);
1779 wait_time(nexttime);
1780 }
1781
1782 /*
1783 * wait for available room in the send queue(s)
1784 */
1785 #ifdef BUSYWAIT
1786 (void)rv;
1787 if (ioctl(pfd.fd, NIOCTXSYNC, NULL) < 0) {
1788 D("ioctl error on queue %d: %s", targ->me,
1789 strerror(errno));
1790 goto quit;
1791 }
1792 #else /* !BUSYWAIT */
1793 if ( (rv = poll(&pfd, 1, 2000)) <= 0) {
1794 if (targ->cancel)
1795 break;
1796 D("poll error on queue %d: %s", targ->me,
1797 rv ? strerror(errno) : "timeout");
1798 // goto quit;
1799 }
1800 if (pfd.revents & POLLERR) {
1801 D("poll error on %d ring %d-%d", pfd.fd,
1802 targ->nmd->first_tx_ring, targ->nmd->last_tx_ring);
1803 goto quit;
1804 }
1805 #endif /* !BUSYWAIT */
1806 /*
1807 * scan our queues and send on those with room
1808 */
1809 for (i = targ->nmd->first_tx_ring; i <= targ->nmd->last_tx_ring; i++) {
1810 int m;
1811 uint64_t limit = rate_limit ? tosend : targ->g->burst;
1812
1813 if (n > 0 && n == sent)
1814 break;
1815
1816 if (n > 0 && n - sent < limit)
1817 limit = n - sent;
1818 txring = NETMAP_TXRING(nifp, i);
1819 if (nm_ring_empty(txring))
1820 continue;
1821
1822 if (targ->g->pkt_min_size > 0) {
1823 size = nrand48(targ->seed) %
1824 (targ->g->pkt_size - targ->g->pkt_min_size) +
1825 targ->g->pkt_min_size;
1826 update_size(pkt, targ, size);
1827 }
1828 m = send_packets(txring, pkt, frame, size, targ,
1829 limit, options);
1830 ND("limit %lu tail %d m %d",
1831 limit, txring->tail, m);
1832 sent += m;
1833 if (m > 0) //XXX-ste: can m be 0?
1834 event++;
1835 targ->ctr.pkts = sent;
1836 targ->ctr.bytes += m*size;
1837 targ->ctr.events = event;
1838 if (rate_limit) {
1839 tosend -= m;
1840 if (tosend <= 0)
1841 break;
1842 }
1843 }
1844 }
1845 /* flush any remaining packets */
1846 if (txring != NULL) {
1847 D("flush tail %d head %d on thread %p",
1848 txring->tail, txring->head,
1849 (void *)pthread_self());
1850 ioctl(pfd.fd, NIOCTXSYNC, NULL);
1851 }
1852
1853 /* final part: wait all the TX queues to be empty. */
1854 for (i = targ->nmd->first_tx_ring; i <= targ->nmd->last_tx_ring; i++) {
1855 txring = NETMAP_TXRING(nifp, i);
1856 while (!targ->cancel && nm_tx_pending(txring)) {
1857 RD(5, "pending tx tail %d head %d on ring %d",
1858 txring->tail, txring->head, i);
1859 ioctl(pfd.fd, NIOCTXSYNC, NULL);
1860 usleep(1); /* wait 1 tick */
1861 }
1862 }
1863 } /* end DEV_NETMAP */
1864
1865 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc);
1866 targ->completed = 1;
1867 targ->ctr.pkts = sent;
1868 targ->ctr.bytes = sent*size;
1869 targ->ctr.events = event;
1870 quit:
1871 /* reset the ``used`` flag. */
1872 targ->used = 0;
1873
1874 return (NULL);
1875 }
1876
1877
1878 #ifndef NO_PCAP
1879 static void
receive_pcap(u_char * user,const struct pcap_pkthdr * h,const u_char * bytes)1880 receive_pcap(u_char *user, const struct pcap_pkthdr * h,
1881 const u_char * bytes)
1882 {
1883 struct my_ctrs *ctr = (struct my_ctrs *)user;
1884 (void)bytes; /* UNUSED */
1885 ctr->bytes += h->len;
1886 ctr->pkts++;
1887 }
1888 #endif /* !NO_PCAP */
1889
1890
1891 static int
receive_packets(struct netmap_ring * ring,u_int limit,int dump,uint64_t * bytes)1892 receive_packets(struct netmap_ring *ring, u_int limit, int dump, uint64_t *bytes)
1893 {
1894 u_int head, rx, n;
1895 uint64_t b = 0;
1896 u_int complete = 0;
1897
1898 if (bytes == NULL)
1899 bytes = &b;
1900
1901 head = ring->head;
1902 n = nm_ring_space(ring);
1903 if (n < limit)
1904 limit = n;
1905 for (rx = 0; rx < limit; rx++) {
1906 struct netmap_slot *slot = &ring->slot[head];
1907 char *p = NETMAP_BUF(ring, slot->buf_idx);
1908
1909 *bytes += slot->len;
1910 if (dump)
1911 dump_payload(p, slot->len, ring, head);
1912 if (!(slot->flags & NS_MOREFRAG))
1913 complete++;
1914
1915 head = nm_ring_next(ring, head);
1916 }
1917 ring->head = ring->cur = head;
1918
1919 return (complete);
1920 }
1921
1922 static void *
receiver_body(void * data)1923 receiver_body(void *data)
1924 {
1925 struct targ *targ = (struct targ *) data;
1926 struct pollfd pfd = { .fd = targ->fd, .events = POLLIN };
1927 struct netmap_if *nifp;
1928 struct netmap_ring *rxring;
1929 int i;
1930 struct my_ctrs cur;
1931 uint64_t n = targ->g->npackets / targ->g->nthreads;
1932
1933 memset(&cur, 0, sizeof(cur));
1934
1935 if (setaffinity(targ->thread, targ->affinity))
1936 goto quit;
1937
1938 D("reading from %s fd %d main_fd %d",
1939 targ->g->ifname, targ->fd, targ->g->main_fd);
1940 /* unbounded wait for the first packet. */
1941 for (;!targ->cancel;) {
1942 i = poll(&pfd, 1, 1000);
1943 if (i > 0 && !(pfd.revents & POLLERR))
1944 break;
1945 if (i < 0) {
1946 D("poll() error: %s", strerror(errno));
1947 goto quit;
1948 }
1949 if (pfd.revents & POLLERR) {
1950 D("fd error");
1951 goto quit;
1952 }
1953 RD(1, "waiting for initial packets, poll returns %d %d",
1954 i, pfd.revents);
1955 }
1956 /* main loop, exit after 1s silence */
1957 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->tic);
1958 if (targ->g->dev_type == DEV_TAP) {
1959 while (!targ->cancel && (n == 0 || targ->ctr.pkts < n)) {
1960 char buf[MAX_BODYSIZE];
1961 /* XXX should we poll ? */
1962 i = read(targ->g->main_fd, buf, sizeof(buf));
1963 if (i > 0) {
1964 targ->ctr.pkts++;
1965 targ->ctr.bytes += i;
1966 targ->ctr.events++;
1967 }
1968 }
1969 #ifndef NO_PCAP
1970 } else if (targ->g->dev_type == DEV_PCAP) {
1971 while (!targ->cancel && (n == 0 || targ->ctr.pkts < n)) {
1972 /* XXX should we poll ? */
1973 pcap_dispatch(targ->g->p, targ->g->burst, receive_pcap,
1974 (u_char *)&targ->ctr);
1975 targ->ctr.events++;
1976 }
1977 #endif /* !NO_PCAP */
1978 } else {
1979 int dump = targ->g->options & OPT_DUMP;
1980
1981 nifp = targ->nmd->nifp;
1982 while (!targ->cancel && (n == 0 || targ->ctr.pkts < n)) {
1983 /* Once we started to receive packets, wait at most 1 seconds
1984 before quitting. */
1985 #ifdef BUSYWAIT
1986 if (ioctl(pfd.fd, NIOCRXSYNC, NULL) < 0) {
1987 D("ioctl error on queue %d: %s", targ->me,
1988 strerror(errno));
1989 goto quit;
1990 }
1991 #else /* !BUSYWAIT */
1992 if (poll(&pfd, 1, 1 * 1000) <= 0 && !targ->g->forever) {
1993 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc);
1994 targ->toc.tv_sec -= 1; /* Subtract timeout time. */
1995 goto out;
1996 }
1997
1998 if (pfd.revents & POLLERR) {
1999 D("poll err");
2000 goto quit;
2001 }
2002 #endif /* !BUSYWAIT */
2003 uint64_t cur_space = 0;
2004 for (i = targ->nmd->first_rx_ring; i <= targ->nmd->last_rx_ring; i++) {
2005 int m;
2006
2007 rxring = NETMAP_RXRING(nifp, i);
2008 /* compute free space in the ring */
2009 m = rxring->head + rxring->num_slots - rxring->tail;
2010 if (m >= (int) rxring->num_slots)
2011 m -= rxring->num_slots;
2012 cur_space += m;
2013 if (nm_ring_empty(rxring))
2014 continue;
2015
2016 m = receive_packets(rxring, targ->g->burst, dump, &cur.bytes);
2017 cur.pkts += m;
2018 if (m > 0)
2019 cur.events++;
2020 }
2021 cur.min_space = targ->ctr.min_space;
2022 if (cur_space < cur.min_space)
2023 cur.min_space = cur_space;
2024 targ->ctr = cur;
2025 }
2026 }
2027
2028 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc);
2029
2030 #if !defined(BUSYWAIT)
2031 out:
2032 #endif
2033 targ->completed = 1;
2034 targ->ctr = cur;
2035
2036 quit:
2037 /* reset the ``used`` flag. */
2038 targ->used = 0;
2039
2040 return (NULL);
2041 }
2042
2043 static void *
txseq_body(void * data)2044 txseq_body(void *data)
2045 {
2046 struct targ *targ = (struct targ *) data;
2047 struct pollfd pfd = { .fd = targ->fd, .events = POLLOUT };
2048 struct netmap_ring *ring;
2049 int64_t sent = 0;
2050 uint64_t event = 0;
2051 int options = targ->g->options | OPT_COPY;
2052 struct timespec nexttime = {0, 0};
2053 int rate_limit = targ->g->tx_rate;
2054 struct pkt *pkt = &targ->pkt;
2055 int frags = targ->g->frags;
2056 uint32_t sequence = 0;
2057 int budget = 0;
2058 void *frame;
2059 int size;
2060
2061 if (targ->g->nthreads > 1) {
2062 D("can only txseq ping with 1 thread");
2063 return NULL;
2064 }
2065
2066 if (targ->g->npackets > 0) {
2067 D("Ignoring -n argument");
2068 }
2069
2070 frame = (char *)pkt + sizeof(pkt->vh) - targ->g->virt_header;
2071 size = targ->g->pkt_size + targ->g->virt_header;
2072
2073 D("start, fd %d main_fd %d", targ->fd, targ->g->main_fd);
2074 if (setaffinity(targ->thread, targ->affinity))
2075 goto quit;
2076
2077 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->tic);
2078 if (rate_limit) {
2079 targ->tic = timespec_add(targ->tic, (struct timespec){2,0});
2080 targ->tic.tv_nsec = 0;
2081 wait_time(targ->tic);
2082 nexttime = targ->tic;
2083 }
2084
2085 /* Only use the first queue. */
2086 ring = NETMAP_TXRING(targ->nmd->nifp, targ->nmd->first_tx_ring);
2087
2088 while (!targ->cancel) {
2089 int64_t limit;
2090 unsigned int space;
2091 unsigned int head;
2092 int fcnt;
2093 uint16_t sum = 0;
2094 int rv;
2095
2096 if (!rate_limit) {
2097 budget = targ->g->burst;
2098
2099 } else if (budget <= 0) {
2100 budget = targ->g->burst;
2101 nexttime = timespec_add(nexttime, targ->g->tx_period);
2102 wait_time(nexttime);
2103 }
2104
2105 /* wait for available room in the send queue */
2106 #ifdef BUSYWAIT
2107 (void)rv;
2108 if (ioctl(pfd.fd, NIOCTXSYNC, NULL) < 0) {
2109 D("ioctl error on queue %d: %s", targ->me,
2110 strerror(errno));
2111 goto quit;
2112 }
2113 #else /* !BUSYWAIT */
2114 if ( (rv = poll(&pfd, 1, 2000)) <= 0) {
2115 if (targ->cancel)
2116 break;
2117 D("poll error on queue %d: %s", targ->me,
2118 rv ? strerror(errno) : "timeout");
2119 // goto quit;
2120 }
2121 if (pfd.revents & POLLERR) {
2122 D("poll error on %d ring %d-%d", pfd.fd,
2123 targ->nmd->first_tx_ring, targ->nmd->last_tx_ring);
2124 goto quit;
2125 }
2126 #endif /* !BUSYWAIT */
2127
2128 /* If no room poll() again. */
2129 space = nm_ring_space(ring);
2130 if (!space) {
2131 continue;
2132 }
2133
2134 limit = budget;
2135
2136 if (space < limit) {
2137 limit = space;
2138 }
2139
2140 /* Cut off ``limit`` to make sure is multiple of ``frags``. */
2141 if (frags > 1) {
2142 limit = (limit / frags) * frags;
2143 }
2144
2145 limit = sent + limit; /* Convert to absolute. */
2146
2147 for (fcnt = frags, head = ring->head;
2148 sent < limit; sent++, sequence++) {
2149 struct netmap_slot *slot = &ring->slot[head];
2150 char *p = NETMAP_BUF(ring, slot->buf_idx);
2151 uint16_t *w = (uint16_t *)PKT(pkt, body, targ->g->af), t;
2152
2153 memcpy(&sum, targ->g->af == AF_INET ? &pkt->ipv4.udp.uh_sum : &pkt->ipv6.udp.uh_sum, sizeof(sum));
2154
2155 slot->flags = 0;
2156 t = *w;
2157 PKT(pkt, body, targ->g->af)[0] = sequence >> 24;
2158 PKT(pkt, body, targ->g->af)[1] = (sequence >> 16) & 0xff;
2159 sum = ~cksum_add(~sum, cksum_add(~t, *w));
2160 t = *++w;
2161 PKT(pkt, body, targ->g->af)[2] = (sequence >> 8) & 0xff;
2162 PKT(pkt, body, targ->g->af)[3] = sequence & 0xff;
2163 sum = ~cksum_add(~sum, cksum_add(~t, *w));
2164 memcpy(targ->g->af == AF_INET ? &pkt->ipv4.udp.uh_sum : &pkt->ipv6.udp.uh_sum, &sum, sizeof(sum));
2165 nm_pkt_copy(frame, p, size);
2166 if (fcnt == frags) {
2167 update_addresses(pkt, targ);
2168 }
2169
2170 if (options & OPT_DUMP) {
2171 dump_payload(p, size, ring, head);
2172 }
2173
2174 slot->len = size;
2175
2176 if (--fcnt > 0) {
2177 slot->flags |= NS_MOREFRAG;
2178 } else {
2179 fcnt = frags;
2180 }
2181
2182 if (sent == limit - 1) {
2183 /* Make sure we don't push an incomplete
2184 * packet. */
2185 assert(!(slot->flags & NS_MOREFRAG));
2186 slot->flags |= NS_REPORT;
2187 }
2188
2189 head = nm_ring_next(ring, head);
2190 if (rate_limit) {
2191 budget--;
2192 }
2193 }
2194
2195 ring->cur = ring->head = head;
2196
2197 event ++;
2198 targ->ctr.pkts = sent;
2199 targ->ctr.bytes = sent * size;
2200 targ->ctr.events = event;
2201 }
2202
2203 /* flush any remaining packets */
2204 D("flush tail %d head %d on thread %p",
2205 ring->tail, ring->head,
2206 (void *)pthread_self());
2207 ioctl(pfd.fd, NIOCTXSYNC, NULL);
2208
2209 /* final part: wait the TX queues to become empty. */
2210 while (!targ->cancel && nm_tx_pending(ring)) {
2211 RD(5, "pending tx tail %d head %d on ring %d",
2212 ring->tail, ring->head, targ->nmd->first_tx_ring);
2213 ioctl(pfd.fd, NIOCTXSYNC, NULL);
2214 usleep(1); /* wait 1 tick */
2215 }
2216
2217 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc);
2218 targ->completed = 1;
2219 targ->ctr.pkts = sent;
2220 targ->ctr.bytes = sent * size;
2221 targ->ctr.events = event;
2222 quit:
2223 /* reset the ``used`` flag. */
2224 targ->used = 0;
2225
2226 return (NULL);
2227 }
2228
2229
2230 static char *
multi_slot_to_string(struct netmap_ring * ring,unsigned int head,unsigned int nfrags,char * strbuf,size_t strbuflen)2231 multi_slot_to_string(struct netmap_ring *ring, unsigned int head,
2232 unsigned int nfrags, char *strbuf, size_t strbuflen)
2233 {
2234 unsigned int f;
2235 char *ret = strbuf;
2236
2237 for (f = 0; f < nfrags; f++) {
2238 struct netmap_slot *slot = &ring->slot[head];
2239 int m = snprintf(strbuf, strbuflen, "|%u,%x|", slot->len,
2240 slot->flags);
2241 if (m >= (int)strbuflen) {
2242 break;
2243 }
2244 strbuf += m;
2245 strbuflen -= m;
2246
2247 head = nm_ring_next(ring, head);
2248 }
2249
2250 return ret;
2251 }
2252
2253 static void *
rxseq_body(void * data)2254 rxseq_body(void *data)
2255 {
2256 struct targ *targ = (struct targ *) data;
2257 struct pollfd pfd = { .fd = targ->fd, .events = POLLIN };
2258 int dump = targ->g->options & OPT_DUMP;
2259 struct netmap_ring *ring;
2260 unsigned int frags_exp = 1;
2261 struct my_ctrs cur;
2262 unsigned int frags = 0;
2263 int first_packet = 1;
2264 int first_slot = 1;
2265 int i, j, af, nrings;
2266 uint32_t seq, *seq_exp = NULL;
2267
2268 memset(&cur, 0, sizeof(cur));
2269
2270 if (setaffinity(targ->thread, targ->affinity))
2271 goto quit;
2272
2273 nrings = targ->nmd->last_rx_ring - targ->nmd->first_rx_ring + 1;
2274 seq_exp = calloc(nrings, sizeof(uint32_t));
2275 if (seq_exp == NULL) {
2276 D("failed to allocate seq array");
2277 goto quit;
2278 }
2279
2280 D("reading from %s fd %d main_fd %d",
2281 targ->g->ifname, targ->fd, targ->g->main_fd);
2282 /* unbounded wait for the first packet. */
2283 for (;!targ->cancel;) {
2284 i = poll(&pfd, 1, 1000);
2285 if (i > 0 && !(pfd.revents & POLLERR))
2286 break;
2287 RD(1, "waiting for initial packets, poll returns %d %d",
2288 i, pfd.revents);
2289 }
2290
2291 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->tic);
2292
2293
2294 while (!targ->cancel) {
2295 unsigned int head;
2296 int limit;
2297
2298 #ifdef BUSYWAIT
2299 if (ioctl(pfd.fd, NIOCRXSYNC, NULL) < 0) {
2300 D("ioctl error on queue %d: %s", targ->me,
2301 strerror(errno));
2302 goto quit;
2303 }
2304 #else /* !BUSYWAIT */
2305 if (poll(&pfd, 1, 1 * 1000) <= 0 && !targ->g->forever) {
2306 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc);
2307 targ->toc.tv_sec -= 1; /* Subtract timeout time. */
2308 goto out;
2309 }
2310
2311 if (pfd.revents & POLLERR) {
2312 D("poll err");
2313 goto quit;
2314 }
2315 #endif /* !BUSYWAIT */
2316
2317 for (j = targ->nmd->first_rx_ring; j <= targ->nmd->last_rx_ring; j++) {
2318 ring = NETMAP_RXRING(targ->nmd->nifp, j);
2319 if (nm_ring_empty(ring))
2320 continue;
2321
2322 limit = nm_ring_space(ring);
2323 if (limit > targ->g->burst)
2324 limit = targ->g->burst;
2325
2326 #if 0
2327 /* Enable this if
2328 * 1) we remove the early-return optimization from
2329 * the netmap poll implementation, or
2330 * 2) pipes get NS_MOREFRAG support.
2331 * With the current netmap implementation, an experiment like
2332 * pkt-gen -i vale:1{1 -f txseq -F 9
2333 * pkt-gen -i vale:1}1 -f rxseq
2334 * would get stuck as soon as we find nm_ring_space(ring) < 9,
2335 * since here limit is rounded to 0 and
2336 * pipe rxsync is not called anymore by the poll() of this loop.
2337 */
2338 if (frags_exp > 1) {
2339 int o = limit;
2340 /* Cut off to the closest smaller multiple. */
2341 limit = (limit / frags_exp) * frags_exp;
2342 RD(2, "LIMIT %d --> %d", o, limit);
2343 }
2344 #endif
2345
2346 for (head = ring->head, i = 0; i < limit; i++) {
2347 struct netmap_slot *slot = &ring->slot[head];
2348 char *p = NETMAP_BUF(ring, slot->buf_idx);
2349 int len = slot->len;
2350 struct pkt *pkt;
2351
2352 if (dump) {
2353 dump_payload(p, slot->len, ring, head);
2354 }
2355
2356 frags++;
2357 if (!(slot->flags & NS_MOREFRAG)) {
2358 if (first_packet) {
2359 first_packet = 0;
2360 } else if (frags != frags_exp) {
2361 char prbuf[512];
2362 RD(1, "Received packets with %u frags, "
2363 "expected %u, '%s'", frags, frags_exp,
2364 multi_slot_to_string(ring, head-frags+1,
2365 frags,
2366 prbuf, sizeof(prbuf)));
2367 }
2368 first_packet = 0;
2369 frags_exp = frags;
2370 frags = 0;
2371 }
2372
2373 p -= sizeof(pkt->vh) - targ->g->virt_header;
2374 len += sizeof(pkt->vh) - targ->g->virt_header;
2375 pkt = (struct pkt *)p;
2376 if (ntohs(pkt->eh.ether_type) == ETHERTYPE_IP)
2377 af = AF_INET;
2378 else
2379 af = AF_INET6;
2380
2381 if ((char *)pkt + len < ((char *)PKT(pkt, body, af)) +
2382 sizeof(seq)) {
2383 RD(1, "%s: packet too small (len=%u)", __func__,
2384 slot->len);
2385 } else {
2386 seq = (PKT(pkt, body, af)[0] << 24) |
2387 (PKT(pkt, body, af)[1] << 16) |
2388 (PKT(pkt, body, af)[2] << 8) |
2389 PKT(pkt, body, af)[3];
2390 if (first_slot) {
2391 /* Grab the first one, whatever it
2392 is. */
2393 seq_exp[j] = seq;
2394 first_slot = 0;
2395 } else if (seq != seq_exp[j]) {
2396 uint32_t delta = seq - seq_exp[j];
2397
2398 if (delta < (0xFFFFFFFF >> 1)) {
2399 RD(2, "Sequence GAP: exp %u found %u",
2400 seq_exp[j], seq);
2401 } else {
2402 RD(2, "Sequence OUT OF ORDER: "
2403 "exp %u found %u", seq_exp[j], seq);
2404 }
2405 seq_exp[j] = seq;
2406 }
2407 seq_exp[j]++;
2408 }
2409
2410 cur.bytes += slot->len;
2411 head = nm_ring_next(ring, head);
2412 cur.pkts++;
2413 }
2414
2415 ring->cur = ring->head = head;
2416
2417 cur.events++;
2418 targ->ctr = cur;
2419 }
2420 }
2421 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc);
2422
2423 #ifndef BUSYWAIT
2424 out:
2425 #endif /* !BUSYWAIT */
2426 targ->completed = 1;
2427 targ->ctr = cur;
2428
2429 quit:
2430 if (seq_exp != NULL)
2431 free(seq_exp);
2432 /* reset the ``used`` flag. */
2433 targ->used = 0;
2434
2435 return (NULL);
2436 }
2437
2438
2439 static void
tx_output(struct glob_arg * g,struct my_ctrs * cur,double delta,const char * msg)2440 tx_output(struct glob_arg *g, struct my_ctrs *cur, double delta, const char *msg)
2441 {
2442 double bw, raw_bw, pps, abs;
2443 char b1[40], b2[80], b3[80];
2444 int size;
2445
2446 if (cur->pkts == 0) {
2447 printf("%s nothing.\n", msg);
2448 return;
2449 }
2450
2451 size = (int)(cur->bytes / cur->pkts);
2452
2453 printf("%s %llu packets %llu bytes %llu events %d bytes each in %.2f seconds.\n",
2454 msg,
2455 (unsigned long long)cur->pkts,
2456 (unsigned long long)cur->bytes,
2457 (unsigned long long)cur->events, size, delta);
2458 if (delta == 0)
2459 delta = 1e-6;
2460 if (size < 60) /* correct for min packet size */
2461 size = 60;
2462 pps = cur->pkts / delta;
2463 bw = (8.0 * cur->bytes) / delta;
2464 raw_bw = (8.0 * cur->bytes + cur->pkts * g->framing) / delta;
2465 abs = cur->pkts / (double)(cur->events);
2466
2467 printf("Speed: %spps Bandwidth: %sbps (raw %sbps). Average batch: %.2f pkts\n",
2468 norm(b1, pps, normalize), norm(b2, bw, normalize), norm(b3, raw_bw, normalize), abs);
2469 }
2470
2471 static void
usage(int errcode)2472 usage(int errcode)
2473 {
2474 /* This usage is generated from the pkt-gen man page:
2475 * $ man pkt-gen > x
2476 * and pasted here adding the string terminators and endlines with simple
2477 * regular expressions. */
2478 const char *cmd = "pkt-gen";
2479 fprintf(stderr,
2480 "Usage:\n"
2481 "%s arguments\n"
2482 " -h Show program usage and exit.\n"
2483 "\n"
2484 " -i interface\n"
2485 " Name of the network interface that pkt-gen operates on. It can be a system network interface\n"
2486 " (e.g., em0), the name of a vale(4) port (e.g., valeSSS:PPP), the name of a netmap pipe or\n"
2487 " monitor, or any valid netmap port name accepted by the nm_open library function, as docu-\n"
2488 " mented in netmap(4) (NIOCREGIF section).\n"
2489 "\n"
2490 " -f function\n"
2491 " The function to be executed by pkt-gen. Specify tx for transmission, rx for reception, ping\n"
2492 " for client-side ping-pong operation, and pong for server-side ping-pong operation.\n"
2493 "\n"
2494 " -n count\n"
2495 " Number of iterations of the pkt-gen function (with 0 meaning infinite). In case of tx or rx,\n"
2496 " count is the number of packets to receive or transmit. In case of ping or pong, count is the\n"
2497 " number of ping-pong transactions.\n"
2498 "\n"
2499 " -l pkt_size\n"
2500 " Packet size in bytes excluding CRC. If passed a second time, use random sizes larger or\n"
2501 " equal than the second one and lower than the first one.\n"
2502 "\n"
2503 " -b burst_size\n"
2504 " Transmit or receive up to burst_size packets at a time.\n"
2505 "\n"
2506 " -4 Use IPv4 addresses.\n"
2507 "\n"
2508 " -6 Use IPv6 addresses.\n"
2509 "\n"
2510 " -d dst_ip[:port[-dst_ip:port]]\n"
2511 " Destination IPv4/IPv6 address and port, single or range.\n"
2512 "\n"
2513 " -s src_ip[:port[-src_ip:port]]\n"
2514 " Source IPv4/IPv6 address and port, single or range.\n"
2515 "\n"
2516 " -D dst_mac\n"
2517 " Destination MAC address in colon notation (e.g., aa:bb:cc:dd:ee:00).\n"
2518 "\n"
2519 " -S src_mac\n"
2520 " Source MAC address in colon notation.\n"
2521 "\n"
2522 " -a cpu_id\n"
2523 " Pin the first thread of pkt-gen to a particular CPU using pthread_setaffinity_np(3). If more\n"
2524 " threads are used, they are pinned to the subsequent CPUs, one per thread.\n"
2525 "\n"
2526 " -c cpus\n"
2527 " Maximum number of CPUs to use (0 means to use all the available ones).\n"
2528 "\n"
2529 " -p threads\n"
2530 " Number of threads to use. By default, only a single thread is used to handle all the netmap\n"
2531 " rings. If threads is larger than one, each thread handles a single TX ring (in tx mode), a\n"
2532 " single RX ring (in rx mode), or a TX/RX ring pair. The number of threads must be less than or\n"
2533 " equal to the number of TX (or RX) rings available in the device specified by interface.\n"
2534 "\n"
2535 " -T report_ms\n"
2536 " Number of milliseconds between reports.\n"
2537 "\n"
2538 " -w wait_for_link_time\n"
2539 " Number of seconds to wait before starting the pkt-gen function, useful to make sure that the\n"
2540 " network link is up. A network device driver may take some time to enter netmap mode, or to\n"
2541 " create a new transmit/receive ring pair when netmap(4) requests one.\n"
2542 "\n"
2543 " -R rate\n"
2544 " Packet transmission rate. Not setting the packet transmission rate tells pkt-gen to transmit\n"
2545 " packets as quickly as possible. On servers from 2010 onward netmap(4) is able to com-\n"
2546 " pletely use all of the bandwidth of a 10 or 40Gbps link, so this option should be used unless\n"
2547 " your intention is to saturate the link.\n"
2548 "\n"
2549 " -X Dump payload of each packet transmitted or received.\n"
2550 "\n"
2551 " -H len Add empty virtio-net-header with size 'len'. Valid sizes are 0, 10 and 12. This option is\n"
2552 " only used with Virtual Machine technologies that use virtio as a network interface.\n"
2553 "\n"
2554 " -P file\n"
2555 " Load the packet to be transmitted from a pcap file rather than constructing it within\n"
2556 " pkt-gen.\n"
2557 "\n"
2558 " -z Use random IPv4/IPv6 src address/port.\n"
2559 "\n"
2560 " -Z Use random IPv4/IPv6 dst address/port.\n"
2561 "\n"
2562 " -N Do not normalize units (i.e., use bps, pps instead of Mbps, Kpps, etc.).\n"
2563 "\n"
2564 " -F num_frags\n"
2565 " Send multi-slot packets, each one with num_frags fragments. A multi-slot packet is repre-\n"
2566 " sented by two or more consecutive netmap slots with the NS_MOREFRAG flag set (except for the\n"
2567 " last slot). This is useful to transmit or receive packets larger than the netmap buffer\n"
2568 " size.\n"
2569 "\n"
2570 " -M frag_size\n"
2571 " In multi-slot mode, frag_size specifies the size of each fragment, if smaller than the packet\n"
2572 " length divided by num_frags.\n"
2573 "\n"
2574 " -I Use indirect buffers. It is only valid for transmitting on VALE ports, and it is implemented\n"
2575 " by setting the NS_INDIRECT flag in the netmap slots.\n"
2576 "\n"
2577 " -W Exit immediately if all the RX rings are empty the first time they are examined.\n"
2578 "\n"
2579 " -v Increase the verbosity level.\n"
2580 "\n"
2581 " -r In tx mode, do not initialize packets, but send whatever the content of the uninitialized\n"
2582 " netmap buffers is (rubbish mode).\n"
2583 "\n"
2584 " -A Compute mean and standard deviation (over a sliding window) for the transmit or receive rate.\n"
2585 "\n"
2586 " -B Take Ethernet framing and CRC into account when computing the average bps. This adds 4 bytes\n"
2587 " of CRC and 20 bytes of framing to each packet.\n"
2588 "\n"
2589 " -C tx_slots[,rx_slots[,tx_rings[,rx_rings]]]\n"
2590 " Configuration in terms of number of rings and slots to be used when opening the netmap port.\n"
2591 " Such configuration has an effect on software ports created on the fly, such as VALE ports and\n"
2592 " netmap pipes. The configuration may consist of 1 to 4 numbers separated by commas: tx_slots,\n"
2593 " rx_slots, tx_rings, rx_rings. Missing numbers or zeroes stand for default values. As an\n"
2594 " additional convenience, if exactly one number is specified, then this is assigned to both\n"
2595 " tx_slots and rx_slots. If there is no fourth number, then the third one is assigned to both\n"
2596 " tx_rings and rx_rings.\n"
2597 "\n"
2598 " -o options data generation options (parsed using atoi)\n"
2599 " OPT_PREFETCH 1\n"
2600 " OPT_ACCESS 2\n"
2601 " OPT_COPY 4\n"
2602 " OPT_MEMCPY 8\n"
2603 " OPT_TS 16 (add a timestamp)\n"
2604 " OPT_INDIRECT 32 (use indirect buffers)\n"
2605 " OPT_DUMP 64 (dump rx/tx traffic)\n"
2606 " OPT_RUBBISH 256\n"
2607 " (send whatever the buffers contain)\n"
2608 " OPT_RANDOM_SRC 512\n"
2609 " OPT_RANDOM_DST 1024\n"
2610 " OPT_PPS_STATS 2048\n"
2611 " OPT_UPDATE_CSUM 4096\n"
2612 "",
2613 cmd);
2614 exit(errcode);
2615 }
2616
2617 static int
start_threads(struct glob_arg * g)2618 start_threads(struct glob_arg *g) {
2619 int i;
2620
2621 targs = calloc(g->nthreads, sizeof(*targs));
2622 struct targ *t;
2623 /*
2624 * Now create the desired number of threads, each one
2625 * using a single descriptor.
2626 */
2627 for (i = 0; i < g->nthreads; i++) {
2628 uint64_t seed = (uint64_t)time(0) | ((uint64_t)time(0) << 32);
2629 t = &targs[i];
2630
2631 bzero(t, sizeof(*t));
2632 t->fd = -1; /* default, with pcap */
2633 t->g = g;
2634 memcpy(t->seed, &seed, sizeof(t->seed));
2635
2636 if (g->dev_type == DEV_NETMAP) {
2637 int m = -1;
2638
2639 /*
2640 * if the user wants both HW and SW rings, we need to
2641 * know when to switch from NR_REG_ONE_NIC to NR_REG_ONE_SW
2642 */
2643 if (g->orig_mode == NR_REG_NIC_SW) {
2644 m = (g->td_type == TD_TYPE_RECEIVER ?
2645 g->nmd->reg.nr_rx_rings :
2646 g->nmd->reg.nr_tx_rings);
2647 }
2648
2649 if (i > 0) {
2650 int j;
2651 /* the first thread uses the fd opened by the main
2652 * thread, the other threads re-open /dev/netmap
2653 */
2654 t->nmd = nmport_clone(g->nmd);
2655 if (t->nmd == NULL)
2656 return -1;
2657
2658 j = i;
2659 if (m > 0 && j >= m) {
2660 /* switch to the software rings */
2661 t->nmd->reg.nr_mode = NR_REG_ONE_SW;
2662 j -= m;
2663 }
2664 t->nmd->reg.nr_ringid = j & NETMAP_RING_MASK;
2665 /* Only touch one of the rings (rx is already ok) */
2666 if (g->td_type == TD_TYPE_RECEIVER)
2667 t->nmd->reg.nr_flags |= NETMAP_NO_TX_POLL;
2668
2669 /* register interface. Override ifname and ringid etc. */
2670 if (nmport_open_desc(t->nmd) < 0) {
2671 nmport_undo_prepare(t->nmd);
2672 t->nmd = NULL;
2673 return -1;
2674 }
2675 } else {
2676 t->nmd = g->nmd;
2677 }
2678 t->fd = t->nmd->fd;
2679 t->frags = g->frags;
2680 } else {
2681 targs[i].fd = g->main_fd;
2682 }
2683 t->used = 1;
2684 t->me = i;
2685 if (g->affinity >= 0) {
2686 t->affinity = (g->affinity + i) % g->cpus;
2687 } else {
2688 t->affinity = -1;
2689 }
2690 /* default, init packets */
2691 initialize_packet(t);
2692 }
2693 /* Wait for PHY reset. */
2694 D("Wait %d secs for phy reset", g->wait_link);
2695 sleep(g->wait_link);
2696 D("Ready...");
2697
2698 for (i = 0; i < g->nthreads; i++) {
2699 t = &targs[i];
2700 if (pthread_create(&t->thread, NULL, g->td_body, t) == -1) {
2701 D("Unable to create thread %d: %s", i, strerror(errno));
2702 t->used = 0;
2703 }
2704 }
2705 return 0;
2706 }
2707
2708 static void
main_thread(struct glob_arg * g)2709 main_thread(struct glob_arg *g)
2710 {
2711 int i;
2712
2713 struct my_ctrs prev, cur;
2714 double delta_t;
2715 struct timeval tic, toc;
2716
2717 prev.pkts = prev.bytes = prev.events = 0;
2718 gettimeofday(&prev.t, NULL);
2719 for (;;) {
2720 char b1[40], b2[40], b3[40], b4[100];
2721 uint64_t pps, usec;
2722 struct my_ctrs x;
2723 double abs;
2724 int done = 0;
2725
2726 usec = wait_for_next_report(&prev.t, &cur.t,
2727 g->report_interval);
2728
2729 cur.pkts = cur.bytes = cur.events = 0;
2730 cur.min_space = 0;
2731 if (usec < 10000) /* too short to be meaningful */
2732 continue;
2733 /* accumulate counts for all threads */
2734 for (i = 0; i < g->nthreads; i++) {
2735 cur.pkts += targs[i].ctr.pkts;
2736 cur.bytes += targs[i].ctr.bytes;
2737 cur.events += targs[i].ctr.events;
2738 cur.min_space += targs[i].ctr.min_space;
2739 targs[i].ctr.min_space = 99999;
2740 if (targs[i].used == 0)
2741 done++;
2742 }
2743 x.pkts = cur.pkts - prev.pkts;
2744 x.bytes = cur.bytes - prev.bytes;
2745 x.events = cur.events - prev.events;
2746 pps = (x.pkts*1000000 + usec/2) / usec;
2747 abs = (x.events > 0) ? (x.pkts / (double) x.events) : 0;
2748
2749 if (!(g->options & OPT_PPS_STATS)) {
2750 strcpy(b4, "");
2751 } else {
2752 /* Compute some pps stats using a sliding window. */
2753 double ppsavg = 0.0, ppsdev = 0.0;
2754 int nsamples = 0;
2755
2756 g->win[g->win_idx] = pps;
2757 g->win_idx = (g->win_idx + 1) % STATS_WIN;
2758
2759 for (i = 0; i < STATS_WIN; i++) {
2760 ppsavg += g->win[i];
2761 if (g->win[i]) {
2762 nsamples ++;
2763 }
2764 }
2765 ppsavg /= nsamples;
2766
2767 for (i = 0; i < STATS_WIN; i++) {
2768 if (g->win[i] == 0) {
2769 continue;
2770 }
2771 ppsdev += (g->win[i] - ppsavg) * (g->win[i] - ppsavg);
2772 }
2773 ppsdev /= nsamples;
2774 ppsdev = sqrt(ppsdev);
2775
2776 snprintf(b4, sizeof(b4), "[avg/std %s/%s pps]",
2777 norm(b1, ppsavg, normalize), norm(b2, ppsdev, normalize));
2778 }
2779
2780 D("%spps %s(%spkts %sbps in %llu usec) %.2f avg_batch %d min_space",
2781 norm(b1, pps, normalize), b4,
2782 norm(b2, (double)x.pkts, normalize),
2783 norm(b3, 1000000*((double)x.bytes*8+(double)x.pkts*g->framing)/usec, normalize),
2784 (unsigned long long)usec,
2785 abs, (int)cur.min_space);
2786 prev = cur;
2787
2788 if (done == g->nthreads)
2789 break;
2790 }
2791
2792 timerclear(&tic);
2793 timerclear(&toc);
2794 cur.pkts = cur.bytes = cur.events = 0;
2795 /* final round */
2796 for (i = 0; i < g->nthreads; i++) {
2797 struct timespec t_tic, t_toc;
2798 /*
2799 * Join active threads, unregister interfaces and close
2800 * file descriptors.
2801 */
2802 if (targs[i].used)
2803 pthread_join(targs[i].thread, NULL); /* blocking */
2804 if (g->dev_type == DEV_NETMAP) {
2805 nmport_close(targs[i].nmd);
2806 targs[i].nmd = NULL;
2807 } else {
2808 close(targs[i].fd);
2809 }
2810
2811 if (targs[i].completed == 0)
2812 D("ouch, thread %d exited with error", i);
2813
2814 /*
2815 * Collect threads output and extract information about
2816 * how long it took to send all the packets.
2817 */
2818 cur.pkts += targs[i].ctr.pkts;
2819 cur.bytes += targs[i].ctr.bytes;
2820 cur.events += targs[i].ctr.events;
2821 /* collect the largest start (tic) and end (toc) times,
2822 * XXX maybe we should do the earliest tic, or do a weighted
2823 * average ?
2824 */
2825 t_tic = timeval2spec(&tic);
2826 t_toc = timeval2spec(&toc);
2827 if (!timerisset(&tic) || timespec_ge(&targs[i].tic, &t_tic))
2828 tic = timespec2val(&targs[i].tic);
2829 if (!timerisset(&toc) || timespec_ge(&targs[i].toc, &t_toc))
2830 toc = timespec2val(&targs[i].toc);
2831 }
2832
2833 /* print output. */
2834 timersub(&toc, &tic, &toc);
2835 delta_t = toc.tv_sec + 1e-6* toc.tv_usec;
2836 if (g->td_type == TD_TYPE_SENDER)
2837 tx_output(g, &cur, delta_t, "Sent");
2838 else if (g->td_type == TD_TYPE_RECEIVER)
2839 tx_output(g, &cur, delta_t, "Received");
2840 }
2841
2842 struct td_desc {
2843 int ty;
2844 const char *key;
2845 void *f;
2846 int default_burst;
2847 };
2848
2849 static struct td_desc func[] = {
2850 { TD_TYPE_RECEIVER, "rx", receiver_body, 512}, /* default */
2851 { TD_TYPE_SENDER, "tx", sender_body, 512 },
2852 { TD_TYPE_OTHER, "ping", ping_body, 1 },
2853 { TD_TYPE_OTHER, "pong", pong_body, 1 },
2854 { TD_TYPE_SENDER, "txseq", txseq_body, 512 },
2855 { TD_TYPE_RECEIVER, "rxseq", rxseq_body, 512 },
2856 { 0, NULL, NULL, 0 }
2857 };
2858
2859 static int
tap_alloc(char * dev)2860 tap_alloc(char *dev)
2861 {
2862 struct ifreq ifr;
2863 int fd, err;
2864 const char *clonedev = TAP_CLONEDEV;
2865
2866 (void)err;
2867 (void)dev;
2868 /* Arguments taken by the function:
2869 *
2870 * char *dev: the name of an interface (or '\0'). MUST have enough
2871 * space to hold the interface name if '\0' is passed
2872 * int flags: interface flags (eg, IFF_TUN etc.)
2873 */
2874
2875 #ifdef __FreeBSD__
2876 if (dev[3]) { /* tapSomething */
2877 static char buf[128];
2878 snprintf(buf, sizeof(buf), "/dev/%s", dev);
2879 clonedev = buf;
2880 }
2881 #endif
2882 /* open the device */
2883 if( (fd = open(clonedev, O_RDWR)) < 0 ) {
2884 return fd;
2885 }
2886 D("%s open successful", clonedev);
2887
2888 /* preparation of the struct ifr, of type "struct ifreq" */
2889 memset(&ifr, 0, sizeof(ifr));
2890
2891 #ifdef linux
2892 ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
2893
2894 if (*dev) {
2895 /* if a device name was specified, put it in the structure; otherwise,
2896 * the kernel will try to allocate the "next" device of the
2897 * specified type */
2898 size_t len = strlen(dev);
2899 if (len > IFNAMSIZ) {
2900 D("%s too long", dev);
2901 return -1;
2902 }
2903 memcpy(ifr.ifr_name, dev, len);
2904 }
2905
2906 /* try to create the device */
2907 if( (err = ioctl(fd, TUNSETIFF, (void *) &ifr)) < 0 ) {
2908 D("failed to do a TUNSETIFF: %s", strerror(errno));
2909 close(fd);
2910 return err;
2911 }
2912
2913 /* if the operation was successful, write back the name of the
2914 * interface to the variable "dev", so the caller can know
2915 * it. Note that the caller MUST reserve space in *dev (see calling
2916 * code below) */
2917 strcpy(dev, ifr.ifr_name);
2918 D("new name is %s", dev);
2919 #endif /* linux */
2920
2921 /* this is the special file descriptor that the caller will use to talk
2922 * with the virtual interface */
2923 return fd;
2924 }
2925
2926 int
main(int arc,char ** argv)2927 main(int arc, char **argv)
2928 {
2929 int i;
2930 struct sigaction sa;
2931 sigset_t ss;
2932
2933 struct glob_arg g;
2934
2935 int ch;
2936 int devqueues = 1; /* how many device queues */
2937 int wait_link_arg = 0;
2938
2939 int pkt_size_done = 0;
2940
2941 struct td_desc *fn = func;
2942
2943 bzero(&g, sizeof(g));
2944
2945 g.main_fd = -1;
2946 g.td_body = fn->f;
2947 g.td_type = fn->ty;
2948 g.report_interval = 1000; /* report interval */
2949 g.affinity = -1;
2950 /* ip addresses can also be a range x.x.x.x-x.x.x.y */
2951 g.af = AF_INET; /* default */
2952 g.src_ip.name = "10.0.0.1";
2953 g.dst_ip.name = "10.1.0.1";
2954 g.dst_mac.name = "ff:ff:ff:ff:ff:ff";
2955 g.src_mac.name = NULL;
2956 g.pkt_size = 60;
2957 g.pkt_min_size = 0;
2958 g.nthreads = 1;
2959 g.cpus = 1; /* default */
2960 g.forever = 1;
2961 g.tx_rate = 0;
2962 g.frags = 1;
2963 g.frag_size = (u_int)-1; /* use the netmap buffer size by default */
2964 g.nmr_config = "";
2965 g.virt_header = 0;
2966 g.wait_link = 2; /* wait 2 seconds for physical ports */
2967
2968 while ((ch = getopt(arc, argv, "46a:f:F:Nn:i:Il:d:s:D:S:b:c:o:p:"
2969 "T:w:WvR:XC:H:rP:zZAhBM:")) != -1) {
2970
2971 switch(ch) {
2972 default:
2973 D("bad option %c %s", ch, optarg);
2974 usage(-1);
2975 break;
2976
2977 case 'h':
2978 usage(0);
2979 break;
2980
2981 case '4':
2982 g.af = AF_INET;
2983 break;
2984
2985 case '6':
2986 g.af = AF_INET6;
2987 break;
2988
2989 case 'N':
2990 normalize = 0;
2991 break;
2992
2993 case 'n':
2994 g.npackets = strtoull(optarg, NULL, 10);
2995 break;
2996
2997 case 'F':
2998 i = atoi(optarg);
2999 if (i < 1 || i > 63) {
3000 D("invalid frags %d [1..63], ignore", i);
3001 break;
3002 }
3003 g.frags = i;
3004 break;
3005
3006 case 'M':
3007 g.frag_size = atoi(optarg);
3008 break;
3009
3010 case 'f':
3011 for (fn = func; fn->key; fn++) {
3012 if (!strcmp(fn->key, optarg))
3013 break;
3014 }
3015 if (fn->key) {
3016 g.td_body = fn->f;
3017 g.td_type = fn->ty;
3018 } else {
3019 D("unrecognised function %s", optarg);
3020 }
3021 break;
3022
3023 case 'o': /* data generation options */
3024 g.options |= atoi(optarg);
3025 break;
3026
3027 case 'a': /* force affinity */
3028 g.affinity = atoi(optarg);
3029 break;
3030
3031 case 'i': /* interface */
3032 /* a prefix of tap: netmap: or pcap: forces the mode.
3033 * otherwise we guess
3034 */
3035 D("interface is %s", optarg);
3036 if (strlen(optarg) > MAX_IFNAMELEN - 8) {
3037 D("ifname too long %s", optarg);
3038 break;
3039 }
3040 strcpy(g.ifname, optarg);
3041 if (!strcmp(optarg, "null")) {
3042 g.dev_type = DEV_NETMAP;
3043 g.dummy_send = 1;
3044 } else if (!strncmp(optarg, "tap:", 4)) {
3045 g.dev_type = DEV_TAP;
3046 strcpy(g.ifname, optarg + 4);
3047 } else if (!strncmp(optarg, "pcap:", 5)) {
3048 g.dev_type = DEV_PCAP;
3049 strcpy(g.ifname, optarg + 5);
3050 } else if (!strncmp(optarg, "netmap:", 7) ||
3051 !strncmp(optarg, "vale", 4)) {
3052 g.dev_type = DEV_NETMAP;
3053 } else if (!strncmp(optarg, "tap", 3)) {
3054 g.dev_type = DEV_TAP;
3055 } else { /* prepend netmap: */
3056 g.dev_type = DEV_NETMAP;
3057 sprintf(g.ifname, "netmap:%s", optarg);
3058 }
3059 break;
3060
3061 case 'I':
3062 g.options |= OPT_INDIRECT; /* use indirect buffers */
3063 break;
3064
3065 case 'l': /* pkt_size */
3066 if (pkt_size_done) {
3067 g.pkt_min_size = atoi(optarg);
3068 } else {
3069 g.pkt_size = atoi(optarg);
3070 pkt_size_done = 1;
3071 }
3072 break;
3073
3074 case 'd':
3075 g.dst_ip.name = optarg;
3076 break;
3077
3078 case 's':
3079 g.src_ip.name = optarg;
3080 break;
3081
3082 case 'T': /* report interval */
3083 g.report_interval = atoi(optarg);
3084 break;
3085
3086 case 'w':
3087 g.wait_link = atoi(optarg);
3088 wait_link_arg = 1;
3089 break;
3090
3091 case 'W':
3092 g.forever = 0; /* exit RX with no traffic */
3093 break;
3094
3095 case 'b': /* burst */
3096 g.burst = atoi(optarg);
3097 break;
3098 case 'c':
3099 g.cpus = atoi(optarg);
3100 break;
3101 case 'p':
3102 g.nthreads = atoi(optarg);
3103 break;
3104
3105 case 'D': /* destination mac */
3106 g.dst_mac.name = optarg;
3107 break;
3108
3109 case 'S': /* source mac */
3110 g.src_mac.name = optarg;
3111 break;
3112 case 'v':
3113 verbose++;
3114 break;
3115 case 'R':
3116 g.tx_rate = atoi(optarg);
3117 break;
3118 case 'X':
3119 g.options |= OPT_DUMP;
3120 break;
3121 case 'C':
3122 D("WARNING: the 'C' option is deprecated, use the '+conf:' libnetmap option instead");
3123 g.nmr_config = strdup(optarg);
3124 break;
3125 case 'H':
3126 g.virt_header = atoi(optarg);
3127 break;
3128 case 'P':
3129 g.packet_file = strdup(optarg);
3130 break;
3131 case 'r':
3132 g.options |= OPT_RUBBISH;
3133 break;
3134 case 'z':
3135 g.options |= OPT_RANDOM_SRC;
3136 break;
3137 case 'Z':
3138 g.options |= OPT_RANDOM_DST;
3139 break;
3140 case 'A':
3141 g.options |= OPT_PPS_STATS;
3142 break;
3143 case 'B':
3144 /* raw packets have4 bytes crc + 20 bytes framing */
3145 // XXX maybe add an option to pass the IFG
3146 g.framing = 24 * 8;
3147 break;
3148 }
3149 }
3150
3151 if (strlen(g.ifname) <=0 ) {
3152 D("missing ifname");
3153 usage(-1);
3154 }
3155
3156 if (g.burst == 0) {
3157 g.burst = fn->default_burst;
3158 D("using default burst size: %d", g.burst);
3159 }
3160
3161 g.system_cpus = i = system_ncpus();
3162 if (g.cpus < 0 || g.cpus > i) {
3163 D("%d cpus is too high, have only %d cpus", g.cpus, i);
3164 usage(-1);
3165 }
3166 D("running on %d cpus (have %d)", g.cpus, i);
3167 if (g.cpus == 0)
3168 g.cpus = i;
3169
3170 if (!wait_link_arg && !strncmp(g.ifname, "vale", 4)) {
3171 g.wait_link = 0;
3172 }
3173
3174 if (g.pkt_size < 16 || g.pkt_size > MAX_PKTSIZE) {
3175 D("bad pktsize %d [16..%d]\n", g.pkt_size, MAX_PKTSIZE);
3176 usage(-1);
3177 }
3178
3179 if (g.pkt_min_size > 0 && (g.pkt_min_size < 16 || g.pkt_min_size > g.pkt_size)) {
3180 D("bad pktminsize %d [16..%d]\n", g.pkt_min_size, g.pkt_size);
3181 usage(-1);
3182 }
3183
3184 if (g.src_mac.name == NULL) {
3185 static char mybuf[20] = "00:00:00:00:00:00";
3186 /* retrieve source mac address. */
3187 if (source_hwaddr(g.ifname, mybuf) == -1) {
3188 D("Unable to retrieve source mac");
3189 // continue, fail later
3190 }
3191 g.src_mac.name = mybuf;
3192 }
3193 /* extract address ranges */
3194 if (extract_mac_range(&g.src_mac) || extract_mac_range(&g.dst_mac))
3195 usage(-1);
3196 g.options |= extract_ip_range(&g.src_ip, g.af);
3197 g.options |= extract_ip_range(&g.dst_ip, g.af);
3198
3199 if (g.virt_header != 0 && g.virt_header != VIRT_HDR_1
3200 && g.virt_header != VIRT_HDR_2) {
3201 D("bad virtio-net-header length");
3202 usage(-1);
3203 }
3204
3205 if (g.dev_type == DEV_TAP) {
3206 D("want to use tap %s", g.ifname);
3207 g.main_fd = tap_alloc(g.ifname);
3208 if (g.main_fd < 0) {
3209 D("cannot open tap %s", g.ifname);
3210 usage(-1);
3211 }
3212 #ifndef NO_PCAP
3213 } else if (g.dev_type == DEV_PCAP) {
3214 char pcap_errbuf[PCAP_ERRBUF_SIZE];
3215
3216 pcap_errbuf[0] = '\0'; // init the buffer
3217 g.p = pcap_open_live(g.ifname, 256 /* XXX */, 1, 100, pcap_errbuf);
3218 if (g.p == NULL) {
3219 D("cannot open pcap on %s", g.ifname);
3220 usage(-1);
3221 }
3222 g.main_fd = pcap_fileno(g.p);
3223 D("using pcap on %s fileno %d", g.ifname, g.main_fd);
3224 #endif /* !NO_PCAP */
3225 } else if (g.dummy_send) { /* but DEV_NETMAP */
3226 D("using a dummy send routine");
3227 } else {
3228 g.nmd = nmport_prepare(g.ifname);
3229 if (g.nmd == NULL)
3230 goto out;
3231
3232 parse_nmr_config(g.nmr_config, &g.nmd->reg);
3233
3234 g.nmd->reg.nr_flags |= NR_ACCEPT_VNET_HDR;
3235
3236 /*
3237 * Open the netmap device using nm_open().
3238 *
3239 * protocol stack and may cause a reset of the card,
3240 * which in turn may take some time for the PHY to
3241 * reconfigure. We do the open here to have time to reset.
3242 */
3243 g.orig_mode = g.nmd->reg.nr_mode;
3244 if (g.nthreads > 1) {
3245 switch (g.orig_mode) {
3246 case NR_REG_ALL_NIC:
3247 case NR_REG_NIC_SW:
3248 g.nmd->reg.nr_mode = NR_REG_ONE_NIC;
3249 break;
3250 case NR_REG_SW:
3251 g.nmd->reg.nr_mode = NR_REG_ONE_SW;
3252 break;
3253 default:
3254 break;
3255 }
3256 g.nmd->reg.nr_ringid = 0;
3257 }
3258 if (nmport_open_desc(g.nmd) < 0)
3259 goto out;
3260 g.main_fd = g.nmd->fd;
3261 ND("mapped %luKB at %p", (unsigned long)(g.nmd->req.nr_memsize>>10),
3262 g.nmd->mem);
3263
3264 if (g.virt_header) {
3265 /* Set the virtio-net header length, since the user asked
3266 * for it explicitly. */
3267 set_vnet_hdr_len(&g);
3268 } else {
3269 /* Check whether the netmap port we opened requires us to send
3270 * and receive frames with virtio-net header. */
3271 get_vnet_hdr_len(&g);
3272 }
3273
3274 /* get num of queues in tx or rx */
3275 if (g.td_type == TD_TYPE_SENDER)
3276 devqueues = g.nmd->reg.nr_tx_rings + g.nmd->reg.nr_host_tx_rings;
3277 else
3278 devqueues = g.nmd->reg.nr_rx_rings + g.nmd->reg.nr_host_rx_rings;
3279
3280 /* validate provided nthreads. */
3281 if (g.nthreads < 1 || g.nthreads > devqueues) {
3282 D("bad nthreads %d, have %d queues", g.nthreads, devqueues);
3283 // continue, fail later
3284 }
3285
3286 if (g.td_type == TD_TYPE_SENDER) {
3287 int mtu = get_if_mtu(&g);
3288
3289 if (mtu > 0 && g.pkt_size > mtu) {
3290 D("pkt_size (%d) must be <= mtu (%d)",
3291 g.pkt_size, mtu);
3292 return -1;
3293 }
3294 }
3295
3296 if (verbose) {
3297 struct netmap_if *nifp = g.nmd->nifp;
3298 struct nmreq_register *req = &g.nmd->reg;
3299
3300 D("nifp at offset %"PRIu64" ntxqs %d nrxqs %d memid %d",
3301 req->nr_offset, req->nr_tx_rings, req->nr_rx_rings,
3302 req->nr_mem_id);
3303 for (i = 0; i < req->nr_tx_rings + req->nr_host_tx_rings; i++) {
3304 struct netmap_ring *ring = NETMAP_TXRING(nifp, i);
3305 D(" TX%d at offset %p slots %d", i,
3306 (void *)((char *)ring - (char *)nifp), ring->num_slots);
3307 }
3308 for (i = 0; i < req->nr_rx_rings + req->nr_host_rx_rings; i++) {
3309 struct netmap_ring *ring = NETMAP_RXRING(nifp, i);
3310 D(" RX%d at offset %p slots %d", i,
3311 (void *)((char *)ring - (char *)nifp), ring->num_slots);
3312 }
3313 }
3314
3315 /* Print some debug information. */
3316 fprintf(stdout,
3317 "%s %s: %d queues, %d threads and %d cpus.\n",
3318 (g.td_type == TD_TYPE_SENDER) ? "Sending on" :
3319 ((g.td_type == TD_TYPE_RECEIVER) ? "Receiving from" :
3320 "Working on"),
3321 g.ifname,
3322 devqueues,
3323 g.nthreads,
3324 g.cpus);
3325 if (g.td_type == TD_TYPE_SENDER) {
3326 fprintf(stdout, "%s -> %s (%s -> %s)\n",
3327 g.src_ip.name, g.dst_ip.name,
3328 g.src_mac.name, g.dst_mac.name);
3329 }
3330
3331 out:
3332 /* Exit if something went wrong. */
3333 if (g.main_fd < 0) {
3334 D("aborting");
3335 usage(-1);
3336 }
3337 }
3338
3339
3340 if (g.options) {
3341 D("--- SPECIAL OPTIONS:%s%s%s%s%s%s\n",
3342 g.options & OPT_PREFETCH ? " prefetch" : "",
3343 g.options & OPT_ACCESS ? " access" : "",
3344 g.options & OPT_MEMCPY ? " memcpy" : "",
3345 g.options & OPT_INDIRECT ? " indirect" : "",
3346 g.options & OPT_COPY ? " copy" : "",
3347 g.options & OPT_RUBBISH ? " rubbish " : "");
3348 }
3349
3350 g.tx_period.tv_sec = g.tx_period.tv_nsec = 0;
3351 if (g.tx_rate > 0) {
3352 /* try to have at least something every second,
3353 * reducing the burst size to some 0.01s worth of data
3354 * (but no less than one full set of fragments)
3355 */
3356 uint64_t x;
3357 int lim = (g.tx_rate)/300;
3358 if (g.burst > lim)
3359 g.burst = lim;
3360 if (g.burst == 0)
3361 g.burst = 1;
3362 x = ((uint64_t)1000000000 * (uint64_t)g.burst) / (uint64_t) g.tx_rate;
3363 g.tx_period.tv_nsec = x;
3364 g.tx_period.tv_sec = g.tx_period.tv_nsec / 1000000000;
3365 g.tx_period.tv_nsec = g.tx_period.tv_nsec % 1000000000;
3366 }
3367 if (g.td_type == TD_TYPE_SENDER)
3368 D("Sending %d packets every %jd.%09ld s",
3369 g.burst, (intmax_t)g.tx_period.tv_sec, g.tx_period.tv_nsec);
3370 /* Install ^C handler. */
3371 global_nthreads = g.nthreads;
3372 sigemptyset(&ss);
3373 sigaddset(&ss, SIGINT);
3374 /* block SIGINT now, so that all created threads will inherit the mask */
3375 if (pthread_sigmask(SIG_BLOCK, &ss, NULL) < 0) {
3376 D("failed to block SIGINT: %s", strerror(errno));
3377 }
3378 if (start_threads(&g) < 0)
3379 return 1;
3380 /* Install the handler and re-enable SIGINT for the main thread */
3381 memset(&sa, 0, sizeof(sa));
3382 sa.sa_handler = sigint_h;
3383 if (sigaction(SIGINT, &sa, NULL) < 0) {
3384 D("failed to install ^C handler: %s", strerror(errno));
3385 }
3386
3387 if (pthread_sigmask(SIG_UNBLOCK, &ss, NULL) < 0) {
3388 D("failed to re-enable SIGINT: %s", strerror(errno));
3389 }
3390 main_thread(&g);
3391 free(targs);
3392 return 0;
3393 }
3394
3395 /* end of file */
3396