xref: /linux/tools/virtio/vhost_net_test.c (revision 79790b6818e96c58fe2bffee1b418c16e64e7b80)
1 // SPDX-License-Identifier: GPL-2.0
2 #define _GNU_SOURCE
3 #include <getopt.h>
4 #include <limits.h>
5 #include <string.h>
6 #include <poll.h>
7 #include <sys/eventfd.h>
8 #include <stdlib.h>
9 #include <assert.h>
10 #include <unistd.h>
11 #include <sys/ioctl.h>
12 #include <sys/stat.h>
13 #include <sys/types.h>
14 #include <fcntl.h>
15 #include <stdbool.h>
16 #include <linux/vhost.h>
17 #include <linux/if.h>
18 #include <linux/if_tun.h>
19 #include <linux/in.h>
20 #include <linux/if_packet.h>
21 #include <linux/virtio_net.h>
22 #include <netinet/ether.h>
23 
24 #define HDR_LEN		sizeof(struct virtio_net_hdr_mrg_rxbuf)
25 #define TEST_BUF_LEN	256
26 #define TEST_PTYPE	ETH_P_LOOPBACK
27 #define DESC_NUM	256
28 
29 /* Used by implementation of kmalloc() in tools/virtio/linux/kernel.h */
30 void *__kmalloc_fake, *__kfree_ignore_start, *__kfree_ignore_end;
31 
32 struct vq_info {
33 	int kick;
34 	int call;
35 	int idx;
36 	long started;
37 	long completed;
38 	struct pollfd fds;
39 	void *ring;
40 	/* copy used for control */
41 	struct vring vring;
42 	struct virtqueue *vq;
43 };
44 
45 struct vdev_info {
46 	struct virtio_device vdev;
47 	int control;
48 	struct vq_info vqs[2];
49 	int nvqs;
50 	void *buf;
51 	size_t buf_size;
52 	char *test_buf;
53 	char *res_buf;
54 	struct vhost_memory *mem;
55 	int sock;
56 	int ifindex;
57 	unsigned char mac[ETHER_ADDR_LEN];
58 };
59 
tun_alloc(struct vdev_info * dev,char * tun_name)60 static int tun_alloc(struct vdev_info *dev, char *tun_name)
61 {
62 	struct ifreq ifr;
63 	int len = HDR_LEN;
64 	int fd, e;
65 
66 	fd = open("/dev/net/tun", O_RDWR);
67 	if (fd < 0) {
68 		perror("Cannot open /dev/net/tun");
69 		return fd;
70 	}
71 
72 	memset(&ifr, 0, sizeof(ifr));
73 
74 	ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
75 	strncpy(ifr.ifr_name, tun_name, IFNAMSIZ);
76 
77 	e = ioctl(fd, TUNSETIFF, &ifr);
78 	if (e < 0) {
79 		perror("ioctl[TUNSETIFF]");
80 		close(fd);
81 		return e;
82 	}
83 
84 	e = ioctl(fd, TUNSETVNETHDRSZ, &len);
85 	if (e < 0) {
86 		perror("ioctl[TUNSETVNETHDRSZ]");
87 		close(fd);
88 		return e;
89 	}
90 
91 	e = ioctl(fd, SIOCGIFHWADDR, &ifr);
92 	if (e < 0) {
93 		perror("ioctl[SIOCGIFHWADDR]");
94 		close(fd);
95 		return e;
96 	}
97 
98 	memcpy(dev->mac, &ifr.ifr_hwaddr.sa_data, ETHER_ADDR_LEN);
99 	return fd;
100 }
101 
vdev_create_socket(struct vdev_info * dev,char * tun_name)102 static void vdev_create_socket(struct vdev_info *dev, char *tun_name)
103 {
104 	struct ifreq ifr;
105 
106 	dev->sock = socket(AF_PACKET, SOCK_RAW, htons(TEST_PTYPE));
107 	assert(dev->sock != -1);
108 
109 	strncpy(ifr.ifr_name, tun_name, IFNAMSIZ);
110 	assert(ioctl(dev->sock, SIOCGIFINDEX, &ifr) >= 0);
111 
112 	dev->ifindex = ifr.ifr_ifindex;
113 
114 	/* Set the flags that bring the device up */
115 	assert(ioctl(dev->sock, SIOCGIFFLAGS, &ifr) >= 0);
116 	ifr.ifr_flags |= (IFF_UP | IFF_RUNNING);
117 	assert(ioctl(dev->sock, SIOCSIFFLAGS, &ifr) >= 0);
118 }
119 
vdev_send_packet(struct vdev_info * dev)120 static void vdev_send_packet(struct vdev_info *dev)
121 {
122 	char *sendbuf = dev->test_buf + HDR_LEN;
123 	struct sockaddr_ll saddrll = {0};
124 	int sockfd = dev->sock;
125 	int ret;
126 
127 	saddrll.sll_family = PF_PACKET;
128 	saddrll.sll_ifindex = dev->ifindex;
129 	saddrll.sll_halen = ETH_ALEN;
130 	saddrll.sll_protocol = htons(TEST_PTYPE);
131 
132 	ret = sendto(sockfd, sendbuf, TEST_BUF_LEN, 0,
133 		     (struct sockaddr *)&saddrll,
134 		     sizeof(struct sockaddr_ll));
135 	assert(ret >= 0);
136 }
137 
vq_notify(struct virtqueue * vq)138 static bool vq_notify(struct virtqueue *vq)
139 {
140 	struct vq_info *info = vq->priv;
141 	unsigned long long v = 1;
142 	int r;
143 
144 	r = write(info->kick, &v, sizeof(v));
145 	assert(r == sizeof(v));
146 
147 	return true;
148 }
149 
vhost_vq_setup(struct vdev_info * dev,struct vq_info * info)150 static void vhost_vq_setup(struct vdev_info *dev, struct vq_info *info)
151 {
152 	struct vhost_vring_addr addr = {
153 		.index = info->idx,
154 		.desc_user_addr = (uint64_t)(unsigned long)info->vring.desc,
155 		.avail_user_addr = (uint64_t)(unsigned long)info->vring.avail,
156 		.used_user_addr = (uint64_t)(unsigned long)info->vring.used,
157 	};
158 	struct vhost_vring_state state = { .index = info->idx };
159 	struct vhost_vring_file file = { .index = info->idx };
160 	int r;
161 
162 	state.num = info->vring.num;
163 	r = ioctl(dev->control, VHOST_SET_VRING_NUM, &state);
164 	assert(r >= 0);
165 
166 	state.num = 0;
167 	r = ioctl(dev->control, VHOST_SET_VRING_BASE, &state);
168 	assert(r >= 0);
169 
170 	r = ioctl(dev->control, VHOST_SET_VRING_ADDR, &addr);
171 	assert(r >= 0);
172 
173 	file.fd = info->kick;
174 	r = ioctl(dev->control, VHOST_SET_VRING_KICK, &file);
175 	assert(r >= 0);
176 }
177 
vq_reset(struct vq_info * info,int num,struct virtio_device * vdev)178 static void vq_reset(struct vq_info *info, int num, struct virtio_device *vdev)
179 {
180 	if (info->vq)
181 		vring_del_virtqueue(info->vq);
182 
183 	memset(info->ring, 0, vring_size(num, 4096));
184 	vring_init(&info->vring, num, info->ring, 4096);
185 	info->vq = vring_new_virtqueue(info->idx, num, 4096, vdev, true, false,
186 				       info->ring, vq_notify, NULL, "test");
187 	assert(info->vq);
188 	info->vq->priv = info;
189 }
190 
vq_info_add(struct vdev_info * dev,int idx,int num,int fd)191 static void vq_info_add(struct vdev_info *dev, int idx, int num, int fd)
192 {
193 	struct vhost_vring_file backend = { .index = idx, .fd = fd };
194 	struct vq_info *info = &dev->vqs[idx];
195 	int r;
196 
197 	info->idx = idx;
198 	info->kick = eventfd(0, EFD_NONBLOCK);
199 	r = posix_memalign(&info->ring, 4096, vring_size(num, 4096));
200 	assert(r >= 0);
201 	vq_reset(info, num, &dev->vdev);
202 	vhost_vq_setup(dev, info);
203 
204 	r = ioctl(dev->control, VHOST_NET_SET_BACKEND, &backend);
205 	assert(!r);
206 }
207 
vdev_info_init(struct vdev_info * dev,unsigned long long features)208 static void vdev_info_init(struct vdev_info *dev, unsigned long long features)
209 {
210 	struct ether_header *eh;
211 	int i, r;
212 
213 	dev->vdev.features = features;
214 	INIT_LIST_HEAD(&dev->vdev.vqs);
215 	spin_lock_init(&dev->vdev.vqs_list_lock);
216 
217 	dev->buf_size = (HDR_LEN + TEST_BUF_LEN) * 2;
218 	dev->buf = malloc(dev->buf_size);
219 	assert(dev->buf);
220 	dev->test_buf = dev->buf;
221 	dev->res_buf = dev->test_buf + HDR_LEN + TEST_BUF_LEN;
222 
223 	memset(dev->test_buf, 0, HDR_LEN + TEST_BUF_LEN);
224 	eh = (struct ether_header *)(dev->test_buf + HDR_LEN);
225 	eh->ether_type = htons(TEST_PTYPE);
226 	memcpy(eh->ether_dhost, dev->mac, ETHER_ADDR_LEN);
227 	memcpy(eh->ether_shost, dev->mac, ETHER_ADDR_LEN);
228 
229 	for (i = sizeof(*eh); i < TEST_BUF_LEN; i++)
230 		dev->test_buf[i + HDR_LEN] = (char)i;
231 
232 	dev->control = open("/dev/vhost-net", O_RDWR);
233 	assert(dev->control >= 0);
234 
235 	r = ioctl(dev->control, VHOST_SET_OWNER, NULL);
236 	assert(r >= 0);
237 
238 	dev->mem = malloc(offsetof(struct vhost_memory, regions) +
239 			  sizeof(dev->mem->regions[0]));
240 	assert(dev->mem);
241 	memset(dev->mem, 0, offsetof(struct vhost_memory, regions) +
242 	       sizeof(dev->mem->regions[0]));
243 	dev->mem->nregions = 1;
244 	dev->mem->regions[0].guest_phys_addr = (long)dev->buf;
245 	dev->mem->regions[0].userspace_addr = (long)dev->buf;
246 	dev->mem->regions[0].memory_size = dev->buf_size;
247 
248 	r = ioctl(dev->control, VHOST_SET_MEM_TABLE, dev->mem);
249 	assert(r >= 0);
250 
251 	r = ioctl(dev->control, VHOST_SET_FEATURES, &features);
252 	assert(r >= 0);
253 
254 	dev->nvqs = 2;
255 }
256 
wait_for_interrupt(struct vq_info * vq)257 static void wait_for_interrupt(struct vq_info *vq)
258 {
259 	unsigned long long val;
260 
261 	poll(&vq->fds, 1, 100);
262 
263 	if (vq->fds.revents & POLLIN)
264 		read(vq->fds.fd, &val, sizeof(val));
265 }
266 
verify_res_buf(char * res_buf)267 static void verify_res_buf(char *res_buf)
268 {
269 	int i;
270 
271 	for (i = ETHER_HDR_LEN; i < TEST_BUF_LEN; i++)
272 		assert(res_buf[i] == (char)i);
273 }
274 
run_tx_test(struct vdev_info * dev,struct vq_info * vq,bool delayed,int bufs)275 static void run_tx_test(struct vdev_info *dev, struct vq_info *vq,
276 			bool delayed, int bufs)
277 {
278 	long long spurious = 0;
279 	struct scatterlist sl;
280 	unsigned int len;
281 	int r;
282 
283 	for (;;) {
284 		long started_before = vq->started;
285 		long completed_before = vq->completed;
286 
287 		virtqueue_disable_cb(vq->vq);
288 		do {
289 			while (vq->started < bufs &&
290 			       (vq->started - vq->completed) < 1) {
291 				sg_init_one(&sl, dev->test_buf, HDR_LEN + TEST_BUF_LEN);
292 				r = virtqueue_add_outbuf(vq->vq, &sl, 1,
293 							 dev->test_buf + vq->started,
294 							 GFP_ATOMIC);
295 				if (unlikely(r != 0))
296 					break;
297 
298 				++vq->started;
299 
300 				if (unlikely(!virtqueue_kick(vq->vq))) {
301 					r = -1;
302 					break;
303 				}
304 			}
305 
306 			if (vq->started >= bufs)
307 				r = -1;
308 
309 			/* Flush out completed bufs if any */
310 			while (virtqueue_get_buf(vq->vq, &len)) {
311 				int n;
312 
313 				n = recvfrom(dev->sock, dev->res_buf, TEST_BUF_LEN, 0, NULL, NULL);
314 				assert(n == TEST_BUF_LEN);
315 				verify_res_buf(dev->res_buf);
316 
317 				++vq->completed;
318 				r = 0;
319 			}
320 		} while (r == 0);
321 
322 		if (vq->completed == completed_before && vq->started == started_before)
323 			++spurious;
324 
325 		assert(vq->completed <= bufs);
326 		assert(vq->started <= bufs);
327 		if (vq->completed == bufs)
328 			break;
329 
330 		if (delayed) {
331 			if (virtqueue_enable_cb_delayed(vq->vq))
332 				wait_for_interrupt(vq);
333 		} else {
334 			if (virtqueue_enable_cb(vq->vq))
335 				wait_for_interrupt(vq);
336 		}
337 	}
338 	printf("TX spurious wakeups: 0x%llx started=0x%lx completed=0x%lx\n",
339 	       spurious, vq->started, vq->completed);
340 }
341 
run_rx_test(struct vdev_info * dev,struct vq_info * vq,bool delayed,int bufs)342 static void run_rx_test(struct vdev_info *dev, struct vq_info *vq,
343 			bool delayed, int bufs)
344 {
345 	long long spurious = 0;
346 	struct scatterlist sl;
347 	unsigned int len;
348 	int r;
349 
350 	for (;;) {
351 		long started_before = vq->started;
352 		long completed_before = vq->completed;
353 
354 		do {
355 			while (vq->started < bufs &&
356 			       (vq->started - vq->completed) < 1) {
357 				sg_init_one(&sl, dev->res_buf, HDR_LEN + TEST_BUF_LEN);
358 
359 				r = virtqueue_add_inbuf(vq->vq, &sl, 1,
360 							dev->res_buf + vq->started,
361 							GFP_ATOMIC);
362 				if (unlikely(r != 0))
363 					break;
364 
365 				++vq->started;
366 
367 				vdev_send_packet(dev);
368 
369 				if (unlikely(!virtqueue_kick(vq->vq))) {
370 					r = -1;
371 					break;
372 				}
373 			}
374 
375 			if (vq->started >= bufs)
376 				r = -1;
377 
378 			/* Flush out completed bufs if any */
379 			while (virtqueue_get_buf(vq->vq, &len)) {
380 				struct ether_header *eh;
381 
382 				eh = (struct ether_header *)(dev->res_buf + HDR_LEN);
383 
384 				/* tun netdev is up and running, only handle the
385 				 * TEST_PTYPE packet.
386 				 */
387 				if (eh->ether_type == htons(TEST_PTYPE)) {
388 					assert(len == TEST_BUF_LEN + HDR_LEN);
389 					verify_res_buf(dev->res_buf + HDR_LEN);
390 				}
391 
392 				++vq->completed;
393 				r = 0;
394 			}
395 		} while (r == 0);
396 
397 		if (vq->completed == completed_before && vq->started == started_before)
398 			++spurious;
399 
400 		assert(vq->completed <= bufs);
401 		assert(vq->started <= bufs);
402 		if (vq->completed == bufs)
403 			break;
404 	}
405 
406 	printf("RX spurious wakeups: 0x%llx started=0x%lx completed=0x%lx\n",
407 	       spurious, vq->started, vq->completed);
408 }
409 
410 static const char optstring[] = "h";
411 static const struct option longopts[] = {
412 	{
413 		.name = "help",
414 		.val = 'h',
415 	},
416 	{
417 		.name = "event-idx",
418 		.val = 'E',
419 	},
420 	{
421 		.name = "no-event-idx",
422 		.val = 'e',
423 	},
424 	{
425 		.name = "indirect",
426 		.val = 'I',
427 	},
428 	{
429 		.name = "no-indirect",
430 		.val = 'i',
431 	},
432 	{
433 		.name = "virtio-1",
434 		.val = '1',
435 	},
436 	{
437 		.name = "no-virtio-1",
438 		.val = '0',
439 	},
440 	{
441 		.name = "delayed-interrupt",
442 		.val = 'D',
443 	},
444 	{
445 		.name = "no-delayed-interrupt",
446 		.val = 'd',
447 	},
448 	{
449 		.name = "buf-num",
450 		.val = 'n',
451 		.has_arg = required_argument,
452 	},
453 	{
454 		.name = "batch",
455 		.val = 'b',
456 		.has_arg = required_argument,
457 	},
458 	{
459 	}
460 };
461 
help(int status)462 static void help(int status)
463 {
464 	fprintf(stderr, "Usage: vhost_net_test [--help]"
465 		" [--no-indirect]"
466 		" [--no-event-idx]"
467 		" [--no-virtio-1]"
468 		" [--delayed-interrupt]"
469 		" [--buf-num]"
470 		"\n");
471 
472 	exit(status);
473 }
474 
main(int argc,char ** argv)475 int main(int argc, char **argv)
476 {
477 	unsigned long long features = (1ULL << VIRTIO_RING_F_INDIRECT_DESC) |
478 		(1ULL << VIRTIO_RING_F_EVENT_IDX) | (1ULL << VIRTIO_F_VERSION_1);
479 	char tun_name[IFNAMSIZ];
480 	long nbufs = 0x100000;
481 	struct vdev_info dev;
482 	bool delayed = false;
483 	int o, fd;
484 
485 	for (;;) {
486 		o = getopt_long(argc, argv, optstring, longopts, NULL);
487 		switch (o) {
488 		case -1:
489 			goto done;
490 		case '?':
491 			help(2);
492 		case 'e':
493 			features &= ~(1ULL << VIRTIO_RING_F_EVENT_IDX);
494 			break;
495 		case 'h':
496 			help(0);
497 		case 'i':
498 			features &= ~(1ULL << VIRTIO_RING_F_INDIRECT_DESC);
499 			break;
500 		case '0':
501 			features &= ~(1ULL << VIRTIO_F_VERSION_1);
502 			break;
503 		case 'D':
504 			delayed = true;
505 			break;
506 		case 'n':
507 			nbufs = strtol(optarg, NULL, 10);
508 			assert(nbufs > 0);
509 			break;
510 		default:
511 			assert(0);
512 			break;
513 		}
514 	}
515 
516 done:
517 	memset(&dev, 0, sizeof(dev));
518 	snprintf(tun_name, IFNAMSIZ, "tun_%d", getpid());
519 
520 	fd = tun_alloc(&dev, tun_name);
521 	assert(fd >= 0);
522 
523 	vdev_info_init(&dev, features);
524 	vq_info_add(&dev, 0, DESC_NUM, fd);
525 	vq_info_add(&dev, 1, DESC_NUM, fd);
526 	vdev_create_socket(&dev, tun_name);
527 
528 	run_rx_test(&dev, &dev.vqs[0], delayed, nbufs);
529 	run_tx_test(&dev, &dev.vqs[1], delayed, nbufs);
530 
531 	return 0;
532 }
533