xref: /freebsd/usr.sbin/bhyve/pci_virtio_net.c (revision 036d2e814bf0f5d88ffb4b24c159320894541757)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2011 NetApp, Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  *
28  * $FreeBSD$
29  */
30 
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD$");
33 
34 #include <sys/param.h>
35 #include <sys/linker_set.h>
36 #include <sys/select.h>
37 #include <sys/uio.h>
38 #include <sys/ioctl.h>
39 #include <net/ethernet.h>
40 #include <net/if.h> /* IFNAMSIZ */
41 
42 #include <err.h>
43 #include <errno.h>
44 #include <fcntl.h>
45 #include <stdio.h>
46 #include <stdlib.h>
47 #include <stdint.h>
48 #include <string.h>
49 #include <strings.h>
50 #include <unistd.h>
51 #include <assert.h>
52 #include <pthread.h>
53 #include <pthread_np.h>
54 
55 #include "bhyverun.h"
56 #include "pci_emul.h"
57 #include "mevent.h"
58 #include "virtio.h"
59 #include "net_utils.h"
60 #include "net_backends.h"
61 
62 #define VTNET_RINGSZ	1024
63 
64 #define VTNET_MAXSEGS	256
65 
66 #define VTNET_S_HOSTCAPS      \
67   ( VIRTIO_NET_F_MAC | VIRTIO_NET_F_STATUS | \
68     VIRTIO_F_NOTIFY_ON_EMPTY | VIRTIO_RING_F_INDIRECT_DESC)
69 
70 /*
71  * PCI config-space "registers"
72  */
73 struct virtio_net_config {
74 	uint8_t  mac[6];
75 	uint16_t status;
76 } __packed;
77 
78 /*
79  * Queue definitions.
80  */
81 #define VTNET_RXQ	0
82 #define VTNET_TXQ	1
83 #define VTNET_CTLQ	2	/* NB: not yet supported */
84 
85 #define VTNET_MAXQ	3
86 
87 /*
88  * Debug printf
89  */
90 static int pci_vtnet_debug;
91 #define DPRINTF(params) if (pci_vtnet_debug) printf params
92 #define WPRINTF(params) printf params
93 
94 /*
95  * Per-device softc
96  */
97 struct pci_vtnet_softc {
98 	struct virtio_softc vsc_vs;
99 	struct vqueue_info vsc_queues[VTNET_MAXQ - 1];
100 	pthread_mutex_t vsc_mtx;
101 
102 	net_backend_t	*vsc_be;
103 
104 	int		vsc_rx_ready;
105 	int		resetting;	/* protected by tx_mtx */
106 
107 	uint64_t	vsc_features;	/* negotiated features */
108 
109 	pthread_mutex_t	rx_mtx;
110 	unsigned int	rx_vhdrlen;
111 	int		rx_merge;	/* merged rx bufs in use */
112 
113 	pthread_t 	tx_tid;
114 	pthread_mutex_t	tx_mtx;
115 	pthread_cond_t	tx_cond;
116 	int		tx_in_progress;
117 
118 	struct virtio_net_config vsc_config;
119 	struct virtio_consts vsc_consts;
120 };
121 
122 static void pci_vtnet_reset(void *);
123 /* static void pci_vtnet_notify(void *, struct vqueue_info *); */
124 static int pci_vtnet_cfgread(void *, int, int, uint32_t *);
125 static int pci_vtnet_cfgwrite(void *, int, int, uint32_t);
126 static void pci_vtnet_neg_features(void *, uint64_t);
127 
128 static struct virtio_consts vtnet_vi_consts = {
129 	"vtnet",		/* our name */
130 	VTNET_MAXQ - 1,		/* we currently support 2 virtqueues */
131 	sizeof(struct virtio_net_config), /* config reg size */
132 	pci_vtnet_reset,	/* reset */
133 	NULL,			/* device-wide qnotify -- not used */
134 	pci_vtnet_cfgread,	/* read PCI config */
135 	pci_vtnet_cfgwrite,	/* write PCI config */
136 	pci_vtnet_neg_features,	/* apply negotiated features */
137 	VTNET_S_HOSTCAPS,	/* our capabilities */
138 };
139 
140 static void
141 pci_vtnet_reset(void *vsc)
142 {
143 	struct pci_vtnet_softc *sc = vsc;
144 
145 	DPRINTF(("vtnet: device reset requested !\n"));
146 
147 	/* Acquire the RX lock to block RX processing. */
148 	pthread_mutex_lock(&sc->rx_mtx);
149 
150 	/* Set sc->resetting and give a chance to the TX thread to stop. */
151 	pthread_mutex_lock(&sc->tx_mtx);
152 	sc->resetting = 1;
153 	while (sc->tx_in_progress) {
154 		pthread_mutex_unlock(&sc->tx_mtx);
155 		usleep(10000);
156 		pthread_mutex_lock(&sc->tx_mtx);
157 	}
158 
159 	sc->vsc_rx_ready = 0;
160 	sc->rx_merge = 1;
161 	sc->rx_vhdrlen = sizeof(struct virtio_net_rxhdr);
162 
163 	/*
164 	 * Now reset rings, MSI-X vectors, and negotiated capabilities.
165 	 * Do that with the TX lock held, since we need to reset
166 	 * sc->resetting.
167 	 */
168 	vi_reset_dev(&sc->vsc_vs);
169 
170 	sc->resetting = 0;
171 	pthread_mutex_unlock(&sc->tx_mtx);
172 	pthread_mutex_unlock(&sc->rx_mtx);
173 }
174 
175 static void
176 pci_vtnet_rx(struct pci_vtnet_softc *sc)
177 {
178 	struct iovec iov[VTNET_MAXSEGS + 1];
179 	struct vqueue_info *vq;
180 	int len, n;
181 	uint16_t idx;
182 
183 	if (!sc->vsc_rx_ready) {
184 		/*
185 		 * The rx ring has not yet been set up.
186 		 * Drop the packet and try later.
187 		 */
188 		netbe_rx_discard(sc->vsc_be);
189 		return;
190 	}
191 
192 	/*
193 	 * Check for available rx buffers
194 	 */
195 	vq = &sc->vsc_queues[VTNET_RXQ];
196 	if (!vq_has_descs(vq)) {
197 		/*
198 		 * No available rx buffers. Drop the packet and try later.
199 		 * Interrupt on empty, if that's negotiated.
200 		 */
201 		netbe_rx_discard(sc->vsc_be);
202 		vq_endchains(vq, /*used_all_avail=*/1);
203 		return;
204 	}
205 
206 	do {
207 		/*
208 		 * Get descriptor chain.
209 		 */
210 		n = vq_getchain(vq, &idx, iov, VTNET_MAXSEGS, NULL);
211 		assert(n >= 1 && n <= VTNET_MAXSEGS);
212 
213 		len = netbe_recv(sc->vsc_be, iov, n);
214 
215 		if (len <= 0) {
216 			/*
217 			 * No more packets (len == 0), or backend errored
218 			 * (err < 0). Return unused available buffers.
219 			 */
220 			vq_retchain(vq);
221 			/* Interrupt if needed/appropriate and stop. */
222 			vq_endchains(vq, /*used_all_avail=*/0);
223 			return;
224 		}
225 
226 		/* Publish the info to the guest */
227 		vq_relchain(vq, idx, (uint32_t)len);
228 	} while (vq_has_descs(vq));
229 
230 	/* Interrupt if needed, including for NOTIFY_ON_EMPTY. */
231 	vq_endchains(vq, /*used_all_avail=*/1);
232 }
233 
234 /*
235  * Called when there is read activity on the backend file descriptor.
236  * Each buffer posted by the guest is assumed to be able to contain
237  * an entire ethernet frame + rx header.
238  */
239 static void
240 pci_vtnet_rx_callback(int fd, enum ev_type type, void *param)
241 {
242 	struct pci_vtnet_softc *sc = param;
243 
244 	pthread_mutex_lock(&sc->rx_mtx);
245 	pci_vtnet_rx(sc);
246 	pthread_mutex_unlock(&sc->rx_mtx);
247 
248 }
249 
250 /* Called on RX kick. */
251 static void
252 pci_vtnet_ping_rxq(void *vsc, struct vqueue_info *vq)
253 {
254 	struct pci_vtnet_softc *sc = vsc;
255 
256 	/*
257 	 * A qnotify means that the rx process can now begin
258 	 */
259 	pthread_mutex_lock(&sc->rx_mtx);
260 	if (sc->vsc_rx_ready == 0) {
261 		sc->vsc_rx_ready = 1;
262 		vq_kick_disable(vq);
263 	}
264 	pthread_mutex_unlock(&sc->rx_mtx);
265 }
266 
267 /* TX virtqueue processing, called by the TX thread. */
268 static void
269 pci_vtnet_proctx(struct pci_vtnet_softc *sc, struct vqueue_info *vq)
270 {
271 	struct iovec iov[VTNET_MAXSEGS + 1];
272 	uint16_t idx;
273 	ssize_t len;
274 	int n;
275 
276 	/*
277 	 * Obtain chain of descriptors. The first descriptor also
278 	 * contains the virtio-net header.
279 	 */
280 	n = vq_getchain(vq, &idx, iov, VTNET_MAXSEGS, NULL);
281 	assert(n >= 1 && n <= VTNET_MAXSEGS);
282 
283 	len = netbe_send(sc->vsc_be, iov, n);
284 
285 	/* chain is processed, release it and set len */
286 	vq_relchain(vq, idx, len > 0 ? len : 0);
287 }
288 
289 /* Called on TX kick. */
290 static void
291 pci_vtnet_ping_txq(void *vsc, struct vqueue_info *vq)
292 {
293 	struct pci_vtnet_softc *sc = vsc;
294 
295 	/*
296 	 * Any ring entries to process?
297 	 */
298 	if (!vq_has_descs(vq))
299 		return;
300 
301 	/* Signal the tx thread for processing */
302 	pthread_mutex_lock(&sc->tx_mtx);
303 	vq_kick_disable(vq);
304 	if (sc->tx_in_progress == 0)
305 		pthread_cond_signal(&sc->tx_cond);
306 	pthread_mutex_unlock(&sc->tx_mtx);
307 }
308 
309 /*
310  * Thread which will handle processing of TX desc
311  */
312 static void *
313 pci_vtnet_tx_thread(void *param)
314 {
315 	struct pci_vtnet_softc *sc = param;
316 	struct vqueue_info *vq;
317 	int error;
318 
319 	vq = &sc->vsc_queues[VTNET_TXQ];
320 
321 	/*
322 	 * Let us wait till the tx queue pointers get initialised &
323 	 * first tx signaled
324 	 */
325 	pthread_mutex_lock(&sc->tx_mtx);
326 	error = pthread_cond_wait(&sc->tx_cond, &sc->tx_mtx);
327 	assert(error == 0);
328 
329 	for (;;) {
330 		/* note - tx mutex is locked here */
331 		while (sc->resetting || !vq_has_descs(vq)) {
332 			vq_kick_enable(vq);
333 			if (!sc->resetting && vq_has_descs(vq))
334 				break;
335 
336 			sc->tx_in_progress = 0;
337 			error = pthread_cond_wait(&sc->tx_cond, &sc->tx_mtx);
338 			assert(error == 0);
339 		}
340 		vq_kick_disable(vq);
341 		sc->tx_in_progress = 1;
342 		pthread_mutex_unlock(&sc->tx_mtx);
343 
344 		do {
345 			/*
346 			 * Run through entries, placing them into
347 			 * iovecs and sending when an end-of-packet
348 			 * is found
349 			 */
350 			pci_vtnet_proctx(sc, vq);
351 		} while (vq_has_descs(vq));
352 
353 		/*
354 		 * Generate an interrupt if needed.
355 		 */
356 		vq_endchains(vq, /*used_all_avail=*/1);
357 
358 		pthread_mutex_lock(&sc->tx_mtx);
359 	}
360 }
361 
362 #ifdef notyet
363 static void
364 pci_vtnet_ping_ctlq(void *vsc, struct vqueue_info *vq)
365 {
366 
367 	DPRINTF(("vtnet: control qnotify!\n\r"));
368 }
369 #endif
370 
371 static int
372 pci_vtnet_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
373 {
374 	struct pci_vtnet_softc *sc;
375 	char tname[MAXCOMLEN + 1];
376 	int mac_provided;
377 
378 	/*
379 	 * Allocate data structures for further virtio initializations.
380 	 * sc also contains a copy of vtnet_vi_consts, since capabilities
381 	 * change depending on the backend.
382 	 */
383 	sc = calloc(1, sizeof(struct pci_vtnet_softc));
384 
385 	sc->vsc_consts = vtnet_vi_consts;
386 	pthread_mutex_init(&sc->vsc_mtx, NULL);
387 
388 	sc->vsc_queues[VTNET_RXQ].vq_qsize = VTNET_RINGSZ;
389 	sc->vsc_queues[VTNET_RXQ].vq_notify = pci_vtnet_ping_rxq;
390 	sc->vsc_queues[VTNET_TXQ].vq_qsize = VTNET_RINGSZ;
391 	sc->vsc_queues[VTNET_TXQ].vq_notify = pci_vtnet_ping_txq;
392 #ifdef notyet
393 	sc->vsc_queues[VTNET_CTLQ].vq_qsize = VTNET_RINGSZ;
394         sc->vsc_queues[VTNET_CTLQ].vq_notify = pci_vtnet_ping_ctlq;
395 #endif
396 
397 	/*
398 	 * Attempt to open the backend device and read the MAC address
399 	 * if specified.
400 	 */
401 	mac_provided = 0;
402 	if (opts != NULL) {
403 		char *devname;
404 		char *vtopts;
405 		int err;
406 
407 		devname = vtopts = strdup(opts);
408 		(void) strsep(&vtopts, ",");
409 
410 		if (vtopts != NULL) {
411 			err = net_parsemac(vtopts, sc->vsc_config.mac);
412 			if (err != 0) {
413 				free(devname);
414 				free(sc);
415 				return (err);
416 			}
417 			mac_provided = 1;
418 		}
419 
420 		err = netbe_init(&sc->vsc_be, devname, pci_vtnet_rx_callback,
421 		          sc);
422 		free(devname);
423 		if (err) {
424 			free(sc);
425 			return (err);
426 		}
427 		sc->vsc_consts.vc_hv_caps |= netbe_get_cap(sc->vsc_be);
428 	}
429 
430 	if (!mac_provided) {
431 		net_genmac(pi, sc->vsc_config.mac);
432 	}
433 
434 	/* initialize config space */
435 	pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_NET);
436 	pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR);
437 	pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_NETWORK);
438 	pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_NET);
439 	pci_set_cfgdata16(pi, PCIR_SUBVEND_0, VIRTIO_VENDOR);
440 
441 	/* Link is up if we managed to open backend device. */
442 	sc->vsc_config.status = (opts == NULL || sc->vsc_be);
443 
444 	vi_softc_linkup(&sc->vsc_vs, &sc->vsc_consts, sc, pi, sc->vsc_queues);
445 	sc->vsc_vs.vs_mtx = &sc->vsc_mtx;
446 
447 	/* use BAR 1 to map MSI-X table and PBA, if we're using MSI-X */
448 	if (vi_intr_init(&sc->vsc_vs, 1, fbsdrun_virtio_msix())) {
449 		free(sc);
450 		return (1);
451 	}
452 
453 	/* use BAR 0 to map config regs in IO space */
454 	vi_set_io_bar(&sc->vsc_vs, 0);
455 
456 	sc->resetting = 0;
457 
458 	sc->rx_merge = 1;
459 	sc->rx_vhdrlen = sizeof(struct virtio_net_rxhdr);
460 	pthread_mutex_init(&sc->rx_mtx, NULL);
461 
462 	/*
463 	 * Initialize tx semaphore & spawn TX processing thread.
464 	 * As of now, only one thread for TX desc processing is
465 	 * spawned.
466 	 */
467 	sc->tx_in_progress = 0;
468 	pthread_mutex_init(&sc->tx_mtx, NULL);
469 	pthread_cond_init(&sc->tx_cond, NULL);
470 	pthread_create(&sc->tx_tid, NULL, pci_vtnet_tx_thread, (void *)sc);
471 	snprintf(tname, sizeof(tname), "vtnet-%d:%d tx", pi->pi_slot,
472 	    pi->pi_func);
473 	pthread_set_name_np(sc->tx_tid, tname);
474 
475 	return (0);
476 }
477 
478 static int
479 pci_vtnet_cfgwrite(void *vsc, int offset, int size, uint32_t value)
480 {
481 	struct pci_vtnet_softc *sc = vsc;
482 	void *ptr;
483 
484 	if (offset < (int)sizeof(sc->vsc_config.mac)) {
485 		assert(offset + size <= (int)sizeof(sc->vsc_config.mac));
486 		/*
487 		 * The driver is allowed to change the MAC address
488 		 */
489 		ptr = &sc->vsc_config.mac[offset];
490 		memcpy(ptr, &value, size);
491 	} else {
492 		/* silently ignore other writes */
493 		DPRINTF(("vtnet: write to readonly reg %d\n\r", offset));
494 	}
495 
496 	return (0);
497 }
498 
499 static int
500 pci_vtnet_cfgread(void *vsc, int offset, int size, uint32_t *retval)
501 {
502 	struct pci_vtnet_softc *sc = vsc;
503 	void *ptr;
504 
505 	ptr = (uint8_t *)&sc->vsc_config + offset;
506 	memcpy(retval, ptr, size);
507 	return (0);
508 }
509 
510 static void
511 pci_vtnet_neg_features(void *vsc, uint64_t negotiated_features)
512 {
513 	struct pci_vtnet_softc *sc = vsc;
514 
515 	sc->vsc_features = negotiated_features;
516 
517 	if (!(negotiated_features & VIRTIO_NET_F_MRG_RXBUF)) {
518 		sc->rx_merge = 0;
519 		/* Without mergeable rx buffers, virtio-net header is 2
520 		 * bytes shorter than sizeof(struct virtio_net_rxhdr). */
521 		sc->rx_vhdrlen = sizeof(struct virtio_net_rxhdr) - 2;
522 	}
523 
524 	/* Tell the backend to enable some capabilities it has advertised. */
525 	netbe_set_cap(sc->vsc_be, negotiated_features, sc->rx_vhdrlen);
526 }
527 
528 static struct pci_devemu pci_de_vnet = {
529 	.pe_emu = 	"virtio-net",
530 	.pe_init =	pci_vtnet_init,
531 	.pe_barwrite =	vi_pci_write,
532 	.pe_barread =	vi_pci_read
533 };
534 PCI_EMUL_SET(pci_de_vnet);
535