1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2019 Vincenzo Maffione <vmaffione@FreeBSD.org>
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
19 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
20 * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
21 * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
24 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
25 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28 /*
29 * This file implements multiple network backends (tap, netmap, ...),
30 * to be used by network frontends such as virtio-net and e1000.
31 * The API to access the backend (e.g. send/receive packets, negotiate
32 * features) is exported by net_backends.h.
33 */
34
35 #include <sys/types.h>
36 #ifndef WITHOUT_CAPSICUM
37 #include <sys/capsicum.h>
38 #endif
39 #include <sys/ioctl.h>
40 #include <sys/mman.h>
41 #include <sys/uio.h>
42
43 #include <net/if.h>
44 #ifdef __FreeBSD__
45 #include <net/if_tap.h>
46 #endif
47
48 #include <assert.h>
49 #ifndef WITHOUT_CAPSICUM
50 #include <capsicum_helpers.h>
51 #endif
52 #include <err.h>
53 #include <errno.h>
54 #include <fcntl.h>
55 #include <poll.h>
56 #include <pthread.h>
57 #include <pthread_np.h>
58 #include <stdio.h>
59 #include <stdlib.h>
60 #include <stdint.h>
61 #include <string.h>
62 #include <unistd.h>
63 #include <sysexits.h>
64 #include <unistd.h>
65
66 #include "config.h"
67 #include "debug.h"
68 #include "iov.h"
69 #include "mevent.h"
70 #include "net_backends.h"
71 #include "net_backends_priv.h"
72 #include "pci_emul.h"
73
74 #define NET_BE_SIZE(be) (sizeof(*be) + (be)->priv_size)
75
76 #ifdef __FreeBSD__
77 void
tap_cleanup(struct net_backend * be)78 tap_cleanup(struct net_backend *be)
79 {
80 struct tap_priv *priv = NET_BE_PRIV(be);
81
82 if (priv->mevp) {
83 mevent_delete(priv->mevp);
84 }
85 if (be->fd != -1) {
86 close(be->fd);
87 be->fd = -1;
88 }
89 }
90
91 static int
tap_init(struct net_backend * be,const char * devname,nvlist_t * nvl __unused,net_be_rxeof_t cb,void * param)92 tap_init(struct net_backend *be, const char *devname,
93 nvlist_t *nvl __unused, net_be_rxeof_t cb, void *param)
94 {
95 struct tap_priv *priv = NET_BE_PRIV(be);
96 char tbuf[80];
97 int opt = 1, up = IFF_UP;
98
99 #ifndef WITHOUT_CAPSICUM
100 cap_rights_t rights;
101 #endif
102
103 if (cb == NULL) {
104 EPRINTLN("TAP backend requires non-NULL callback");
105 return (-1);
106 }
107
108 strcpy(tbuf, "/dev/");
109 strlcat(tbuf, devname, sizeof(tbuf));
110
111 be->fd = open(tbuf, O_RDWR);
112 if (be->fd == -1) {
113 EPRINTLN("open of tap device %s failed", tbuf);
114 goto error;
115 }
116
117 /*
118 * Set non-blocking and register for read
119 * notifications with the event loop
120 */
121 if (ioctl(be->fd, FIONBIO, &opt) < 0) {
122 EPRINTLN("tap device O_NONBLOCK failed");
123 goto error;
124 }
125
126 if (ioctl(be->fd, VMIO_SIOCSIFFLAGS, up)) {
127 EPRINTLN("tap device link up failed");
128 goto error;
129 }
130
131 #ifndef WITHOUT_CAPSICUM
132 cap_rights_init(&rights, CAP_EVENT, CAP_READ, CAP_WRITE);
133 if (caph_rights_limit(be->fd, &rights) == -1)
134 errx(EX_OSERR, "Unable to apply rights for sandbox");
135 #endif
136
137 memset(priv->bbuf, 0, sizeof(priv->bbuf));
138 priv->bbuflen = 0;
139
140 priv->mevp = mevent_add_disabled(be->fd, EVF_READ, cb, param);
141 if (priv->mevp == NULL) {
142 EPRINTLN("Could not register event");
143 goto error;
144 }
145
146 return (0);
147
148 error:
149 tap_cleanup(be);
150 return (-1);
151 }
152
153 /*
154 * Called to send a buffer chain out to the tap device
155 */
156 ssize_t
tap_send(struct net_backend * be,const struct iovec * iov,int iovcnt)157 tap_send(struct net_backend *be, const struct iovec *iov, int iovcnt)
158 {
159 return (writev(be->fd, iov, iovcnt));
160 }
161
162 ssize_t
tap_peek_recvlen(struct net_backend * be)163 tap_peek_recvlen(struct net_backend *be)
164 {
165 struct tap_priv *priv = NET_BE_PRIV(be);
166 ssize_t ret;
167
168 if (priv->bbuflen > 0) {
169 /*
170 * We already have a packet in the bounce buffer.
171 * Just return its length.
172 */
173 return priv->bbuflen;
174 }
175
176 /*
177 * Read the next packet (if any) into the bounce buffer, so
178 * that we get to know its length and we can return that
179 * to the caller.
180 */
181 ret = read(be->fd, priv->bbuf, sizeof(priv->bbuf));
182 if (ret < 0 && errno == EWOULDBLOCK) {
183 return (0);
184 }
185
186 if (ret > 0)
187 priv->bbuflen = ret;
188
189 return (ret);
190 }
191
192 ssize_t
tap_recv(struct net_backend * be,const struct iovec * iov,int iovcnt)193 tap_recv(struct net_backend *be, const struct iovec *iov, int iovcnt)
194 {
195 struct tap_priv *priv = NET_BE_PRIV(be);
196 ssize_t ret;
197
198 if (priv->bbuflen > 0) {
199 /*
200 * A packet is available in the bounce buffer, so
201 * we read it from there.
202 */
203 ret = buf_to_iov(priv->bbuf, priv->bbuflen,
204 iov, iovcnt, 0);
205
206 /* Mark the bounce buffer as empty. */
207 priv->bbuflen = 0;
208
209 return (ret);
210 }
211
212 ret = readv(be->fd, iov, iovcnt);
213 if (ret < 0 && errno == EWOULDBLOCK) {
214 return (0);
215 }
216
217 return (ret);
218 }
219
220 void
tap_recv_enable(struct net_backend * be)221 tap_recv_enable(struct net_backend *be)
222 {
223 struct tap_priv *priv = NET_BE_PRIV(be);
224
225 mevent_enable(priv->mevp);
226 }
227
228 void
tap_recv_disable(struct net_backend * be)229 tap_recv_disable(struct net_backend *be)
230 {
231 struct tap_priv *priv = NET_BE_PRIV(be);
232
233 mevent_disable(priv->mevp);
234 }
235
236 uint64_t
tap_get_cap(struct net_backend * be __unused)237 tap_get_cap(struct net_backend *be __unused)
238 {
239
240 return (0); /* no capabilities for now */
241 }
242
243 int
tap_set_cap(struct net_backend * be __unused,uint64_t features,unsigned vnet_hdr_len)244 tap_set_cap(struct net_backend *be __unused, uint64_t features,
245 unsigned vnet_hdr_len)
246 {
247
248 return ((features || vnet_hdr_len) ? -1 : 0);
249 }
250
251 static struct net_backend tap_backend = {
252 .prefix = "tap",
253 .priv_size = sizeof(struct tap_priv),
254 .init = tap_init,
255 .cleanup = tap_cleanup,
256 .send = tap_send,
257 .peek_recvlen = tap_peek_recvlen,
258 .recv = tap_recv,
259 .recv_enable = tap_recv_enable,
260 .recv_disable = tap_recv_disable,
261 .get_cap = tap_get_cap,
262 .set_cap = tap_set_cap,
263 };
264
265 /* A clone of the tap backend, with a different prefix. */
266 static struct net_backend vmnet_backend = {
267 .prefix = "vmnet",
268 .priv_size = sizeof(struct tap_priv),
269 .init = tap_init,
270 .cleanup = tap_cleanup,
271 .send = tap_send,
272 .peek_recvlen = tap_peek_recvlen,
273 .recv = tap_recv,
274 .recv_enable = tap_recv_enable,
275 .recv_disable = tap_recv_disable,
276 .get_cap = tap_get_cap,
277 .set_cap = tap_set_cap,
278 };
279
280 DATA_SET(net_backend_set, tap_backend);
281 DATA_SET(net_backend_set, vmnet_backend);
282 #endif /* __FreeBSD__ */
283
284 #ifdef __FreeBSD__
285 int
netbe_legacy_config(nvlist_t * nvl,const char * opts)286 netbe_legacy_config(nvlist_t *nvl, const char *opts)
287 {
288 char *backend, *cp;
289
290 if (opts == NULL)
291 return (0);
292
293 cp = strchr(opts, ',');
294 if (cp == NULL) {
295 set_config_value_node(nvl, "backend", opts);
296 return (0);
297 }
298 backend = strndup(opts, cp - opts);
299 set_config_value_node(nvl, "backend", backend);
300 free(backend);
301 return (pci_parse_legacy_config(nvl, cp + 1));
302 }
303 #else
304 int
netbe_legacy_config(nvlist_t * nvl,const char * opts)305 netbe_legacy_config(nvlist_t *nvl, const char *opts)
306 {
307 char *config, *name, *tofree, *value;
308
309 if (opts == NULL)
310 return (0);
311
312 /* Default to the 'dlpi' backend - can still be overridden by opts */
313 set_config_value_node(nvl, "backend", "dlpi");
314 set_config_value_node(nvl, "type", "dlpi");
315
316 config = tofree = strdup(opts);
317 if (config == NULL)
318 err(4, "netbe_legacy_config strdup()");
319 while ((name = strsep(&config, ",")) != NULL) {
320 value = strchr(name, '=');
321 if (value != NULL) {
322 *value++ = '\0';
323 set_config_value_node(nvl, name, value);
324 } else {
325 set_config_value_node(nvl, "vnic", name);
326 }
327 }
328 free(tofree);
329
330 return (0);
331 }
332 #endif
333
334 /*
335 * Initialize a backend and attach to the frontend.
336 * This is called during frontend initialization.
337 * @ret is a pointer to the backend to be initialized
338 * @devname is the backend-name as supplied on the command line,
339 * e.g. -s 2:0,frontend-name,backend-name[,other-args]
340 * @cb is the receive callback supplied by the frontend,
341 * and it is invoked in the event loop when a receive
342 * event is generated in the hypervisor,
343 * @param is a pointer to the frontend, and normally used as
344 * the argument for the callback.
345 */
346 int
netbe_init(struct net_backend ** ret,nvlist_t * nvl,net_be_rxeof_t cb,void * param)347 netbe_init(struct net_backend **ret, nvlist_t *nvl, net_be_rxeof_t cb,
348 void *param)
349 {
350 struct net_backend **pbe, *nbe, *tbe = NULL;
351 const char *value, *type;
352 char *devname;
353 int err;
354
355 value = get_config_value_node(nvl, "backend");
356 if (value == NULL) {
357 return (-1);
358 }
359 devname = strdup(value);
360
361 /*
362 * Use the type given by configuration if exists; otherwise
363 * use the prefix of the backend as the type.
364 */
365 type = get_config_value_node(nvl, "type");
366 if (type == NULL)
367 type = devname;
368
369 /*
370 * Find the network backend that matches the user-provided
371 * device name. net_backend_set is built using a linker set.
372 */
373 SET_FOREACH(pbe, net_backend_set) {
374 if (strncmp(type, (*pbe)->prefix,
375 strlen((*pbe)->prefix)) == 0) {
376 tbe = *pbe;
377 assert(tbe->init != NULL);
378 assert(tbe->cleanup != NULL);
379 assert(tbe->send != NULL);
380 assert(tbe->recv != NULL);
381 assert(tbe->get_cap != NULL);
382 assert(tbe->set_cap != NULL);
383 break;
384 }
385 }
386
387 *ret = NULL;
388 if (tbe == NULL) {
389 free(devname);
390 return (EINVAL);
391 }
392
393 nbe = calloc(1, NET_BE_SIZE(tbe));
394 *nbe = *tbe; /* copy the template */
395 nbe->fd = -1;
396 nbe->sc = param;
397 nbe->be_vnet_hdr_len = 0;
398 nbe->fe_vnet_hdr_len = 0;
399
400 /* Initialize the backend. */
401 err = nbe->init(nbe, devname, nvl, cb, param);
402 if (err) {
403 free(devname);
404 free(nbe);
405 return (err);
406 }
407
408 *ret = nbe;
409 free(devname);
410
411 return (0);
412 }
413
414 void
netbe_cleanup(struct net_backend * be)415 netbe_cleanup(struct net_backend *be)
416 {
417
418 if (be != NULL) {
419 be->cleanup(be);
420 free(be);
421 }
422 }
423
424 uint64_t
netbe_get_cap(struct net_backend * be)425 netbe_get_cap(struct net_backend *be)
426 {
427
428 assert(be != NULL);
429 return (be->get_cap(be));
430 }
431
432 int
netbe_set_cap(struct net_backend * be,uint64_t features,unsigned vnet_hdr_len)433 netbe_set_cap(struct net_backend *be, uint64_t features,
434 unsigned vnet_hdr_len)
435 {
436 int ret;
437
438 assert(be != NULL);
439
440 /* There are only three valid lengths, i.e., 0, 10 and 12. */
441 if (vnet_hdr_len && vnet_hdr_len != VNET_HDR_LEN
442 && vnet_hdr_len != (VNET_HDR_LEN - sizeof(uint16_t)))
443 return (-1);
444
445 be->fe_vnet_hdr_len = vnet_hdr_len;
446
447 ret = be->set_cap(be, features, vnet_hdr_len);
448 assert(be->be_vnet_hdr_len == 0 ||
449 be->be_vnet_hdr_len == be->fe_vnet_hdr_len);
450
451 return (ret);
452 }
453
454 ssize_t
netbe_send(struct net_backend * be,const struct iovec * iov,int iovcnt)455 netbe_send(struct net_backend *be, const struct iovec *iov, int iovcnt)
456 {
457
458 return (be->send(be, iov, iovcnt));
459 }
460
461 ssize_t
netbe_peek_recvlen(struct net_backend * be)462 netbe_peek_recvlen(struct net_backend *be)
463 {
464
465 return (be->peek_recvlen(be));
466 }
467
468 /*
469 * Try to read a packet from the backend, without blocking.
470 * If no packets are available, return 0. In case of success, return
471 * the length of the packet just read. Return -1 in case of errors.
472 */
473 ssize_t
netbe_recv(struct net_backend * be,const struct iovec * iov,int iovcnt)474 netbe_recv(struct net_backend *be, const struct iovec *iov, int iovcnt)
475 {
476
477 return (be->recv(be, iov, iovcnt));
478 }
479
480 /*
481 * Read a packet from the backend and discard it.
482 * Returns the size of the discarded packet or zero if no packet was available.
483 * A negative error code is returned in case of read error.
484 */
485 ssize_t
netbe_rx_discard(struct net_backend * be)486 netbe_rx_discard(struct net_backend *be)
487 {
488 /*
489 * MP note: the dummybuf is only used to discard frames,
490 * so there is no need for it to be per-vtnet or locked.
491 * We only make it large enough for TSO-sized segment.
492 */
493 static uint8_t dummybuf[65536 + 64];
494 struct iovec iov;
495
496 #ifdef __FreeBSD__
497 iov.iov_base = dummybuf;
498 #else
499 iov.iov_base = (caddr_t)dummybuf;
500 #endif
501 iov.iov_len = sizeof(dummybuf);
502
503 return netbe_recv(be, &iov, 1);
504 }
505
506 void
netbe_rx_disable(struct net_backend * be)507 netbe_rx_disable(struct net_backend *be)
508 {
509
510 return be->recv_disable(be);
511 }
512
513 void
netbe_rx_enable(struct net_backend * be)514 netbe_rx_enable(struct net_backend *be)
515 {
516
517 return be->recv_enable(be);
518 }
519
520 size_t
netbe_get_vnet_hdr_len(struct net_backend * be)521 netbe_get_vnet_hdr_len(struct net_backend *be)
522 {
523
524 return (be->be_vnet_hdr_len);
525 }
526
527 #ifndef __FreeBSD__
528 int
netbe_get_mac(net_backend_t * be,void * buf,size_t * buflen)529 netbe_get_mac(net_backend_t *be, void *buf, size_t *buflen)
530 {
531 if (be->get_mac == NULL)
532 return (ENOTSUP);
533 return (be->get_mac(be, buf, buflen));
534 }
535 #endif
536