1 /*
2 * Copyright (C) 2013-2016 Universita` di Pisa
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27
28 /*
29 * This module implements the VALE switch for netmap
30
31 --- VALE SWITCH ---
32
33 NMG_LOCK() serializes all modifications to switches and ports.
34 A switch cannot be deleted until all ports are gone.
35
36 For each switch, an SX lock (RWlock on linux) protects
37 deletion of ports. When configuring or deleting a new port, the
38 lock is acquired in exclusive mode (after holding NMG_LOCK).
39 When forwarding, the lock is acquired in shared mode (without NMG_LOCK).
40 The lock is held throughout the entire forwarding cycle,
41 during which the thread may incur in a page fault.
42 Hence it is important that sleepable shared locks are used.
43
44 On the rx ring, the per-port lock is grabbed initially to reserve
45 a number of slot in the ring, then the lock is released,
46 packets are copied from source to destination, and then
47 the lock is acquired again and the receive ring is updated.
48 (A similar thing is done on the tx ring for NIC and host stack
49 ports attached to the switch)
50
51 */
52
53 /*
54 * OS-specific code that is used only within this file.
55 * Other OS-specific code that must be accessed by drivers
56 * is present in netmap_kern.h
57 */
58
59 #if defined(__FreeBSD__)
60 #include <sys/cdefs.h> /* prerequisite */
61 #include <sys/types.h>
62 #include <sys/errno.h>
63 #include <sys/param.h> /* defines used in kernel.h */
64 #include <sys/kernel.h> /* types used in module initialization */
65 #include <sys/conf.h> /* cdevsw struct, UID, GID */
66 #include <sys/sockio.h>
67 #include <sys/socketvar.h> /* struct socket */
68 #include <sys/malloc.h>
69 #include <sys/poll.h>
70 #include <sys/rwlock.h>
71 #include <sys/socket.h> /* sockaddrs */
72 #include <sys/selinfo.h>
73 #include <sys/sysctl.h>
74 #include <net/if.h>
75 #include <net/if_var.h>
76 #include <net/bpf.h> /* BIOCIMMEDIATE */
77 #include <machine/bus.h> /* bus_dmamap_* */
78 #include <sys/endian.h>
79 #include <sys/refcount.h>
80 #include <sys/smp.h>
81
82
83 #elif defined(linux)
84
85 #include "bsd_glue.h"
86
87 #elif defined(__APPLE__)
88
89 #warning OSX support is only partial
90 #include "osx_glue.h"
91
92 #elif defined(_WIN32)
93 #include "win_glue.h"
94
95 #else
96
97 #error Unsupported platform
98
99 #endif /* unsupported */
100
101 /*
102 * common headers
103 */
104
105 #include <net/netmap.h>
106 #include <dev/netmap/netmap_kern.h>
107 #include <dev/netmap/netmap_mem2.h>
108
109 #include <dev/netmap/netmap_bdg.h>
110
111 const char*
netmap_bdg_name(struct netmap_vp_adapter * vp)112 netmap_bdg_name(struct netmap_vp_adapter *vp)
113 {
114 struct nm_bridge *b = vp->na_bdg;
115 if (b == NULL)
116 return NULL;
117 return b->bdg_basename;
118 }
119
120
121 #ifndef CONFIG_NET_NS
122 /*
123 * XXX in principle nm_bridges could be created dynamically
124 * Right now we have a static array and deletions are protected
125 * by an exclusive lock.
126 */
127 struct nm_bridge *nm_bridges;
128 #endif /* !CONFIG_NET_NS */
129
130
131 static int
nm_is_id_char(const char c)132 nm_is_id_char(const char c)
133 {
134 return (c >= 'a' && c <= 'z') ||
135 (c >= 'A' && c <= 'Z') ||
136 (c >= '0' && c <= '9') ||
137 (c == '_');
138 }
139
140 /* Validate the name of a bdg port and return the
141 * position of the ":" character. */
142 static int
nm_bdg_name_validate(const char * name,size_t prefixlen)143 nm_bdg_name_validate(const char *name, size_t prefixlen)
144 {
145 int colon_pos = -1;
146 int i;
147
148 if (!name || strlen(name) < prefixlen) {
149 return -1;
150 }
151
152 for (i = 0; i < NM_BDG_IFNAMSIZ && name[i]; i++) {
153 if (name[i] == ':') {
154 colon_pos = i;
155 break;
156 } else if (!nm_is_id_char(name[i])) {
157 return -1;
158 }
159 }
160
161 if (strlen(name) - colon_pos > IFNAMSIZ) {
162 /* interface name too long */
163 return -1;
164 }
165
166 return colon_pos;
167 }
168
169 /*
170 * locate a bridge among the existing ones.
171 * MUST BE CALLED WITH NMG_LOCK()
172 *
173 * a ':' in the name terminates the bridge name. Otherwise, just NM_NAME.
174 * We assume that this is called with a name of at least NM_NAME chars.
175 */
176 struct nm_bridge *
nm_find_bridge(const char * name,int create,struct netmap_bdg_ops * ops)177 nm_find_bridge(const char *name, int create, struct netmap_bdg_ops *ops)
178 {
179 int i, namelen;
180 struct nm_bridge *b = NULL, *bridges;
181 u_int num_bridges;
182
183 NMG_LOCK_ASSERT();
184
185 netmap_bns_getbridges(&bridges, &num_bridges);
186
187 namelen = nm_bdg_name_validate(name,
188 (ops != NULL ? strlen(ops->name) : 0));
189 if (namelen < 0) {
190 nm_prerr("invalid bridge name %s", name ? name : NULL);
191 return NULL;
192 }
193
194 /* lookup the name, remember empty slot if there is one */
195 for (i = 0; i < num_bridges; i++) {
196 struct nm_bridge *x = bridges + i;
197
198 if ((x->bdg_flags & NM_BDG_ACTIVE) + x->bdg_active_ports == 0) {
199 if (create && b == NULL)
200 b = x; /* record empty slot */
201 } else if (x->bdg_namelen != namelen) {
202 continue;
203 } else if (strncmp(name, x->bdg_basename, namelen) == 0) {
204 nm_prdis("found '%.*s' at %d", namelen, name, i);
205 b = x;
206 break;
207 }
208 }
209 if (i == num_bridges && b) { /* name not found, can create entry */
210 /* initialize the bridge */
211 nm_prdis("create new bridge %s with ports %d", b->bdg_basename,
212 b->bdg_active_ports);
213 b->ht = nm_os_malloc(sizeof(struct nm_hash_ent) * NM_BDG_HASH);
214 if (b->ht == NULL) {
215 nm_prerr("failed to allocate hash table");
216 return NULL;
217 }
218 strncpy(b->bdg_basename, name, namelen);
219 b->bdg_namelen = namelen;
220 b->bdg_active_ports = 0;
221 for (i = 0; i < NM_BDG_MAXPORTS; i++)
222 b->bdg_port_index[i] = i;
223 /* set the default function */
224 b->bdg_ops = b->bdg_saved_ops = *ops;
225 b->private_data = b->ht;
226 b->bdg_flags = 0;
227 NM_BNS_GET(b);
228 }
229 return b;
230 }
231
232
233 int
netmap_bdg_free(struct nm_bridge * b)234 netmap_bdg_free(struct nm_bridge *b)
235 {
236 if ((b->bdg_flags & NM_BDG_ACTIVE) + b->bdg_active_ports != 0) {
237 return EBUSY;
238 }
239
240 nm_prdis("marking bridge %s as free", b->bdg_basename);
241 nm_os_free(b->ht);
242 memset(&b->bdg_ops, 0, sizeof(b->bdg_ops));
243 memset(&b->bdg_saved_ops, 0, sizeof(b->bdg_saved_ops));
244 b->bdg_flags = 0;
245 NM_BNS_PUT(b);
246 return 0;
247 }
248
249 /* Called by external kernel modules (e.g., Openvswitch).
250 * to modify the private data previously given to regops().
251 * 'name' may be just bridge's name (including ':' if it
252 * is not just NM_BDG_NAME).
253 * Called without NMG_LOCK.
254 */
255 int
netmap_bdg_update_private_data(const char * name,bdg_update_private_data_fn_t callback,void * callback_data,void * auth_token)256 netmap_bdg_update_private_data(const char *name, bdg_update_private_data_fn_t callback,
257 void *callback_data, void *auth_token)
258 {
259 void *private_data = NULL;
260 struct nm_bridge *b;
261 int error = 0;
262
263 NMG_LOCK();
264 b = nm_find_bridge(name, 0 /* don't create */, NULL);
265 if (!b) {
266 error = EINVAL;
267 goto unlock_update_priv;
268 }
269 if (!nm_bdg_valid_auth_token(b, auth_token)) {
270 error = EACCES;
271 goto unlock_update_priv;
272 }
273 BDG_WLOCK(b);
274 private_data = callback(b->private_data, callback_data, &error);
275 b->private_data = private_data;
276 BDG_WUNLOCK(b);
277
278 unlock_update_priv:
279 NMG_UNLOCK();
280 return error;
281 }
282
283
284
285 /* remove from bridge b the ports in slots hw and sw
286 * (sw can be -1 if not needed)
287 */
288 void
netmap_bdg_detach_common(struct nm_bridge * b,int hw,int sw)289 netmap_bdg_detach_common(struct nm_bridge *b, int hw, int sw)
290 {
291 int s_hw = hw, s_sw = sw;
292 int i, lim =b->bdg_active_ports;
293 uint32_t *tmp = b->tmp_bdg_port_index;
294
295 /*
296 New algorithm:
297 make a copy of bdg_port_index;
298 lookup NA(ifp)->bdg_port and SWNA(ifp)->bdg_port
299 in the array of bdg_port_index, replacing them with
300 entries from the bottom of the array;
301 decrement bdg_active_ports;
302 acquire BDG_WLOCK() and copy back the array.
303 */
304
305 if (netmap_debug & NM_DEBUG_BDG)
306 nm_prinf("detach %d and %d (lim %d)", hw, sw, lim);
307 /* make a copy of the list of active ports, update it,
308 * and then copy back within BDG_WLOCK().
309 */
310 memcpy(b->tmp_bdg_port_index, b->bdg_port_index, sizeof(b->tmp_bdg_port_index));
311 for (i = 0; (hw >= 0 || sw >= 0) && i < lim; ) {
312 if (hw >= 0 && tmp[i] == hw) {
313 nm_prdis("detach hw %d at %d", hw, i);
314 lim--; /* point to last active port */
315 tmp[i] = tmp[lim]; /* swap with i */
316 tmp[lim] = hw; /* now this is inactive */
317 hw = -1;
318 } else if (sw >= 0 && tmp[i] == sw) {
319 nm_prdis("detach sw %d at %d", sw, i);
320 lim--;
321 tmp[i] = tmp[lim];
322 tmp[lim] = sw;
323 sw = -1;
324 } else {
325 i++;
326 }
327 }
328 if (hw >= 0 || sw >= 0) {
329 nm_prerr("delete failed hw %d sw %d, should panic...", hw, sw);
330 }
331
332 BDG_WLOCK(b);
333 if (b->bdg_ops.dtor)
334 b->bdg_ops.dtor(b->bdg_ports[s_hw]);
335 b->bdg_ports[s_hw] = NULL;
336 if (s_sw >= 0) {
337 b->bdg_ports[s_sw] = NULL;
338 }
339 memcpy(b->bdg_port_index, b->tmp_bdg_port_index, sizeof(b->tmp_bdg_port_index));
340 b->bdg_active_ports = lim;
341 BDG_WUNLOCK(b);
342
343 nm_prdis("now %d active ports", lim);
344 netmap_bdg_free(b);
345 }
346
347
348 /* nm_bdg_ctl callback for VALE ports */
349 int
netmap_vp_bdg_ctl(struct nmreq_header * hdr,struct netmap_adapter * na)350 netmap_vp_bdg_ctl(struct nmreq_header *hdr, struct netmap_adapter *na)
351 {
352 struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na;
353 struct nm_bridge *b = vpna->na_bdg;
354
355 if (hdr->nr_reqtype == NETMAP_REQ_VALE_ATTACH) {
356 return 0; /* nothing to do */
357 }
358 if (b) {
359 netmap_set_all_rings(na, 0 /* disable */);
360 netmap_bdg_detach_common(b, vpna->bdg_port, -1);
361 vpna->na_bdg = NULL;
362 netmap_set_all_rings(na, 1 /* enable */);
363 }
364 /* I have took reference just for attach */
365 netmap_adapter_put(na);
366 return 0;
367 }
368
369 int
netmap_default_bdg_attach(const char * name,struct netmap_adapter * na,struct nm_bridge * b)370 netmap_default_bdg_attach(const char *name, struct netmap_adapter *na,
371 struct nm_bridge *b)
372 {
373 return NM_NEED_BWRAP;
374 }
375
376 /* Try to get a reference to a netmap adapter attached to a VALE switch.
377 * If the adapter is found (or is created), this function returns 0, a
378 * non NULL pointer is returned into *na, and the caller holds a
379 * reference to the adapter.
380 * If an adapter is not found, then no reference is grabbed and the
381 * function returns an error code, or 0 if there is just a VALE prefix
382 * mismatch. Therefore the caller holds a reference when
383 * (*na != NULL && return == 0).
384 */
385 int
netmap_get_bdg_na(struct nmreq_header * hdr,struct netmap_adapter ** na,struct netmap_mem_d * nmd,int create,struct netmap_bdg_ops * ops)386 netmap_get_bdg_na(struct nmreq_header *hdr, struct netmap_adapter **na,
387 struct netmap_mem_d *nmd, int create, struct netmap_bdg_ops *ops)
388 {
389 char *nr_name = hdr->nr_name;
390 const char *ifname;
391 if_t ifp = NULL;
392 int error = 0;
393 struct netmap_vp_adapter *vpna, *hostna = NULL;
394 struct nm_bridge *b;
395 uint32_t i, j;
396 uint32_t cand = NM_BDG_NOPORT, cand2 = NM_BDG_NOPORT;
397 int needed;
398
399 *na = NULL; /* default return value */
400
401 /* first try to see if this is a bridge port. */
402 NMG_LOCK_ASSERT();
403 if (strncmp(nr_name, ops->name, strlen(ops->name) - 1)) {
404 return 0; /* no error, but no VALE prefix */
405 }
406
407 b = nm_find_bridge(nr_name, create, ops);
408 if (b == NULL) {
409 nm_prdis("no bridges available for '%s'", nr_name);
410 return (create ? ENOMEM : ENXIO);
411 }
412 if (strlen(nr_name) < b->bdg_namelen) /* impossible */
413 panic("x");
414
415 /* Now we are sure that name starts with the bridge's name,
416 * lookup the port in the bridge. We need to scan the entire
417 * list. It is not important to hold a WLOCK on the bridge
418 * during the search because NMG_LOCK already guarantees
419 * that there are no other possible writers.
420 */
421
422 /* lookup in the local list of ports */
423 for (j = 0; j < b->bdg_active_ports; j++) {
424 i = b->bdg_port_index[j];
425 vpna = b->bdg_ports[i];
426 nm_prdis("checking %s", vpna->up.name);
427 if (!strcmp(vpna->up.name, nr_name)) {
428 netmap_adapter_get(&vpna->up);
429 nm_prdis("found existing if %s refs %d", nr_name)
430 *na = &vpna->up;
431 return 0;
432 }
433 }
434 /* not found, should we create it? */
435 if (!create)
436 return ENXIO;
437 /* yes we should, see if we have space to attach entries */
438 needed = 2; /* in some cases we only need 1 */
439 if (b->bdg_active_ports + needed >= NM_BDG_MAXPORTS) {
440 nm_prerr("bridge full %d, cannot create new port", b->bdg_active_ports);
441 return ENOMEM;
442 }
443 /* record the next two ports available, but do not allocate yet */
444 cand = b->bdg_port_index[b->bdg_active_ports];
445 cand2 = b->bdg_port_index[b->bdg_active_ports + 1];
446 nm_prdis("+++ bridge %s port %s used %d avail %d %d",
447 b->bdg_basename, ifname, b->bdg_active_ports, cand, cand2);
448
449 /*
450 * try see if there is a matching NIC with this name
451 * (after the bridge's name)
452 */
453 ifname = nr_name + b->bdg_namelen + 1;
454 ifp = ifunit_ref(ifname);
455 if (!ifp) {
456 /* Create an ephemeral virtual port.
457 * This block contains all the ephemeral-specific logic.
458 */
459
460 if (hdr->nr_reqtype != NETMAP_REQ_REGISTER) {
461 error = EINVAL;
462 goto out;
463 }
464
465 /* bdg_netmap_attach creates a struct netmap_adapter */
466 error = b->bdg_ops.vp_create(hdr, NULL, nmd, &vpna);
467 if (error) {
468 if (netmap_debug & NM_DEBUG_BDG)
469 nm_prerr("error %d", error);
470 goto out;
471 }
472 /* shortcut - we can skip get_hw_na(),
473 * ownership check and nm_bdg_attach()
474 */
475
476 } else {
477 struct netmap_adapter *hw;
478
479 /* the vale:nic syntax is only valid for some commands */
480 switch (hdr->nr_reqtype) {
481 case NETMAP_REQ_VALE_ATTACH:
482 case NETMAP_REQ_VALE_DETACH:
483 case NETMAP_REQ_VALE_POLLING_ENABLE:
484 case NETMAP_REQ_VALE_POLLING_DISABLE:
485 break; /* ok */
486 default:
487 error = EINVAL;
488 goto out;
489 }
490
491 error = netmap_get_hw_na(ifp, nmd, &hw);
492 if (error || hw == NULL)
493 goto out;
494
495 /* host adapter might not be created */
496 error = hw->nm_bdg_attach(nr_name, hw, b);
497 if (error == NM_NEED_BWRAP) {
498 error = b->bdg_ops.bwrap_attach(nr_name, hw);
499 }
500 if (error)
501 goto out;
502 vpna = hw->na_vp;
503 hostna = hw->na_hostvp;
504 if (hdr->nr_reqtype == NETMAP_REQ_VALE_ATTACH) {
505 /* Check if we need to skip the host rings. */
506 struct nmreq_vale_attach *areq =
507 (struct nmreq_vale_attach *)(uintptr_t)hdr->nr_body;
508 if (areq->reg.nr_mode != NR_REG_NIC_SW) {
509 hostna = NULL;
510 }
511 }
512 }
513
514 BDG_WLOCK(b);
515 vpna->bdg_port = cand;
516 nm_prdis("NIC %p to bridge port %d", vpna, cand);
517 /* bind the port to the bridge (virtual ports are not active) */
518 b->bdg_ports[cand] = vpna;
519 vpna->na_bdg = b;
520 b->bdg_active_ports++;
521 if (hostna != NULL) {
522 /* also bind the host stack to the bridge */
523 b->bdg_ports[cand2] = hostna;
524 hostna->bdg_port = cand2;
525 hostna->na_bdg = b;
526 b->bdg_active_ports++;
527 nm_prdis("host %p to bridge port %d", hostna, cand2);
528 }
529 nm_prdis("if %s refs %d", ifname, vpna->up.na_refcount);
530 BDG_WUNLOCK(b);
531 *na = &vpna->up;
532 netmap_adapter_get(*na);
533
534 out:
535 if (ifp)
536 if_rele(ifp);
537
538 return error;
539 }
540
541 /* Process NETMAP_REQ_VALE_ATTACH.
542 */
543 int
netmap_bdg_attach(struct nmreq_header * hdr,void * auth_token)544 netmap_bdg_attach(struct nmreq_header *hdr, void *auth_token)
545 {
546 struct nmreq_vale_attach *req =
547 (struct nmreq_vale_attach *)(uintptr_t)hdr->nr_body;
548 struct netmap_vp_adapter * vpna;
549 struct netmap_adapter *na = NULL;
550 struct netmap_mem_d *nmd = NULL;
551 struct nm_bridge *b = NULL;
552 int error;
553
554 NMG_LOCK();
555 /* permission check for modified bridges */
556 b = nm_find_bridge(hdr->nr_name, 0 /* don't create */, NULL);
557 if (b && !nm_bdg_valid_auth_token(b, auth_token)) {
558 error = EACCES;
559 goto unlock_exit;
560 }
561
562 if (req->reg.nr_mem_id) {
563 nmd = netmap_mem_find(req->reg.nr_mem_id);
564 if (nmd == NULL) {
565 error = EINVAL;
566 goto unlock_exit;
567 }
568 }
569
570 /* check for existing one */
571 error = netmap_get_vale_na(hdr, &na, nmd, 0);
572 if (na) {
573 error = EBUSY;
574 goto unref_exit;
575 }
576 error = netmap_get_vale_na(hdr, &na,
577 nmd, 1 /* create if not exists */);
578 if (error) { /* no device */
579 goto unlock_exit;
580 }
581
582 if (na == NULL) { /* VALE prefix missing */
583 error = EINVAL;
584 goto unlock_exit;
585 }
586
587 if (NETMAP_OWNED_BY_ANY(na)) {
588 error = EBUSY;
589 goto unref_exit;
590 }
591
592 if (na->nm_bdg_ctl) {
593 /* nop for VALE ports. The bwrap needs to put the hwna
594 * in netmap mode (see netmap_bwrap_bdg_ctl)
595 */
596 error = na->nm_bdg_ctl(hdr, na);
597 if (error)
598 goto unref_exit;
599 nm_prdis("registered %s to netmap-mode", na->name);
600 }
601 vpna = (struct netmap_vp_adapter *)na;
602 req->port_index = vpna->bdg_port;
603
604 if (nmd)
605 netmap_mem_put(nmd);
606
607 NMG_UNLOCK();
608 return 0;
609
610 unref_exit:
611 netmap_adapter_put(na);
612 unlock_exit:
613 if (nmd)
614 netmap_mem_put(nmd);
615
616 NMG_UNLOCK();
617 return error;
618 }
619
620
621 int
nm_is_bwrap(struct netmap_adapter * na)622 nm_is_bwrap(struct netmap_adapter *na)
623 {
624 return na->nm_register == netmap_bwrap_reg;
625 }
626
627 /* Process NETMAP_REQ_VALE_DETACH.
628 */
629 int
netmap_bdg_detach(struct nmreq_header * hdr,void * auth_token)630 netmap_bdg_detach(struct nmreq_header *hdr, void *auth_token)
631 {
632 int error;
633
634 NMG_LOCK();
635 error = netmap_bdg_detach_locked(hdr, auth_token);
636 NMG_UNLOCK();
637 return error;
638 }
639
640 int
netmap_bdg_detach_locked(struct nmreq_header * hdr,void * auth_token)641 netmap_bdg_detach_locked(struct nmreq_header *hdr, void *auth_token)
642 {
643 struct nmreq_vale_detach *nmreq_det = (void *)(uintptr_t)hdr->nr_body;
644 struct netmap_vp_adapter *vpna;
645 struct netmap_adapter *na;
646 struct nm_bridge *b = NULL;
647 int error;
648
649 /* permission check for modified bridges */
650 b = nm_find_bridge(hdr->nr_name, 0 /* don't create */, NULL);
651 if (b && !nm_bdg_valid_auth_token(b, auth_token)) {
652 error = EACCES;
653 goto error_exit;
654 }
655
656 error = netmap_get_vale_na(hdr, &na, NULL, 0 /* don't create */);
657 if (error) { /* no device, or another bridge or user owns the device */
658 goto error_exit;
659 }
660
661 if (na == NULL) { /* VALE prefix missing */
662 error = EINVAL;
663 goto error_exit;
664 } else if (nm_is_bwrap(na) &&
665 ((struct netmap_bwrap_adapter *)na)->na_polling_state) {
666 /* Don't detach a NIC with polling */
667 error = EBUSY;
668 goto unref_exit;
669 }
670
671 vpna = (struct netmap_vp_adapter *)na;
672 if (na->na_vp != vpna) {
673 /* trying to detach first attach of VALE persistent port attached
674 * to 2 bridges
675 */
676 error = EBUSY;
677 goto unref_exit;
678 }
679 nmreq_det->port_index = vpna->bdg_port;
680
681 if (na->nm_bdg_ctl) {
682 /* remove the port from bridge. The bwrap
683 * also needs to put the hwna in normal mode
684 */
685 error = na->nm_bdg_ctl(hdr, na);
686 }
687
688 unref_exit:
689 netmap_adapter_put(na);
690 error_exit:
691 return error;
692
693 }
694
695
696 struct nm_bdg_polling_state;
697 struct
698 nm_bdg_kthread {
699 struct nm_kctx *nmk;
700 u_int qfirst;
701 u_int qlast;
702 struct nm_bdg_polling_state *bps;
703 };
704
705 struct nm_bdg_polling_state {
706 bool configured;
707 bool stopped;
708 struct netmap_bwrap_adapter *bna;
709 uint32_t mode;
710 u_int qfirst;
711 u_int qlast;
712 u_int cpu_from;
713 u_int ncpus;
714 struct nm_bdg_kthread *kthreads;
715 };
716
717 static void
netmap_bwrap_polling(void * data)718 netmap_bwrap_polling(void *data)
719 {
720 struct nm_bdg_kthread *nbk = data;
721 struct netmap_bwrap_adapter *bna;
722 u_int qfirst, qlast, i;
723 struct netmap_kring **kring0, *kring;
724
725 if (!nbk)
726 return;
727 qfirst = nbk->qfirst;
728 qlast = nbk->qlast;
729 bna = nbk->bps->bna;
730 kring0 = NMR(bna->hwna, NR_RX);
731
732 for (i = qfirst; i < qlast; i++) {
733 kring = kring0[i];
734 kring->nm_notify(kring, 0);
735 }
736 }
737
738 static int
nm_bdg_create_kthreads(struct nm_bdg_polling_state * bps)739 nm_bdg_create_kthreads(struct nm_bdg_polling_state *bps)
740 {
741 struct nm_kctx_cfg kcfg;
742 int i, j;
743
744 bps->kthreads = nm_os_malloc(sizeof(struct nm_bdg_kthread) * bps->ncpus);
745 if (bps->kthreads == NULL)
746 return ENOMEM;
747
748 bzero(&kcfg, sizeof(kcfg));
749 kcfg.worker_fn = netmap_bwrap_polling;
750 for (i = 0; i < bps->ncpus; i++) {
751 struct nm_bdg_kthread *t = bps->kthreads + i;
752 int all = (bps->ncpus == 1 &&
753 bps->mode == NETMAP_POLLING_MODE_SINGLE_CPU);
754 int affinity = bps->cpu_from + i;
755
756 t->bps = bps;
757 t->qfirst = all ? bps->qfirst /* must be 0 */: affinity;
758 t->qlast = all ? bps->qlast : t->qfirst + 1;
759 if (netmap_verbose)
760 nm_prinf("kthread %d a:%u qf:%u ql:%u", i, affinity, t->qfirst,
761 t->qlast);
762
763 kcfg.type = i;
764 kcfg.worker_private = t;
765 t->nmk = nm_os_kctx_create(&kcfg, NULL);
766 if (t->nmk == NULL) {
767 goto cleanup;
768 }
769 nm_os_kctx_worker_setaff(t->nmk, affinity);
770 }
771 return 0;
772
773 cleanup:
774 for (j = 0; j < i; j++) {
775 struct nm_bdg_kthread *t = bps->kthreads + i;
776 nm_os_kctx_destroy(t->nmk);
777 }
778 nm_os_free(bps->kthreads);
779 return EFAULT;
780 }
781
782 /* A variant of ptnetmap_start_kthreads() */
783 static int
nm_bdg_polling_start_kthreads(struct nm_bdg_polling_state * bps)784 nm_bdg_polling_start_kthreads(struct nm_bdg_polling_state *bps)
785 {
786 int error, i, j;
787
788 if (!bps) {
789 nm_prerr("polling is not configured");
790 return EFAULT;
791 }
792 bps->stopped = false;
793
794 for (i = 0; i < bps->ncpus; i++) {
795 struct nm_bdg_kthread *t = bps->kthreads + i;
796 error = nm_os_kctx_worker_start(t->nmk);
797 if (error) {
798 nm_prerr("error in nm_kthread_start(): %d", error);
799 goto cleanup;
800 }
801 }
802 return 0;
803
804 cleanup:
805 for (j = 0; j < i; j++) {
806 struct nm_bdg_kthread *t = bps->kthreads + i;
807 nm_os_kctx_worker_stop(t->nmk);
808 }
809 bps->stopped = true;
810 return error;
811 }
812
813 static void
nm_bdg_polling_stop_delete_kthreads(struct nm_bdg_polling_state * bps)814 nm_bdg_polling_stop_delete_kthreads(struct nm_bdg_polling_state *bps)
815 {
816 int i;
817
818 if (!bps)
819 return;
820
821 for (i = 0; i < bps->ncpus; i++) {
822 struct nm_bdg_kthread *t = bps->kthreads + i;
823 nm_os_kctx_worker_stop(t->nmk);
824 nm_os_kctx_destroy(t->nmk);
825 }
826 bps->stopped = true;
827 }
828
829 static int
get_polling_cfg(struct nmreq_vale_polling * req,struct netmap_adapter * na,struct nm_bdg_polling_state * bps)830 get_polling_cfg(struct nmreq_vale_polling *req, struct netmap_adapter *na,
831 struct nm_bdg_polling_state *bps)
832 {
833 unsigned int avail_cpus, core_from;
834 unsigned int qfirst, qlast;
835 uint32_t i = req->nr_first_cpu_id;
836 uint32_t req_cpus = req->nr_num_polling_cpus;
837
838 avail_cpus = nm_os_ncpus();
839
840 if (req_cpus == 0) {
841 nm_prerr("req_cpus must be > 0");
842 return EINVAL;
843 } else if (req_cpus >= avail_cpus) {
844 nm_prerr("Cannot use all the CPUs in the system");
845 return EINVAL;
846 }
847
848 if (req->nr_mode == NETMAP_POLLING_MODE_MULTI_CPU) {
849 /* Use a separate core for each ring. If nr_num_polling_cpus>1
850 * more consecutive rings are polled.
851 * For example, if nr_first_cpu_id=2 and nr_num_polling_cpus=2,
852 * ring 2 and 3 are polled by core 2 and 3, respectively. */
853 if (i + req_cpus > nma_get_nrings(na, NR_RX)) {
854 nm_prerr("Rings %u-%u not in range (have %d rings)",
855 i, i + req_cpus, nma_get_nrings(na, NR_RX));
856 return EINVAL;
857 }
858 qfirst = i;
859 qlast = qfirst + req_cpus;
860 core_from = qfirst;
861
862 } else if (req->nr_mode == NETMAP_POLLING_MODE_SINGLE_CPU) {
863 /* Poll all the rings using a core specified by nr_first_cpu_id.
864 * the number of cores must be 1. */
865 if (req_cpus != 1) {
866 nm_prerr("ncpus must be 1 for NETMAP_POLLING_MODE_SINGLE_CPU "
867 "(was %d)", req_cpus);
868 return EINVAL;
869 }
870 qfirst = 0;
871 qlast = nma_get_nrings(na, NR_RX);
872 core_from = i;
873 } else {
874 nm_prerr("Invalid polling mode");
875 return EINVAL;
876 }
877
878 bps->mode = req->nr_mode;
879 bps->qfirst = qfirst;
880 bps->qlast = qlast;
881 bps->cpu_from = core_from;
882 bps->ncpus = req_cpus;
883 nm_prinf("%s qfirst %u qlast %u cpu_from %u ncpus %u",
884 req->nr_mode == NETMAP_POLLING_MODE_MULTI_CPU ?
885 "MULTI" : "SINGLE",
886 qfirst, qlast, core_from, req_cpus);
887 return 0;
888 }
889
890 static int
nm_bdg_ctl_polling_start(struct nmreq_vale_polling * req,struct netmap_adapter * na)891 nm_bdg_ctl_polling_start(struct nmreq_vale_polling *req, struct netmap_adapter *na)
892 {
893 struct nm_bdg_polling_state *bps;
894 struct netmap_bwrap_adapter *bna;
895 int error;
896
897 bna = (struct netmap_bwrap_adapter *)na;
898 if (bna->na_polling_state) {
899 nm_prerr("ERROR adapter already in polling mode");
900 return EFAULT;
901 }
902
903 bps = nm_os_malloc(sizeof(*bps));
904 if (!bps)
905 return ENOMEM;
906 bps->configured = false;
907 bps->stopped = true;
908
909 if (get_polling_cfg(req, na, bps)) {
910 nm_os_free(bps);
911 return EINVAL;
912 }
913
914 if (nm_bdg_create_kthreads(bps)) {
915 nm_os_free(bps);
916 return EFAULT;
917 }
918
919 bps->configured = true;
920 bna->na_polling_state = bps;
921 bps->bna = bna;
922
923 /* disable interrupts if possible */
924 nma_intr_enable(bna->hwna, 0);
925 /* start kthread now */
926 error = nm_bdg_polling_start_kthreads(bps);
927 if (error) {
928 nm_prerr("ERROR nm_bdg_polling_start_kthread()");
929 nm_os_free(bps->kthreads);
930 nm_os_free(bps);
931 bna->na_polling_state = NULL;
932 nma_intr_enable(bna->hwna, 1);
933 }
934 return error;
935 }
936
937 static int
nm_bdg_ctl_polling_stop(struct netmap_adapter * na)938 nm_bdg_ctl_polling_stop(struct netmap_adapter *na)
939 {
940 struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter *)na;
941 struct nm_bdg_polling_state *bps;
942
943 if (!bna->na_polling_state) {
944 nm_prerr("ERROR adapter is not in polling mode");
945 return EFAULT;
946 }
947 bps = bna->na_polling_state;
948 nm_bdg_polling_stop_delete_kthreads(bna->na_polling_state);
949 bps->configured = false;
950 nm_os_free(bps);
951 bna->na_polling_state = NULL;
952 /* re-enable interrupts */
953 nma_intr_enable(bna->hwna, 1);
954 return 0;
955 }
956
957 int
nm_bdg_polling(struct nmreq_header * hdr)958 nm_bdg_polling(struct nmreq_header *hdr)
959 {
960 struct nmreq_vale_polling *req =
961 (struct nmreq_vale_polling *)(uintptr_t)hdr->nr_body;
962 struct netmap_adapter *na = NULL;
963 int error = 0;
964
965 NMG_LOCK();
966 error = netmap_get_vale_na(hdr, &na, NULL, /*create=*/0);
967 if (na && !error) {
968 if (!nm_is_bwrap(na)) {
969 error = EOPNOTSUPP;
970 } else if (hdr->nr_reqtype == NETMAP_BDG_POLLING_ON) {
971 error = nm_bdg_ctl_polling_start(req, na);
972 if (!error)
973 netmap_adapter_get(na);
974 } else {
975 error = nm_bdg_ctl_polling_stop(na);
976 if (!error)
977 netmap_adapter_put(na);
978 }
979 netmap_adapter_put(na);
980 } else if (!na && !error) {
981 /* Not VALE port. */
982 error = EINVAL;
983 }
984 NMG_UNLOCK();
985
986 return error;
987 }
988
989 /* Called by external kernel modules (e.g., Openvswitch).
990 * to set configure/lookup/dtor functions of a VALE instance.
991 * Register callbacks to the given bridge. 'name' may be just
992 * bridge's name (including ':' if it is not just NM_BDG_NAME).
993 *
994 * Called without NMG_LOCK.
995 */
996
997 int
netmap_bdg_regops(const char * name,struct netmap_bdg_ops * bdg_ops,void * private_data,void * auth_token)998 netmap_bdg_regops(const char *name, struct netmap_bdg_ops *bdg_ops, void *private_data, void *auth_token)
999 {
1000 struct nm_bridge *b;
1001 int error = 0;
1002
1003 NMG_LOCK();
1004 b = nm_find_bridge(name, 0 /* don't create */, NULL);
1005 if (!b) {
1006 error = ENXIO;
1007 goto unlock_regops;
1008 }
1009 if (!nm_bdg_valid_auth_token(b, auth_token)) {
1010 error = EACCES;
1011 goto unlock_regops;
1012 }
1013
1014 BDG_WLOCK(b);
1015 if (!bdg_ops) {
1016 /* resetting the bridge */
1017 bzero(b->ht, sizeof(struct nm_hash_ent) * NM_BDG_HASH);
1018 b->bdg_ops = b->bdg_saved_ops;
1019 b->private_data = b->ht;
1020 } else {
1021 /* modifying the bridge */
1022 b->private_data = private_data;
1023 #define nm_bdg_override(m) if (bdg_ops->m) b->bdg_ops.m = bdg_ops->m
1024 nm_bdg_override(lookup);
1025 nm_bdg_override(config);
1026 nm_bdg_override(dtor);
1027 nm_bdg_override(vp_create);
1028 nm_bdg_override(bwrap_attach);
1029 #undef nm_bdg_override
1030
1031 }
1032 BDG_WUNLOCK(b);
1033
1034 unlock_regops:
1035 NMG_UNLOCK();
1036 return error;
1037 }
1038
1039
1040 int
netmap_bdg_config(struct nm_ifreq * nr)1041 netmap_bdg_config(struct nm_ifreq *nr)
1042 {
1043 struct nm_bridge *b;
1044 int error = EINVAL;
1045
1046 NMG_LOCK();
1047 b = nm_find_bridge(nr->nifr_name, 0, NULL);
1048 if (!b) {
1049 NMG_UNLOCK();
1050 return error;
1051 }
1052 NMG_UNLOCK();
1053 /* Don't call config() with NMG_LOCK() held */
1054 BDG_RLOCK(b);
1055 if (b->bdg_ops.config != NULL)
1056 error = b->bdg_ops.config(nr);
1057 BDG_RUNLOCK(b);
1058 return error;
1059 }
1060
1061
1062 /* nm_register callback for VALE ports */
1063 int
netmap_vp_reg(struct netmap_adapter * na,int onoff)1064 netmap_vp_reg(struct netmap_adapter *na, int onoff)
1065 {
1066 struct netmap_vp_adapter *vpna =
1067 (struct netmap_vp_adapter*)na;
1068
1069 /* persistent ports may be put in netmap mode
1070 * before being attached to a bridge
1071 */
1072 if (vpna->na_bdg)
1073 BDG_WLOCK(vpna->na_bdg);
1074 if (onoff) {
1075 netmap_krings_mode_commit(na, onoff);
1076 if (na->active_fds == 0)
1077 na->na_flags |= NAF_NETMAP_ON;
1078 /* XXX on FreeBSD, persistent VALE ports should also
1079 * toggle IFCAP_NETMAP in na->ifp (2014-03-16)
1080 */
1081 } else {
1082 if (na->active_fds == 0)
1083 na->na_flags &= ~NAF_NETMAP_ON;
1084 netmap_krings_mode_commit(na, onoff);
1085 }
1086 if (vpna->na_bdg)
1087 BDG_WUNLOCK(vpna->na_bdg);
1088 return 0;
1089 }
1090
1091
1092 /* rxsync code used by VALE ports nm_rxsync callback and also
1093 * internally by the brwap
1094 */
1095 static int
netmap_vp_rxsync_locked(struct netmap_kring * kring,int flags)1096 netmap_vp_rxsync_locked(struct netmap_kring *kring, int flags)
1097 {
1098 struct netmap_adapter *na = kring->na;
1099 struct netmap_ring *ring = kring->ring;
1100 u_int nm_i, lim = kring->nkr_num_slots - 1;
1101 u_int head = kring->rhead;
1102 int n;
1103
1104 if (head > lim) {
1105 nm_prerr("ouch dangerous reset!!!");
1106 n = netmap_ring_reinit(kring);
1107 goto done;
1108 }
1109
1110 /* First part, import newly received packets. */
1111 /* actually nothing to do here, they are already in the kring */
1112
1113 /* Second part, skip past packets that userspace has released. */
1114 nm_i = kring->nr_hwcur;
1115 if (nm_i != head) {
1116 /* consistency check, but nothing really important here */
1117 for (n = 0; likely(nm_i != head); n++) {
1118 struct netmap_slot *slot = &ring->slot[nm_i];
1119 void *addr = NMB(na, slot);
1120
1121 if (addr == NETMAP_BUF_BASE(kring->na)) { /* bad buf */
1122 nm_prerr("bad buffer index %d, ignore ?",
1123 slot->buf_idx);
1124 }
1125 slot->flags &= ~NS_BUF_CHANGED;
1126 nm_i = nm_next(nm_i, lim);
1127 }
1128 kring->nr_hwcur = head;
1129 }
1130
1131 n = 0;
1132 done:
1133 return n;
1134 }
1135
1136 /*
1137 * nm_rxsync callback for VALE ports
1138 * user process reading from a VALE switch.
1139 * Already protected against concurrent calls from userspace,
1140 * but we must acquire the queue's lock to protect against
1141 * writers on the same queue.
1142 */
1143 int
netmap_vp_rxsync(struct netmap_kring * kring,int flags)1144 netmap_vp_rxsync(struct netmap_kring *kring, int flags)
1145 {
1146 int n;
1147
1148 mtx_lock(&kring->q_lock);
1149 n = netmap_vp_rxsync_locked(kring, flags);
1150 mtx_unlock(&kring->q_lock);
1151 return n;
1152 }
1153
1154 int
netmap_bwrap_attach(const char * nr_name,struct netmap_adapter * hwna,struct netmap_bdg_ops * ops)1155 netmap_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna,
1156 struct netmap_bdg_ops *ops)
1157 {
1158 return ops->bwrap_attach(nr_name, hwna);
1159 }
1160
1161
1162 /* Bridge wrapper code (bwrap).
1163 * This is used to connect a non-VALE-port netmap_adapter (hwna) to a
1164 * VALE switch.
1165 * The main task is to swap the meaning of tx and rx rings to match the
1166 * expectations of the VALE switch code (see nm_bdg_flush).
1167 *
1168 * The bwrap works by interposing a netmap_bwrap_adapter between the
1169 * rest of the system and the hwna. The netmap_bwrap_adapter looks like
1170 * a netmap_vp_adapter to the rest the system, but, internally, it
1171 * translates all callbacks to what the hwna expects.
1172 *
1173 * Note that we have to intercept callbacks coming from two sides:
1174 *
1175 * - callbacks coming from the netmap module are intercepted by
1176 * passing around the netmap_bwrap_adapter instead of the hwna
1177 *
1178 * - callbacks coming from outside of the netmap module only know
1179 * about the hwna. This, however, only happens in interrupt
1180 * handlers, where only the hwna->nm_notify callback is called.
1181 * What the bwrap does is to overwrite the hwna->nm_notify callback
1182 * with its own netmap_bwrap_intr_notify.
1183 * XXX This assumes that the hwna->nm_notify callback was the
1184 * standard netmap_notify(), as it is the case for nic adapters.
1185 * Any additional action performed by hwna->nm_notify will not be
1186 * performed by netmap_bwrap_intr_notify.
1187 *
1188 * Additionally, the bwrap can optionally attach the host rings pair
1189 * of the wrapped adapter to a different port of the switch.
1190 */
1191
1192
1193 static void
netmap_bwrap_dtor(struct netmap_adapter * na)1194 netmap_bwrap_dtor(struct netmap_adapter *na)
1195 {
1196 struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na;
1197 struct netmap_adapter *hwna = bna->hwna;
1198 struct nm_bridge *b = bna->up.na_bdg,
1199 *bh = bna->host.na_bdg;
1200
1201 if (bna->host.up.nm_mem)
1202 netmap_mem_put(bna->host.up.nm_mem);
1203
1204 if (b) {
1205 netmap_bdg_detach_common(b, bna->up.bdg_port,
1206 (bh ? bna->host.bdg_port : -1));
1207 }
1208
1209 nm_prdis("na %p", na);
1210 na->ifp = NULL;
1211 bna->host.up.ifp = NULL;
1212 hwna->na_vp = bna->saved_na_vp;
1213 hwna->na_hostvp = NULL;
1214 hwna->na_private = NULL;
1215 hwna->na_flags &= ~NAF_BUSY;
1216 netmap_adapter_put(hwna);
1217
1218 }
1219
1220
1221 /*
1222 * Intr callback for NICs connected to a bridge.
1223 * Simply ignore tx interrupts (maybe we could try to recover space ?)
1224 * and pass received packets from nic to the bridge.
1225 *
1226 * XXX TODO check locking: this is called from the interrupt
1227 * handler so we should make sure that the interface is not
1228 * disconnected while passing down an interrupt.
1229 *
1230 * Note, no user process can access this NIC or the host stack.
1231 * The only part of the ring that is significant are the slots,
1232 * and head/cur/tail are set from the kring as needed
1233 * (part as a receive ring, part as a transmit ring).
1234 *
1235 * callback that overwrites the hwna notify callback.
1236 * Packets come from the outside or from the host stack and are put on an
1237 * hwna rx ring.
1238 * The bridge wrapper then sends the packets through the bridge.
1239 */
1240 int
netmap_bwrap_intr_notify(struct netmap_kring * kring,int flags)1241 netmap_bwrap_intr_notify(struct netmap_kring *kring, int flags)
1242 {
1243 struct netmap_adapter *na = kring->na;
1244 struct netmap_bwrap_adapter *bna = na->na_private;
1245 struct netmap_kring *bkring;
1246 struct netmap_vp_adapter *vpna = &bna->up;
1247 u_int ring_nr = kring->ring_id;
1248 int ret = NM_IRQ_COMPLETED;
1249 int error;
1250
1251 if (netmap_debug & NM_DEBUG_RXINTR)
1252 nm_prinf("%s %s 0x%x", na->name, kring->name, flags);
1253
1254 bkring = vpna->up.tx_rings[ring_nr];
1255
1256 /* make sure the ring is not disabled */
1257 if (nm_kr_tryget(kring, 0 /* can't sleep */, NULL)) {
1258 return EIO;
1259 }
1260
1261 if (netmap_debug & NM_DEBUG_RXINTR)
1262 nm_prinf("%s head %d cur %d tail %d", na->name,
1263 kring->rhead, kring->rcur, kring->rtail);
1264
1265 /* simulate a user wakeup on the rx ring
1266 * fetch packets that have arrived.
1267 */
1268 error = kring->nm_sync(kring, 0);
1269 if (error)
1270 goto put_out;
1271 if (kring->nr_hwcur == kring->nr_hwtail) {
1272 if (netmap_verbose)
1273 nm_prlim(1, "interrupt with no packets on %s",
1274 kring->name);
1275 goto put_out;
1276 }
1277
1278 /* new packets are kring->rcur to kring->nr_hwtail, and the bkring
1279 * had hwcur == bkring->rhead. So advance bkring->rhead to kring->nr_hwtail
1280 * to push all packets out.
1281 */
1282 bkring->rhead = bkring->rcur = kring->nr_hwtail;
1283
1284 bkring->nm_sync(bkring, flags);
1285
1286 /* mark all buffers as released on this ring */
1287 kring->rhead = kring->rcur = kring->rtail = kring->nr_hwtail;
1288 /* another call to actually release the buffers */
1289 error = kring->nm_sync(kring, 0);
1290
1291 /* The second rxsync may have further advanced hwtail. If this happens,
1292 * return NM_IRQ_RESCHED, otherwise just return NM_IRQ_COMPLETED. */
1293 if (kring->rcur != kring->nr_hwtail) {
1294 ret = NM_IRQ_RESCHED;
1295 }
1296 put_out:
1297 nm_kr_put(kring);
1298
1299 return error ? error : ret;
1300 }
1301
1302
1303 /* nm_register callback for bwrap */
1304 int
netmap_bwrap_reg(struct netmap_adapter * na,int onoff)1305 netmap_bwrap_reg(struct netmap_adapter *na, int onoff)
1306 {
1307 struct netmap_bwrap_adapter *bna =
1308 (struct netmap_bwrap_adapter *)na;
1309 struct netmap_adapter *hwna = bna->hwna;
1310 struct netmap_vp_adapter *hostna = &bna->host;
1311 int error, i;
1312 enum txrx t;
1313
1314 nm_prdis("%s %s", na->name, onoff ? "on" : "off");
1315
1316 if (onoff) {
1317 /* netmap_do_regif has been called on the bwrap na.
1318 * We need to pass the information about the
1319 * memory allocator down to the hwna before
1320 * putting it in netmap mode
1321 */
1322 hwna->na_lut = na->na_lut;
1323
1324 if (hostna->na_bdg) {
1325 /* if the host rings have been attached to switch,
1326 * we need to copy the memory allocator information
1327 * in the hostna also
1328 */
1329 hostna->up.na_lut = na->na_lut;
1330 }
1331
1332 }
1333
1334 /* pass down the pending ring state information */
1335 for_rx_tx(t) {
1336 for (i = 0; i < netmap_all_rings(na, t); i++) {
1337 NMR(hwna, nm_txrx_swap(t))[i]->nr_pending_mode =
1338 NMR(na, t)[i]->nr_pending_mode;
1339 }
1340 }
1341
1342 /* forward the request to the hwna */
1343 error = hwna->nm_register(hwna, onoff);
1344 if (error)
1345 return error;
1346
1347 /* copy up the current ring state information */
1348 for_rx_tx(t) {
1349 for (i = 0; i < netmap_all_rings(na, t); i++) {
1350 struct netmap_kring *kring = NMR(hwna, nm_txrx_swap(t))[i];
1351 NMR(na, t)[i]->nr_mode = kring->nr_mode;
1352 }
1353 }
1354
1355 /* impersonate a netmap_vp_adapter */
1356 netmap_vp_reg(na, onoff);
1357 if (hostna->na_bdg)
1358 netmap_vp_reg(&hostna->up, onoff);
1359
1360 if (onoff) {
1361 u_int i;
1362 /* intercept the hwna nm_nofify callback on the hw rings */
1363 for (i = 0; i < hwna->num_rx_rings; i++) {
1364 hwna->rx_rings[i]->save_notify = hwna->rx_rings[i]->nm_notify;
1365 hwna->rx_rings[i]->nm_notify = bna->nm_intr_notify;
1366 }
1367 i = hwna->num_rx_rings; /* for safety */
1368 /* save the host ring notify unconditionally */
1369 for (; i < netmap_real_rings(hwna, NR_RX); i++) {
1370 hwna->rx_rings[i]->save_notify =
1371 hwna->rx_rings[i]->nm_notify;
1372 if (hostna->na_bdg) {
1373 /* also intercept the host ring notify */
1374 hwna->rx_rings[i]->nm_notify =
1375 netmap_bwrap_intr_notify;
1376 na->tx_rings[i]->nm_sync = na->nm_txsync;
1377 }
1378 }
1379 if (na->active_fds == 0)
1380 na->na_flags |= NAF_NETMAP_ON;
1381 } else {
1382 u_int i;
1383
1384 if (na->active_fds == 0)
1385 na->na_flags &= ~NAF_NETMAP_ON;
1386
1387 /* reset all notify callbacks (including host ring) */
1388 for (i = 0; i < netmap_all_rings(hwna, NR_RX); i++) {
1389 hwna->rx_rings[i]->nm_notify =
1390 hwna->rx_rings[i]->save_notify;
1391 hwna->rx_rings[i]->save_notify = NULL;
1392 }
1393 hwna->na_lut.lut = NULL;
1394 hwna->na_lut.plut = NULL;
1395 hwna->na_lut.objtotal = 0;
1396 hwna->na_lut.objsize = 0;
1397
1398 /* reset the number of host rings to default */
1399 for_rx_tx(t) {
1400 nma_set_host_nrings(hwna, t, 1);
1401 }
1402
1403 }
1404
1405 return 0;
1406 }
1407
1408 /* nm_config callback for bwrap */
1409 static int
netmap_bwrap_config(struct netmap_adapter * na,struct nm_config_info * info)1410 netmap_bwrap_config(struct netmap_adapter *na, struct nm_config_info *info)
1411 {
1412 struct netmap_bwrap_adapter *bna =
1413 (struct netmap_bwrap_adapter *)na;
1414 struct netmap_adapter *hwna = bna->hwna;
1415 int error;
1416
1417 /* cache the lut in the embedded host adapter */
1418 error = netmap_mem_get_lut(hwna->nm_mem, &bna->host.up.na_lut);
1419 if (error)
1420 return error;
1421
1422 /* Forward the request to the hwna. It may happen that nobody
1423 * registered hwna yet, so netmap_mem_get_lut() may have not
1424 * been called yet. */
1425 error = netmap_mem_get_lut(hwna->nm_mem, &hwna->na_lut);
1426 if (error)
1427 return error;
1428 netmap_update_config(hwna);
1429 /* swap the results and propagate */
1430 info->num_tx_rings = hwna->num_rx_rings;
1431 info->num_tx_descs = hwna->num_rx_desc;
1432 info->num_rx_rings = hwna->num_tx_rings;
1433 info->num_rx_descs = hwna->num_tx_desc;
1434 info->rx_buf_maxsize = hwna->rx_buf_maxsize;
1435
1436 if (na->na_flags & NAF_HOST_RINGS) {
1437 struct netmap_adapter *hostna = &bna->host.up;
1438 enum txrx t;
1439
1440 /* limit the number of host rings to that of hw */
1441 if (na->na_flags & NAF_HOST_ALL) {
1442 hostna->num_tx_rings = nma_get_nrings(hwna, NR_RX);
1443 hostna->num_rx_rings = nma_get_nrings(hwna, NR_TX);
1444 } else {
1445 nm_bound_var(&hostna->num_tx_rings, 1, 1,
1446 nma_get_nrings(hwna, NR_TX), NULL);
1447 nm_bound_var(&hostna->num_rx_rings, 1, 1,
1448 nma_get_nrings(hwna, NR_RX), NULL);
1449 }
1450 for_rx_tx(t) {
1451 enum txrx r = nm_txrx_swap(t);
1452 u_int nr = nma_get_nrings(hostna, t);
1453
1454 nma_set_host_nrings(na, t, nr);
1455 if (nma_get_host_nrings(hwna, t) < nr) {
1456 nma_set_host_nrings(hwna, t, nr);
1457 }
1458 nma_set_ndesc(hostna, t, nma_get_ndesc(hwna, r));
1459 }
1460 }
1461
1462 return 0;
1463 }
1464
1465 /* nm_bufcfg callback for bwrap */
1466 static int
netmap_bwrap_bufcfg(struct netmap_kring * kring,uint64_t target)1467 netmap_bwrap_bufcfg(struct netmap_kring *kring, uint64_t target)
1468 {
1469 struct netmap_adapter *na = kring->na;
1470 struct netmap_bwrap_adapter *bna =
1471 (struct netmap_bwrap_adapter *)na;
1472 struct netmap_adapter *hwna = bna->hwna;
1473 struct netmap_kring *hwkring;
1474 enum txrx r;
1475 int error;
1476
1477 /* we need the hw kring that corresponds to the bwrap one:
1478 * remember that rx and tx are swapped
1479 */
1480 r = nm_txrx_swap(kring->tx);
1481 hwkring = NMR(hwna, r)[kring->ring_id];
1482
1483 /* copy down the offset information, forward the request
1484 * and copy up the results
1485 */
1486 hwkring->offset_mask = kring->offset_mask;
1487 hwkring->offset_max = kring->offset_max;
1488 hwkring->offset_gap = kring->offset_gap;
1489
1490 error = hwkring->nm_bufcfg(hwkring, target);
1491 if (error)
1492 return error;
1493
1494 kring->hwbuf_len = hwkring->hwbuf_len;
1495 kring->buf_align = hwkring->buf_align;
1496
1497 return 0;
1498 }
1499
1500 /* nm_krings_create callback for bwrap */
1501 int
netmap_bwrap_krings_create_common(struct netmap_adapter * na)1502 netmap_bwrap_krings_create_common(struct netmap_adapter *na)
1503 {
1504 struct netmap_bwrap_adapter *bna =
1505 (struct netmap_bwrap_adapter *)na;
1506 struct netmap_adapter *hwna = bna->hwna;
1507 struct netmap_adapter *hostna = &bna->host.up;
1508 int i, error = 0;
1509 enum txrx t;
1510
1511 /* also create the hwna krings */
1512 error = hwna->nm_krings_create(hwna);
1513 if (error) {
1514 return error;
1515 }
1516
1517 /* increment the usage counter for all the hwna krings */
1518 for_rx_tx(t) {
1519 for (i = 0; i < netmap_all_rings(hwna, t); i++) {
1520 NMR(hwna, t)[i]->users++;
1521 /* this to prevent deletion of the rings through
1522 * our krings, instead of through the hwna ones */
1523 NMR(na, t)[i]->nr_kflags |= NKR_NEEDRING;
1524 }
1525 }
1526
1527 /* now create the actual rings */
1528 error = netmap_mem_rings_create(hwna);
1529 if (error) {
1530 goto err_dec_users;
1531 }
1532
1533 /* cross-link the netmap rings
1534 * The original number of rings comes from hwna,
1535 * rx rings on one side equals tx rings on the other.
1536 */
1537 for_rx_tx(t) {
1538 enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */
1539 for (i = 0; i < netmap_all_rings(hwna, r); i++) {
1540 NMR(na, t)[i]->nkr_num_slots = NMR(hwna, r)[i]->nkr_num_slots;
1541 NMR(na, t)[i]->ring = NMR(hwna, r)[i]->ring;
1542 }
1543 }
1544
1545 if (na->na_flags & NAF_HOST_RINGS) {
1546 /* the hostna rings are the host rings of the bwrap.
1547 * The corresponding krings must point back to the
1548 * hostna
1549 */
1550 hostna->tx_rings = &na->tx_rings[na->num_tx_rings];
1551 hostna->rx_rings = &na->rx_rings[na->num_rx_rings];
1552 for_rx_tx(t) {
1553 for (i = 0; i < nma_get_nrings(hostna, t); i++) {
1554 NMR(hostna, t)[i]->na = hostna;
1555 }
1556 }
1557 }
1558
1559 return 0;
1560
1561 err_dec_users:
1562 for_rx_tx(t) {
1563 for (i = 0; i < netmap_all_rings(hwna, t); i++) {
1564 NMR(hwna, t)[i]->users--;
1565 NMR(na, t)[i]->users--;
1566 }
1567 }
1568 hwna->nm_krings_delete(hwna);
1569 return error;
1570 }
1571
1572
1573 void
netmap_bwrap_krings_delete_common(struct netmap_adapter * na)1574 netmap_bwrap_krings_delete_common(struct netmap_adapter *na)
1575 {
1576 struct netmap_bwrap_adapter *bna =
1577 (struct netmap_bwrap_adapter *)na;
1578 struct netmap_adapter *hwna = bna->hwna;
1579 enum txrx t;
1580 int i;
1581
1582 nm_prdis("%s", na->name);
1583
1584 /* decrement the usage counter for all the hwna krings */
1585 for_rx_tx(t) {
1586 for (i = 0; i < netmap_all_rings(hwna, t); i++) {
1587 NMR(hwna, t)[i]->users--;
1588 NMR(na, t)[i]->users--;
1589 }
1590 }
1591
1592 /* delete any netmap rings that are no longer needed */
1593 netmap_mem_rings_delete(hwna);
1594 hwna->nm_krings_delete(hwna);
1595 }
1596
1597
1598 /* notify method for the bridge-->hwna direction */
1599 int
netmap_bwrap_notify(struct netmap_kring * kring,int flags)1600 netmap_bwrap_notify(struct netmap_kring *kring, int flags)
1601 {
1602 struct netmap_adapter *na = kring->na;
1603 struct netmap_bwrap_adapter *bna = na->na_private;
1604 struct netmap_adapter *hwna = bna->hwna;
1605 u_int ring_n = kring->ring_id;
1606 u_int lim = kring->nkr_num_slots - 1;
1607 struct netmap_kring *hw_kring;
1608 int error;
1609
1610 nm_prdis("%s: na %s hwna %s",
1611 (kring ? kring->name : "NULL!"),
1612 (na ? na->name : "NULL!"),
1613 (hwna ? hwna->name : "NULL!"));
1614 hw_kring = hwna->tx_rings[ring_n];
1615
1616 if (nm_kr_tryget(hw_kring, 0, NULL)) {
1617 return ENXIO;
1618 }
1619
1620 /* first step: simulate a user wakeup on the rx ring */
1621 netmap_vp_rxsync(kring, flags);
1622 nm_prdis("%s[%d] PRE rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)",
1623 na->name, ring_n,
1624 kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease,
1625 kring->rhead, kring->rcur, kring->rtail,
1626 hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_kring->rtail);
1627 /* second step: the new packets are sent on the tx ring
1628 * (which is actually the same ring)
1629 */
1630 hw_kring->rhead = hw_kring->rcur = kring->nr_hwtail;
1631 error = hw_kring->nm_sync(hw_kring, flags);
1632 if (error)
1633 goto put_out;
1634
1635 /* third step: now we are back the rx ring */
1636 /* claim ownership on all hw owned bufs */
1637 kring->rhead = kring->rcur = nm_next(hw_kring->nr_hwtail, lim); /* skip past reserved slot */
1638
1639 /* fourth step: the user goes to sleep again, causing another rxsync */
1640 netmap_vp_rxsync(kring, flags);
1641 nm_prdis("%s[%d] PST rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)",
1642 na->name, ring_n,
1643 kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease,
1644 kring->rhead, kring->rcur, kring->rtail,
1645 hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_kring->rtail);
1646 put_out:
1647 nm_kr_put(hw_kring);
1648
1649 return error ? error : NM_IRQ_COMPLETED;
1650 }
1651
1652
1653 /* nm_bdg_ctl callback for the bwrap.
1654 * Called on bridge-attach and detach, as an effect of valectl -[ahd].
1655 * On attach, it needs to provide a fake netmap_priv_d structure and
1656 * perform a netmap_do_regif() on the bwrap. This will put both the
1657 * bwrap and the hwna in netmap mode, with the netmap rings shared
1658 * and cross linked. Moroever, it will start intercepting interrupts
1659 * directed to hwna.
1660 */
1661 static int
netmap_bwrap_bdg_ctl(struct nmreq_header * hdr,struct netmap_adapter * na)1662 netmap_bwrap_bdg_ctl(struct nmreq_header *hdr, struct netmap_adapter *na)
1663 {
1664 struct netmap_priv_d *npriv;
1665 struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na;
1666 int error = 0;
1667
1668 if (hdr->nr_reqtype == NETMAP_REQ_VALE_ATTACH) {
1669 struct nmreq_vale_attach *req =
1670 (struct nmreq_vale_attach *)(uintptr_t)hdr->nr_body;
1671 if (req->reg.nr_ringid != 0 ||
1672 (req->reg.nr_mode != NR_REG_ALL_NIC &&
1673 req->reg.nr_mode != NR_REG_NIC_SW)) {
1674 /* We only support attaching all the NIC rings
1675 * and/or the host stack. */
1676 return EINVAL;
1677 }
1678 if (NETMAP_OWNED_BY_ANY(na)) {
1679 return EBUSY;
1680 }
1681 if (bna->na_kpriv) {
1682 /* nothing to do */
1683 return 0;
1684 }
1685 npriv = netmap_priv_new();
1686 if (npriv == NULL)
1687 return ENOMEM;
1688 npriv->np_ifp = na->ifp; /* let the priv destructor release the ref */
1689 error = netmap_do_regif(npriv, na, hdr);
1690 if (error) {
1691 netmap_priv_delete(npriv);
1692 netmap_mem_restore(bna->hwna);
1693 return error;
1694 }
1695 bna->na_kpriv = npriv;
1696 na->na_flags |= NAF_BUSY;
1697 } else {
1698 if (na->active_fds == 0) /* not registered */
1699 return EINVAL;
1700 netmap_priv_delete(bna->na_kpriv);
1701 bna->na_kpriv = NULL;
1702 na->na_flags &= ~NAF_BUSY;
1703 netmap_mem_restore(bna->hwna);
1704 }
1705
1706 return error;
1707 }
1708
1709 /* attach a bridge wrapper to the 'real' device */
1710 int
netmap_bwrap_attach_common(struct netmap_adapter * na,struct netmap_adapter * hwna)1711 netmap_bwrap_attach_common(struct netmap_adapter *na,
1712 struct netmap_adapter *hwna)
1713 {
1714 struct netmap_bwrap_adapter *bna;
1715 struct netmap_adapter *hostna = NULL;
1716 int error = 0;
1717 enum txrx t;
1718
1719 /* make sure the NIC is not already in use */
1720 if (NETMAP_OWNED_BY_ANY(hwna)) {
1721 nm_prerr("NIC %s busy, cannot attach to bridge", hwna->name);
1722 return EBUSY;
1723 }
1724
1725 bna = (struct netmap_bwrap_adapter *)na;
1726 /* make bwrap ifp point to the real ifp */
1727 na->ifp = hwna->ifp;
1728 if_ref(na->ifp);
1729 na->na_private = bna;
1730 /* fill the ring data for the bwrap adapter with rx/tx meanings
1731 * swapped. The real cross-linking will be done during register,
1732 * when all the krings will have been created.
1733 */
1734 for_rx_tx(t) {
1735 enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */
1736 nma_set_nrings(na, t, nma_get_nrings(hwna, r));
1737 nma_set_ndesc(na, t, nma_get_ndesc(hwna, r));
1738 }
1739 na->nm_dtor = netmap_bwrap_dtor;
1740 na->nm_config = netmap_bwrap_config;
1741 na->nm_bufcfg = netmap_bwrap_bufcfg;
1742 na->nm_bdg_ctl = netmap_bwrap_bdg_ctl;
1743 na->pdev = hwna->pdev;
1744 na->nm_mem = netmap_mem_get(hwna->nm_mem);
1745 na->virt_hdr_len = hwna->virt_hdr_len;
1746 na->rx_buf_maxsize = hwna->rx_buf_maxsize;
1747
1748 bna->hwna = hwna;
1749 netmap_adapter_get(hwna);
1750 hwna->na_private = bna; /* weak reference */
1751 bna->saved_na_vp = hwna->na_vp;
1752 hwna->na_vp = &bna->up;
1753 bna->up.up.na_vp = &(bna->up);
1754
1755 if (hwna->na_flags & NAF_HOST_RINGS) {
1756 if (hwna->na_flags & NAF_SW_ONLY)
1757 na->na_flags |= NAF_SW_ONLY;
1758 na->na_flags |= NAF_HOST_RINGS;
1759 hostna = &bna->host.up;
1760
1761 snprintf(hostna->name, sizeof(hostna->name), "%s^", na->name);
1762 hostna->ifp = hwna->ifp;
1763 // hostna->nm_txsync = netmap_bwrap_host_txsync;
1764 // hostna->nm_rxsync = netmap_bwrap_host_rxsync;
1765 hostna->nm_mem = netmap_mem_get(na->nm_mem);
1766 hostna->na_private = bna;
1767 hostna->na_vp = &bna->up;
1768 na->na_hostvp = hwna->na_hostvp =
1769 hostna->na_hostvp = &bna->host;
1770 hostna->na_flags = NAF_BUSY; /* prevent NIOCREGIF */
1771 hostna->rx_buf_maxsize = hwna->rx_buf_maxsize;
1772 /* bwrap_config() will determine the number of host rings */
1773 }
1774 if (hwna->na_flags & NAF_MOREFRAG)
1775 na->na_flags |= NAF_MOREFRAG;
1776
1777 nm_prdis("%s<->%s txr %d txd %d rxr %d rxd %d",
1778 na->name, if_name(ifp),
1779 na->num_tx_rings, na->num_tx_desc,
1780 na->num_rx_rings, na->num_rx_desc);
1781
1782 error = netmap_attach_common(na);
1783 if (error) {
1784 goto err_put;
1785 }
1786 hwna->na_flags |= NAF_BUSY;
1787 return 0;
1788
1789 err_put:
1790 hwna->na_vp = hwna->na_hostvp = NULL;
1791 netmap_adapter_put(hwna);
1792 return error;
1793
1794 }
1795
1796 struct nm_bridge *
netmap_init_bridges2(u_int n)1797 netmap_init_bridges2(u_int n)
1798 {
1799 int i;
1800 struct nm_bridge *b;
1801
1802 b = nm_os_malloc(sizeof(struct nm_bridge) * n);
1803 if (b == NULL)
1804 return NULL;
1805 for (i = 0; i < n; i++)
1806 BDG_RWINIT(&b[i]);
1807 return b;
1808 }
1809
1810 void
netmap_uninit_bridges2(struct nm_bridge * b,u_int n)1811 netmap_uninit_bridges2(struct nm_bridge *b, u_int n)
1812 {
1813 int i;
1814
1815 if (b == NULL)
1816 return;
1817
1818 for (i = 0; i < n; i++)
1819 BDG_RWDESTROY(&b[i]);
1820 nm_os_free(b);
1821 }
1822
1823 int
netmap_init_bridges(void)1824 netmap_init_bridges(void)
1825 {
1826 #ifdef CONFIG_NET_NS
1827 return netmap_bns_register();
1828 #else
1829 nm_bridges = netmap_init_bridges2(vale_max_bridges);
1830 if (nm_bridges == NULL)
1831 return ENOMEM;
1832 return 0;
1833 #endif
1834 }
1835
1836 void
netmap_uninit_bridges(void)1837 netmap_uninit_bridges(void)
1838 {
1839 #ifdef CONFIG_NET_NS
1840 netmap_bns_unregister();
1841 #else
1842 netmap_uninit_bridges2(nm_bridges, vale_max_bridges);
1843 #endif
1844 }
1845