xref: /freebsd/sys/dev/netmap/netmap_pipe.c (revision 51015e6d0f570239b0c2088dc6cf2b018928375d)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (C) 2014-2018 Giuseppe Lettieri
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *   1. Redistributions of source code must retain the above copyright
11  *      notice, this list of conditions and the following disclaimer.
12  *   2. Redistributions in binary form must reproduce the above copyright
13  *      notice, this list of conditions and the following disclaimer in the
14  *      documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 /* $FreeBSD$ */
30 
31 #if defined(__FreeBSD__)
32 #include <sys/cdefs.h> /* prerequisite */
33 
34 #include <sys/types.h>
35 #include <sys/errno.h>
36 #include <sys/param.h>	/* defines used in kernel.h */
37 #include <sys/kernel.h>	/* types used in module initialization */
38 #include <sys/malloc.h>
39 #include <sys/poll.h>
40 #include <sys/lock.h>
41 #include <sys/rwlock.h>
42 #include <sys/selinfo.h>
43 #include <sys/sysctl.h>
44 #include <sys/socket.h> /* sockaddrs */
45 #include <net/if.h>
46 #include <net/if_var.h>
47 #include <machine/bus.h>	/* bus_dmamap_* */
48 #include <sys/refcount.h>
49 
50 
51 #elif defined(linux)
52 
53 #include "bsd_glue.h"
54 
55 #elif defined(__APPLE__)
56 
57 #warning OSX support is only partial
58 #include "osx_glue.h"
59 
60 #elif defined(_WIN32)
61 #include "win_glue.h"
62 
63 #else
64 
65 #error	Unsupported platform
66 
67 #endif /* unsupported */
68 
69 /*
70  * common headers
71  */
72 
73 #include <net/netmap.h>
74 #include <dev/netmap/netmap_kern.h>
75 #include <dev/netmap/netmap_mem2.h>
76 
77 #ifdef WITH_PIPES
78 
79 #define NM_PIPE_MAXSLOTS	4096
80 #define NM_PIPE_MAXRINGS	256
81 
82 static int netmap_default_pipes = 0; /* ignored, kept for compatibility */
83 SYSBEGIN(vars_pipes);
84 SYSCTL_DECL(_dev_netmap);
85 SYSCTL_INT(_dev_netmap, OID_AUTO, default_pipes, CTLFLAG_RW,
86 		&netmap_default_pipes, 0, "For compatibility only");
87 SYSEND;
88 
89 /* allocate the pipe array in the parent adapter */
90 static int
91 nm_pipe_alloc(struct netmap_adapter *na, u_int npipes)
92 {
93 	size_t old_len, len;
94 	struct netmap_pipe_adapter **npa;
95 
96 	if (npipes <= na->na_max_pipes)
97 		/* we already have more entries that requested */
98 		return 0;
99 
100 	if (npipes < na->na_next_pipe || npipes > NM_MAXPIPES)
101 		return EINVAL;
102 
103 	old_len = sizeof(struct netmap_pipe_adapter *)*na->na_max_pipes;
104 	len = sizeof(struct netmap_pipe_adapter *) * npipes;
105 	npa = nm_os_realloc(na->na_pipes, len, old_len);
106 	if (npa == NULL)
107 		return ENOMEM;
108 
109 	na->na_pipes = npa;
110 	na->na_max_pipes = npipes;
111 
112 	return 0;
113 }
114 
115 /* deallocate the parent array in the parent adapter */
116 void
117 netmap_pipe_dealloc(struct netmap_adapter *na)
118 {
119 	if (na->na_pipes) {
120 		if (na->na_next_pipe > 0) {
121 			nm_prerr("freeing not empty pipe array for %s (%d dangling pipes)!",
122 			    na->name, na->na_next_pipe);
123 		}
124 		nm_os_free(na->na_pipes);
125 		na->na_pipes = NULL;
126 		na->na_max_pipes = 0;
127 		na->na_next_pipe = 0;
128 	}
129 }
130 
131 /* find a pipe endpoint with the given id among the parent's pipes */
132 static struct netmap_pipe_adapter *
133 netmap_pipe_find(struct netmap_adapter *parent, const char *pipe_id)
134 {
135 	int i;
136 	struct netmap_pipe_adapter *na;
137 
138 	for (i = 0; i < parent->na_next_pipe; i++) {
139 		const char *na_pipe_id;
140 		na = parent->na_pipes[i];
141 		na_pipe_id = strrchr(na->up.name,
142 			na->role == NM_PIPE_ROLE_MASTER ? '{' : '}');
143 		KASSERT(na_pipe_id != NULL, ("Invalid pipe name"));
144 		++na_pipe_id;
145 		if (!strcmp(na_pipe_id, pipe_id)) {
146 			return na;
147 		}
148 	}
149 	return NULL;
150 }
151 
152 /* add a new pipe endpoint to the parent array */
153 static int
154 netmap_pipe_add(struct netmap_adapter *parent, struct netmap_pipe_adapter *na)
155 {
156 	if (parent->na_next_pipe >= parent->na_max_pipes) {
157 		u_int npipes = parent->na_max_pipes ?  2*parent->na_max_pipes : 2;
158 		int error = nm_pipe_alloc(parent, npipes);
159 		if (error)
160 			return error;
161 	}
162 
163 	parent->na_pipes[parent->na_next_pipe] = na;
164 	na->parent_slot = parent->na_next_pipe;
165 	parent->na_next_pipe++;
166 	return 0;
167 }
168 
169 /* remove the given pipe endpoint from the parent array */
170 static void
171 netmap_pipe_remove(struct netmap_adapter *parent, struct netmap_pipe_adapter *na)
172 {
173 	u_int n;
174 	n = --parent->na_next_pipe;
175 	if (n != na->parent_slot) {
176 		struct netmap_pipe_adapter **p =
177 			&parent->na_pipes[na->parent_slot];
178 		*p = parent->na_pipes[n];
179 		(*p)->parent_slot = na->parent_slot;
180 	}
181 	parent->na_pipes[n] = NULL;
182 }
183 
184 int
185 netmap_pipe_txsync(struct netmap_kring *txkring, int flags)
186 {
187 	struct netmap_kring *rxkring = txkring->pipe;
188 	u_int k, lim = txkring->nkr_num_slots - 1, nk;
189 	int m; /* slots to transfer */
190 	int complete; /* did we see a complete packet ? */
191 	struct netmap_ring *txring = txkring->ring, *rxring = rxkring->ring;
192 
193 	nm_prdis("%p: %s %x -> %s", txkring, txkring->name, flags, rxkring->name);
194 	nm_prdis(20, "TX before: hwcur %d hwtail %d cur %d head %d tail %d",
195 		txkring->nr_hwcur, txkring->nr_hwtail,
196 		txkring->rcur, txkring->rhead, txkring->rtail);
197 
198 	/* update the hwtail */
199 	txkring->nr_hwtail = txkring->pipe_tail;
200 
201 	m = txkring->rhead - txkring->nr_hwcur; /* new slots */
202 	if (m < 0)
203 		m += txkring->nkr_num_slots;
204 
205 	if (m == 0) {
206 		/* nothing to send */
207 		return 0;
208 	}
209 
210 	for (k = txkring->nr_hwcur, nk = lim + 1, complete = 0; m;
211 			m--, k = nm_next(k, lim), nk = (complete ? k : nk)) {
212 		struct netmap_slot *rs = &rxring->slot[k];
213 		struct netmap_slot *ts = &txring->slot[k];
214 		uint64_t off = nm_get_offset(rxkring, rs);
215 
216 		*rs = *ts;
217 		if (nm_get_offset(rxkring, rs) < off) {
218 			nm_write_offset(rxkring, rs, off);
219 		}
220 		if (ts->flags & NS_BUF_CHANGED) {
221 			ts->flags &= ~NS_BUF_CHANGED;
222 		}
223 		complete = !(ts->flags & NS_MOREFRAG);
224 	}
225 
226 	txkring->nr_hwcur = k;
227 
228 	nm_prdis(20, "TX after : hwcur %d hwtail %d cur %d head %d tail %d k %d",
229 		txkring->nr_hwcur, txkring->nr_hwtail,
230 		txkring->rcur, txkring->rhead, txkring->rtail, k);
231 
232 	if (likely(nk <= lim)) {
233 		mb(); /* make sure the slots are updated before publishing them */
234 		rxkring->pipe_tail = nk; /* only publish complete packets */
235 		rxkring->nm_notify(rxkring, 0);
236 	}
237 
238 	return 0;
239 }
240 
241 int
242 netmap_pipe_rxsync(struct netmap_kring *rxkring, int flags)
243 {
244 	struct netmap_kring *txkring = rxkring->pipe;
245 	u_int k, lim = rxkring->nkr_num_slots - 1;
246 	int m; /* slots to release */
247 	struct netmap_ring *txring = txkring->ring, *rxring = rxkring->ring;
248 
249 	nm_prdis("%p: %s %x -> %s", txkring, txkring->name, flags, rxkring->name);
250 	nm_prdis(20, "RX before: hwcur %d hwtail %d cur %d head %d tail %d",
251 		rxkring->nr_hwcur, rxkring->nr_hwtail,
252 		rxkring->rcur, rxkring->rhead, rxkring->rtail);
253 
254 	/* update the hwtail */
255 	rxkring->nr_hwtail = rxkring->pipe_tail;
256 
257 	m = rxkring->rhead - rxkring->nr_hwcur; /* released slots */
258 	if (m < 0)
259 		m += rxkring->nkr_num_slots;
260 
261 	if (m == 0) {
262 		/* nothing to release */
263 		return 0;
264 	}
265 
266 	for (k = rxkring->nr_hwcur; m; m--, k = nm_next(k, lim)) {
267 		struct netmap_slot *rs = &rxring->slot[k];
268 		struct netmap_slot *ts = &txring->slot[k];
269 
270 		/* copy the slot. This also propagates any offset */
271 		*ts = *rs;
272 		if (rs->flags & NS_BUF_CHANGED) {
273 			rs->flags &= ~NS_BUF_CHANGED;
274 		}
275 	}
276 
277 	mb(); /* make sure the slots are updated before publishing them */
278 	txkring->pipe_tail = nm_prev(k, lim);
279 	rxkring->nr_hwcur = k;
280 
281 	nm_prdis(20, "RX after : hwcur %d hwtail %d cur %d head %d tail %d k %d",
282 		rxkring->nr_hwcur, rxkring->nr_hwtail,
283 		rxkring->rcur, rxkring->rhead, rxkring->rtail, k);
284 
285 	txkring->nm_notify(txkring, 0);
286 
287 	return 0;
288 }
289 
290 /* Pipe endpoints are created and destroyed together, so that endopoints do not
291  * have to check for the existence of their peer at each ?xsync.
292  *
293  * To play well with the existing netmap infrastructure (refcounts etc.), we
294  * adopt the following strategy:
295  *
296  * 1) The first endpoint that is created also creates the other endpoint and
297  * grabs a reference to it.
298  *
299  *    state A)  user1 --> endpoint1 --> endpoint2
300  *
301  * 2) If, starting from state A, endpoint2 is then registered, endpoint1 gives
302  * its reference to the user:
303  *
304  *    state B)  user1 --> endpoint1     endpoint2 <--- user2
305  *
306  * 3) Assume that, starting from state B endpoint2 is closed. In the unregister
307  * callback endpoint2 notes that endpoint1 is still active and adds a reference
308  * from endpoint1 to itself. When user2 then releases her own reference,
309  * endpoint2 is not destroyed and we are back to state A. A symmetrical state
310  * would be reached if endpoint1 were released instead.
311  *
312  * 4) If, starting from state A, endpoint1 is closed, the destructor notes that
313  * it owns a reference to endpoint2 and releases it.
314  *
315  * Something similar goes on for the creation and destruction of the krings.
316  */
317 
318 
319 int netmap_pipe_krings_create_both(struct netmap_adapter *na,
320 				  struct netmap_adapter *ona)
321 {
322 	enum txrx t;
323 	int error;
324 	int i;
325 
326 	/* case 1) below */
327 	nm_prdis("%p: case 1, create both ends", na);
328 	error = netmap_krings_create(na, 0);
329 	if (error)
330 		return error;
331 
332 	/* create the krings of the other end */
333 	error = netmap_krings_create(ona, 0);
334 	if (error)
335 		goto del_krings1;
336 
337 	/* cross link the krings and initialize the pipe_tails */
338 	for_rx_tx(t) {
339 		enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */
340 		for (i = 0; i < nma_get_nrings(na, t); i++) {
341 			struct netmap_kring *k1 = NMR(na, t)[i],
342 					    *k2 = NMR(ona, r)[i];
343 			k1->pipe = k2;
344 			k2->pipe = k1;
345 			/* mark all peer-adapter rings as fake */
346 			k2->nr_kflags |= NKR_FAKERING;
347 			/* init tails */
348 			k1->pipe_tail = k1->nr_hwtail;
349 			k2->pipe_tail = k2->nr_hwtail;
350 		}
351 	}
352 
353 	return 0;
354 
355 del_krings1:
356 	netmap_krings_delete(na);
357 	return error;
358 }
359 
360 /* netmap_pipe_krings_create.
361  *
362  * There are two cases:
363  *
364  * 1) state is
365  *
366  *        usr1 --> e1 --> e2
367  *
368  *    and we are e1. We have to create both sets
369  *    of krings.
370  *
371  * 2) state is
372  *
373  *        usr1 --> e1 --> e2
374  *
375  *    and we are e2. e1 is certainly registered and our
376  *    krings already exist. Nothing to do.
377  */
378 static int
379 netmap_pipe_krings_create(struct netmap_adapter *na)
380 {
381 	struct netmap_pipe_adapter *pna =
382 		(struct netmap_pipe_adapter *)na;
383 	struct netmap_adapter *ona = &pna->peer->up;
384 
385 	if (pna->peer_ref)
386 		return netmap_pipe_krings_create_both(na, ona);
387 
388 	return 0;
389 }
390 
391 int
392 netmap_pipe_reg_both(struct netmap_adapter *na, struct netmap_adapter *ona)
393 {
394 	int i, error = 0;
395 	enum txrx t;
396 
397 	for_rx_tx(t) {
398 		for (i = 0; i < nma_get_nrings(na, t); i++) {
399 			struct netmap_kring *kring = NMR(na, t)[i];
400 
401 			if (nm_kring_pending_on(kring)) {
402 				/* mark the peer ring as needed */
403 				kring->pipe->nr_kflags |= NKR_NEEDRING;
404 			}
405 		}
406 	}
407 
408 	/* create all missing needed rings on the other end.
409 	 * Either our end, or the other, has been marked as
410 	 * fake, so the allocation will not be done twice.
411 	 */
412 	error = netmap_mem_rings_create(ona);
413 	if (error)
414 		return error;
415 
416 	/* In case of no error we put our rings in netmap mode */
417 	for_rx_tx(t) {
418 		for (i = 0; i < nma_get_nrings(na, t); i++) {
419 			struct netmap_kring *kring = NMR(na, t)[i];
420 			if (nm_kring_pending_on(kring)) {
421 
422 				kring->nr_mode = NKR_NETMAP_ON;
423 				if ((kring->nr_kflags & NKR_FAKERING) &&
424 				    (kring->pipe->nr_kflags & NKR_FAKERING)) {
425 					/* this is a re-open of a pipe
426 					 * end-point kept alive by the other end.
427 					 * We need to leave everything as it is
428 					 */
429 					continue;
430 				}
431 
432 				/* copy the buffers from the non-fake ring
433 				 * (this also propagates any initial offset)
434 				 */
435 				memcpy(kring->pipe->ring->slot,
436 				       kring->ring->slot,
437 				       sizeof(struct netmap_slot) *
438 						kring->nkr_num_slots);
439 				/* copy the offset-related fields */
440 				*(uint64_t *)(uintptr_t)&kring->pipe->ring->offset_mask =
441 					kring->ring->offset_mask;
442 				*(uint64_t *)(uintptr_t)&kring->pipe->ring->buf_align =
443 					kring->ring->buf_align;
444 				/* mark both rings as fake and needed,
445 				 * so that buffers will not be
446 				 * deleted by the standard machinery
447 				 * (we will delete them by ourselves in
448 				 * netmap_pipe_krings_delete)
449 				 */
450 				kring->nr_kflags |=
451 					(NKR_FAKERING | NKR_NEEDRING);
452 				kring->nr_mode = NKR_NETMAP_ON;
453 			}
454 		}
455 	}
456 
457 	return 0;
458 }
459 
460 /* netmap_pipe_reg.
461  *
462  * There are two cases on registration (onoff==1)
463  *
464  * 1.a) state is
465  *
466  *        usr1 --> e1 --> e2
467  *
468  *      and we are e1. Create the needed rings of the
469  *      other end.
470  *
471  * 1.b) state is
472  *
473  *        usr1 --> e1 --> e2 <-- usr2
474  *
475  *      and we are e2. Drop the ref e1 is holding.
476  *
477  *  There are two additional cases on unregister (onoff==0)
478  *
479  *  2.a) state is
480  *
481  *         usr1 --> e1 --> e2
482  *
483  *       and we are e1. Nothing special to do, e2 will
484  *       be cleaned up by the destructor of e1.
485  *
486  *  2.b) state is
487  *
488  *         usr1 --> e1     e2 <-- usr2
489  *
490  *       and we are either e1 or e2. Add a ref from the
491  *       other end.
492  */
493 static int
494 netmap_pipe_reg(struct netmap_adapter *na, int onoff)
495 {
496 	struct netmap_pipe_adapter *pna =
497 		(struct netmap_pipe_adapter *)na;
498 	struct netmap_adapter *ona = &pna->peer->up;
499 	int error = 0;
500 
501 	nm_prdis("%p: onoff %d", na, onoff);
502 	if (onoff) {
503 		error = netmap_pipe_reg_both(na, ona);
504 		if (error) {
505 			return error;
506 		}
507 		if (na->active_fds == 0)
508 			na->na_flags |= NAF_NETMAP_ON;
509 	} else {
510 		if (na->active_fds == 0)
511 			na->na_flags &= ~NAF_NETMAP_ON;
512 		netmap_krings_mode_commit(na, onoff);
513 	}
514 
515 	if (na->active_fds) {
516 		nm_prdis("active_fds %d", na->active_fds);
517 		return 0;
518 	}
519 
520 	if (pna->peer_ref) {
521 		nm_prdis("%p: case 1.a or 2.a, nothing to do", na);
522 		return 0;
523 	}
524 	if (onoff) {
525 		nm_prdis("%p: case 1.b, drop peer", na);
526 		pna->peer->peer_ref = 0;
527 		netmap_adapter_put(na);
528 	} else {
529 		nm_prdis("%p: case 2.b, grab peer", na);
530 		netmap_adapter_get(na);
531 		pna->peer->peer_ref = 1;
532 	}
533 	return error;
534 }
535 
536 void
537 netmap_pipe_krings_delete_both(struct netmap_adapter *na,
538 			       struct netmap_adapter *ona)
539 {
540 	struct netmap_adapter *sna;
541 	enum txrx t;
542 	int i;
543 
544 	/* case 1) below */
545 	nm_prdis("%p: case 1, deleting everything", na);
546 	/* To avoid double-frees we zero-out all the buffers in the kernel part
547 	 * of each ring. The reason is this: If the user is behaving correctly,
548 	 * all buffers are found in exactly one slot in the userspace part of
549 	 * some ring.  If the user is not behaving correctly, we cannot release
550 	 * buffers cleanly anyway. In the latter case, the allocator will
551 	 * return to a clean state only when all its users will close.
552 	 */
553 	sna = na;
554 cleanup:
555 	for_rx_tx(t) {
556 		for (i = 0; i < nma_get_nrings(sna, t); i++) {
557 			struct netmap_kring *kring = NMR(sna, t)[i];
558 			struct netmap_ring *ring = kring->ring;
559 			uint32_t j, lim = kring->nkr_num_slots - 1;
560 
561 			nm_prdis("%s ring %p hwtail %u hwcur %u",
562 				kring->name, ring, kring->nr_hwtail, kring->nr_hwcur);
563 
564 			if (ring == NULL)
565 				continue;
566 
567 			if (kring->tx == NR_RX)
568 				ring->slot[kring->pipe_tail].buf_idx = 0;
569 
570 			for (j = nm_next(kring->pipe_tail, lim);
571 			     j != kring->nr_hwcur;
572 			     j = nm_next(j, lim))
573 			{
574 				nm_prdis("%s[%d] %u", kring->name, j, ring->slot[j].buf_idx);
575 				ring->slot[j].buf_idx = 0;
576 			}
577 			kring->nr_kflags &= ~(NKR_FAKERING | NKR_NEEDRING);
578 		}
579 
580 	}
581 	if (sna != ona && ona->tx_rings) {
582 		sna = ona;
583 		goto cleanup;
584 	}
585 
586 	netmap_mem_rings_delete(na);
587 	netmap_krings_delete(na); /* also zeroes tx_rings etc. */
588 
589 	if (ona->tx_rings == NULL) {
590 		/* already deleted, we must be on an
591 		 * cleanup-after-error path */
592 		return;
593 	}
594 	netmap_mem_rings_delete(ona);
595 	netmap_krings_delete(ona);
596 }
597 
598 /* netmap_pipe_krings_delete.
599  *
600  * There are two cases:
601  *
602  * 1) state is
603  *
604  *                usr1 --> e1 --> e2
605  *
606  *    and we are e1 (e2 is not registered, so krings_delete cannot be
607  *    called on it);
608  *
609  * 2) state is
610  *
611  *                usr1 --> e1     e2 <-- usr2
612  *
613  *    and we are either e1 or e2.
614  *
615  * In the former case we have to also delete the krings of e2;
616  * in the latter case we do nothing.
617  */
618 static void
619 netmap_pipe_krings_delete(struct netmap_adapter *na)
620 {
621 	struct netmap_pipe_adapter *pna =
622 		(struct netmap_pipe_adapter *)na;
623 	struct netmap_adapter *ona; /* na of the other end */
624 
625 	if (!pna->peer_ref) {
626 		nm_prdis("%p: case 2, kept alive by peer",  na);
627 		return;
628 	}
629 	ona = &pna->peer->up;
630 	netmap_pipe_krings_delete_both(na, ona);
631 }
632 
633 
634 static void
635 netmap_pipe_dtor(struct netmap_adapter *na)
636 {
637 	struct netmap_pipe_adapter *pna =
638 		(struct netmap_pipe_adapter *)na;
639 	nm_prdis("%p %p", na, pna->parent_ifp);
640 	if (pna->peer_ref) {
641 		nm_prdis("%p: clean up peer", na);
642 		pna->peer_ref = 0;
643 		netmap_adapter_put(&pna->peer->up);
644 	}
645 	if (pna->role == NM_PIPE_ROLE_MASTER)
646 		netmap_pipe_remove(pna->parent, pna);
647 	if (pna->parent_ifp)
648 		if_rele(pna->parent_ifp);
649 	netmap_adapter_put(pna->parent);
650 	pna->parent = NULL;
651 }
652 
653 int
654 netmap_get_pipe_na(struct nmreq_header *hdr, struct netmap_adapter **na,
655 		struct netmap_mem_d *nmd, int create)
656 {
657 	struct nmreq_register *req = (struct nmreq_register *)(uintptr_t)hdr->nr_body;
658 	struct netmap_adapter *pna; /* parent adapter */
659 	struct netmap_pipe_adapter *mna, *sna, *reqna;
660 	if_t ifp = NULL;
661 	const char *pipe_id = NULL;
662 	int role = 0;
663 	int error, retries = 0;
664 	char *cbra, pipe_char;
665 
666 	/* Try to parse the pipe syntax 'xx{yy' or 'xx}yy'. */
667 	cbra = strrchr(hdr->nr_name, '{');
668 	if (cbra != NULL) {
669 		role = NM_PIPE_ROLE_MASTER;
670 	} else {
671 		cbra = strrchr(hdr->nr_name, '}');
672 		if (cbra != NULL) {
673 			role = NM_PIPE_ROLE_SLAVE;
674 		} else {
675 			nm_prdis("not a pipe");
676 			return 0;
677 		}
678 	}
679 	pipe_char = *cbra;
680 	pipe_id = cbra + 1;
681 	if (*pipe_id == '\0' || cbra == hdr->nr_name) {
682 		/* Bracket is the last character, so pipe name is missing;
683 		 * or bracket is the first character, so base port name
684 		 * is missing. */
685 		return EINVAL;
686 	}
687 
688 	if (req->nr_mode != NR_REG_ALL_NIC && req->nr_mode != NR_REG_ONE_NIC) {
689 		/* We only accept modes involving hardware rings. */
690 		return EINVAL;
691 	}
692 
693 	/* first, try to find the parent adapter */
694 	for (;;) {
695 		int create_error;
696 
697 		/* Temporarily remove the pipe suffix. */
698 		*cbra = '\0';
699 		error = netmap_get_na(hdr, &pna, &ifp, nmd, create);
700 		/* Restore the pipe suffix. */
701 		*cbra = pipe_char;
702 		if (!error)
703 			break;
704 		if (error != ENXIO || retries++) {
705 			nm_prdis("parent lookup failed: %d", error);
706 			return error;
707 		}
708 		nm_prdis("try to create a persistent vale port");
709 		/* create a persistent vale port and try again */
710 		*cbra = '\0';
711 		NMG_UNLOCK();
712 		create_error = netmap_vi_create(hdr, 1 /* autodelete */);
713 		NMG_LOCK();
714 		*cbra = pipe_char;
715 		if (create_error && create_error != EEXIST) {
716 			if (create_error != EOPNOTSUPP) {
717 				nm_prerr("failed to create a persistent vale port: %d",
718 				    create_error);
719 			}
720 			return error;
721 		}
722 	}
723 
724 	if (NETMAP_OWNED_BY_KERN(pna)) {
725 		nm_prdis("parent busy");
726 		error = EBUSY;
727 		goto put_out;
728 	}
729 
730 	/* next, lookup the pipe id in the parent list */
731 	reqna = NULL;
732 	mna = netmap_pipe_find(pna, pipe_id);
733 	if (mna) {
734 		if (mna->role == role) {
735 			nm_prdis("found %s directly at %d", pipe_id, mna->parent_slot);
736 			reqna = mna;
737 		} else {
738 			nm_prdis("found %s indirectly at %d", pipe_id, mna->parent_slot);
739 			reqna = mna->peer;
740 		}
741 		/* the pipe we have found already holds a ref to the parent,
742 		 * so we need to drop the one we got from netmap_get_na()
743 		 */
744 		netmap_unget_na(pna, ifp);
745 		goto found;
746 	}
747 	nm_prdis("pipe %s not found, create %d", pipe_id, create);
748 	if (!create) {
749 		error = ENODEV;
750 		goto put_out;
751 	}
752 	/* we create both master and slave.
753 	 * The endpoint we were asked for holds a reference to
754 	 * the other one.
755 	 */
756 	mna = nm_os_malloc(sizeof(*mna));
757 	if (mna == NULL) {
758 		error = ENOMEM;
759 		goto put_out;
760 	}
761 	snprintf(mna->up.name, sizeof(mna->up.name), "%s{%s", pna->name, pipe_id);
762 
763 	mna->role = NM_PIPE_ROLE_MASTER;
764 	mna->parent = pna;
765 	mna->parent_ifp = ifp;
766 
767 	mna->up.nm_txsync = netmap_pipe_txsync;
768 	mna->up.nm_rxsync = netmap_pipe_rxsync;
769 	mna->up.nm_register = netmap_pipe_reg;
770 	mna->up.nm_dtor = netmap_pipe_dtor;
771 	mna->up.nm_krings_create = netmap_pipe_krings_create;
772 	mna->up.nm_krings_delete = netmap_pipe_krings_delete;
773 	mna->up.nm_mem = netmap_mem_get(pna->nm_mem);
774 	mna->up.na_flags |= NAF_MEM_OWNER | NAF_OFFSETS;
775 	mna->up.na_lut = pna->na_lut;
776 
777 	mna->up.num_tx_rings = req->nr_tx_rings;
778 	nm_bound_var(&mna->up.num_tx_rings, 1,
779 			1, NM_PIPE_MAXRINGS, NULL);
780 	mna->up.num_rx_rings = req->nr_rx_rings;
781 	nm_bound_var(&mna->up.num_rx_rings, 1,
782 			1, NM_PIPE_MAXRINGS, NULL);
783 	mna->up.num_tx_desc = req->nr_tx_slots;
784 	nm_bound_var(&mna->up.num_tx_desc, pna->num_tx_desc,
785 			1, NM_PIPE_MAXSLOTS, NULL);
786 	mna->up.num_rx_desc = req->nr_rx_slots;
787 	nm_bound_var(&mna->up.num_rx_desc, pna->num_rx_desc,
788 			1, NM_PIPE_MAXSLOTS, NULL);
789 	error = netmap_attach_common(&mna->up);
790 	if (error)
791 		goto free_mna;
792 	/* register the master with the parent */
793 	error = netmap_pipe_add(pna, mna);
794 	if (error)
795 		goto free_mna;
796 
797 	/* create the slave */
798 	sna = nm_os_malloc(sizeof(*mna));
799 	if (sna == NULL) {
800 		error = ENOMEM;
801 		goto unregister_mna;
802 	}
803 	/* most fields are the same, copy from master and then fix */
804 	*sna = *mna;
805 	sna->up.nm_mem = netmap_mem_get(mna->up.nm_mem);
806 	/* swap the number of tx/rx rings and slots */
807 	sna->up.num_tx_rings = mna->up.num_rx_rings;
808 	sna->up.num_tx_desc  = mna->up.num_rx_desc;
809 	sna->up.num_rx_rings = mna->up.num_tx_rings;
810 	sna->up.num_rx_desc  = mna->up.num_tx_desc;
811 	snprintf(sna->up.name, sizeof(sna->up.name), "%s}%s", pna->name, pipe_id);
812 	sna->role = NM_PIPE_ROLE_SLAVE;
813 	error = netmap_attach_common(&sna->up);
814 	if (error)
815 		goto free_sna;
816 
817 	/* join the two endpoints */
818 	mna->peer = sna;
819 	sna->peer = mna;
820 
821 	/* we already have a reference to the parent, but we
822 	 * need another one for the other endpoint we created
823 	 */
824 	netmap_adapter_get(pna);
825 	/* likewise for the ifp, if any */
826 	if (ifp)
827 		if_ref(ifp);
828 
829 	if (role == NM_PIPE_ROLE_MASTER) {
830 		reqna = mna;
831 		mna->peer_ref = 1;
832 		netmap_adapter_get(&sna->up);
833 	} else {
834 		reqna = sna;
835 		sna->peer_ref = 1;
836 		netmap_adapter_get(&mna->up);
837 	}
838 	nm_prdis("created master %p and slave %p", mna, sna);
839 found:
840 
841 	nm_prdis("pipe %s %s at %p", pipe_id,
842 		(reqna->role == NM_PIPE_ROLE_MASTER ? "master" : "slave"), reqna);
843 	*na = &reqna->up;
844 	netmap_adapter_get(*na);
845 
846 	/* keep the reference to the parent.
847 	 * It will be released by the req destructor
848 	 */
849 
850 	return 0;
851 
852 free_sna:
853 	nm_os_free(sna);
854 unregister_mna:
855 	netmap_pipe_remove(pna, mna);
856 free_mna:
857 	nm_os_free(mna);
858 put_out:
859 	netmap_unget_na(pna, ifp);
860 	return error;
861 }
862 
863 
864 #endif /* WITH_PIPES */
865