xref: /freebsd/sys/dev/netmap/netmap_pipe.c (revision bdafb02fcb88389fd1ab684cfe734cb429d35618)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (C) 2014-2016 Giuseppe Lettieri
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *   1. Redistributions of source code must retain the above copyright
11  *      notice, this list of conditions and the following disclaimer.
12  *   2. Redistributions in binary form must reproduce the above copyright
13  *      notice, this list of conditions and the following disclaimer in the
14  *      documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 /* $FreeBSD$ */
30 
31 #if defined(__FreeBSD__)
32 #include <sys/cdefs.h> /* prerequisite */
33 
34 #include <sys/types.h>
35 #include <sys/errno.h>
36 #include <sys/param.h>	/* defines used in kernel.h */
37 #include <sys/kernel.h>	/* types used in module initialization */
38 #include <sys/malloc.h>
39 #include <sys/poll.h>
40 #include <sys/lock.h>
41 #include <sys/rwlock.h>
42 #include <sys/selinfo.h>
43 #include <sys/sysctl.h>
44 #include <sys/socket.h> /* sockaddrs */
45 #include <net/if.h>
46 #include <net/if_var.h>
47 #include <machine/bus.h>	/* bus_dmamap_* */
48 #include <sys/refcount.h>
49 
50 
51 #elif defined(linux)
52 
53 #include "bsd_glue.h"
54 
55 #elif defined(__APPLE__)
56 
57 #warning OSX support is only partial
58 #include "osx_glue.h"
59 
60 #elif defined(_WIN32)
61 #include "win_glue.h"
62 
63 #else
64 
65 #error	Unsupported platform
66 
67 #endif /* unsupported */
68 
69 /*
70  * common headers
71  */
72 
73 #include <net/netmap.h>
74 #include <dev/netmap/netmap_kern.h>
75 #include <dev/netmap/netmap_mem2.h>
76 
77 #ifdef WITH_PIPES
78 
79 #define NM_PIPE_MAXSLOTS	4096
80 #define NM_PIPE_MAXRINGS	256
81 
82 static int netmap_default_pipes = 0; /* ignored, kept for compatibility */
83 SYSBEGIN(vars_pipes);
84 SYSCTL_DECL(_dev_netmap);
85 SYSCTL_INT(_dev_netmap, OID_AUTO, default_pipes, CTLFLAG_RW,
86 		&netmap_default_pipes, 0, "For compatibility only");
87 SYSEND;
88 
89 /* allocate the pipe array in the parent adapter */
90 static int
91 nm_pipe_alloc(struct netmap_adapter *na, u_int npipes)
92 {
93 	size_t old_len, len;
94 	struct netmap_pipe_adapter **npa;
95 
96 	if (npipes <= na->na_max_pipes)
97 		/* we already have more entries that requested */
98 		return 0;
99 
100 	if (npipes < na->na_next_pipe || npipes > NM_MAXPIPES)
101 		return EINVAL;
102 
103 	old_len = sizeof(struct netmap_pipe_adapter *)*na->na_max_pipes;
104 	len = sizeof(struct netmap_pipe_adapter *) * npipes;
105 	npa = nm_os_realloc(na->na_pipes, len, old_len);
106 	if (npa == NULL)
107 		return ENOMEM;
108 
109 	na->na_pipes = npa;
110 	na->na_max_pipes = npipes;
111 
112 	return 0;
113 }
114 
115 /* deallocate the parent array in the parent adapter */
116 void
117 netmap_pipe_dealloc(struct netmap_adapter *na)
118 {
119 	if (na->na_pipes) {
120 		if (na->na_next_pipe > 0) {
121 			D("freeing not empty pipe array for %s (%d dangling pipes)!", na->name,
122 					na->na_next_pipe);
123 		}
124 		nm_os_free(na->na_pipes);
125 		na->na_pipes = NULL;
126 		na->na_max_pipes = 0;
127 		na->na_next_pipe = 0;
128 	}
129 }
130 
131 /* find a pipe endpoint with the given id among the parent's pipes */
132 static struct netmap_pipe_adapter *
133 netmap_pipe_find(struct netmap_adapter *parent, const char *pipe_id)
134 {
135 	int i;
136 	struct netmap_pipe_adapter *na;
137 
138 	for (i = 0; i < parent->na_next_pipe; i++) {
139 		const char *na_pipe_id;
140 		na = parent->na_pipes[i];
141 		na_pipe_id = strrchr(na->up.name,
142 			na->role == NM_PIPE_ROLE_MASTER ? '{' : '}');
143 		KASSERT(na_pipe_id != NULL, ("Invalid pipe name"));
144 		++na_pipe_id;
145 		if (!strcmp(na_pipe_id, pipe_id)) {
146 			return na;
147 		}
148 	}
149 	return NULL;
150 }
151 
152 /* add a new pipe endpoint to the parent array */
153 static int
154 netmap_pipe_add(struct netmap_adapter *parent, struct netmap_pipe_adapter *na)
155 {
156 	if (parent->na_next_pipe >= parent->na_max_pipes) {
157 		u_int npipes = parent->na_max_pipes ?  2*parent->na_max_pipes : 2;
158 		int error = nm_pipe_alloc(parent, npipes);
159 		if (error)
160 			return error;
161 	}
162 
163 	parent->na_pipes[parent->na_next_pipe] = na;
164 	na->parent_slot = parent->na_next_pipe;
165 	parent->na_next_pipe++;
166 	return 0;
167 }
168 
169 /* remove the given pipe endpoint from the parent array */
170 static void
171 netmap_pipe_remove(struct netmap_adapter *parent, struct netmap_pipe_adapter *na)
172 {
173 	u_int n;
174 	n = --parent->na_next_pipe;
175 	if (n != na->parent_slot) {
176 		struct netmap_pipe_adapter **p =
177 			&parent->na_pipes[na->parent_slot];
178 		*p = parent->na_pipes[n];
179 		(*p)->parent_slot = na->parent_slot;
180 	}
181 	parent->na_pipes[n] = NULL;
182 }
183 
184 int
185 netmap_pipe_txsync(struct netmap_kring *txkring, int flags)
186 {
187 	struct netmap_kring *rxkring = txkring->pipe;
188 	u_int k, lim = txkring->nkr_num_slots - 1;
189 	int m; /* slots to transfer */
190 	struct netmap_ring *txring = txkring->ring, *rxring = rxkring->ring;
191 
192 	ND("%p: %s %x -> %s", txkring, txkring->name, flags, rxkring->name);
193 	ND(20, "TX before: hwcur %d hwtail %d cur %d head %d tail %d",
194 		txkring->nr_hwcur, txkring->nr_hwtail,
195 		txkring->rcur, txkring->rhead, txkring->rtail);
196 
197 	m = txkring->rhead - txkring->nr_hwcur; /* new slots */
198 	if (m < 0)
199 		m += txkring->nkr_num_slots;
200 
201 	if (m == 0) {
202 		/* nothing to send */
203 		return 0;
204 	}
205 
206 	for (k = txkring->nr_hwcur; m; m--, k = nm_next(k, lim)) {
207 		struct netmap_slot *rs = &rxring->slot[k];
208 		struct netmap_slot *ts = &txring->slot[k];
209 
210 		rs->len = ts->len;
211 		rs->ptr = ts->ptr;
212 
213 		if (ts->flags & NS_BUF_CHANGED) {
214 			rs->buf_idx = ts->buf_idx;
215 			rs->flags |= NS_BUF_CHANGED;
216 			ts->flags &= ~NS_BUF_CHANGED;
217 		}
218 	}
219 
220 	mb(); /* make sure the slots are updated before publishing them */
221 	rxkring->nr_hwtail = k;
222 	txkring->nr_hwcur = k;
223 
224 	ND(20, "TX after : hwcur %d hwtail %d cur %d head %d tail %d k %d",
225 		txkring->nr_hwcur, txkring->nr_hwtail,
226 		txkring->rcur, txkring->rhead, txkring->rtail, k);
227 
228 	rxkring->nm_notify(rxkring, 0);
229 
230 	return 0;
231 }
232 
233 int
234 netmap_pipe_rxsync(struct netmap_kring *rxkring, int flags)
235 {
236 	struct netmap_kring *txkring = rxkring->pipe;
237 	u_int k, lim = rxkring->nkr_num_slots - 1;
238 	int m; /* slots to release */
239 	struct netmap_ring *txring = txkring->ring, *rxring = rxkring->ring;
240 
241 	ND("%p: %s %x -> %s", txkring, txkring->name, flags, rxkring->name);
242 	ND(20, "RX before: hwcur %d hwtail %d cur %d head %d tail %d",
243 		rxkring->nr_hwcur, rxkring->nr_hwtail,
244 		rxkring->rcur, rxkring->rhead, rxkring->rtail);
245 
246 	m = rxkring->rhead - rxkring->nr_hwcur; /* released slots */
247 	if (m < 0)
248 		m += rxkring->nkr_num_slots;
249 
250 	if (m == 0) {
251 		/* nothing to release */
252 		return 0;
253 	}
254 
255 	for (k = rxkring->nr_hwcur; m; m--, k = nm_next(k, lim)) {
256 		struct netmap_slot *rs = &rxring->slot[k];
257 		struct netmap_slot *ts = &txring->slot[k];
258 
259 		if (rs->flags & NS_BUF_CHANGED) {
260 			/* copy the slot and report the buffer change */
261 			*ts = *rs;
262 			rs->flags &= ~NS_BUF_CHANGED;
263 		}
264 	}
265 
266 	mb(); /* make sure the slots are updated before publishing them */
267 	txkring->nr_hwtail = nm_prev(k, lim);
268 	rxkring->nr_hwcur = k;
269 
270 	ND(20, "RX after : hwcur %d hwtail %d cur %d head %d tail %d k %d",
271 		rxkring->nr_hwcur, rxkring->nr_hwtail,
272 		rxkring->rcur, rxkring->rhead, rxkring->rtail, k);
273 
274 	txkring->nm_notify(txkring, 0);
275 
276 	return 0;
277 }
278 
279 /* Pipe endpoints are created and destroyed together, so that endopoints do not
280  * have to check for the existence of their peer at each ?xsync.
281  *
282  * To play well with the existing netmap infrastructure (refcounts etc.), we
283  * adopt the following strategy:
284  *
285  * 1) The first endpoint that is created also creates the other endpoint and
286  * grabs a reference to it.
287  *
288  *    state A)  user1 --> endpoint1 --> endpoint2
289  *
290  * 2) If, starting from state A, endpoint2 is then registered, endpoint1 gives
291  * its reference to the user:
292  *
293  *    state B)  user1 --> endpoint1     endpoint2 <--- user2
294  *
295  * 3) Assume that, starting from state B endpoint2 is closed. In the unregister
296  * callback endpoint2 notes that endpoint1 is still active and adds a reference
297  * from endpoint1 to itself. When user2 then releases her own reference,
298  * endpoint2 is not destroyed and we are back to state A. A symmetrical state
299  * would be reached if endpoint1 were released instead.
300  *
301  * 4) If, starting from state A, endpoint1 is closed, the destructor notes that
302  * it owns a reference to endpoint2 and releases it.
303  *
304  * Something similar goes on for the creation and destruction of the krings.
305  */
306 
307 
308 /* netmap_pipe_krings_create.
309  *
310  * There are two cases:
311  *
312  * 1) state is
313  *
314  *        usr1 --> e1 --> e2
315  *
316  *    and we are e1. We have to create both sets
317  *    of krings.
318  *
319  * 2) state is
320  *
321  *        usr1 --> e1 --> e2
322  *
323  *    and we are e2. e1 is certainly registered and our
324  *    krings already exist. Nothing to do.
325  */
326 static int
327 netmap_pipe_krings_create(struct netmap_adapter *na)
328 {
329 	struct netmap_pipe_adapter *pna =
330 		(struct netmap_pipe_adapter *)na;
331 	struct netmap_adapter *ona = &pna->peer->up;
332 	int error = 0;
333 	enum txrx t;
334 
335 	if (pna->peer_ref) {
336 		int i;
337 
338 		/* case 1) above */
339 		ND("%p: case 1, create both ends", na);
340 		error = netmap_krings_create(na, 0);
341 		if (error)
342 			goto err;
343 
344 		/* create the krings of the other end */
345 		error = netmap_krings_create(ona, 0);
346 		if (error)
347 			goto del_krings1;
348 
349 		/* cross link the krings */
350 		for_rx_tx(t) {
351 			enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */
352 			for (i = 0; i < nma_get_nrings(na, t); i++) {
353 				NMR(na, t)[i]->pipe = NMR(ona, r)[i];
354 				NMR(ona, r)[i]->pipe = NMR(na, t)[i];
355 				/* mark all peer-adapter rings as fake */
356 				NMR(ona, r)[i]->nr_kflags |= NKR_FAKERING;
357 			}
358 		}
359 
360 	}
361 	return 0;
362 
363 del_krings1:
364 	netmap_krings_delete(na);
365 err:
366 	return error;
367 }
368 
369 /* netmap_pipe_reg.
370  *
371  * There are two cases on registration (onoff==1)
372  *
373  * 1.a) state is
374  *
375  *        usr1 --> e1 --> e2
376  *
377  *      and we are e1. Create the needed rings of the
378  *      other end.
379  *
380  * 1.b) state is
381  *
382  *        usr1 --> e1 --> e2 <-- usr2
383  *
384  *      and we are e2. Drop the ref e1 is holding.
385  *
386  *  There are two additional cases on unregister (onoff==0)
387  *
388  *  2.a) state is
389  *
390  *         usr1 --> e1 --> e2
391  *
392  *       and we are e1. Nothing special to do, e2 will
393  *       be cleaned up by the destructor of e1.
394  *
395  *  2.b) state is
396  *
397  *         usr1 --> e1     e2 <-- usr2
398  *
399  *       and we are either e1 or e2. Add a ref from the
400  *       other end.
401  */
402 static int
403 netmap_pipe_reg(struct netmap_adapter *na, int onoff)
404 {
405 	struct netmap_pipe_adapter *pna =
406 		(struct netmap_pipe_adapter *)na;
407 	struct netmap_adapter *ona = &pna->peer->up;
408 	int i, error = 0;
409 	enum txrx t;
410 
411 	ND("%p: onoff %d", na, onoff);
412 	if (onoff) {
413 		for_rx_tx(t) {
414 			for (i = 0; i < nma_get_nrings(na, t); i++) {
415 				struct netmap_kring *kring = NMR(na, t)[i];
416 
417 				if (nm_kring_pending_on(kring)) {
418 					/* mark the peer ring as needed */
419 					kring->pipe->nr_kflags |= NKR_NEEDRING;
420 				}
421 			}
422 		}
423 
424 		/* create all missing needed rings on the other end.
425 		 * Either our end, or the other, has been marked as
426 		 * fake, so the allocation will not be done twice.
427 		 */
428 		error = netmap_mem_rings_create(ona);
429 		if (error)
430 			return error;
431 
432 		/* In case of no error we put our rings in netmap mode */
433 		for_rx_tx(t) {
434 			for (i = 0; i < nma_get_nrings(na, t) + 1; i++) {
435 				struct netmap_kring *kring = NMR(na, t)[i];
436 				if (nm_kring_pending_on(kring)) {
437 					struct netmap_kring *sring, *dring;
438 
439 					/* copy the buffers from the non-fake ring */
440 					if (kring->nr_kflags & NKR_FAKERING) {
441 						sring = kring->pipe;
442 						dring = kring;
443 					} else {
444 						sring = kring;
445 						dring = kring->pipe;
446 					}
447 					memcpy(dring->ring->slot,
448 					       sring->ring->slot,
449 					       sizeof(struct netmap_slot) *
450 							sring->nkr_num_slots);
451 					/* mark both rings as fake and needed,
452 					 * so that buffers will not be
453 					 * deleted by the standard machinery
454 					 * (we will delete them by ourselves in
455 					 * netmap_pipe_krings_delete)
456 					 */
457 					sring->nr_kflags |=
458 						(NKR_FAKERING | NKR_NEEDRING);
459 					dring->nr_kflags |=
460 						(NKR_FAKERING | NKR_NEEDRING);
461 					kring->nr_mode = NKR_NETMAP_ON;
462 				}
463 			}
464 		}
465 		if (na->active_fds == 0)
466 			na->na_flags |= NAF_NETMAP_ON;
467 	} else {
468 		if (na->active_fds == 0)
469 			na->na_flags &= ~NAF_NETMAP_ON;
470 		for_rx_tx(t) {
471 			for (i = 0; i < nma_get_nrings(na, t) + 1; i++) {
472 				struct netmap_kring *kring = NMR(na, t)[i];
473 
474 				if (nm_kring_pending_off(kring)) {
475 					kring->nr_mode = NKR_NETMAP_OFF;
476 				}
477 			}
478 		}
479 	}
480 
481 	if (na->active_fds) {
482 		ND("active_fds %d", na->active_fds);
483 		return 0;
484 	}
485 
486 	if (pna->peer_ref) {
487 		ND("%p: case 1.a or 2.a, nothing to do", na);
488 		return 0;
489 	}
490 	if (onoff) {
491 		ND("%p: case 1.b, drop peer", na);
492 		pna->peer->peer_ref = 0;
493 		netmap_adapter_put(na);
494 	} else {
495 		ND("%p: case 2.b, grab peer", na);
496 		netmap_adapter_get(na);
497 		pna->peer->peer_ref = 1;
498 	}
499 	return error;
500 }
501 
502 /* netmap_pipe_krings_delete.
503  *
504  * There are two cases:
505  *
506  * 1) state is
507  *
508  *                usr1 --> e1 --> e2
509  *
510  *    and we are e1 (e2 is not registered, so krings_delete cannot be
511  *    called on it);
512  *
513  * 2) state is
514  *
515  *                usr1 --> e1     e2 <-- usr2
516  *
517  *    and we are either e1 or e2.
518  *
519  * In the former case we have to also delete the krings of e2;
520  * in the latter case we do nothing.
521  */
522 static void
523 netmap_pipe_krings_delete(struct netmap_adapter *na)
524 {
525 	struct netmap_pipe_adapter *pna =
526 		(struct netmap_pipe_adapter *)na;
527 	struct netmap_adapter *sna, *ona; /* na of the other end */
528 	enum txrx t;
529 	int i;
530 
531 	if (!pna->peer_ref) {
532 		ND("%p: case 2, kept alive by peer",  na);
533 		return;
534 	}
535 	ona = &pna->peer->up;
536 	/* case 1) above */
537 	ND("%p: case 1, deleting everything", na);
538 	/* To avoid double-frees we zero-out all the buffers in the kernel part
539 	 * of each ring. The reason is this: If the user is behaving correctly,
540 	 * all buffers are found in exactly one slot in the userspace part of
541 	 * some ring.  If the user is not behaving correctly, we cannot release
542 	 * buffers cleanly anyway. In the latter case, the allocator will
543 	 * return to a clean state only when all its users will close.
544 	 */
545 	sna = na;
546 cleanup:
547 	for_rx_tx(t) {
548 		for (i = 0; i < nma_get_nrings(sna, t) + 1; i++) {
549 			struct netmap_kring *kring = NMR(sna, t)[i];
550 			struct netmap_ring *ring = kring->ring;
551 			uint32_t j, lim = kring->nkr_num_slots - 1;
552 
553 			ND("%s ring %p hwtail %u hwcur %u",
554 				kring->name, ring, kring->nr_hwtail, kring->nr_hwcur);
555 
556 			if (ring == NULL)
557 				continue;
558 
559 			if (kring->nr_hwtail == kring->nr_hwcur)
560 				ring->slot[kring->nr_hwtail].buf_idx = 0;
561 
562 			for (j = nm_next(kring->nr_hwtail, lim);
563 			     j != kring->nr_hwcur;
564 			     j = nm_next(j, lim))
565 			{
566 				ND("%s[%d] %u", kring->name, j, ring->slot[j].buf_idx);
567 				ring->slot[j].buf_idx = 0;
568 			}
569 			kring->nr_kflags &= ~(NKR_FAKERING | NKR_NEEDRING);
570 		}
571 
572 	}
573 	if (sna != ona && ona->tx_rings) {
574 		sna = ona;
575 		goto cleanup;
576 	}
577 
578 	netmap_mem_rings_delete(na);
579 	netmap_krings_delete(na); /* also zeroes tx_rings etc. */
580 
581 	if (ona->tx_rings == NULL) {
582 		/* already deleted, we must be on an
583 		 * cleanup-after-error path */
584 		return;
585 	}
586 	netmap_mem_rings_delete(ona);
587 	netmap_krings_delete(ona);
588 }
589 
590 
591 static void
592 netmap_pipe_dtor(struct netmap_adapter *na)
593 {
594 	struct netmap_pipe_adapter *pna =
595 		(struct netmap_pipe_adapter *)na;
596 	ND("%p %p", na, pna->parent_ifp);
597 	if (pna->peer_ref) {
598 		ND("%p: clean up peer", na);
599 		pna->peer_ref = 0;
600 		netmap_adapter_put(&pna->peer->up);
601 	}
602 	if (pna->role == NM_PIPE_ROLE_MASTER)
603 		netmap_pipe_remove(pna->parent, pna);
604 	if (pna->parent_ifp)
605 		if_rele(pna->parent_ifp);
606 	netmap_adapter_put(pna->parent);
607 	pna->parent = NULL;
608 }
609 
610 int
611 netmap_get_pipe_na(struct nmreq_header *hdr, struct netmap_adapter **na,
612 		struct netmap_mem_d *nmd, int create)
613 {
614 	struct nmreq_register *req = (struct nmreq_register *)(uintptr_t)hdr->nr_body;
615 	struct netmap_adapter *pna; /* parent adapter */
616 	struct netmap_pipe_adapter *mna, *sna, *reqna;
617 	struct ifnet *ifp = NULL;
618 	const char *pipe_id = NULL;
619 	int role = 0;
620 	int error, retries = 0;
621 	char *cbra;
622 
623 	/* Try to parse the pipe syntax 'xx{yy' or 'xx}yy'. */
624 	cbra = strrchr(hdr->nr_name, '{');
625 	if (cbra != NULL) {
626 		role = NM_PIPE_ROLE_MASTER;
627 	} else {
628 		cbra = strrchr(hdr->nr_name, '}');
629 		if (cbra != NULL) {
630 			role = NM_PIPE_ROLE_SLAVE;
631 		} else {
632 			ND("not a pipe");
633 			return 0;
634 		}
635 	}
636 	pipe_id = cbra + 1;
637 	if (*pipe_id == '\0' || cbra == hdr->nr_name) {
638 		/* Bracket is the last character, so pipe name is missing;
639 		 * or bracket is the first character, so base port name
640 		 * is missing. */
641 		return EINVAL;
642 	}
643 
644 	if (req->nr_mode != NR_REG_ALL_NIC && req->nr_mode != NR_REG_ONE_NIC) {
645 		/* We only accept modes involving hardware rings. */
646 		return EINVAL;
647 	}
648 
649 	/* first, try to find the parent adapter */
650 	for (;;) {
651 		char nr_name_orig[NETMAP_REQ_IFNAMSIZ];
652 		int create_error;
653 
654 		/* Temporarily remove the pipe suffix. */
655 		strncpy(nr_name_orig, hdr->nr_name, sizeof(nr_name_orig));
656 		*cbra = '\0';
657 		error = netmap_get_na(hdr, &pna, &ifp, nmd, create);
658 		/* Restore the pipe suffix. */
659 		strncpy(hdr->nr_name, nr_name_orig, sizeof(hdr->nr_name));
660 		if (!error)
661 			break;
662 		if (error != ENXIO || retries++) {
663 			ND("parent lookup failed: %d", error);
664 			return error;
665 		}
666 		ND("try to create a persistent vale port");
667 		/* create a persistent vale port and try again */
668 		*cbra = '\0';
669 		NMG_UNLOCK();
670 		create_error = netmap_vi_create(hdr, 1 /* autodelete */);
671 		NMG_LOCK();
672 		strncpy(hdr->nr_name, nr_name_orig, sizeof(hdr->nr_name));
673 		if (create_error && create_error != EEXIST) {
674 			if (create_error != EOPNOTSUPP) {
675 				D("failed to create a persistent vale port: %d", create_error);
676 			}
677 			return error;
678 		}
679 	}
680 
681 	if (NETMAP_OWNED_BY_KERN(pna)) {
682 		ND("parent busy");
683 		error = EBUSY;
684 		goto put_out;
685 	}
686 
687 	/* next, lookup the pipe id in the parent list */
688 	reqna = NULL;
689 	mna = netmap_pipe_find(pna, pipe_id);
690 	if (mna) {
691 		if (mna->role == role) {
692 			ND("found %s directly at %d", pipe_id, mna->parent_slot);
693 			reqna = mna;
694 		} else {
695 			ND("found %s indirectly at %d", pipe_id, mna->parent_slot);
696 			reqna = mna->peer;
697 		}
698 		/* the pipe we have found already holds a ref to the parent,
699 		 * so we need to drop the one we got from netmap_get_na()
700 		 */
701 		netmap_unget_na(pna, ifp);
702 		goto found;
703 	}
704 	ND("pipe %s not found, create %d", pipe_id, create);
705 	if (!create) {
706 		error = ENODEV;
707 		goto put_out;
708 	}
709 	/* we create both master and slave.
710 	 * The endpoint we were asked for holds a reference to
711 	 * the other one.
712 	 */
713 	mna = nm_os_malloc(sizeof(*mna));
714 	if (mna == NULL) {
715 		error = ENOMEM;
716 		goto put_out;
717 	}
718 	snprintf(mna->up.name, sizeof(mna->up.name), "%s{%s", pna->name, pipe_id);
719 
720 	mna->role = NM_PIPE_ROLE_MASTER;
721 	mna->parent = pna;
722 	mna->parent_ifp = ifp;
723 
724 	mna->up.nm_txsync = netmap_pipe_txsync;
725 	mna->up.nm_rxsync = netmap_pipe_rxsync;
726 	mna->up.nm_register = netmap_pipe_reg;
727 	mna->up.nm_dtor = netmap_pipe_dtor;
728 	mna->up.nm_krings_create = netmap_pipe_krings_create;
729 	mna->up.nm_krings_delete = netmap_pipe_krings_delete;
730 	mna->up.nm_mem = netmap_mem_get(pna->nm_mem);
731 	mna->up.na_flags |= NAF_MEM_OWNER;
732 	mna->up.na_lut = pna->na_lut;
733 
734 	mna->up.num_tx_rings = req->nr_tx_rings;
735 	nm_bound_var(&mna->up.num_tx_rings, 1,
736 			1, NM_PIPE_MAXRINGS, NULL);
737 	mna->up.num_rx_rings = req->nr_rx_rings;
738 	nm_bound_var(&mna->up.num_rx_rings, 1,
739 			1, NM_PIPE_MAXRINGS, NULL);
740 	mna->up.num_tx_desc = req->nr_tx_slots;
741 	nm_bound_var(&mna->up.num_tx_desc, pna->num_tx_desc,
742 			1, NM_PIPE_MAXSLOTS, NULL);
743 	mna->up.num_rx_desc = req->nr_rx_slots;
744 	nm_bound_var(&mna->up.num_rx_desc, pna->num_rx_desc,
745 			1, NM_PIPE_MAXSLOTS, NULL);
746 	error = netmap_attach_common(&mna->up);
747 	if (error)
748 		goto free_mna;
749 	/* register the master with the parent */
750 	error = netmap_pipe_add(pna, mna);
751 	if (error)
752 		goto free_mna;
753 
754 	/* create the slave */
755 	sna = nm_os_malloc(sizeof(*mna));
756 	if (sna == NULL) {
757 		error = ENOMEM;
758 		goto unregister_mna;
759 	}
760 	/* most fields are the same, copy from master and then fix */
761 	*sna = *mna;
762 	sna->up.nm_mem = netmap_mem_get(mna->up.nm_mem);
763 	/* swap the number of tx/rx rings */
764 	sna->up.num_tx_rings = mna->up.num_rx_rings;
765 	sna->up.num_rx_rings = mna->up.num_tx_rings;
766 	snprintf(sna->up.name, sizeof(sna->up.name), "%s}%s", pna->name, pipe_id);
767 	sna->role = NM_PIPE_ROLE_SLAVE;
768 	error = netmap_attach_common(&sna->up);
769 	if (error)
770 		goto free_sna;
771 
772 	/* join the two endpoints */
773 	mna->peer = sna;
774 	sna->peer = mna;
775 
776 	/* we already have a reference to the parent, but we
777 	 * need another one for the other endpoint we created
778 	 */
779 	netmap_adapter_get(pna);
780 	/* likewise for the ifp, if any */
781 	if (ifp)
782 		if_ref(ifp);
783 
784 	if (role == NM_PIPE_ROLE_MASTER) {
785 		reqna = mna;
786 		mna->peer_ref = 1;
787 		netmap_adapter_get(&sna->up);
788 	} else {
789 		reqna = sna;
790 		sna->peer_ref = 1;
791 		netmap_adapter_get(&mna->up);
792 	}
793 	ND("created master %p and slave %p", mna, sna);
794 found:
795 
796 	ND("pipe %s %s at %p", pipe_id,
797 		(reqna->role == NM_PIPE_ROLE_MASTER ? "master" : "slave"), reqna);
798 	*na = &reqna->up;
799 	netmap_adapter_get(*na);
800 
801 	/* keep the reference to the parent.
802 	 * It will be released by the req destructor
803 	 */
804 
805 	return 0;
806 
807 free_sna:
808 	nm_os_free(sna);
809 unregister_mna:
810 	netmap_pipe_remove(pna, mna);
811 free_mna:
812 	nm_os_free(mna);
813 put_out:
814 	netmap_unget_na(pna, ifp);
815 	return error;
816 }
817 
818 
819 #endif /* WITH_PIPES */
820