xref: /freebsd/sys/dev/netmap/netmap_kloop.c (revision e4478d7e46876142b5f75cfc93ef649a6bde05ae)
1 /*
2  * Copyright (C) 2016-2018 Vincenzo Maffione
3  * Copyright (C) 2015 Stefano Garzarella
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *   1. Redistributions of source code must retain the above copyright
10  *      notice, this list of conditions and the following disclaimer.
11  *   2. Redistributions in binary form must reproduce the above copyright
12  *      notice, this list of conditions and the following disclaimer in the
13  *      documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  *
27  * $FreeBSD$
28  */
29 
30 /*
31  * common headers
32  */
33 #if defined(__FreeBSD__)
34 #include <sys/cdefs.h>
35 #include <sys/param.h>
36 #include <sys/kernel.h>
37 #include <sys/types.h>
38 #include <sys/selinfo.h>
39 #include <sys/socket.h>
40 #include <net/if.h>
41 #include <net/if_var.h>
42 #include <machine/bus.h>
43 
44 #define usleep_range(_1, _2) \
45         pause_sbt("sync-kloop-sleep", SBT_1US * _1, SBT_1US * 1, C_ABSOLUTE)
46 
47 #elif defined(linux)
48 #include <bsd_glue.h>
49 #include <linux/file.h>
50 #include <linux/eventfd.h>
51 #endif
52 
53 #include <net/netmap.h>
54 #include <dev/netmap/netmap_kern.h>
55 #include <net/netmap_virt.h>
56 #include <dev/netmap/netmap_mem2.h>
57 
58 /* Support for eventfd-based notifications. */
59 #if defined(linux)
60 #define SYNC_KLOOP_POLL
61 #endif
62 
63 /* Write kring pointers (hwcur, hwtail) to the CSB.
64  * This routine is coupled with ptnetmap_guest_read_kring_csb(). */
65 static inline void
66 sync_kloop_kernel_write(struct nm_csb_ktoa __user *ptr, uint32_t hwcur,
67 			   uint32_t hwtail)
68 {
69 	/*
70 	 * The same scheme used in ptnetmap_guest_write_kring_csb() applies here.
71 	 * We allow the application to read a value of hwcur more recent than the value
72 	 * of hwtail, since this would anyway result in a consistent view of the
73 	 * ring state (and hwcur can never wraparound hwtail, since hwcur must be
74 	 * behind head).
75 	 *
76 	 * The following memory barrier scheme is used to make this happen:
77 	 *
78 	 *          Application          Kernel
79 	 *
80 	 *          STORE(hwcur)         LOAD(hwtail)
81 	 *          mb() <-------------> mb()
82 	 *          STORE(hwtail)        LOAD(hwcur)
83 	 */
84 	CSB_WRITE(ptr, hwcur, hwcur);
85 	nm_stst_barrier();
86 	CSB_WRITE(ptr, hwtail, hwtail);
87 }
88 
89 /* Read kring pointers (head, cur, sync_flags) from the CSB.
90  * This routine is coupled with ptnetmap_guest_write_kring_csb(). */
91 static inline void
92 sync_kloop_kernel_read(struct nm_csb_atok __user *ptr,
93 			  struct netmap_ring *shadow_ring,
94 			  uint32_t num_slots)
95 {
96 	/*
97 	 * We place a memory barrier to make sure that the update of head never
98 	 * overtakes the update of cur.
99 	 * (see explanation in ptnetmap_guest_write_kring_csb).
100 	 */
101 	CSB_READ(ptr, head, shadow_ring->head);
102 	nm_stst_barrier();
103 	CSB_READ(ptr, cur, shadow_ring->cur);
104 	CSB_READ(ptr, sync_flags, shadow_ring->flags);
105 }
106 
107 /* Enable or disable application --> kernel kicks. */
108 static inline void
109 csb_ktoa_kick_enable(struct nm_csb_ktoa __user *csb_ktoa, uint32_t val)
110 {
111 	CSB_WRITE(csb_ktoa, kern_need_kick, val);
112 }
113 
114 #ifdef SYNC_KLOOP_POLL
115 /* Are application interrupt enabled or disabled? */
116 static inline uint32_t
117 csb_atok_intr_enabled(struct nm_csb_atok __user *csb_atok)
118 {
119 	uint32_t v;
120 
121 	CSB_READ(csb_atok, appl_need_kick, v);
122 
123 	return v;
124 }
125 #endif  /* SYNC_KLOOP_POLL */
126 
127 static inline void
128 sync_kloop_kring_dump(const char *title, const struct netmap_kring *kring)
129 {
130 	nm_prinf("%s - name: %s hwcur: %d hwtail: %d "
131 		"rhead: %d rcur: %d rtail: %d",
132 		title, kring->name, kring->nr_hwcur, kring->nr_hwtail,
133 		kring->rhead, kring->rcur, kring->rtail);
134 }
135 
136 struct sync_kloop_ring_args {
137 	struct netmap_kring *kring;
138 	struct nm_csb_atok *csb_atok;
139 	struct nm_csb_ktoa *csb_ktoa;
140 #ifdef SYNC_KLOOP_POLL
141 	struct eventfd_ctx *irq_ctx;
142 #endif /* SYNC_KLOOP_POLL */
143 };
144 
145 static void
146 netmap_sync_kloop_tx_ring(const struct sync_kloop_ring_args *a)
147 {
148 	struct netmap_kring *kring = a->kring;
149 	struct nm_csb_atok *csb_atok = a->csb_atok;
150 	struct nm_csb_ktoa *csb_ktoa = a->csb_ktoa;
151 	struct netmap_ring shadow_ring; /* shadow copy of the netmap_ring */
152 	bool more_txspace = false;
153 	uint32_t num_slots;
154 	int batch;
155 
156 	num_slots = kring->nkr_num_slots;
157 
158 	/* Disable application --> kernel notifications. */
159 	csb_ktoa_kick_enable(csb_ktoa, 0);
160 	/* Copy the application kring pointers from the CSB */
161 	sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots);
162 
163 	for (;;) {
164 		batch = shadow_ring.head - kring->nr_hwcur;
165 		if (batch < 0)
166 			batch += num_slots;
167 
168 #ifdef PTN_TX_BATCH_LIM
169 		if (batch > PTN_TX_BATCH_LIM(num_slots)) {
170 			/* If application moves ahead too fast, let's cut the move so
171 			 * that we don't exceed our batch limit. */
172 			uint32_t head_lim = kring->nr_hwcur + PTN_TX_BATCH_LIM(num_slots);
173 
174 			if (head_lim >= num_slots)
175 				head_lim -= num_slots;
176 			nm_prdis(1, "batch: %d head: %d head_lim: %d", batch, shadow_ring.head,
177 					head_lim);
178 			shadow_ring.head = head_lim;
179 			batch = PTN_TX_BATCH_LIM(num_slots);
180 		}
181 #endif /* PTN_TX_BATCH_LIM */
182 
183 		if (nm_kr_txspace(kring) <= (num_slots >> 1)) {
184 			shadow_ring.flags |= NAF_FORCE_RECLAIM;
185 		}
186 
187 		/* Netmap prologue */
188 		shadow_ring.tail = kring->rtail;
189 		if (unlikely(nm_txsync_prologue(kring, &shadow_ring) >= num_slots)) {
190 			/* Reinit ring and enable notifications. */
191 			netmap_ring_reinit(kring);
192 			csb_ktoa_kick_enable(csb_ktoa, 1);
193 			break;
194 		}
195 
196 		if (unlikely(netmap_debug & NM_DEBUG_TXSYNC)) {
197 			sync_kloop_kring_dump("pre txsync", kring);
198 		}
199 
200 		if (unlikely(kring->nm_sync(kring, shadow_ring.flags))) {
201 			/* Reenable notifications. */
202 			csb_ktoa_kick_enable(csb_ktoa, 1);
203 			nm_prerr("txsync() failed");
204 			break;
205 		}
206 
207 		/*
208 		 * Finalize
209 		 * Copy kernel hwcur and hwtail into the CSB for the application sync(), and
210 		 * do the nm_sync_finalize.
211 		 */
212 		sync_kloop_kernel_write(csb_ktoa, kring->nr_hwcur,
213 				kring->nr_hwtail);
214 		if (kring->rtail != kring->nr_hwtail) {
215 			/* Some more room available in the parent adapter. */
216 			kring->rtail = kring->nr_hwtail;
217 			more_txspace = true;
218 		}
219 
220 		if (unlikely(netmap_debug & NM_DEBUG_TXSYNC)) {
221 			sync_kloop_kring_dump("post txsync", kring);
222 		}
223 
224 		/* Interrupt the application if needed. */
225 #ifdef SYNC_KLOOP_POLL
226 		if (a->irq_ctx && more_txspace && csb_atok_intr_enabled(csb_atok)) {
227 			/* Disable application kick to avoid sending unnecessary kicks */
228 			eventfd_signal(a->irq_ctx, 1);
229 			more_txspace = false;
230 		}
231 #endif /* SYNC_KLOOP_POLL */
232 
233 		/* Read CSB to see if there is more work to do. */
234 		sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots);
235 		if (shadow_ring.head == kring->rhead) {
236 			/*
237 			 * No more packets to transmit. We enable notifications and
238 			 * go to sleep, waiting for a kick from the application when new
239 			 * new slots are ready for transmission.
240 			 */
241 			/* Reenable notifications. */
242 			csb_ktoa_kick_enable(csb_ktoa, 1);
243 			/* Doublecheck. */
244 			sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots);
245 			if (shadow_ring.head != kring->rhead) {
246 				/* We won the race condition, there are more packets to
247 				 * transmit. Disable notifications and do another cycle */
248 				csb_ktoa_kick_enable(csb_ktoa, 0);
249 				continue;
250 			}
251 			break;
252 		}
253 
254 		if (nm_kr_txempty(kring)) {
255 			/* No more available TX slots. We stop waiting for a notification
256 			 * from the backend (netmap_tx_irq). */
257 			nm_prdis(1, "TX ring");
258 			break;
259 		}
260 	}
261 
262 #ifdef SYNC_KLOOP_POLL
263 	if (a->irq_ctx && more_txspace && csb_atok_intr_enabled(csb_atok)) {
264 		eventfd_signal(a->irq_ctx, 1);
265 	}
266 #endif /* SYNC_KLOOP_POLL */
267 }
268 
269 /* RX cycle without receive any packets */
270 #define SYNC_LOOP_RX_DRY_CYCLES_MAX	2
271 
272 static inline int
273 sync_kloop_norxslots(struct netmap_kring *kring, uint32_t g_head)
274 {
275 	return (NM_ACCESS_ONCE(kring->nr_hwtail) == nm_prev(g_head,
276 				kring->nkr_num_slots - 1));
277 }
278 
279 static void
280 netmap_sync_kloop_rx_ring(const struct sync_kloop_ring_args *a)
281 {
282 
283 	struct netmap_kring *kring = a->kring;
284 	struct nm_csb_atok *csb_atok = a->csb_atok;
285 	struct nm_csb_ktoa *csb_ktoa = a->csb_ktoa;
286 	struct netmap_ring shadow_ring; /* shadow copy of the netmap_ring */
287 	int dry_cycles = 0;
288 	bool some_recvd = false;
289 	uint32_t num_slots;
290 
291 	num_slots = kring->nkr_num_slots;
292 
293 	/* Get RX csb_atok and csb_ktoa pointers from the CSB. */
294 	num_slots = kring->nkr_num_slots;
295 
296 	/* Disable notifications. */
297 	csb_ktoa_kick_enable(csb_ktoa, 0);
298 	/* Copy the application kring pointers from the CSB */
299 	sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots);
300 
301 	for (;;) {
302 		uint32_t hwtail;
303 
304 		/* Netmap prologue */
305 		shadow_ring.tail = kring->rtail;
306 		if (unlikely(nm_rxsync_prologue(kring, &shadow_ring) >= num_slots)) {
307 			/* Reinit ring and enable notifications. */
308 			netmap_ring_reinit(kring);
309 			csb_ktoa_kick_enable(csb_ktoa, 1);
310 			break;
311 		}
312 
313 		if (unlikely(netmap_debug & NM_DEBUG_RXSYNC)) {
314 			sync_kloop_kring_dump("pre rxsync", kring);
315 		}
316 
317 		if (unlikely(kring->nm_sync(kring, shadow_ring.flags))) {
318 			/* Reenable notifications. */
319 			csb_ktoa_kick_enable(csb_ktoa, 1);
320 			nm_prerr("rxsync() failed");
321 			break;
322 		}
323 
324 		/*
325 		 * Finalize
326 		 * Copy kernel hwcur and hwtail into the CSB for the application sync()
327 		 */
328 		hwtail = NM_ACCESS_ONCE(kring->nr_hwtail);
329 		sync_kloop_kernel_write(csb_ktoa, kring->nr_hwcur, hwtail);
330 		if (kring->rtail != hwtail) {
331 			kring->rtail = hwtail;
332 			some_recvd = true;
333 			dry_cycles = 0;
334 		} else {
335 			dry_cycles++;
336 		}
337 
338 		if (unlikely(netmap_debug & NM_DEBUG_RXSYNC)) {
339 			sync_kloop_kring_dump("post rxsync", kring);
340 		}
341 
342 #ifdef SYNC_KLOOP_POLL
343 		/* Interrupt the application if needed. */
344 		if (a->irq_ctx && some_recvd && csb_atok_intr_enabled(csb_atok)) {
345 			/* Disable application kick to avoid sending unnecessary kicks */
346 			eventfd_signal(a->irq_ctx, 1);
347 			some_recvd = false;
348 		}
349 #endif /* SYNC_KLOOP_POLL */
350 
351 		/* Read CSB to see if there is more work to do. */
352 		sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots);
353 		if (sync_kloop_norxslots(kring, shadow_ring.head)) {
354 			/*
355 			 * No more slots available for reception. We enable notification and
356 			 * go to sleep, waiting for a kick from the application when new receive
357 			 * slots are available.
358 			 */
359 			/* Reenable notifications. */
360 			csb_ktoa_kick_enable(csb_ktoa, 1);
361 			/* Doublecheck. */
362 			sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots);
363 			if (!sync_kloop_norxslots(kring, shadow_ring.head)) {
364 				/* We won the race condition, more slots are available. Disable
365 				 * notifications and do another cycle. */
366 				csb_ktoa_kick_enable(csb_ktoa, 0);
367 				continue;
368 			}
369 			break;
370 		}
371 
372 		hwtail = NM_ACCESS_ONCE(kring->nr_hwtail);
373 		if (unlikely(hwtail == kring->rhead ||
374 					dry_cycles >= SYNC_LOOP_RX_DRY_CYCLES_MAX)) {
375 			/* No more packets to be read from the backend. We stop and
376 			 * wait for a notification from the backend (netmap_rx_irq). */
377 			nm_prdis(1, "nr_hwtail: %d rhead: %d dry_cycles: %d",
378 					hwtail, kring->rhead, dry_cycles);
379 			break;
380 		}
381 	}
382 
383 	nm_kr_put(kring);
384 
385 #ifdef SYNC_KLOOP_POLL
386 	/* Interrupt the application if needed. */
387 	if (a->irq_ctx && some_recvd && csb_atok_intr_enabled(csb_atok)) {
388 		eventfd_signal(a->irq_ctx, 1);
389 	}
390 #endif /* SYNC_KLOOP_POLL */
391 }
392 
393 #ifdef SYNC_KLOOP_POLL
394 struct sync_kloop_poll_entry {
395 	/* Support for receiving notifications from
396 	 * a netmap ring or from the application. */
397 	struct file *filp;
398 	wait_queue_t wait;
399 	wait_queue_head_t *wqh;
400 
401 	/* Support for sending notifications to the application. */
402 	struct eventfd_ctx *irq_ctx;
403 	struct file *irq_filp;
404 };
405 
406 struct sync_kloop_poll_ctx {
407 	poll_table wait_table;
408 	unsigned int next_entry;
409 	unsigned int num_entries;
410 	struct sync_kloop_poll_entry entries[0];
411 };
412 
413 static void
414 sync_kloop_poll_table_queue_proc(struct file *file, wait_queue_head_t *wqh,
415 				poll_table *pt)
416 {
417 	struct sync_kloop_poll_ctx *poll_ctx =
418 		container_of(pt, struct sync_kloop_poll_ctx, wait_table);
419 	struct sync_kloop_poll_entry *entry = poll_ctx->entries +
420 						poll_ctx->next_entry;
421 
422 	BUG_ON(poll_ctx->next_entry >= poll_ctx->num_entries);
423 	entry->wqh = wqh;
424 	entry->filp = file;
425 	/* Use the default wake up function. */
426 	init_waitqueue_entry(&entry->wait, current);
427 	add_wait_queue(wqh, &entry->wait);
428 	poll_ctx->next_entry++;
429 }
430 #endif  /* SYNC_KLOOP_POLL */
431 
432 int
433 netmap_sync_kloop(struct netmap_priv_d *priv, struct nmreq_header *hdr)
434 {
435 	struct nmreq_sync_kloop_start *req =
436 		(struct nmreq_sync_kloop_start *)(uintptr_t)hdr->nr_body;
437 	struct nmreq_opt_sync_kloop_eventfds *eventfds_opt = NULL;
438 #ifdef SYNC_KLOOP_POLL
439 	struct sync_kloop_poll_ctx *poll_ctx = NULL;
440 #endif  /* SYNC_KLOOP_POLL */
441 	int num_rx_rings, num_tx_rings, num_rings;
442 	uint32_t sleep_us = req->sleep_us;
443 	struct nm_csb_atok* csb_atok_base;
444 	struct nm_csb_ktoa* csb_ktoa_base;
445 	struct netmap_adapter *na;
446 	struct nmreq_option *opt;
447 	int err = 0;
448 	int i;
449 
450 	if (sleep_us > 1000000) {
451 		/* We do not accept sleeping for more than a second. */
452 		return EINVAL;
453 	}
454 
455 	if (priv->np_nifp == NULL) {
456 		return ENXIO;
457 	}
458 	mb(); /* make sure following reads are not from cache */
459 
460 	na = priv->np_na;
461 	if (!nm_netmap_on(na)) {
462 		return ENXIO;
463 	}
464 
465 	NMG_LOCK();
466 	/* Make sure the application is working in CSB mode. */
467 	if (!priv->np_csb_atok_base || !priv->np_csb_ktoa_base) {
468 		NMG_UNLOCK();
469 		nm_prerr("sync-kloop on %s requires "
470 				"NETMAP_REQ_OPT_CSB option", na->name);
471 		return EINVAL;
472 	}
473 
474 	csb_atok_base = priv->np_csb_atok_base;
475 	csb_ktoa_base = priv->np_csb_ktoa_base;
476 
477 	/* Make sure that no kloop is currently running. */
478 	if (priv->np_kloop_state & NM_SYNC_KLOOP_RUNNING) {
479 		err = EBUSY;
480 	}
481 	priv->np_kloop_state |= NM_SYNC_KLOOP_RUNNING;
482 	NMG_UNLOCK();
483 	if (err) {
484 		return err;
485 	}
486 
487 	num_rx_rings = priv->np_qlast[NR_RX] - priv->np_qfirst[NR_RX];
488 	num_tx_rings = priv->np_qlast[NR_TX] - priv->np_qfirst[NR_TX];
489 	num_rings = num_tx_rings + num_rx_rings;
490 
491 	/* Validate notification options. */
492 	opt = nmreq_findoption((struct nmreq_option *)(uintptr_t)hdr->nr_options,
493 				NETMAP_REQ_OPT_SYNC_KLOOP_EVENTFDS);
494 	if (opt != NULL) {
495 		err = nmreq_checkduplicate(opt);
496 		if (err) {
497 			opt->nro_status = err;
498 			goto out;
499 		}
500 		if (opt->nro_size != sizeof(*eventfds_opt) +
501 			sizeof(eventfds_opt->eventfds[0]) * num_rings) {
502 			/* Option size not consistent with the number of
503 			 * entries. */
504 			opt->nro_status = err = EINVAL;
505 			goto out;
506 		}
507 #ifdef SYNC_KLOOP_POLL
508 		eventfds_opt = (struct nmreq_opt_sync_kloop_eventfds *)opt;
509 		opt->nro_status = 0;
510 		/* We need 2 poll entries for TX and RX notifications coming
511 		 * from the netmap adapter, plus one entries per ring for the
512 		 * notifications coming from the application. */
513 		poll_ctx = nm_os_malloc(sizeof(*poll_ctx) +
514 				(2 + num_rings) * sizeof(poll_ctx->entries[0]));
515 		init_poll_funcptr(&poll_ctx->wait_table,
516 					sync_kloop_poll_table_queue_proc);
517 		poll_ctx->num_entries = 2 + num_rings;
518 		poll_ctx->next_entry = 0;
519 		/* Poll for notifications coming from the applications through
520 		 * eventfds . */
521 		for (i = 0; i < num_rings; i++) {
522 			struct eventfd_ctx *irq;
523 			struct file *filp;
524 			unsigned long mask;
525 
526 			filp = eventfd_fget(eventfds_opt->eventfds[i].ioeventfd);
527 			if (IS_ERR(filp)) {
528 				err = PTR_ERR(filp);
529 				goto out;
530 			}
531 			mask = filp->f_op->poll(filp, &poll_ctx->wait_table);
532 			if (mask & POLLERR) {
533 				err = EINVAL;
534 				goto out;
535 			}
536 
537 			filp = eventfd_fget(eventfds_opt->eventfds[i].irqfd);
538 			if (IS_ERR(filp)) {
539 				err = PTR_ERR(filp);
540 				goto out;
541 			}
542 			poll_ctx->entries[i].irq_filp = filp;
543 			irq = eventfd_ctx_fileget(filp);
544 			if (IS_ERR(irq)) {
545 				err = PTR_ERR(irq);
546 				goto out;
547 			}
548 			poll_ctx->entries[i].irq_ctx = irq;
549 		}
550 		/* Poll for notifications coming from the netmap rings bound to
551 		 * this file descriptor. */
552 		{
553 			NM_SELINFO_T *si[NR_TXRX];
554 
555 			NMG_LOCK();
556 			si[NR_RX] = nm_si_user(priv, NR_RX) ? &na->si[NR_RX] :
557 				&na->rx_rings[priv->np_qfirst[NR_RX]]->si;
558 			si[NR_TX] = nm_si_user(priv, NR_TX) ? &na->si[NR_TX] :
559 				&na->tx_rings[priv->np_qfirst[NR_TX]]->si;
560 			NMG_UNLOCK();
561 			poll_wait(priv->np_filp, si[NR_RX], &poll_ctx->wait_table);
562 			poll_wait(priv->np_filp, si[NR_TX], &poll_ctx->wait_table);
563 		}
564 #else   /* SYNC_KLOOP_POLL */
565 		opt->nro_status = EOPNOTSUPP;
566 		goto out;
567 #endif  /* SYNC_KLOOP_POLL */
568 	}
569 
570 	/* Main loop. */
571 	for (;;) {
572 		if (unlikely(NM_ACCESS_ONCE(priv->np_kloop_state) & NM_SYNC_KLOOP_STOPPING)) {
573 			break;
574 		}
575 
576 #ifdef SYNC_KLOOP_POLL
577 		if (poll_ctx)
578 			__set_current_state(TASK_INTERRUPTIBLE);
579 #endif  /* SYNC_KLOOP_POLL */
580 
581 		/* Process all the TX rings bound to this file descriptor. */
582 		for (i = 0; i < num_tx_rings; i++) {
583 			struct sync_kloop_ring_args a = {
584 				.kring = NMR(na, NR_TX)[i + priv->np_qfirst[NR_TX]],
585 				.csb_atok = csb_atok_base + i,
586 				.csb_ktoa = csb_ktoa_base + i,
587 			};
588 
589 #ifdef SYNC_KLOOP_POLL
590 			if (poll_ctx)
591 				a.irq_ctx = poll_ctx->entries[i].irq_ctx;
592 #endif /* SYNC_KLOOP_POLL */
593 			if (unlikely(nm_kr_tryget(a.kring, 1, NULL))) {
594 				continue;
595 			}
596 			netmap_sync_kloop_tx_ring(&a);
597 			nm_kr_put(a.kring);
598 		}
599 
600 		/* Process all the RX rings bound to this file descriptor. */
601 		for (i = 0; i < num_rx_rings; i++) {
602 			struct sync_kloop_ring_args a = {
603 				.kring = NMR(na, NR_RX)[i + priv->np_qfirst[NR_RX]],
604 				.csb_atok = csb_atok_base + num_tx_rings + i,
605 				.csb_ktoa = csb_ktoa_base + num_tx_rings + i,
606 			};
607 
608 #ifdef SYNC_KLOOP_POLL
609 			if (poll_ctx)
610 				a.irq_ctx = poll_ctx->entries[num_tx_rings + i].irq_ctx;
611 #endif /* SYNC_KLOOP_POLL */
612 
613 			if (unlikely(nm_kr_tryget(a.kring, 1, NULL))) {
614 				continue;
615 			}
616 			netmap_sync_kloop_rx_ring(&a);
617 			nm_kr_put(a.kring);
618 		}
619 
620 #ifdef SYNC_KLOOP_POLL
621 		if (poll_ctx) {
622 			/* If a poll context is present, yield to the scheduler
623 			 * waiting for a notification to come either from
624 			 * netmap or the application. */
625 			schedule_timeout_interruptible(msecs_to_jiffies(1000));
626 		} else
627 #endif /* SYNC_KLOOP_POLL */
628 		{
629 			/* Default synchronization method: sleep for a while. */
630 			usleep_range(sleep_us, sleep_us);
631 		}
632 	}
633 out:
634 #ifdef SYNC_KLOOP_POLL
635 	if (poll_ctx) {
636 		/* Stop polling from netmap and the eventfds, and deallocate
637 		 * the poll context. */
638 		__set_current_state(TASK_RUNNING);
639 		for (i = 0; i < poll_ctx->next_entry; i++) {
640 			struct sync_kloop_poll_entry *entry =
641 						poll_ctx->entries + i;
642 
643 			if (entry->wqh)
644 				remove_wait_queue(entry->wqh, &entry->wait);
645 			/* We did not get a reference to the eventfds, but
646 			 * don't do that on netmap file descriptors (since
647 			 * a reference was not taken. */
648 			if (entry->filp && entry->filp != priv->np_filp)
649 				fput(entry->filp);
650 			if (entry->irq_ctx)
651 				eventfd_ctx_put(entry->irq_ctx);
652 			if (entry->irq_filp)
653 				fput(entry->irq_filp);
654 		}
655 		nm_os_free(poll_ctx);
656 		poll_ctx = NULL;
657 	}
658 #endif /* SYNC_KLOOP_POLL */
659 
660 	/* Reset the kloop state. */
661 	NMG_LOCK();
662 	priv->np_kloop_state = 0;
663 	NMG_UNLOCK();
664 
665 	return err;
666 }
667 
668 int
669 netmap_sync_kloop_stop(struct netmap_priv_d *priv)
670 {
671 	bool running = true;
672 	int err = 0;
673 
674 	NMG_LOCK();
675 	priv->np_kloop_state |= NM_SYNC_KLOOP_STOPPING;
676 	NMG_UNLOCK();
677 	while (running) {
678 		usleep_range(1000, 1500);
679 		NMG_LOCK();
680 		running = (NM_ACCESS_ONCE(priv->np_kloop_state)
681 				& NM_SYNC_KLOOP_RUNNING);
682 		NMG_UNLOCK();
683 	}
684 
685 	return err;
686 }
687 
688 #ifdef WITH_PTNETMAP
689 /*
690  * Guest ptnetmap txsync()/rxsync() routines, used in ptnet device drivers.
691  * These routines are reused across the different operating systems supported
692  * by netmap.
693  */
694 
695 /*
696  * Reconcile host and guest views of the transmit ring.
697  *
698  * Guest user wants to transmit packets up to the one before ring->head,
699  * and guest kernel knows tx_ring->hwcur is the first packet unsent
700  * by the host kernel.
701  *
702  * We push out as many packets as possible, and possibly
703  * reclaim buffers from previously completed transmission.
704  *
705  * Notifications from the host are enabled only if the user guest would
706  * block (no space in the ring).
707  */
708 bool
709 netmap_pt_guest_txsync(struct nm_csb_atok *atok, struct nm_csb_ktoa *ktoa,
710 			struct netmap_kring *kring, int flags)
711 {
712 	bool notify = false;
713 
714 	/* Disable notifications */
715 	atok->appl_need_kick = 0;
716 
717 	/*
718 	 * First part: tell the host (updating the CSB) to process the new
719 	 * packets.
720 	 */
721 	kring->nr_hwcur = ktoa->hwcur;
722 	ptnetmap_guest_write_kring_csb(atok, kring->rcur, kring->rhead);
723 
724         /* Ask for a kick from a guest to the host if needed. */
725 	if (((kring->rhead != kring->nr_hwcur || nm_kr_txempty(kring))
726 		&& NM_ACCESS_ONCE(ktoa->kern_need_kick)) ||
727 			(flags & NAF_FORCE_RECLAIM)) {
728 		atok->sync_flags = flags;
729 		notify = true;
730 	}
731 
732 	/*
733 	 * Second part: reclaim buffers for completed transmissions.
734 	 */
735 	if (nm_kr_txempty(kring) || (flags & NAF_FORCE_RECLAIM)) {
736                 ptnetmap_guest_read_kring_csb(ktoa, kring);
737 	}
738 
739         /*
740          * No more room in the ring for new transmissions. The user thread will
741 	 * go to sleep and we need to be notified by the host when more free
742 	 * space is available.
743          */
744 	if (nm_kr_txempty(kring) && !(kring->nr_kflags & NKR_NOINTR)) {
745 		/* Reenable notifications. */
746 		atok->appl_need_kick = 1;
747                 /* Double check */
748                 ptnetmap_guest_read_kring_csb(ktoa, kring);
749                 /* If there is new free space, disable notifications */
750 		if (unlikely(!nm_kr_txempty(kring))) {
751 			atok->appl_need_kick = 0;
752 		}
753 	}
754 
755 	nm_prdis(1, "%s CSB(head:%u cur:%u hwtail:%u) KRING(head:%u cur:%u tail:%u)",
756 		kring->name, atok->head, atok->cur, ktoa->hwtail,
757 		kring->rhead, kring->rcur, kring->nr_hwtail);
758 
759 	return notify;
760 }
761 
762 /*
763  * Reconcile host and guest view of the receive ring.
764  *
765  * Update hwcur/hwtail from host (reading from CSB).
766  *
767  * If guest user has released buffers up to the one before ring->head, we
768  * also give them to the host.
769  *
770  * Notifications from the host are enabled only if the user guest would
771  * block (no more completed slots in the ring).
772  */
773 bool
774 netmap_pt_guest_rxsync(struct nm_csb_atok *atok, struct nm_csb_ktoa *ktoa,
775 			struct netmap_kring *kring, int flags)
776 {
777 	bool notify = false;
778 
779         /* Disable notifications */
780 	atok->appl_need_kick = 0;
781 
782 	/*
783 	 * First part: import newly received packets, by updating the kring
784 	 * hwtail to the hwtail known from the host (read from the CSB).
785 	 * This also updates the kring hwcur.
786 	 */
787         ptnetmap_guest_read_kring_csb(ktoa, kring);
788 	kring->nr_kflags &= ~NKR_PENDINTR;
789 
790 	/*
791 	 * Second part: tell the host about the slots that guest user has
792 	 * released, by updating cur and head in the CSB.
793 	 */
794 	if (kring->rhead != kring->nr_hwcur) {
795 		ptnetmap_guest_write_kring_csb(atok, kring->rcur,
796 					       kring->rhead);
797                 /* Ask for a kick from the guest to the host if needed. */
798 		if (NM_ACCESS_ONCE(ktoa->kern_need_kick)) {
799 			atok->sync_flags = flags;
800 			notify = true;
801 		}
802 	}
803 
804         /*
805          * No more completed RX slots. The user thread will go to sleep and
806 	 * we need to be notified by the host when more RX slots have been
807 	 * completed.
808          */
809 	if (nm_kr_rxempty(kring) && !(kring->nr_kflags & NKR_NOINTR)) {
810 		/* Reenable notifications. */
811                 atok->appl_need_kick = 1;
812                 /* Double check */
813                 ptnetmap_guest_read_kring_csb(ktoa, kring);
814                 /* If there are new slots, disable notifications. */
815 		if (!nm_kr_rxempty(kring)) {
816                         atok->appl_need_kick = 0;
817                 }
818         }
819 
820 	nm_prdis(1, "%s CSB(head:%u cur:%u hwtail:%u) KRING(head:%u cur:%u tail:%u)",
821 		kring->name, atok->head, atok->cur, ktoa->hwtail,
822 		kring->rhead, kring->rcur, kring->nr_hwtail);
823 
824 	return notify;
825 }
826 
827 /*
828  * Callbacks for ptnet drivers: nm_krings_create, nm_krings_delete, nm_dtor.
829  */
830 int
831 ptnet_nm_krings_create(struct netmap_adapter *na)
832 {
833 	struct netmap_pt_guest_adapter *ptna =
834 			(struct netmap_pt_guest_adapter *)na; /* Upcast. */
835 	struct netmap_adapter *na_nm = &ptna->hwup.up;
836 	struct netmap_adapter *na_dr = &ptna->dr.up;
837 	int ret;
838 
839 	if (ptna->backend_users) {
840 		return 0;
841 	}
842 
843 	/* Create krings on the public netmap adapter. */
844 	ret = netmap_hw_krings_create(na_nm);
845 	if (ret) {
846 		return ret;
847 	}
848 
849 	/* Copy krings into the netmap adapter private to the driver. */
850 	na_dr->tx_rings = na_nm->tx_rings;
851 	na_dr->rx_rings = na_nm->rx_rings;
852 
853 	return 0;
854 }
855 
856 void
857 ptnet_nm_krings_delete(struct netmap_adapter *na)
858 {
859 	struct netmap_pt_guest_adapter *ptna =
860 			(struct netmap_pt_guest_adapter *)na; /* Upcast. */
861 	struct netmap_adapter *na_nm = &ptna->hwup.up;
862 	struct netmap_adapter *na_dr = &ptna->dr.up;
863 
864 	if (ptna->backend_users) {
865 		return;
866 	}
867 
868 	na_dr->tx_rings = NULL;
869 	na_dr->rx_rings = NULL;
870 
871 	netmap_hw_krings_delete(na_nm);
872 }
873 
874 void
875 ptnet_nm_dtor(struct netmap_adapter *na)
876 {
877 	struct netmap_pt_guest_adapter *ptna =
878 			(struct netmap_pt_guest_adapter *)na;
879 
880 	netmap_mem_put(ptna->dr.up.nm_mem);
881 	memset(&ptna->dr, 0, sizeof(ptna->dr));
882 	netmap_mem_pt_guest_ifp_del(na->nm_mem, na->ifp);
883 }
884 
885 int
886 netmap_pt_guest_attach(struct netmap_adapter *arg,
887 		       unsigned int nifp_offset, unsigned int memid)
888 {
889 	struct netmap_pt_guest_adapter *ptna;
890 	struct ifnet *ifp = arg ? arg->ifp : NULL;
891 	int error;
892 
893 	/* get allocator */
894 	arg->nm_mem = netmap_mem_pt_guest_new(ifp, nifp_offset, memid);
895 	if (arg->nm_mem == NULL)
896 		return ENOMEM;
897 	arg->na_flags |= NAF_MEM_OWNER;
898 	error = netmap_attach_ext(arg, sizeof(struct netmap_pt_guest_adapter), 1);
899 	if (error)
900 		return error;
901 
902 	/* get the netmap_pt_guest_adapter */
903 	ptna = (struct netmap_pt_guest_adapter *) NA(ifp);
904 
905 	/* Initialize a separate pass-through netmap adapter that is going to
906 	 * be used by the ptnet driver only, and so never exposed to netmap
907          * applications. We only need a subset of the available fields. */
908 	memset(&ptna->dr, 0, sizeof(ptna->dr));
909 	ptna->dr.up.ifp = ifp;
910 	ptna->dr.up.nm_mem = netmap_mem_get(ptna->hwup.up.nm_mem);
911         ptna->dr.up.nm_config = ptna->hwup.up.nm_config;
912 
913 	ptna->backend_users = 0;
914 
915 	return 0;
916 }
917 
918 #endif /* WITH_PTNETMAP */
919