xref: /freebsd/sys/dev/netmap/netmap_kloop.c (revision a6578a04e440f79f3b913660221caa9cde3e722c)
1 /*
2  * Copyright (C) 2016-2018 Vincenzo Maffione
3  * Copyright (C) 2015 Stefano Garzarella
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *   1. Redistributions of source code must retain the above copyright
10  *      notice, this list of conditions and the following disclaimer.
11  *   2. Redistributions in binary form must reproduce the above copyright
12  *      notice, this list of conditions and the following disclaimer in the
13  *      documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  *
27  * $FreeBSD$
28  */
29 
30 /*
31  * common headers
32  */
33 #if defined(__FreeBSD__)
34 #include <sys/cdefs.h>
35 #include <sys/param.h>
36 #include <sys/kernel.h>
37 #include <sys/types.h>
38 #include <sys/selinfo.h>
39 #include <sys/socket.h>
40 #include <net/if.h>
41 #include <net/if_var.h>
42 #include <machine/bus.h>
43 
44 #define usleep_range(_1, _2) \
45         pause_sbt("sync-kloop-sleep", SBT_1US * _1, SBT_1US * 1, C_ABSOLUTE)
46 
47 #elif defined(linux)
48 #include <bsd_glue.h>
49 #include <linux/file.h>
50 #include <linux/eventfd.h>
51 #endif
52 
53 #include <net/netmap.h>
54 #include <dev/netmap/netmap_kern.h>
55 #include <net/netmap_virt.h>
56 #include <dev/netmap/netmap_mem2.h>
57 
58 /* Support for eventfd-based notifications. */
59 #if defined(linux)
60 #define SYNC_KLOOP_POLL
61 #endif
62 
63 /* Write kring pointers (hwcur, hwtail) to the CSB.
64  * This routine is coupled with ptnetmap_guest_read_kring_csb(). */
65 static inline void
66 sync_kloop_kernel_write(struct nm_csb_ktoa __user *ptr, uint32_t hwcur,
67 			   uint32_t hwtail)
68 {
69 	/*
70 	 * The same scheme used in ptnetmap_guest_write_kring_csb() applies here.
71 	 * We allow the application to read a value of hwcur more recent than the value
72 	 * of hwtail, since this would anyway result in a consistent view of the
73 	 * ring state (and hwcur can never wraparound hwtail, since hwcur must be
74 	 * behind head).
75 	 *
76 	 * The following memory barrier scheme is used to make this happen:
77 	 *
78 	 *          Application          Kernel
79 	 *
80 	 *          STORE(hwcur)         LOAD(hwtail)
81 	 *          mb() <-------------> mb()
82 	 *          STORE(hwtail)        LOAD(hwcur)
83 	 */
84 	CSB_WRITE(ptr, hwcur, hwcur);
85 	nm_stst_barrier();
86 	CSB_WRITE(ptr, hwtail, hwtail);
87 }
88 
89 /* Read kring pointers (head, cur, sync_flags) from the CSB.
90  * This routine is coupled with ptnetmap_guest_write_kring_csb(). */
91 static inline void
92 sync_kloop_kernel_read(struct nm_csb_atok __user *ptr,
93 			  struct netmap_ring *shadow_ring,
94 			  uint32_t num_slots)
95 {
96 	/*
97 	 * We place a memory barrier to make sure that the update of head never
98 	 * overtakes the update of cur.
99 	 * (see explanation in ptnetmap_guest_write_kring_csb).
100 	 */
101 	CSB_READ(ptr, head, shadow_ring->head);
102 	nm_stst_barrier();
103 	CSB_READ(ptr, cur, shadow_ring->cur);
104 	CSB_READ(ptr, sync_flags, shadow_ring->flags);
105 }
106 
107 /* Enable or disable application --> kernel kicks. */
108 static inline void
109 csb_ktoa_kick_enable(struct nm_csb_ktoa __user *csb_ktoa, uint32_t val)
110 {
111 	CSB_WRITE(csb_ktoa, kern_need_kick, val);
112 }
113 
114 /* Are application interrupt enabled or disabled? */
115 static inline uint32_t
116 csb_atok_intr_enabled(struct nm_csb_atok __user *csb_atok)
117 {
118 	uint32_t v;
119 
120 	CSB_READ(csb_atok, appl_need_kick, v);
121 
122 	return v;
123 }
124 
125 static inline void
126 sync_kloop_kring_dump(const char *title, const struct netmap_kring *kring)
127 {
128 	nm_prinf("%s - name: %s hwcur: %d hwtail: %d "
129 		"rhead: %d rcur: %d rtail: %d",
130 		title, kring->name, kring->nr_hwcur, kring->nr_hwtail,
131 		kring->rhead, kring->rcur, kring->rtail);
132 }
133 
134 struct sync_kloop_ring_args {
135 	struct netmap_kring *kring;
136 	struct nm_csb_atok *csb_atok;
137 	struct nm_csb_ktoa *csb_ktoa;
138 #ifdef SYNC_KLOOP_POLL
139 	struct eventfd_ctx *irq_ctx;
140 #endif /* SYNC_KLOOP_POLL */
141 };
142 
143 static void
144 netmap_sync_kloop_tx_ring(const struct sync_kloop_ring_args *a)
145 {
146 	struct netmap_kring *kring = a->kring;
147 	struct nm_csb_atok *csb_atok = a->csb_atok;
148 	struct nm_csb_ktoa *csb_ktoa = a->csb_ktoa;
149 	struct netmap_ring shadow_ring; /* shadow copy of the netmap_ring */
150 	bool more_txspace = false;
151 	uint32_t num_slots;
152 	int batch;
153 
154 	num_slots = kring->nkr_num_slots;
155 
156 	/* Disable application --> kernel notifications. */
157 	csb_ktoa_kick_enable(csb_ktoa, 0);
158 	/* Copy the application kring pointers from the CSB */
159 	sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots);
160 
161 	for (;;) {
162 		batch = shadow_ring.head - kring->nr_hwcur;
163 		if (batch < 0)
164 			batch += num_slots;
165 
166 #ifdef PTN_TX_BATCH_LIM
167 		if (batch > PTN_TX_BATCH_LIM(num_slots)) {
168 			/* If application moves ahead too fast, let's cut the move so
169 			 * that we don't exceed our batch limit. */
170 			uint32_t head_lim = kring->nr_hwcur + PTN_TX_BATCH_LIM(num_slots);
171 
172 			if (head_lim >= num_slots)
173 				head_lim -= num_slots;
174 			nm_prdis(1, "batch: %d head: %d head_lim: %d", batch, shadow_ring.head,
175 					head_lim);
176 			shadow_ring.head = head_lim;
177 			batch = PTN_TX_BATCH_LIM(num_slots);
178 		}
179 #endif /* PTN_TX_BATCH_LIM */
180 
181 		if (nm_kr_txspace(kring) <= (num_slots >> 1)) {
182 			shadow_ring.flags |= NAF_FORCE_RECLAIM;
183 		}
184 
185 		/* Netmap prologue */
186 		shadow_ring.tail = kring->rtail;
187 		if (unlikely(nm_txsync_prologue(kring, &shadow_ring) >= num_slots)) {
188 			/* Reinit ring and enable notifications. */
189 			netmap_ring_reinit(kring);
190 			csb_ktoa_kick_enable(csb_ktoa, 1);
191 			break;
192 		}
193 
194 		if (unlikely(netmap_debug & NM_DEBUG_TXSYNC)) {
195 			sync_kloop_kring_dump("pre txsync", kring);
196 		}
197 
198 		if (unlikely(kring->nm_sync(kring, shadow_ring.flags))) {
199 			/* Reenable notifications. */
200 			csb_ktoa_kick_enable(csb_ktoa, 1);
201 			nm_prerr("txsync() failed");
202 			break;
203 		}
204 
205 		/*
206 		 * Finalize
207 		 * Copy kernel hwcur and hwtail into the CSB for the application sync(), and
208 		 * do the nm_sync_finalize.
209 		 */
210 		sync_kloop_kernel_write(csb_ktoa, kring->nr_hwcur,
211 				kring->nr_hwtail);
212 		if (kring->rtail != kring->nr_hwtail) {
213 			/* Some more room available in the parent adapter. */
214 			kring->rtail = kring->nr_hwtail;
215 			more_txspace = true;
216 		}
217 
218 		if (unlikely(netmap_debug & NM_DEBUG_TXSYNC)) {
219 			sync_kloop_kring_dump("post txsync", kring);
220 		}
221 
222 		/* Interrupt the application if needed. */
223 #ifdef SYNC_KLOOP_POLL
224 		if (a->irq_ctx && more_txspace && csb_atok_intr_enabled(csb_atok)) {
225 			/* Disable application kick to avoid sending unnecessary kicks */
226 			eventfd_signal(a->irq_ctx, 1);
227 			more_txspace = false;
228 		}
229 #endif /* SYNC_KLOOP_POLL */
230 
231 		/* Read CSB to see if there is more work to do. */
232 		sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots);
233 		if (shadow_ring.head == kring->rhead) {
234 			/*
235 			 * No more packets to transmit. We enable notifications and
236 			 * go to sleep, waiting for a kick from the application when new
237 			 * new slots are ready for transmission.
238 			 */
239 			/* Reenable notifications. */
240 			csb_ktoa_kick_enable(csb_ktoa, 1);
241 			/* Doublecheck. */
242 			sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots);
243 			if (shadow_ring.head != kring->rhead) {
244 				/* We won the race condition, there are more packets to
245 				 * transmit. Disable notifications and do another cycle */
246 				csb_ktoa_kick_enable(csb_ktoa, 0);
247 				continue;
248 			}
249 			break;
250 		}
251 
252 		if (nm_kr_txempty(kring)) {
253 			/* No more available TX slots. We stop waiting for a notification
254 			 * from the backend (netmap_tx_irq). */
255 			nm_prdis(1, "TX ring");
256 			break;
257 		}
258 	}
259 
260 #ifdef SYNC_KLOOP_POLL
261 	if (a->irq_ctx && more_txspace && csb_atok_intr_enabled(csb_atok)) {
262 		eventfd_signal(a->irq_ctx, 1);
263 	}
264 #endif /* SYNC_KLOOP_POLL */
265 }
266 
267 /* RX cycle without receive any packets */
268 #define SYNC_LOOP_RX_DRY_CYCLES_MAX	2
269 
270 static inline int
271 sync_kloop_norxslots(struct netmap_kring *kring, uint32_t g_head)
272 {
273 	return (NM_ACCESS_ONCE(kring->nr_hwtail) == nm_prev(g_head,
274 				kring->nkr_num_slots - 1));
275 }
276 
277 static void
278 netmap_sync_kloop_rx_ring(const struct sync_kloop_ring_args *a)
279 {
280 
281 	struct netmap_kring *kring = a->kring;
282 	struct nm_csb_atok *csb_atok = a->csb_atok;
283 	struct nm_csb_ktoa *csb_ktoa = a->csb_ktoa;
284 	struct netmap_ring shadow_ring; /* shadow copy of the netmap_ring */
285 	int dry_cycles = 0;
286 	bool some_recvd = false;
287 	uint32_t num_slots;
288 
289 	num_slots = kring->nkr_num_slots;
290 
291 	/* Get RX csb_atok and csb_ktoa pointers from the CSB. */
292 	num_slots = kring->nkr_num_slots;
293 
294 	/* Disable notifications. */
295 	csb_ktoa_kick_enable(csb_ktoa, 0);
296 	/* Copy the application kring pointers from the CSB */
297 	sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots);
298 
299 	for (;;) {
300 		uint32_t hwtail;
301 
302 		/* Netmap prologue */
303 		shadow_ring.tail = kring->rtail;
304 		if (unlikely(nm_rxsync_prologue(kring, &shadow_ring) >= num_slots)) {
305 			/* Reinit ring and enable notifications. */
306 			netmap_ring_reinit(kring);
307 			csb_ktoa_kick_enable(csb_ktoa, 1);
308 			break;
309 		}
310 
311 		if (unlikely(netmap_debug & NM_DEBUG_RXSYNC)) {
312 			sync_kloop_kring_dump("pre rxsync", kring);
313 		}
314 
315 		if (unlikely(kring->nm_sync(kring, shadow_ring.flags))) {
316 			/* Reenable notifications. */
317 			csb_ktoa_kick_enable(csb_ktoa, 1);
318 			nm_prerr("rxsync() failed");
319 			break;
320 		}
321 
322 		/*
323 		 * Finalize
324 		 * Copy kernel hwcur and hwtail into the CSB for the application sync()
325 		 */
326 		hwtail = NM_ACCESS_ONCE(kring->nr_hwtail);
327 		sync_kloop_kernel_write(csb_ktoa, kring->nr_hwcur, hwtail);
328 		if (kring->rtail != hwtail) {
329 			kring->rtail = hwtail;
330 			some_recvd = true;
331 			dry_cycles = 0;
332 		} else {
333 			dry_cycles++;
334 		}
335 
336 		if (unlikely(netmap_debug & NM_DEBUG_RXSYNC)) {
337 			sync_kloop_kring_dump("post rxsync", kring);
338 		}
339 
340 #ifdef SYNC_KLOOP_POLL
341 		/* Interrupt the application if needed. */
342 		if (a->irq_ctx && some_recvd && csb_atok_intr_enabled(csb_atok)) {
343 			/* Disable application kick to avoid sending unnecessary kicks */
344 			eventfd_signal(a->irq_ctx, 1);
345 			some_recvd = false;
346 		}
347 #endif /* SYNC_KLOOP_POLL */
348 
349 		/* Read CSB to see if there is more work to do. */
350 		sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots);
351 		if (sync_kloop_norxslots(kring, shadow_ring.head)) {
352 			/*
353 			 * No more slots available for reception. We enable notification and
354 			 * go to sleep, waiting for a kick from the application when new receive
355 			 * slots are available.
356 			 */
357 			/* Reenable notifications. */
358 			csb_ktoa_kick_enable(csb_ktoa, 1);
359 			/* Doublecheck. */
360 			sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots);
361 			if (!sync_kloop_norxslots(kring, shadow_ring.head)) {
362 				/* We won the race condition, more slots are available. Disable
363 				 * notifications and do another cycle. */
364 				csb_ktoa_kick_enable(csb_ktoa, 0);
365 				continue;
366 			}
367 			break;
368 		}
369 
370 		hwtail = NM_ACCESS_ONCE(kring->nr_hwtail);
371 		if (unlikely(hwtail == kring->rhead ||
372 					dry_cycles >= SYNC_LOOP_RX_DRY_CYCLES_MAX)) {
373 			/* No more packets to be read from the backend. We stop and
374 			 * wait for a notification from the backend (netmap_rx_irq). */
375 			nm_prdis(1, "nr_hwtail: %d rhead: %d dry_cycles: %d",
376 					hwtail, kring->rhead, dry_cycles);
377 			break;
378 		}
379 	}
380 
381 	nm_kr_put(kring);
382 
383 #ifdef SYNC_KLOOP_POLL
384 	/* Interrupt the application if needed. */
385 	if (a->irq_ctx && some_recvd && csb_atok_intr_enabled(csb_atok)) {
386 		eventfd_signal(a->irq_ctx, 1);
387 	}
388 #endif /* SYNC_KLOOP_POLL */
389 }
390 
391 #ifdef SYNC_KLOOP_POLL
392 struct sync_kloop_poll_entry {
393 	/* Support for receiving notifications from
394 	 * a netmap ring or from the application. */
395 	struct file *filp;
396 	wait_queue_t wait;
397 	wait_queue_head_t *wqh;
398 
399 	/* Support for sending notifications to the application. */
400 	struct eventfd_ctx *irq_ctx;
401 	struct file *irq_filp;
402 };
403 
404 struct sync_kloop_poll_ctx {
405 	poll_table wait_table;
406 	unsigned int next_entry;
407 	unsigned int num_entries;
408 	struct sync_kloop_poll_entry entries[0];
409 };
410 
411 static void
412 sync_kloop_poll_table_queue_proc(struct file *file, wait_queue_head_t *wqh,
413 				poll_table *pt)
414 {
415 	struct sync_kloop_poll_ctx *poll_ctx =
416 		container_of(pt, struct sync_kloop_poll_ctx, wait_table);
417 	struct sync_kloop_poll_entry *entry = poll_ctx->entries +
418 						poll_ctx->next_entry;
419 
420 	BUG_ON(poll_ctx->next_entry >= poll_ctx->num_entries);
421 	entry->wqh = wqh;
422 	entry->filp = file;
423 	/* Use the default wake up function. */
424 	init_waitqueue_entry(&entry->wait, current);
425 	add_wait_queue(wqh, &entry->wait);
426 	poll_ctx->next_entry++;
427 }
428 #endif  /* SYNC_KLOOP_POLL */
429 
430 int
431 netmap_sync_kloop(struct netmap_priv_d *priv, struct nmreq_header *hdr)
432 {
433 	struct nmreq_sync_kloop_start *req =
434 		(struct nmreq_sync_kloop_start *)(uintptr_t)hdr->nr_body;
435 	struct nmreq_opt_sync_kloop_eventfds *eventfds_opt = NULL;
436 #ifdef SYNC_KLOOP_POLL
437 	struct sync_kloop_poll_ctx *poll_ctx = NULL;
438 #endif  /* SYNC_KLOOP_POLL */
439 	int num_rx_rings, num_tx_rings, num_rings;
440 	uint32_t sleep_us = req->sleep_us;
441 	struct nm_csb_atok* csb_atok_base;
442 	struct nm_csb_ktoa* csb_ktoa_base;
443 	struct netmap_adapter *na;
444 	struct nmreq_option *opt;
445 	int err = 0;
446 	int i;
447 
448 	if (sleep_us > 1000000) {
449 		/* We do not accept sleeping for more than a second. */
450 		return EINVAL;
451 	}
452 
453 	if (priv->np_nifp == NULL) {
454 		return ENXIO;
455 	}
456 	mb(); /* make sure following reads are not from cache */
457 
458 	na = priv->np_na;
459 	if (!nm_netmap_on(na)) {
460 		return ENXIO;
461 	}
462 
463 	NMG_LOCK();
464 	/* Make sure the application is working in CSB mode. */
465 	if (!priv->np_csb_atok_base || !priv->np_csb_ktoa_base) {
466 		NMG_UNLOCK();
467 		nm_prerr("sync-kloop on %s requires "
468 				"NETMAP_REQ_OPT_CSB option", na->name);
469 		return EINVAL;
470 	}
471 
472 	csb_atok_base = priv->np_csb_atok_base;
473 	csb_ktoa_base = priv->np_csb_ktoa_base;
474 
475 	/* Make sure that no kloop is currently running. */
476 	if (priv->np_kloop_state & NM_SYNC_KLOOP_RUNNING) {
477 		err = EBUSY;
478 	}
479 	priv->np_kloop_state |= NM_SYNC_KLOOP_RUNNING;
480 	NMG_UNLOCK();
481 	if (err) {
482 		return err;
483 	}
484 
485 	num_rx_rings = priv->np_qlast[NR_RX] - priv->np_qfirst[NR_RX];
486 	num_tx_rings = priv->np_qlast[NR_TX] - priv->np_qfirst[NR_TX];
487 	num_rings = num_tx_rings + num_rx_rings;
488 
489 	/* Validate notification options. */
490 	opt = nmreq_findoption((struct nmreq_option *)(uintptr_t)hdr->nr_options,
491 				NETMAP_REQ_OPT_SYNC_KLOOP_EVENTFDS);
492 	if (opt != NULL) {
493 		err = nmreq_checkduplicate(opt);
494 		if (err) {
495 			opt->nro_status = err;
496 			goto out;
497 		}
498 		if (opt->nro_size != sizeof(*eventfds_opt) +
499 			sizeof(eventfds_opt->eventfds[0]) * num_rings) {
500 			/* Option size not consistent with the number of
501 			 * entries. */
502 			opt->nro_status = err = EINVAL;
503 			goto out;
504 		}
505 #ifdef SYNC_KLOOP_POLL
506 		eventfds_opt = (struct nmreq_opt_sync_kloop_eventfds *)opt;
507 		opt->nro_status = 0;
508 		/* We need 2 poll entries for TX and RX notifications coming
509 		 * from the netmap adapter, plus one entries per ring for the
510 		 * notifications coming from the application. */
511 		poll_ctx = nm_os_malloc(sizeof(*poll_ctx) +
512 				(2 + num_rings) * sizeof(poll_ctx->entries[0]));
513 		init_poll_funcptr(&poll_ctx->wait_table,
514 					sync_kloop_poll_table_queue_proc);
515 		poll_ctx->num_entries = 2 + num_rings;
516 		poll_ctx->next_entry = 0;
517 		/* Poll for notifications coming from the applications through
518 		 * eventfds . */
519 		for (i = 0; i < num_rings; i++) {
520 			struct eventfd_ctx *irq;
521 			struct file *filp;
522 			unsigned long mask;
523 
524 			filp = eventfd_fget(eventfds_opt->eventfds[i].ioeventfd);
525 			if (IS_ERR(filp)) {
526 				err = PTR_ERR(filp);
527 				goto out;
528 			}
529 			mask = filp->f_op->poll(filp, &poll_ctx->wait_table);
530 			if (mask & POLLERR) {
531 				err = EINVAL;
532 				goto out;
533 			}
534 
535 			filp = eventfd_fget(eventfds_opt->eventfds[i].irqfd);
536 			if (IS_ERR(filp)) {
537 				err = PTR_ERR(filp);
538 				goto out;
539 			}
540 			poll_ctx->entries[i].irq_filp = filp;
541 			irq = eventfd_ctx_fileget(filp);
542 			if (IS_ERR(irq)) {
543 				err = PTR_ERR(irq);
544 				goto out;
545 			}
546 			poll_ctx->entries[i].irq_ctx = irq;
547 		}
548 		/* Poll for notifications coming from the netmap rings bound to
549 		 * this file descriptor. */
550 		{
551 			NM_SELINFO_T *si[NR_TXRX];
552 
553 			NMG_LOCK();
554 			si[NR_RX] = nm_si_user(priv, NR_RX) ? &na->si[NR_RX] :
555 				&na->rx_rings[priv->np_qfirst[NR_RX]]->si;
556 			si[NR_TX] = nm_si_user(priv, NR_TX) ? &na->si[NR_TX] :
557 				&na->tx_rings[priv->np_qfirst[NR_TX]]->si;
558 			NMG_UNLOCK();
559 			poll_wait(priv->np_filp, si[NR_RX], &poll_ctx->wait_table);
560 			poll_wait(priv->np_filp, si[NR_TX], &poll_ctx->wait_table);
561 		}
562 #else   /* SYNC_KLOOP_POLL */
563 		opt->nro_status = EOPNOTSUPP;
564 		goto out;
565 #endif  /* SYNC_KLOOP_POLL */
566 	}
567 
568 	/* Main loop. */
569 	for (;;) {
570 		if (unlikely(NM_ACCESS_ONCE(priv->np_kloop_state) & NM_SYNC_KLOOP_STOPPING)) {
571 			break;
572 		}
573 
574 #ifdef SYNC_KLOOP_POLL
575 		if (poll_ctx)
576 			__set_current_state(TASK_INTERRUPTIBLE);
577 #endif  /* SYNC_KLOOP_POLL */
578 
579 		/* Process all the TX rings bound to this file descriptor. */
580 		for (i = 0; i < num_tx_rings; i++) {
581 			struct sync_kloop_ring_args a = {
582 				.kring = NMR(na, NR_TX)[i + priv->np_qfirst[NR_TX]],
583 				.csb_atok = csb_atok_base + i,
584 				.csb_ktoa = csb_ktoa_base + i,
585 			};
586 
587 #ifdef SYNC_KLOOP_POLL
588 			if (poll_ctx)
589 				a.irq_ctx = poll_ctx->entries[i].irq_ctx;
590 #endif /* SYNC_KLOOP_POLL */
591 			if (unlikely(nm_kr_tryget(a.kring, 1, NULL))) {
592 				continue;
593 			}
594 			netmap_sync_kloop_tx_ring(&a);
595 			nm_kr_put(a.kring);
596 		}
597 
598 		/* Process all the RX rings bound to this file descriptor. */
599 		for (i = 0; i < num_rx_rings; i++) {
600 			struct sync_kloop_ring_args a = {
601 				.kring = NMR(na, NR_RX)[i + priv->np_qfirst[NR_RX]],
602 				.csb_atok = csb_atok_base + num_tx_rings + i,
603 				.csb_ktoa = csb_ktoa_base + num_tx_rings + i,
604 			};
605 
606 #ifdef SYNC_KLOOP_POLL
607 			if (poll_ctx)
608 				a.irq_ctx = poll_ctx->entries[num_tx_rings + i].irq_ctx;
609 #endif /* SYNC_KLOOP_POLL */
610 
611 			if (unlikely(nm_kr_tryget(a.kring, 1, NULL))) {
612 				continue;
613 			}
614 			netmap_sync_kloop_rx_ring(&a);
615 			nm_kr_put(a.kring);
616 		}
617 
618 #ifdef SYNC_KLOOP_POLL
619 		if (poll_ctx) {
620 			/* If a poll context is present, yield to the scheduler
621 			 * waiting for a notification to come either from
622 			 * netmap or the application. */
623 			schedule_timeout_interruptible(msecs_to_jiffies(1000));
624 		} else
625 #endif /* SYNC_KLOOP_POLL */
626 		{
627 			/* Default synchronization method: sleep for a while. */
628 			usleep_range(sleep_us, sleep_us);
629 		}
630 	}
631 out:
632 #ifdef SYNC_KLOOP_POLL
633 	if (poll_ctx) {
634 		/* Stop polling from netmap and the eventfds, and deallocate
635 		 * the poll context. */
636 		__set_current_state(TASK_RUNNING);
637 		for (i = 0; i < poll_ctx->next_entry; i++) {
638 			struct sync_kloop_poll_entry *entry =
639 						poll_ctx->entries + i;
640 
641 			if (entry->wqh)
642 				remove_wait_queue(entry->wqh, &entry->wait);
643 			/* We did not get a reference to the eventfds, but
644 			 * don't do that on netmap file descriptors (since
645 			 * a reference was not taken. */
646 			if (entry->filp && entry->filp != priv->np_filp)
647 				fput(entry->filp);
648 			if (entry->irq_ctx)
649 				eventfd_ctx_put(entry->irq_ctx);
650 			if (entry->irq_filp)
651 				fput(entry->irq_filp);
652 		}
653 		nm_os_free(poll_ctx);
654 		poll_ctx = NULL;
655 	}
656 #endif /* SYNC_KLOOP_POLL */
657 
658 	/* Reset the kloop state. */
659 	NMG_LOCK();
660 	priv->np_kloop_state = 0;
661 	NMG_UNLOCK();
662 
663 	return err;
664 }
665 
666 int
667 netmap_sync_kloop_stop(struct netmap_priv_d *priv)
668 {
669 	bool running = true;
670 	int err = 0;
671 
672 	NMG_LOCK();
673 	priv->np_kloop_state |= NM_SYNC_KLOOP_STOPPING;
674 	NMG_UNLOCK();
675 	while (running) {
676 		usleep_range(1000, 1500);
677 		NMG_LOCK();
678 		running = (NM_ACCESS_ONCE(priv->np_kloop_state)
679 				& NM_SYNC_KLOOP_RUNNING);
680 		NMG_UNLOCK();
681 	}
682 
683 	return err;
684 }
685 
686 #ifdef WITH_PTNETMAP
687 /*
688  * Guest ptnetmap txsync()/rxsync() routines, used in ptnet device drivers.
689  * These routines are reused across the different operating systems supported
690  * by netmap.
691  */
692 
693 /*
694  * Reconcile host and guest views of the transmit ring.
695  *
696  * Guest user wants to transmit packets up to the one before ring->head,
697  * and guest kernel knows tx_ring->hwcur is the first packet unsent
698  * by the host kernel.
699  *
700  * We push out as many packets as possible, and possibly
701  * reclaim buffers from previously completed transmission.
702  *
703  * Notifications from the host are enabled only if the user guest would
704  * block (no space in the ring).
705  */
706 bool
707 netmap_pt_guest_txsync(struct nm_csb_atok *atok, struct nm_csb_ktoa *ktoa,
708 			struct netmap_kring *kring, int flags)
709 {
710 	bool notify = false;
711 
712 	/* Disable notifications */
713 	atok->appl_need_kick = 0;
714 
715 	/*
716 	 * First part: tell the host (updating the CSB) to process the new
717 	 * packets.
718 	 */
719 	kring->nr_hwcur = ktoa->hwcur;
720 	ptnetmap_guest_write_kring_csb(atok, kring->rcur, kring->rhead);
721 
722         /* Ask for a kick from a guest to the host if needed. */
723 	if (((kring->rhead != kring->nr_hwcur || nm_kr_txempty(kring))
724 		&& NM_ACCESS_ONCE(ktoa->kern_need_kick)) ||
725 			(flags & NAF_FORCE_RECLAIM)) {
726 		atok->sync_flags = flags;
727 		notify = true;
728 	}
729 
730 	/*
731 	 * Second part: reclaim buffers for completed transmissions.
732 	 */
733 	if (nm_kr_txempty(kring) || (flags & NAF_FORCE_RECLAIM)) {
734                 ptnetmap_guest_read_kring_csb(ktoa, kring);
735 	}
736 
737         /*
738          * No more room in the ring for new transmissions. The user thread will
739 	 * go to sleep and we need to be notified by the host when more free
740 	 * space is available.
741          */
742 	if (nm_kr_txempty(kring) && !(kring->nr_kflags & NKR_NOINTR)) {
743 		/* Reenable notifications. */
744 		atok->appl_need_kick = 1;
745                 /* Double check */
746                 ptnetmap_guest_read_kring_csb(ktoa, kring);
747                 /* If there is new free space, disable notifications */
748 		if (unlikely(!nm_kr_txempty(kring))) {
749 			atok->appl_need_kick = 0;
750 		}
751 	}
752 
753 	nm_prdis(1, "%s CSB(head:%u cur:%u hwtail:%u) KRING(head:%u cur:%u tail:%u)",
754 		kring->name, atok->head, atok->cur, ktoa->hwtail,
755 		kring->rhead, kring->rcur, kring->nr_hwtail);
756 
757 	return notify;
758 }
759 
760 /*
761  * Reconcile host and guest view of the receive ring.
762  *
763  * Update hwcur/hwtail from host (reading from CSB).
764  *
765  * If guest user has released buffers up to the one before ring->head, we
766  * also give them to the host.
767  *
768  * Notifications from the host are enabled only if the user guest would
769  * block (no more completed slots in the ring).
770  */
771 bool
772 netmap_pt_guest_rxsync(struct nm_csb_atok *atok, struct nm_csb_ktoa *ktoa,
773 			struct netmap_kring *kring, int flags)
774 {
775 	bool notify = false;
776 
777         /* Disable notifications */
778 	atok->appl_need_kick = 0;
779 
780 	/*
781 	 * First part: import newly received packets, by updating the kring
782 	 * hwtail to the hwtail known from the host (read from the CSB).
783 	 * This also updates the kring hwcur.
784 	 */
785         ptnetmap_guest_read_kring_csb(ktoa, kring);
786 	kring->nr_kflags &= ~NKR_PENDINTR;
787 
788 	/*
789 	 * Second part: tell the host about the slots that guest user has
790 	 * released, by updating cur and head in the CSB.
791 	 */
792 	if (kring->rhead != kring->nr_hwcur) {
793 		ptnetmap_guest_write_kring_csb(atok, kring->rcur,
794 					       kring->rhead);
795                 /* Ask for a kick from the guest to the host if needed. */
796 		if (NM_ACCESS_ONCE(ktoa->kern_need_kick)) {
797 			atok->sync_flags = flags;
798 			notify = true;
799 		}
800 	}
801 
802         /*
803          * No more completed RX slots. The user thread will go to sleep and
804 	 * we need to be notified by the host when more RX slots have been
805 	 * completed.
806          */
807 	if (nm_kr_rxempty(kring) && !(kring->nr_kflags & NKR_NOINTR)) {
808 		/* Reenable notifications. */
809                 atok->appl_need_kick = 1;
810                 /* Double check */
811                 ptnetmap_guest_read_kring_csb(ktoa, kring);
812                 /* If there are new slots, disable notifications. */
813 		if (!nm_kr_rxempty(kring)) {
814                         atok->appl_need_kick = 0;
815                 }
816         }
817 
818 	nm_prdis(1, "%s CSB(head:%u cur:%u hwtail:%u) KRING(head:%u cur:%u tail:%u)",
819 		kring->name, atok->head, atok->cur, ktoa->hwtail,
820 		kring->rhead, kring->rcur, kring->nr_hwtail);
821 
822 	return notify;
823 }
824 
825 /*
826  * Callbacks for ptnet drivers: nm_krings_create, nm_krings_delete, nm_dtor.
827  */
828 int
829 ptnet_nm_krings_create(struct netmap_adapter *na)
830 {
831 	struct netmap_pt_guest_adapter *ptna =
832 			(struct netmap_pt_guest_adapter *)na; /* Upcast. */
833 	struct netmap_adapter *na_nm = &ptna->hwup.up;
834 	struct netmap_adapter *na_dr = &ptna->dr.up;
835 	int ret;
836 
837 	if (ptna->backend_users) {
838 		return 0;
839 	}
840 
841 	/* Create krings on the public netmap adapter. */
842 	ret = netmap_hw_krings_create(na_nm);
843 	if (ret) {
844 		return ret;
845 	}
846 
847 	/* Copy krings into the netmap adapter private to the driver. */
848 	na_dr->tx_rings = na_nm->tx_rings;
849 	na_dr->rx_rings = na_nm->rx_rings;
850 
851 	return 0;
852 }
853 
854 void
855 ptnet_nm_krings_delete(struct netmap_adapter *na)
856 {
857 	struct netmap_pt_guest_adapter *ptna =
858 			(struct netmap_pt_guest_adapter *)na; /* Upcast. */
859 	struct netmap_adapter *na_nm = &ptna->hwup.up;
860 	struct netmap_adapter *na_dr = &ptna->dr.up;
861 
862 	if (ptna->backend_users) {
863 		return;
864 	}
865 
866 	na_dr->tx_rings = NULL;
867 	na_dr->rx_rings = NULL;
868 
869 	netmap_hw_krings_delete(na_nm);
870 }
871 
872 void
873 ptnet_nm_dtor(struct netmap_adapter *na)
874 {
875 	struct netmap_pt_guest_adapter *ptna =
876 			(struct netmap_pt_guest_adapter *)na;
877 
878 	netmap_mem_put(ptna->dr.up.nm_mem);
879 	memset(&ptna->dr, 0, sizeof(ptna->dr));
880 	netmap_mem_pt_guest_ifp_del(na->nm_mem, na->ifp);
881 }
882 
883 int
884 netmap_pt_guest_attach(struct netmap_adapter *arg,
885 		       unsigned int nifp_offset, unsigned int memid)
886 {
887 	struct netmap_pt_guest_adapter *ptna;
888 	struct ifnet *ifp = arg ? arg->ifp : NULL;
889 	int error;
890 
891 	/* get allocator */
892 	arg->nm_mem = netmap_mem_pt_guest_new(ifp, nifp_offset, memid);
893 	if (arg->nm_mem == NULL)
894 		return ENOMEM;
895 	arg->na_flags |= NAF_MEM_OWNER;
896 	error = netmap_attach_ext(arg, sizeof(struct netmap_pt_guest_adapter), 1);
897 	if (error)
898 		return error;
899 
900 	/* get the netmap_pt_guest_adapter */
901 	ptna = (struct netmap_pt_guest_adapter *) NA(ifp);
902 
903 	/* Initialize a separate pass-through netmap adapter that is going to
904 	 * be used by the ptnet driver only, and so never exposed to netmap
905          * applications. We only need a subset of the available fields. */
906 	memset(&ptna->dr, 0, sizeof(ptna->dr));
907 	ptna->dr.up.ifp = ifp;
908 	ptna->dr.up.nm_mem = netmap_mem_get(ptna->hwup.up.nm_mem);
909         ptna->dr.up.nm_config = ptna->hwup.up.nm_config;
910 
911 	ptna->backend_users = 0;
912 
913 	return 0;
914 }
915 
916 #endif /* WITH_PTNETMAP */
917