xref: /freebsd/sys/dev/netmap/netmap_kloop.c (revision 28f4385e45a2681c14bd04b83fe1796eaefe8265)
1 /*
2  * Copyright (C) 2016-2018 Vincenzo Maffione
3  * Copyright (C) 2015 Stefano Garzarella
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *   1. Redistributions of source code must retain the above copyright
10  *      notice, this list of conditions and the following disclaimer.
11  *   2. Redistributions in binary form must reproduce the above copyright
12  *      notice, this list of conditions and the following disclaimer in the
13  *      documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  *
27  * $FreeBSD$
28  */
29 
30 /*
31  * common headers
32  */
33 #if defined(__FreeBSD__)
34 #include <sys/cdefs.h>
35 #include <sys/param.h>
36 #include <sys/kernel.h>
37 #include <sys/types.h>
38 #include <sys/selinfo.h>
39 #include <sys/socket.h>
40 #include <net/if.h>
41 #include <net/if_var.h>
42 #include <machine/bus.h>
43 
44 #define usleep_range(_1, _2) \
45         pause_sbt("sync-kloop-sleep", SBT_1US * _1, SBT_1US * 1, C_ABSOLUTE)
46 
47 #elif defined(linux)
48 #include <bsd_glue.h>
49 #include <linux/file.h>
50 #include <linux/eventfd.h>
51 #endif
52 
53 #include <net/netmap.h>
54 #include <dev/netmap/netmap_kern.h>
55 #include <net/netmap_virt.h>
56 #include <dev/netmap/netmap_mem2.h>
57 
58 /* Support for eventfd-based notifications. */
59 #if defined(linux)
60 #define SYNC_KLOOP_POLL
61 #endif
62 
63 /* Write kring pointers (hwcur, hwtail) to the CSB.
64  * This routine is coupled with ptnetmap_guest_read_kring_csb(). */
65 static inline void
66 sync_kloop_kernel_write(struct nm_csb_ktoa __user *ptr, uint32_t hwcur,
67 			   uint32_t hwtail)
68 {
69 	/* Issue a first store-store barrier to make sure writes to the
70 	 * netmap ring do not overcome updates on ktoa->hwcur and ktoa->hwtail. */
71 	nm_stst_barrier();
72 
73 	/*
74 	 * The same scheme used in nm_sync_kloop_appl_write() applies here.
75 	 * We allow the application to read a value of hwcur more recent than the value
76 	 * of hwtail, since this would anyway result in a consistent view of the
77 	 * ring state (and hwcur can never wraparound hwtail, since hwcur must be
78 	 * behind head).
79 	 *
80 	 * The following memory barrier scheme is used to make this happen:
81 	 *
82 	 *          Application            Kernel
83 	 *
84 	 *          STORE(hwcur)           LOAD(hwtail)
85 	 *          wmb() <------------->  rmb()
86 	 *          STORE(hwtail)          LOAD(hwcur)
87 	 */
88 	CSB_WRITE(ptr, hwcur, hwcur);
89 	nm_stst_barrier();
90 	CSB_WRITE(ptr, hwtail, hwtail);
91 }
92 
93 /* Read kring pointers (head, cur, sync_flags) from the CSB.
94  * This routine is coupled with ptnetmap_guest_write_kring_csb(). */
95 static inline void
96 sync_kloop_kernel_read(struct nm_csb_atok __user *ptr,
97 			  struct netmap_ring *shadow_ring,
98 			  uint32_t num_slots)
99 {
100 	/*
101 	 * We place a memory barrier to make sure that the update of head never
102 	 * overtakes the update of cur.
103 	 * (see explanation in sync_kloop_kernel_write).
104 	 */
105 	CSB_READ(ptr, head, shadow_ring->head);
106 	nm_ldld_barrier();
107 	CSB_READ(ptr, cur, shadow_ring->cur);
108 	CSB_READ(ptr, sync_flags, shadow_ring->flags);
109 
110 	/* Make sure that loads from atok->head and atok->cur are not delayed
111 	 * after the loads from the netmap ring. */
112 	nm_ldld_barrier();
113 }
114 
115 /* Enable or disable application --> kernel kicks. */
116 static inline void
117 csb_ktoa_kick_enable(struct nm_csb_ktoa __user *csb_ktoa, uint32_t val)
118 {
119 	CSB_WRITE(csb_ktoa, kern_need_kick, val);
120 }
121 
122 #ifdef SYNC_KLOOP_POLL
123 /* Are application interrupt enabled or disabled? */
124 static inline uint32_t
125 csb_atok_intr_enabled(struct nm_csb_atok __user *csb_atok)
126 {
127 	uint32_t v;
128 
129 	CSB_READ(csb_atok, appl_need_kick, v);
130 
131 	return v;
132 }
133 #endif  /* SYNC_KLOOP_POLL */
134 
135 static inline void
136 sync_kloop_kring_dump(const char *title, const struct netmap_kring *kring)
137 {
138 	nm_prinf("%s, kring %s, hwcur %d, rhead %d, "
139 		"rcur %d, rtail %d, hwtail %d",
140 		title, kring->name, kring->nr_hwcur, kring->rhead,
141 		kring->rcur, kring->rtail, kring->nr_hwtail);
142 }
143 
144 struct sync_kloop_ring_args {
145 	struct netmap_kring *kring;
146 	struct nm_csb_atok *csb_atok;
147 	struct nm_csb_ktoa *csb_ktoa;
148 #ifdef SYNC_KLOOP_POLL
149 	struct eventfd_ctx *irq_ctx;
150 #endif /* SYNC_KLOOP_POLL */
151 };
152 
153 static void
154 netmap_sync_kloop_tx_ring(const struct sync_kloop_ring_args *a)
155 {
156 	struct netmap_kring *kring = a->kring;
157 	struct nm_csb_atok *csb_atok = a->csb_atok;
158 	struct nm_csb_ktoa *csb_ktoa = a->csb_ktoa;
159 	struct netmap_ring shadow_ring; /* shadow copy of the netmap_ring */
160 	bool more_txspace = false;
161 	uint32_t num_slots;
162 	int batch;
163 
164 	num_slots = kring->nkr_num_slots;
165 
166 	/* Disable application --> kernel notifications. */
167 	csb_ktoa_kick_enable(csb_ktoa, 0);
168 	/* Copy the application kring pointers from the CSB */
169 	sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots);
170 
171 	for (;;) {
172 		batch = shadow_ring.head - kring->nr_hwcur;
173 		if (batch < 0)
174 			batch += num_slots;
175 
176 #ifdef PTN_TX_BATCH_LIM
177 		if (batch > PTN_TX_BATCH_LIM(num_slots)) {
178 			/* If application moves ahead too fast, let's cut the move so
179 			 * that we don't exceed our batch limit. */
180 			uint32_t head_lim = kring->nr_hwcur + PTN_TX_BATCH_LIM(num_slots);
181 
182 			if (head_lim >= num_slots)
183 				head_lim -= num_slots;
184 			nm_prdis(1, "batch: %d head: %d head_lim: %d", batch, shadow_ring.head,
185 					head_lim);
186 			shadow_ring.head = head_lim;
187 			batch = PTN_TX_BATCH_LIM(num_slots);
188 		}
189 #endif /* PTN_TX_BATCH_LIM */
190 
191 		if (nm_kr_txspace(kring) <= (num_slots >> 1)) {
192 			shadow_ring.flags |= NAF_FORCE_RECLAIM;
193 		}
194 
195 		/* Netmap prologue */
196 		shadow_ring.tail = kring->rtail;
197 		if (unlikely(nm_txsync_prologue(kring, &shadow_ring) >= num_slots)) {
198 			/* Reinit ring and enable notifications. */
199 			netmap_ring_reinit(kring);
200 			csb_ktoa_kick_enable(csb_ktoa, 1);
201 			break;
202 		}
203 
204 		if (unlikely(netmap_debug & NM_DEBUG_TXSYNC)) {
205 			sync_kloop_kring_dump("pre txsync", kring);
206 		}
207 
208 		if (unlikely(kring->nm_sync(kring, shadow_ring.flags))) {
209 			/* Reenable notifications. */
210 			csb_ktoa_kick_enable(csb_ktoa, 1);
211 			nm_prerr("txsync() failed");
212 			break;
213 		}
214 
215 		/*
216 		 * Finalize
217 		 * Copy kernel hwcur and hwtail into the CSB for the application sync(), and
218 		 * do the nm_sync_finalize.
219 		 */
220 		sync_kloop_kernel_write(csb_ktoa, kring->nr_hwcur,
221 				kring->nr_hwtail);
222 		if (kring->rtail != kring->nr_hwtail) {
223 			/* Some more room available in the parent adapter. */
224 			kring->rtail = kring->nr_hwtail;
225 			more_txspace = true;
226 		}
227 
228 		if (unlikely(netmap_debug & NM_DEBUG_TXSYNC)) {
229 			sync_kloop_kring_dump("post txsync", kring);
230 		}
231 
232 		/* Interrupt the application if needed. */
233 #ifdef SYNC_KLOOP_POLL
234 		if (a->irq_ctx && more_txspace && csb_atok_intr_enabled(csb_atok)) {
235 			/* Disable application kick to avoid sending unnecessary kicks */
236 			eventfd_signal(a->irq_ctx, 1);
237 			more_txspace = false;
238 		}
239 #endif /* SYNC_KLOOP_POLL */
240 
241 		/* Read CSB to see if there is more work to do. */
242 		sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots);
243 		if (shadow_ring.head == kring->rhead) {
244 			/*
245 			 * No more packets to transmit. We enable notifications and
246 			 * go to sleep, waiting for a kick from the application when new
247 			 * new slots are ready for transmission.
248 			 */
249 			/* Reenable notifications. */
250 			csb_ktoa_kick_enable(csb_ktoa, 1);
251 			/* Double check, with store-load memory barrier. */
252 			nm_stld_barrier();
253 			sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots);
254 			if (shadow_ring.head != kring->rhead) {
255 				/* We won the race condition, there are more packets to
256 				 * transmit. Disable notifications and do another cycle */
257 				csb_ktoa_kick_enable(csb_ktoa, 0);
258 				continue;
259 			}
260 			break;
261 		}
262 
263 		if (nm_kr_txempty(kring)) {
264 			/* No more available TX slots. We stop waiting for a notification
265 			 * from the backend (netmap_tx_irq). */
266 			nm_prdis(1, "TX ring");
267 			break;
268 		}
269 	}
270 
271 #ifdef SYNC_KLOOP_POLL
272 	if (a->irq_ctx && more_txspace && csb_atok_intr_enabled(csb_atok)) {
273 		eventfd_signal(a->irq_ctx, 1);
274 	}
275 #endif /* SYNC_KLOOP_POLL */
276 }
277 
278 /* RX cycle without receive any packets */
279 #define SYNC_LOOP_RX_DRY_CYCLES_MAX	2
280 
281 static inline int
282 sync_kloop_norxslots(struct netmap_kring *kring, uint32_t g_head)
283 {
284 	return (NM_ACCESS_ONCE(kring->nr_hwtail) == nm_prev(g_head,
285 				kring->nkr_num_slots - 1));
286 }
287 
288 static void
289 netmap_sync_kloop_rx_ring(const struct sync_kloop_ring_args *a)
290 {
291 
292 	struct netmap_kring *kring = a->kring;
293 	struct nm_csb_atok *csb_atok = a->csb_atok;
294 	struct nm_csb_ktoa *csb_ktoa = a->csb_ktoa;
295 	struct netmap_ring shadow_ring; /* shadow copy of the netmap_ring */
296 	int dry_cycles = 0;
297 	bool some_recvd = false;
298 	uint32_t num_slots;
299 
300 	num_slots = kring->nkr_num_slots;
301 
302 	/* Get RX csb_atok and csb_ktoa pointers from the CSB. */
303 	num_slots = kring->nkr_num_slots;
304 
305 	/* Disable notifications. */
306 	csb_ktoa_kick_enable(csb_ktoa, 0);
307 	/* Copy the application kring pointers from the CSB */
308 	sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots);
309 
310 	for (;;) {
311 		uint32_t hwtail;
312 
313 		/* Netmap prologue */
314 		shadow_ring.tail = kring->rtail;
315 		if (unlikely(nm_rxsync_prologue(kring, &shadow_ring) >= num_slots)) {
316 			/* Reinit ring and enable notifications. */
317 			netmap_ring_reinit(kring);
318 			csb_ktoa_kick_enable(csb_ktoa, 1);
319 			break;
320 		}
321 
322 		if (unlikely(netmap_debug & NM_DEBUG_RXSYNC)) {
323 			sync_kloop_kring_dump("pre rxsync", kring);
324 		}
325 
326 		if (unlikely(kring->nm_sync(kring, shadow_ring.flags))) {
327 			/* Reenable notifications. */
328 			csb_ktoa_kick_enable(csb_ktoa, 1);
329 			nm_prerr("rxsync() failed");
330 			break;
331 		}
332 
333 		/*
334 		 * Finalize
335 		 * Copy kernel hwcur and hwtail into the CSB for the application sync()
336 		 */
337 		hwtail = NM_ACCESS_ONCE(kring->nr_hwtail);
338 		sync_kloop_kernel_write(csb_ktoa, kring->nr_hwcur, hwtail);
339 		if (kring->rtail != hwtail) {
340 			kring->rtail = hwtail;
341 			some_recvd = true;
342 			dry_cycles = 0;
343 		} else {
344 			dry_cycles++;
345 		}
346 
347 		if (unlikely(netmap_debug & NM_DEBUG_RXSYNC)) {
348 			sync_kloop_kring_dump("post rxsync", kring);
349 		}
350 
351 #ifdef SYNC_KLOOP_POLL
352 		/* Interrupt the application if needed. */
353 		if (a->irq_ctx && some_recvd && csb_atok_intr_enabled(csb_atok)) {
354 			/* Disable application kick to avoid sending unnecessary kicks */
355 			eventfd_signal(a->irq_ctx, 1);
356 			some_recvd = false;
357 		}
358 #endif /* SYNC_KLOOP_POLL */
359 
360 		/* Read CSB to see if there is more work to do. */
361 		sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots);
362 		if (sync_kloop_norxslots(kring, shadow_ring.head)) {
363 			/*
364 			 * No more slots available for reception. We enable notification and
365 			 * go to sleep, waiting for a kick from the application when new receive
366 			 * slots are available.
367 			 */
368 			/* Reenable notifications. */
369 			csb_ktoa_kick_enable(csb_ktoa, 1);
370 			/* Double check, with store-load memory barrier. */
371 			nm_stld_barrier();
372 			sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots);
373 			if (!sync_kloop_norxslots(kring, shadow_ring.head)) {
374 				/* We won the race condition, more slots are available. Disable
375 				 * notifications and do another cycle. */
376 				csb_ktoa_kick_enable(csb_ktoa, 0);
377 				continue;
378 			}
379 			break;
380 		}
381 
382 		hwtail = NM_ACCESS_ONCE(kring->nr_hwtail);
383 		if (unlikely(hwtail == kring->rhead ||
384 					dry_cycles >= SYNC_LOOP_RX_DRY_CYCLES_MAX)) {
385 			/* No more packets to be read from the backend. We stop and
386 			 * wait for a notification from the backend (netmap_rx_irq). */
387 			nm_prdis(1, "nr_hwtail: %d rhead: %d dry_cycles: %d",
388 					hwtail, kring->rhead, dry_cycles);
389 			break;
390 		}
391 	}
392 
393 	nm_kr_put(kring);
394 
395 #ifdef SYNC_KLOOP_POLL
396 	/* Interrupt the application if needed. */
397 	if (a->irq_ctx && some_recvd && csb_atok_intr_enabled(csb_atok)) {
398 		eventfd_signal(a->irq_ctx, 1);
399 	}
400 #endif /* SYNC_KLOOP_POLL */
401 }
402 
403 #ifdef SYNC_KLOOP_POLL
404 struct sync_kloop_poll_entry {
405 	/* Support for receiving notifications from
406 	 * a netmap ring or from the application. */
407 	struct file *filp;
408 	wait_queue_t wait;
409 	wait_queue_head_t *wqh;
410 
411 	/* Support for sending notifications to the application. */
412 	struct eventfd_ctx *irq_ctx;
413 	struct file *irq_filp;
414 };
415 
416 struct sync_kloop_poll_ctx {
417 	poll_table wait_table;
418 	unsigned int next_entry;
419 	unsigned int num_entries;
420 	struct sync_kloop_poll_entry entries[0];
421 };
422 
423 static void
424 sync_kloop_poll_table_queue_proc(struct file *file, wait_queue_head_t *wqh,
425 				poll_table *pt)
426 {
427 	struct sync_kloop_poll_ctx *poll_ctx =
428 		container_of(pt, struct sync_kloop_poll_ctx, wait_table);
429 	struct sync_kloop_poll_entry *entry = poll_ctx->entries +
430 						poll_ctx->next_entry;
431 
432 	BUG_ON(poll_ctx->next_entry >= poll_ctx->num_entries);
433 	entry->wqh = wqh;
434 	entry->filp = file;
435 	/* Use the default wake up function. */
436 	init_waitqueue_entry(&entry->wait, current);
437 	add_wait_queue(wqh, &entry->wait);
438 	poll_ctx->next_entry++;
439 }
440 #endif  /* SYNC_KLOOP_POLL */
441 
442 int
443 netmap_sync_kloop(struct netmap_priv_d *priv, struct nmreq_header *hdr)
444 {
445 	struct nmreq_sync_kloop_start *req =
446 		(struct nmreq_sync_kloop_start *)(uintptr_t)hdr->nr_body;
447 	struct nmreq_opt_sync_kloop_eventfds *eventfds_opt = NULL;
448 #ifdef SYNC_KLOOP_POLL
449 	struct sync_kloop_poll_ctx *poll_ctx = NULL;
450 #endif  /* SYNC_KLOOP_POLL */
451 	int num_rx_rings, num_tx_rings, num_rings;
452 	struct sync_kloop_ring_args *args = NULL;
453 	uint32_t sleep_us = req->sleep_us;
454 	struct nm_csb_atok* csb_atok_base;
455 	struct nm_csb_ktoa* csb_ktoa_base;
456 	struct netmap_adapter *na;
457 	struct nmreq_option *opt;
458 	int err = 0;
459 	int i;
460 
461 	if (sleep_us > 1000000) {
462 		/* We do not accept sleeping for more than a second. */
463 		return EINVAL;
464 	}
465 
466 	if (priv->np_nifp == NULL) {
467 		return ENXIO;
468 	}
469 	mb(); /* make sure following reads are not from cache */
470 
471 	na = priv->np_na;
472 	if (!nm_netmap_on(na)) {
473 		return ENXIO;
474 	}
475 
476 	NMG_LOCK();
477 	/* Make sure the application is working in CSB mode. */
478 	if (!priv->np_csb_atok_base || !priv->np_csb_ktoa_base) {
479 		NMG_UNLOCK();
480 		nm_prerr("sync-kloop on %s requires "
481 				"NETMAP_REQ_OPT_CSB option", na->name);
482 		return EINVAL;
483 	}
484 
485 	csb_atok_base = priv->np_csb_atok_base;
486 	csb_ktoa_base = priv->np_csb_ktoa_base;
487 
488 	/* Make sure that no kloop is currently running. */
489 	if (priv->np_kloop_state & NM_SYNC_KLOOP_RUNNING) {
490 		err = EBUSY;
491 	}
492 	priv->np_kloop_state |= NM_SYNC_KLOOP_RUNNING;
493 	NMG_UNLOCK();
494 	if (err) {
495 		return err;
496 	}
497 
498 	num_rx_rings = priv->np_qlast[NR_RX] - priv->np_qfirst[NR_RX];
499 	num_tx_rings = priv->np_qlast[NR_TX] - priv->np_qfirst[NR_TX];
500 	num_rings = num_tx_rings + num_rx_rings;
501 
502 	args = nm_os_malloc(num_rings * sizeof(args[0]));
503 	if (!args) {
504 		err = ENOMEM;
505 		goto out;
506 	}
507 
508 	/* Validate notification options. */
509 	opt = nmreq_findoption((struct nmreq_option *)(uintptr_t)hdr->nr_options,
510 				NETMAP_REQ_OPT_SYNC_KLOOP_EVENTFDS);
511 	if (opt != NULL) {
512 		err = nmreq_checkduplicate(opt);
513 		if (err) {
514 			opt->nro_status = err;
515 			goto out;
516 		}
517 		if (opt->nro_size != sizeof(*eventfds_opt) +
518 			sizeof(eventfds_opt->eventfds[0]) * num_rings) {
519 			/* Option size not consistent with the number of
520 			 * entries. */
521 			opt->nro_status = err = EINVAL;
522 			goto out;
523 		}
524 #ifdef SYNC_KLOOP_POLL
525 		eventfds_opt = (struct nmreq_opt_sync_kloop_eventfds *)opt;
526 		opt->nro_status = 0;
527 		/* We need 2 poll entries for TX and RX notifications coming
528 		 * from the netmap adapter, plus one entries per ring for the
529 		 * notifications coming from the application. */
530 		poll_ctx = nm_os_malloc(sizeof(*poll_ctx) +
531 				(2 + num_rings) * sizeof(poll_ctx->entries[0]));
532 		init_poll_funcptr(&poll_ctx->wait_table,
533 					sync_kloop_poll_table_queue_proc);
534 		poll_ctx->num_entries = 2 + num_rings;
535 		poll_ctx->next_entry = 0;
536 		/* Poll for notifications coming from the applications through
537 		 * eventfds . */
538 		for (i = 0; i < num_rings; i++) {
539 			struct eventfd_ctx *irq;
540 			struct file *filp;
541 			unsigned long mask;
542 
543 			filp = eventfd_fget(eventfds_opt->eventfds[i].ioeventfd);
544 			if (IS_ERR(filp)) {
545 				err = PTR_ERR(filp);
546 				goto out;
547 			}
548 			mask = filp->f_op->poll(filp, &poll_ctx->wait_table);
549 			if (mask & POLLERR) {
550 				err = EINVAL;
551 				goto out;
552 			}
553 
554 			filp = eventfd_fget(eventfds_opt->eventfds[i].irqfd);
555 			if (IS_ERR(filp)) {
556 				err = PTR_ERR(filp);
557 				goto out;
558 			}
559 			poll_ctx->entries[i].irq_filp = filp;
560 			irq = eventfd_ctx_fileget(filp);
561 			if (IS_ERR(irq)) {
562 				err = PTR_ERR(irq);
563 				goto out;
564 			}
565 			poll_ctx->entries[i].irq_ctx = irq;
566 		}
567 		/* Poll for notifications coming from the netmap rings bound to
568 		 * this file descriptor. */
569 		{
570 			NM_SELINFO_T *si[NR_TXRX];
571 
572 			NMG_LOCK();
573 			si[NR_RX] = nm_si_user(priv, NR_RX) ? &na->si[NR_RX] :
574 				&na->rx_rings[priv->np_qfirst[NR_RX]]->si;
575 			si[NR_TX] = nm_si_user(priv, NR_TX) ? &na->si[NR_TX] :
576 				&na->tx_rings[priv->np_qfirst[NR_TX]]->si;
577 			NMG_UNLOCK();
578 			poll_wait(priv->np_filp, si[NR_TX], &poll_ctx->wait_table);
579 			poll_wait(priv->np_filp, si[NR_RX], &poll_ctx->wait_table);
580 		}
581 #else   /* SYNC_KLOOP_POLL */
582 		opt->nro_status = EOPNOTSUPP;
583 		goto out;
584 #endif  /* SYNC_KLOOP_POLL */
585 	}
586 
587 	/* Prepare the arguments for netmap_sync_kloop_tx_ring()
588 	 * and netmap_sync_kloop_rx_ring(). */
589 	for (i = 0; i < num_tx_rings; i++) {
590 		struct sync_kloop_ring_args *a = args + i;
591 
592 		a->kring = NMR(na, NR_TX)[i + priv->np_qfirst[NR_TX]];
593 		a->csb_atok = csb_atok_base + i;
594 		a->csb_ktoa = csb_ktoa_base + i;
595 #ifdef SYNC_KLOOP_POLL
596 		if (poll_ctx)
597 			a->irq_ctx = poll_ctx->entries[i].irq_ctx;
598 #endif /* SYNC_KLOOP_POLL */
599 	}
600 	for (i = 0; i < num_rx_rings; i++) {
601 		struct sync_kloop_ring_args *a = args + num_tx_rings + i;
602 
603 		a->kring = NMR(na, NR_RX)[i + priv->np_qfirst[NR_RX]];
604 		a->csb_atok = csb_atok_base + num_tx_rings + i;
605 		a->csb_ktoa = csb_ktoa_base + num_tx_rings + i;
606 #ifdef SYNC_KLOOP_POLL
607 		if (poll_ctx)
608 			a->irq_ctx = poll_ctx->entries[num_tx_rings + i].irq_ctx;
609 #endif /* SYNC_KLOOP_POLL */
610 	}
611 
612 	/* Main loop. */
613 	for (;;) {
614 		if (unlikely(NM_ACCESS_ONCE(priv->np_kloop_state) & NM_SYNC_KLOOP_STOPPING)) {
615 			break;
616 		}
617 
618 #ifdef SYNC_KLOOP_POLL
619 		if (poll_ctx) {
620 			/* It is important to set the task state as
621 			 * interruptible before processing any TX/RX ring,
622 			 * so that if a notification on ring Y comes after
623 			 * we have processed ring Y, but before we call
624 			 * schedule(), we don't miss it. This is true because
625 			 * the wake up function will change the the task state,
626 			 * and therefore the schedule_timeout() call below
627 			 * will observe the change).
628 			 */
629 			set_current_state(TASK_INTERRUPTIBLE);
630 		}
631 #endif  /* SYNC_KLOOP_POLL */
632 
633 		/* Process all the TX rings bound to this file descriptor. */
634 		for (i = 0; i < num_tx_rings; i++) {
635 			struct sync_kloop_ring_args *a = args + i;
636 
637 			if (unlikely(nm_kr_tryget(a->kring, 1, NULL))) {
638 				continue;
639 			}
640 			netmap_sync_kloop_tx_ring(a);
641 			nm_kr_put(a->kring);
642 		}
643 
644 		/* Process all the RX rings bound to this file descriptor. */
645 		for (i = 0; i < num_rx_rings; i++) {
646 			struct sync_kloop_ring_args *a = args + num_tx_rings + i;
647 
648 			if (unlikely(nm_kr_tryget(a->kring, 1, NULL))) {
649 				continue;
650 			}
651 			netmap_sync_kloop_rx_ring(a);
652 			nm_kr_put(a->kring);
653 		}
654 
655 #ifdef SYNC_KLOOP_POLL
656 		if (poll_ctx) {
657 			/* If a poll context is present, yield to the scheduler
658 			 * waiting for a notification to come either from
659 			 * netmap or the application. */
660 			schedule_timeout(msecs_to_jiffies(20000));
661 		} else
662 #endif /* SYNC_KLOOP_POLL */
663 		{
664 			/* Default synchronization method: sleep for a while. */
665 			usleep_range(sleep_us, sleep_us);
666 		}
667 	}
668 out:
669 #ifdef SYNC_KLOOP_POLL
670 	if (poll_ctx) {
671 		/* Stop polling from netmap and the eventfds, and deallocate
672 		 * the poll context. */
673 		__set_current_state(TASK_RUNNING);
674 		for (i = 0; i < poll_ctx->next_entry; i++) {
675 			struct sync_kloop_poll_entry *entry =
676 						poll_ctx->entries + i;
677 
678 			if (entry->wqh)
679 				remove_wait_queue(entry->wqh, &entry->wait);
680 			/* We did not get a reference to the eventfds, but
681 			 * don't do that on netmap file descriptors (since
682 			 * a reference was not taken. */
683 			if (entry->filp && entry->filp != priv->np_filp)
684 				fput(entry->filp);
685 			if (entry->irq_ctx)
686 				eventfd_ctx_put(entry->irq_ctx);
687 			if (entry->irq_filp)
688 				fput(entry->irq_filp);
689 		}
690 		nm_os_free(poll_ctx);
691 		poll_ctx = NULL;
692 	}
693 #endif /* SYNC_KLOOP_POLL */
694 
695 	if (args) {
696 		nm_os_free(args);
697 		args = NULL;
698 	}
699 
700 	/* Reset the kloop state. */
701 	NMG_LOCK();
702 	priv->np_kloop_state = 0;
703 	NMG_UNLOCK();
704 
705 	return err;
706 }
707 
708 int
709 netmap_sync_kloop_stop(struct netmap_priv_d *priv)
710 {
711 	bool running = true;
712 	int err = 0;
713 
714 	NMG_LOCK();
715 	priv->np_kloop_state |= NM_SYNC_KLOOP_STOPPING;
716 	NMG_UNLOCK();
717 	while (running) {
718 		usleep_range(1000, 1500);
719 		NMG_LOCK();
720 		running = (NM_ACCESS_ONCE(priv->np_kloop_state)
721 				& NM_SYNC_KLOOP_RUNNING);
722 		NMG_UNLOCK();
723 	}
724 
725 	return err;
726 }
727 
728 #ifdef WITH_PTNETMAP
729 /*
730  * Guest ptnetmap txsync()/rxsync() routines, used in ptnet device drivers.
731  * These routines are reused across the different operating systems supported
732  * by netmap.
733  */
734 
735 /*
736  * Reconcile host and guest views of the transmit ring.
737  *
738  * Guest user wants to transmit packets up to the one before ring->head,
739  * and guest kernel knows tx_ring->hwcur is the first packet unsent
740  * by the host kernel.
741  *
742  * We push out as many packets as possible, and possibly
743  * reclaim buffers from previously completed transmission.
744  *
745  * Notifications from the host are enabled only if the user guest would
746  * block (no space in the ring).
747  */
748 bool
749 netmap_pt_guest_txsync(struct nm_csb_atok *atok, struct nm_csb_ktoa *ktoa,
750 			struct netmap_kring *kring, int flags)
751 {
752 	bool notify = false;
753 
754 	/* Disable notifications */
755 	atok->appl_need_kick = 0;
756 
757 	/*
758 	 * First part: tell the host (updating the CSB) to process the new
759 	 * packets.
760 	 */
761 	kring->nr_hwcur = ktoa->hwcur;
762 	nm_sync_kloop_appl_write(atok, kring->rcur, kring->rhead);
763 
764         /* Ask for a kick from a guest to the host if needed. */
765 	if (((kring->rhead != kring->nr_hwcur || nm_kr_txempty(kring))
766 		&& NM_ACCESS_ONCE(ktoa->kern_need_kick)) ||
767 			(flags & NAF_FORCE_RECLAIM)) {
768 		atok->sync_flags = flags;
769 		notify = true;
770 	}
771 
772 	/*
773 	 * Second part: reclaim buffers for completed transmissions.
774 	 */
775 	if (nm_kr_txempty(kring) || (flags & NAF_FORCE_RECLAIM)) {
776 		nm_sync_kloop_appl_read(ktoa, &kring->nr_hwtail,
777 					&kring->nr_hwcur);
778 	}
779 
780         /*
781          * No more room in the ring for new transmissions. The user thread will
782 	 * go to sleep and we need to be notified by the host when more free
783 	 * space is available.
784          */
785 	if (nm_kr_txempty(kring) && !(kring->nr_kflags & NKR_NOINTR)) {
786 		/* Reenable notifications. */
787 		atok->appl_need_kick = 1;
788                 /* Double check, with store-load memory barrier. */
789 		nm_stld_barrier();
790 		nm_sync_kloop_appl_read(ktoa, &kring->nr_hwtail,
791 					&kring->nr_hwcur);
792                 /* If there is new free space, disable notifications */
793 		if (unlikely(!nm_kr_txempty(kring))) {
794 			atok->appl_need_kick = 0;
795 		}
796 	}
797 
798 	nm_prdis(1, "%s CSB(head:%u cur:%u hwtail:%u) KRING(head:%u cur:%u tail:%u)",
799 		kring->name, atok->head, atok->cur, ktoa->hwtail,
800 		kring->rhead, kring->rcur, kring->nr_hwtail);
801 
802 	return notify;
803 }
804 
805 /*
806  * Reconcile host and guest view of the receive ring.
807  *
808  * Update hwcur/hwtail from host (reading from CSB).
809  *
810  * If guest user has released buffers up to the one before ring->head, we
811  * also give them to the host.
812  *
813  * Notifications from the host are enabled only if the user guest would
814  * block (no more completed slots in the ring).
815  */
816 bool
817 netmap_pt_guest_rxsync(struct nm_csb_atok *atok, struct nm_csb_ktoa *ktoa,
818 			struct netmap_kring *kring, int flags)
819 {
820 	bool notify = false;
821 
822         /* Disable notifications */
823 	atok->appl_need_kick = 0;
824 
825 	/*
826 	 * First part: import newly received packets, by updating the kring
827 	 * hwtail to the hwtail known from the host (read from the CSB).
828 	 * This also updates the kring hwcur.
829 	 */
830 	nm_sync_kloop_appl_read(ktoa, &kring->nr_hwtail, &kring->nr_hwcur);
831 	kring->nr_kflags &= ~NKR_PENDINTR;
832 
833 	/*
834 	 * Second part: tell the host about the slots that guest user has
835 	 * released, by updating cur and head in the CSB.
836 	 */
837 	if (kring->rhead != kring->nr_hwcur) {
838 		nm_sync_kloop_appl_write(atok, kring->rcur, kring->rhead);
839                 /* Ask for a kick from the guest to the host if needed. */
840 		if (NM_ACCESS_ONCE(ktoa->kern_need_kick)) {
841 			atok->sync_flags = flags;
842 			notify = true;
843 		}
844 	}
845 
846         /*
847          * No more completed RX slots. The user thread will go to sleep and
848 	 * we need to be notified by the host when more RX slots have been
849 	 * completed.
850          */
851 	if (nm_kr_rxempty(kring) && !(kring->nr_kflags & NKR_NOINTR)) {
852 		/* Reenable notifications. */
853                 atok->appl_need_kick = 1;
854                 /* Double check, with store-load memory barrier. */
855 		nm_stld_barrier();
856 		nm_sync_kloop_appl_read(ktoa, &kring->nr_hwtail,
857 					&kring->nr_hwcur);
858                 /* If there are new slots, disable notifications. */
859 		if (!nm_kr_rxempty(kring)) {
860                         atok->appl_need_kick = 0;
861                 }
862         }
863 
864 	nm_prdis(1, "%s CSB(head:%u cur:%u hwtail:%u) KRING(head:%u cur:%u tail:%u)",
865 		kring->name, atok->head, atok->cur, ktoa->hwtail,
866 		kring->rhead, kring->rcur, kring->nr_hwtail);
867 
868 	return notify;
869 }
870 
871 /*
872  * Callbacks for ptnet drivers: nm_krings_create, nm_krings_delete, nm_dtor.
873  */
874 int
875 ptnet_nm_krings_create(struct netmap_adapter *na)
876 {
877 	struct netmap_pt_guest_adapter *ptna =
878 			(struct netmap_pt_guest_adapter *)na; /* Upcast. */
879 	struct netmap_adapter *na_nm = &ptna->hwup.up;
880 	struct netmap_adapter *na_dr = &ptna->dr.up;
881 	int ret;
882 
883 	if (ptna->backend_users) {
884 		return 0;
885 	}
886 
887 	/* Create krings on the public netmap adapter. */
888 	ret = netmap_hw_krings_create(na_nm);
889 	if (ret) {
890 		return ret;
891 	}
892 
893 	/* Copy krings into the netmap adapter private to the driver. */
894 	na_dr->tx_rings = na_nm->tx_rings;
895 	na_dr->rx_rings = na_nm->rx_rings;
896 
897 	return 0;
898 }
899 
900 void
901 ptnet_nm_krings_delete(struct netmap_adapter *na)
902 {
903 	struct netmap_pt_guest_adapter *ptna =
904 			(struct netmap_pt_guest_adapter *)na; /* Upcast. */
905 	struct netmap_adapter *na_nm = &ptna->hwup.up;
906 	struct netmap_adapter *na_dr = &ptna->dr.up;
907 
908 	if (ptna->backend_users) {
909 		return;
910 	}
911 
912 	na_dr->tx_rings = NULL;
913 	na_dr->rx_rings = NULL;
914 
915 	netmap_hw_krings_delete(na_nm);
916 }
917 
918 void
919 ptnet_nm_dtor(struct netmap_adapter *na)
920 {
921 	struct netmap_pt_guest_adapter *ptna =
922 			(struct netmap_pt_guest_adapter *)na;
923 
924 	netmap_mem_put(ptna->dr.up.nm_mem);
925 	memset(&ptna->dr, 0, sizeof(ptna->dr));
926 	netmap_mem_pt_guest_ifp_del(na->nm_mem, na->ifp);
927 }
928 
929 int
930 netmap_pt_guest_attach(struct netmap_adapter *arg,
931 		       unsigned int nifp_offset, unsigned int memid)
932 {
933 	struct netmap_pt_guest_adapter *ptna;
934 	struct ifnet *ifp = arg ? arg->ifp : NULL;
935 	int error;
936 
937 	/* get allocator */
938 	arg->nm_mem = netmap_mem_pt_guest_new(ifp, nifp_offset, memid);
939 	if (arg->nm_mem == NULL)
940 		return ENOMEM;
941 	arg->na_flags |= NAF_MEM_OWNER;
942 	error = netmap_attach_ext(arg, sizeof(struct netmap_pt_guest_adapter), 1);
943 	if (error)
944 		return error;
945 
946 	/* get the netmap_pt_guest_adapter */
947 	ptna = (struct netmap_pt_guest_adapter *) NA(ifp);
948 
949 	/* Initialize a separate pass-through netmap adapter that is going to
950 	 * be used by the ptnet driver only, and so never exposed to netmap
951          * applications. We only need a subset of the available fields. */
952 	memset(&ptna->dr, 0, sizeof(ptna->dr));
953 	ptna->dr.up.ifp = ifp;
954 	ptna->dr.up.nm_mem = netmap_mem_get(ptna->hwup.up.nm_mem);
955         ptna->dr.up.nm_config = ptna->hwup.up.nm_config;
956 
957 	ptna->backend_users = 0;
958 
959 	return 0;
960 }
961 
962 #endif /* WITH_PTNETMAP */
963