xref: /freebsd/sys/compat/linuxkpi/common/src/linux_netdev.c (revision 13ec1e3155c7e9bf037b12af186351b7fa9b9450)
1 /*-
2  * Copyright (c) 2021 The FreeBSD Foundation
3  *
4  * This software was developed by Björn Zeeb under sponsorship from
5  * the FreeBSD Foundation.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/types.h>
34 #include <sys/kernel.h>
35 #include <sys/sysctl.h>
36 
37 #include <linux/bitops.h>
38 #include <linux/list.h>
39 #include <linux/netdevice.h>
40 
41 MALLOC_DEFINE(M_NETDEV, "lkpindev", "Linux KPI netdevice compat");
42 
43 #define	NAPI_LOCK_INIT(_ndev)		\
44     mtx_init(&(_ndev)->napi_mtx, "napi_mtx", NULL, MTX_DEF)
45 #define	NAPI_LOCK_DESTROY(_ndev)	mtx_destroy(&(_ndev)->napi_mtx)
46 #define	NAPI_LOCK_ASSERT(_ndev)		mtx_assert(&(_ndev)->napi_mtx, MA_OWNED)
47 #define	NAPI_LOCK(_ndev)		mtx_lock(&(_ndev)->napi_mtx)
48 #define	NAPI_UNLOCK(_ndev)		mtx_unlock(&(_ndev)->napi_mtx)
49 
50 /* -------------------------------------------------------------------------- */
51 /* Do not schedule new things while we are waiting to clear things. */
52 #define	LKPI_NAPI_FLAG_DISABLE_PENDING				0
53 /* To synchronise that only one poll is ever running. */
54 #define	LKPI_NAPI_FLAG_IS_SCHEDULED				1
55 /* If trying to schedule while poll is running. Need to re-schedule. */
56 #define	LKPI_NAPI_FLAG_LOST_RACE_TRY_AGAIN			2
57 /* When shutting down forcefully prevent anything from running task/poll. */
58 #define	LKPI_NAPI_FLAG_SHUTDOWN					3
59 
60 #define LKPI_NAPI_FLAGS \
61         "\20\1DISABLE_PENDING\2IS_SCHEDULED\3LOST_RACE_TRY_AGAIN"
62 
63 /* #define	NAPI_DEBUG */
64 #ifdef NAPI_DEBUG
65 static int debug_napi;
66 SYSCTL_INT(_compat_linuxkpi, OID_AUTO, debug_napi, CTLFLAG_RWTUN,
67     &debug_napi, 0, "NAPI debug level");
68 
69 #define	DNAPI_TODO		0x01
70 #define	DNAPI_IMPROVE		0x02
71 #define	DNAPI_TRACE		0x10
72 #define	DNAPI_TRACE_TASK	0x20
73 #define	DNAPI_DIRECT_DISPATCH	0x1000
74 
75 #define	NAPI_TRACE(_n)		if (debug_napi & DNAPI_TRACE)		\
76     printf("NAPI_TRACE %s:%d %u %p (%#jx %b)\n", __func__, __LINE__,	\
77 	(unsigned int)ticks, _n, (uintmax_t)(_n)->_flags,		\
78 	(int)(_n)->_flags, LKPI_NAPI_FLAGS)
79 #define	NAPI_TRACE2D(_n, _d)	if (debug_napi & DNAPI_TRACE)		\
80     printf("NAPI_TRACE %s:%d %u %p (%#jx %b) %d\n", __func__, __LINE__, \
81 	(unsigned int)ticks, _n, (uintmax_t)(_n)->_flags,		\
82 	(int)(_n)->_flags, LKPI_NAPI_FLAGS, _d)
83 #define	NAPI_TRACE_TASK(_n, _p, _c) if (debug_napi & DNAPI_TRACE_TASK)	\
84     printf("NAPI_TRACE %s:%d %u %p (%#jx %b) pending %d count %d "	\
85 	"rx_count %d\n", __func__, __LINE__,				\
86 	(unsigned int)ticks, _n, (uintmax_t)(_n)->_flags,		\
87 	(int)(_n)->_flags, LKPI_NAPI_FLAGS, _p, _c, (_n)->rx_count)
88 #define	NAPI_TODO()		if (debug_napi & DNAPI_TODO)		\
89     printf("NAPI_TODO %s:%d %d\n", __func__, __LINE__, ticks)
90 #define	NAPI_IMPROVE()		if (debug_napi & DNAPI_IMPROVE)		\
91     printf("NAPI_IMPROVE %s:%d %d\n", __func__, __LINE__, ticks)
92 
93 #define	NAPI_DIRECT_DISPATCH()	((debug_napi & DNAPI_DIRECT_DISPATCH) != 0)
94 #else
95 #define	NAPI_TRACE(_n)			do { } while(0)
96 #define	NAPI_TRACE2D(_n, _d)		do { } while(0)
97 #define	NAPI_TRACE_TASK(_n, _p, _c)	do { } while(0)
98 #define	NAPI_TODO()			do { } while(0)
99 #define	NAPI_IMPROVE()			do { } while(0)
100 
101 #define	NAPI_DIRECT_DISPATCH()		(0)
102 #endif
103 
104 /* -------------------------------------------------------------------------- */
105 
106 /*
107  * Check if a poll is running or can run and and if the latter
108  * make us as running.  That way we ensure that only one poll
109  * can only ever run at the same time.  Returns true if no poll
110  * was scheduled yet.
111  */
112 bool
113 linuxkpi_napi_schedule_prep(struct napi_struct *napi)
114 {
115 	unsigned long old, new;
116 
117 	NAPI_TRACE(napi);
118 
119 	/* Can can only update/return if all flags agree. */
120 	do {
121 		old = READ_ONCE(napi->_flags);
122 
123 		/* If we are stopping, cannot run again. */
124 		if ((old & BIT(LKPI_NAPI_FLAG_DISABLE_PENDING)) != 0) {
125 			NAPI_TRACE(napi);
126 			return (false);
127 		}
128 
129 		new = old;
130 		/* We were already scheduled. Need to try again? */
131 		if ((old & BIT(LKPI_NAPI_FLAG_IS_SCHEDULED)) != 0)
132 			new |= BIT(LKPI_NAPI_FLAG_LOST_RACE_TRY_AGAIN);
133 		new |= BIT(LKPI_NAPI_FLAG_IS_SCHEDULED);
134 
135 	} while (atomic_cmpset_acq_long(&napi->_flags, old, new) == 0);
136 
137 	NAPI_TRACE(napi);
138         return ((old & BIT(LKPI_NAPI_FLAG_IS_SCHEDULED)) == 0);
139 }
140 
141 static void
142 lkpi___napi_schedule_dd(struct napi_struct *napi)
143 {
144 	unsigned long old, new;
145 	int rc;
146 
147 	rc = 0;
148 again:
149 	NAPI_TRACE2D(napi, rc);
150 	if (napi->poll != NULL)
151 		rc = napi->poll(napi, napi->budget);
152 	napi->rx_count += rc;
153 
154 	/* Check if interrupts are still disabled, more work to do. */
155 	/* Bandaid for now. */
156 	if (rc >= napi->budget)
157 		goto again;
158 
159 	/* Bandaid for now. */
160 	if (test_bit(LKPI_NAPI_FLAG_LOST_RACE_TRY_AGAIN, &napi->_flags))
161 		goto again;
162 
163 	do {
164 		new = old = READ_ONCE(napi->_flags);
165 		clear_bit(LKPI_NAPI_FLAG_LOST_RACE_TRY_AGAIN, &new);
166 		clear_bit(LKPI_NAPI_FLAG_IS_SCHEDULED, &new);
167 	} while (atomic_cmpset_acq_long(&napi->_flags, old, new) == 0);
168 
169 	NAPI_TRACE2D(napi, rc);
170 }
171 
172 void
173 linuxkpi___napi_schedule(struct napi_struct *napi)
174 {
175 	int rc;
176 
177 	NAPI_TRACE(napi);
178 	if (test_bit(LKPI_NAPI_FLAG_SHUTDOWN, &napi->_flags)) {
179 		clear_bit(LKPI_NAPI_FLAG_LOST_RACE_TRY_AGAIN, &napi->_flags);
180 		clear_bit(LKPI_NAPI_FLAG_IS_SCHEDULED, &napi->_flags);
181 		NAPI_TRACE(napi);
182 		return;
183 	}
184 
185 	if (NAPI_DIRECT_DISPATCH()) {
186 		lkpi___napi_schedule_dd(napi);
187 	} else {
188 		rc = taskqueue_enqueue(napi->dev->napi_tq, &napi->napi_task);
189 		NAPI_TRACE2D(napi, rc);
190 		if (rc != 0) {
191 			/* Should we assert EPIPE? */
192 			return;
193 		}
194 	}
195 }
196 
197 void
198 linuxkpi_napi_schedule(struct napi_struct *napi)
199 {
200 
201 	NAPI_TRACE(napi);
202 
203 	/*
204 	 * iwlwifi calls this sequence instead of napi_schedule()
205 	 * to be able to test the prep result.
206 	 */
207 	if (napi_schedule_prep(napi))
208 		__napi_schedule(napi);
209 }
210 
211 void
212 linuxkpi_napi_reschedule(struct napi_struct *napi)
213 {
214 
215 	NAPI_TRACE(napi);
216 
217 	/* Not sure what is different to napi_schedule yet. */
218 	if (napi_schedule_prep(napi))
219 		__napi_schedule(napi);
220 }
221 
222 bool
223 linuxkpi_napi_complete_done(struct napi_struct *napi, int ret)
224 {
225 	unsigned long old, new;
226 
227 	NAPI_TRACE(napi);
228 	if (NAPI_DIRECT_DISPATCH())
229 		return (true);
230 
231 	do {
232 		new = old = READ_ONCE(napi->_flags);
233 
234 		/*
235 		 * If we lost a race before, we need to re-schedule.
236 		 * Leave IS_SCHEDULED set essentially doing "_prep".
237 		 */
238 		if (!test_bit(LKPI_NAPI_FLAG_LOST_RACE_TRY_AGAIN, &old))
239 			clear_bit(LKPI_NAPI_FLAG_IS_SCHEDULED, &new);
240 		clear_bit(LKPI_NAPI_FLAG_LOST_RACE_TRY_AGAIN, &new);
241 	} while (atomic_cmpset_acq_long(&napi->_flags, old, new) == 0);
242 
243 	NAPI_TRACE(napi);
244 
245 	/* Someone tried to schedule while poll was running. Re-sched. */
246 	if (test_bit(LKPI_NAPI_FLAG_LOST_RACE_TRY_AGAIN, &old)) {
247 		__napi_schedule(napi);
248 		return (false);
249 	}
250 
251 	return (true);
252 }
253 
254 bool
255 linuxkpi_napi_complete(struct napi_struct *napi)
256 {
257 
258 	NAPI_TRACE(napi);
259 	return (napi_complete_done(napi, 0));
260 }
261 
262 void
263 linuxkpi_napi_disable(struct napi_struct *napi)
264 {
265 	NAPI_TRACE(napi);
266 	set_bit(LKPI_NAPI_FLAG_DISABLE_PENDING, &napi->_flags);
267 	while (test_bit(LKPI_NAPI_FLAG_IS_SCHEDULED, &napi->_flags))
268 		pause_sbt("napidslp", SBT_1MS, 0, C_HARDCLOCK);
269 	clear_bit(LKPI_NAPI_FLAG_DISABLE_PENDING, &napi->_flags);
270 }
271 
272 void
273 linuxkpi_napi_enable(struct napi_struct *napi)
274 {
275 
276 	NAPI_TRACE(napi);
277 	KASSERT(!test_bit(LKPI_NAPI_FLAG_IS_SCHEDULED, &napi->_flags),
278 	    ("%s: enabling napi %p already scheduled\n", __func__, napi));
279 	mb();
280 	/* Let us be scheduled. */
281 	clear_bit(LKPI_NAPI_FLAG_IS_SCHEDULED, &napi->_flags);
282 }
283 
284 void
285 linuxkpi_napi_synchronize(struct napi_struct *napi)
286 {
287 	NAPI_TRACE(napi);
288 #if defined(SMP)
289 	/* Check & sleep while a napi is scheduled. */
290 	while (test_bit(LKPI_NAPI_FLAG_IS_SCHEDULED, &napi->_flags))
291 		pause_sbt("napisslp", SBT_1MS, 0, C_HARDCLOCK);
292 #else
293 	mb();
294 #endif
295 }
296 
297 /* -------------------------------------------------------------------------- */
298 
299 static void
300 lkpi_napi_task(void *ctx, int pending)
301 {
302 	struct napi_struct *napi;
303 	int count;
304 
305 	KASSERT(ctx != NULL, ("%s: napi %p, pending %d\n",
306 	    __func__, ctx, pending));
307 	napi = ctx;
308 	KASSERT(napi->poll != NULL, ("%s: napi %p poll is NULL\n",
309 	    __func__, napi));
310 
311 	NAPI_TRACE_TASK(napi, pending, napi->budget);
312 	count = napi->poll(napi, napi->budget);
313 	napi->rx_count += count;
314 	NAPI_TRACE_TASK(napi, pending, count);
315 
316 	/*
317 	 * We must not check against count < pending here.  There are situations
318 	 * when a driver may "poll" and we may not have any work to do and that
319 	 * would make us re-schedule ourseless for ever.
320 	 */
321 	if (count >= napi->budget) {
322 		/*
323 		 * Have to re-schedule ourselves.  napi_complete() was not run
324 		 * in this case which means we are still SCHEDULED.
325 		 * In order to queue another task we have to directly call
326 		 * __napi_schedule() without _prep() in the way.
327 		 */
328 		__napi_schedule(napi);
329 	}
330 }
331 
332 /* -------------------------------------------------------------------------- */
333 
334 void
335 linuxkpi_netif_napi_add(struct net_device *ndev, struct napi_struct *napi,
336     int(*napi_poll)(struct napi_struct *, int), int budget)
337 {
338 
339 	napi->dev = ndev;
340 	napi->poll = napi_poll;
341 	napi->budget = budget;
342 
343 	INIT_LIST_HEAD(&napi->rx_list);
344 	napi->rx_count = 0;
345 
346 	TASK_INIT(&napi->napi_task, 0, lkpi_napi_task, napi);
347 
348 	NAPI_LOCK(ndev);
349 	TAILQ_INSERT_TAIL(&ndev->napi_head, napi, entry);
350 	NAPI_UNLOCK(ndev);
351 
352 	/* Anything else to do on the ndev? */
353 	clear_bit(LKPI_NAPI_FLAG_SHUTDOWN, &napi->_flags);
354 }
355 
356 static void
357 lkpi_netif_napi_del_locked(struct napi_struct *napi)
358 {
359 	struct net_device *ndev;
360 
361 	ndev = napi->dev;
362 	NAPI_LOCK_ASSERT(ndev);
363 
364 	set_bit(LKPI_NAPI_FLAG_SHUTDOWN, &napi->_flags);
365 	TAILQ_REMOVE(&ndev->napi_head, napi, entry);
366 	while (taskqueue_cancel(ndev->napi_tq, &napi->napi_task, NULL) != 0)
367 		taskqueue_drain(ndev->napi_tq, &napi->napi_task);
368 }
369 
370 void
371 linuxkpi_netif_napi_del(struct napi_struct *napi)
372 {
373 	struct net_device *ndev;
374 
375 	ndev = napi->dev;
376 	NAPI_LOCK(ndev);
377 	lkpi_netif_napi_del_locked(napi);
378 	NAPI_UNLOCK(ndev);
379 }
380 
381 /* -------------------------------------------------------------------------- */
382 
383 void
384 linuxkpi_init_dummy_netdev(struct net_device *ndev)
385 {
386 
387 	memset(ndev, 0, sizeof(*ndev));
388 
389 	ndev->reg_state = NETREG_DUMMY;
390 	NAPI_LOCK_INIT(ndev);
391 	TAILQ_INIT(&ndev->napi_head);
392 	/* Anything else? */
393 
394 	ndev->napi_tq = taskqueue_create("tq_ndev_napi", M_WAITOK,
395 	    taskqueue_thread_enqueue, &ndev->napi_tq);
396 	/* One thread for now. */
397 	(void) taskqueue_start_threads(&ndev->napi_tq, 1, PWAIT,
398 	    "ndev napi taskq");
399 }
400 
401 struct net_device *
402 linuxkpi_alloc_netdev(size_t len, const char *name, uint32_t flags,
403     void(*setup_func)(struct net_device *))
404 {
405 	struct net_device *ndev;
406 
407 	ndev = malloc(sizeof(*ndev) + len, M_NETDEV, M_NOWAIT);
408 	if (ndev == NULL)
409 		return (ndev);
410 
411 	/* Always first as it zeros! */
412 	linuxkpi_init_dummy_netdev(ndev);
413 
414 	strlcpy(ndev->name, name, sizeof(*ndev->name));
415 
416 	/* This needs extending as we support more. */
417 
418 	setup_func(ndev);
419 
420 	return (ndev);
421 }
422 
423 void
424 linuxkpi_free_netdev(struct net_device *ndev)
425 {
426 	struct napi_struct *napi, *temp;
427 
428 	NAPI_LOCK(ndev);
429 	TAILQ_FOREACH_SAFE(napi, &ndev->napi_head, entry, temp) {
430 		lkpi_netif_napi_del_locked(napi);
431 	}
432 	NAPI_UNLOCK(ndev);
433 
434 	taskqueue_free(ndev->napi_tq);
435 	ndev->napi_tq = NULL;
436 	NAPI_LOCK_DESTROY(ndev);
437 
438 	/* This needs extending as we support more. */
439 
440 	free(ndev, M_NETDEV);
441 }
442