xref: /freebsd/sys/compat/linuxkpi/common/src/linux_work.c (revision 5bf5ca772c6de2d53344a78cf461447cc322ccea)
1 /*-
2  * Copyright (c) 2017 Hans Petter Selasky
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice unmodified, this list of conditions, and the following
10  *    disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
29 
30 #include <linux/workqueue.h>
31 #include <linux/wait.h>
32 #include <linux/compat.h>
33 #include <linux/spinlock.h>
34 
35 #include <sys/kernel.h>
36 
37 /*
38  * Define all work struct states
39  */
40 enum {
41 	WORK_ST_IDLE,			/* idle - not started */
42 	WORK_ST_TIMER,			/* timer is being started */
43 	WORK_ST_TASK,			/* taskqueue is being queued */
44 	WORK_ST_EXEC,			/* callback is being called */
45 	WORK_ST_CANCEL,			/* cancel is being requested */
46 	WORK_ST_MAX,
47 };
48 
49 /*
50  * Define global workqueues
51  */
52 static struct workqueue_struct *linux_system_short_wq;
53 static struct workqueue_struct *linux_system_long_wq;
54 
55 struct workqueue_struct *system_wq;
56 struct workqueue_struct *system_long_wq;
57 struct workqueue_struct *system_unbound_wq;
58 struct workqueue_struct *system_power_efficient_wq;
59 
60 static int linux_default_wq_cpus = 4;
61 
62 static void linux_delayed_work_timer_fn(void *);
63 
64 /*
65  * This function atomically updates the work state and returns the
66  * previous state at the time of update.
67  */
68 static uint8_t
69 linux_update_state(atomic_t *v, const uint8_t *pstate)
70 {
71 	int c, old;
72 
73 	c = v->counter;
74 
75 	while ((old = atomic_cmpxchg(v, c, pstate[c])) != c)
76 		c = old;
77 
78 	return (c);
79 }
80 
81 /*
82  * A LinuxKPI task is allowed to free itself inside the callback function
83  * and cannot safely be referred after the callback function has
84  * completed. This function gives the linux_work_fn() function a hint,
85  * that the task is not going away and can have its state checked
86  * again. Without this extra hint LinuxKPI tasks cannot be serialized
87  * accross multiple worker threads.
88  */
89 static bool
90 linux_work_exec_unblock(struct work_struct *work)
91 {
92 	struct workqueue_struct *wq;
93 	struct work_exec *exec;
94 	bool retval = 0;
95 
96 	wq = work->work_queue;
97 	if (unlikely(wq == NULL))
98 		goto done;
99 
100 	WQ_EXEC_LOCK(wq);
101 	TAILQ_FOREACH(exec, &wq->exec_head, entry) {
102 		if (exec->target == work) {
103 			exec->target = NULL;
104 			retval = 1;
105 			break;
106 		}
107 	}
108 	WQ_EXEC_UNLOCK(wq);
109 done:
110 	return (retval);
111 }
112 
113 static void
114 linux_delayed_work_enqueue(struct delayed_work *dwork)
115 {
116 	struct taskqueue *tq;
117 
118 	tq = dwork->work.work_queue->taskqueue;
119 	taskqueue_enqueue(tq, &dwork->work.work_task);
120 }
121 
122 /*
123  * This function queues the given work structure on the given
124  * workqueue. It returns non-zero if the work was successfully
125  * [re-]queued. Else the work is already pending for completion.
126  */
127 bool
128 linux_queue_work_on(int cpu __unused, struct workqueue_struct *wq,
129     struct work_struct *work)
130 {
131 	static const uint8_t states[WORK_ST_MAX] __aligned(8) = {
132 		[WORK_ST_IDLE] = WORK_ST_TASK,		/* start queuing task */
133 		[WORK_ST_TIMER] = WORK_ST_TIMER,	/* NOP */
134 		[WORK_ST_TASK] = WORK_ST_TASK,		/* NOP */
135 		[WORK_ST_EXEC] = WORK_ST_TASK,		/* queue task another time */
136 		[WORK_ST_CANCEL] = WORK_ST_TASK,	/* start queuing task again */
137 	};
138 
139 	if (atomic_read(&wq->draining) != 0)
140 		return (!work_pending(work));
141 
142 	switch (linux_update_state(&work->state, states)) {
143 	case WORK_ST_EXEC:
144 	case WORK_ST_CANCEL:
145 		if (linux_work_exec_unblock(work) != 0)
146 			return (1);
147 		/* FALLTHROUGH */
148 	case WORK_ST_IDLE:
149 		work->work_queue = wq;
150 		taskqueue_enqueue(wq->taskqueue, &work->work_task);
151 		return (1);
152 	default:
153 		return (0);		/* already on a queue */
154 	}
155 }
156 
157 /*
158  * This function queues the given work structure on the given
159  * workqueue after a given delay in ticks. It returns non-zero if the
160  * work was successfully [re-]queued. Else the work is already pending
161  * for completion.
162  */
163 bool
164 linux_queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
165     struct delayed_work *dwork, unsigned delay)
166 {
167 	static const uint8_t states[WORK_ST_MAX] __aligned(8) = {
168 		[WORK_ST_IDLE] = WORK_ST_TIMER,		/* start timeout */
169 		[WORK_ST_TIMER] = WORK_ST_TIMER,	/* NOP */
170 		[WORK_ST_TASK] = WORK_ST_TASK,		/* NOP */
171 		[WORK_ST_EXEC] = WORK_ST_TIMER,		/* start timeout */
172 		[WORK_ST_CANCEL] = WORK_ST_TIMER,	/* start timeout */
173 	};
174 
175 	if (atomic_read(&wq->draining) != 0)
176 		return (!work_pending(&dwork->work));
177 
178 	switch (linux_update_state(&dwork->work.state, states)) {
179 	case WORK_ST_EXEC:
180 	case WORK_ST_CANCEL:
181 		if (delay == 0 && linux_work_exec_unblock(&dwork->work) != 0) {
182 			dwork->timer.expires = jiffies;
183 			return (1);
184 		}
185 		/* FALLTHROUGH */
186 	case WORK_ST_IDLE:
187 		dwork->work.work_queue = wq;
188 		dwork->timer.expires = jiffies + delay;
189 
190 		if (delay == 0) {
191 			linux_delayed_work_enqueue(dwork);
192 		} else if (unlikely(cpu != WORK_CPU_UNBOUND)) {
193 			mtx_lock(&dwork->timer.mtx);
194 			callout_reset_on(&dwork->timer.callout, delay,
195 			    &linux_delayed_work_timer_fn, dwork, cpu);
196 			mtx_unlock(&dwork->timer.mtx);
197 		} else {
198 			mtx_lock(&dwork->timer.mtx);
199 			callout_reset(&dwork->timer.callout, delay,
200 			    &linux_delayed_work_timer_fn, dwork);
201 			mtx_unlock(&dwork->timer.mtx);
202 		}
203 		return (1);
204 	default:
205 		return (0);		/* already on a queue */
206 	}
207 }
208 
209 void
210 linux_work_fn(void *context, int pending)
211 {
212 	static const uint8_t states[WORK_ST_MAX] __aligned(8) = {
213 		[WORK_ST_IDLE] = WORK_ST_IDLE,		/* NOP */
214 		[WORK_ST_TIMER] = WORK_ST_EXEC,		/* delayed work w/o timeout */
215 		[WORK_ST_TASK] = WORK_ST_EXEC,		/* call callback */
216 		[WORK_ST_EXEC] = WORK_ST_IDLE,		/* complete callback */
217 		[WORK_ST_CANCEL] = WORK_ST_EXEC,	/* failed to cancel */
218 	};
219 	struct work_struct *work;
220 	struct workqueue_struct *wq;
221 	struct work_exec exec;
222 
223 	linux_set_current(curthread);
224 
225 	/* setup local variables */
226 	work = context;
227 	wq = work->work_queue;
228 
229 	/* store target pointer */
230 	exec.target = work;
231 
232 	/* insert executor into list */
233 	WQ_EXEC_LOCK(wq);
234 	TAILQ_INSERT_TAIL(&wq->exec_head, &exec, entry);
235 	while (1) {
236 		switch (linux_update_state(&work->state, states)) {
237 		case WORK_ST_TIMER:
238 		case WORK_ST_TASK:
239 		case WORK_ST_CANCEL:
240 			WQ_EXEC_UNLOCK(wq);
241 
242 			/* call work function */
243 			work->func(work);
244 
245 			WQ_EXEC_LOCK(wq);
246 			/* check if unblocked */
247 			if (exec.target != work) {
248 				/* reapply block */
249 				exec.target = work;
250 				break;
251 			}
252 			/* FALLTHROUGH */
253 		default:
254 			goto done;
255 		}
256 	}
257 done:
258 	/* remove executor from list */
259 	TAILQ_REMOVE(&wq->exec_head, &exec, entry);
260 	WQ_EXEC_UNLOCK(wq);
261 }
262 
263 void
264 linux_delayed_work_fn(void *context, int pending)
265 {
266 	struct delayed_work *dwork = context;
267 
268 	/*
269 	 * Make sure the timer belonging to the delayed work gets
270 	 * drained before invoking the work function. Else the timer
271 	 * mutex may still be in use which can lead to use-after-free
272 	 * situations, because the work function might free the work
273 	 * structure before returning.
274 	 */
275 	callout_drain(&dwork->timer.callout);
276 
277 	linux_work_fn(&dwork->work, pending);
278 }
279 
280 static void
281 linux_delayed_work_timer_fn(void *arg)
282 {
283 	static const uint8_t states[WORK_ST_MAX] __aligned(8) = {
284 		[WORK_ST_IDLE] = WORK_ST_IDLE,		/* NOP */
285 		[WORK_ST_TIMER] = WORK_ST_TASK,		/* start queueing task */
286 		[WORK_ST_TASK] = WORK_ST_TASK,		/* NOP */
287 		[WORK_ST_EXEC] = WORK_ST_EXEC,		/* NOP */
288 		[WORK_ST_CANCEL] = WORK_ST_TASK,	/* failed to cancel */
289 	};
290 	struct delayed_work *dwork = arg;
291 
292 	switch (linux_update_state(&dwork->work.state, states)) {
293 	case WORK_ST_TIMER:
294 	case WORK_ST_CANCEL:
295 		linux_delayed_work_enqueue(dwork);
296 		break;
297 	default:
298 		break;
299 	}
300 }
301 
302 /*
303  * This function cancels the given work structure in a synchronous
304  * fashion. It returns non-zero if the work was successfully
305  * cancelled. Else the work was already cancelled.
306  */
307 bool
308 linux_cancel_work_sync(struct work_struct *work)
309 {
310 	static const uint8_t states[WORK_ST_MAX] __aligned(8) = {
311 		[WORK_ST_IDLE] = WORK_ST_IDLE,		/* NOP */
312 		[WORK_ST_TIMER] = WORK_ST_TIMER,	/* can't happen */
313 		[WORK_ST_TASK] = WORK_ST_IDLE,		/* cancel and drain */
314 		[WORK_ST_EXEC] = WORK_ST_IDLE,		/* too late, drain */
315 		[WORK_ST_CANCEL] = WORK_ST_IDLE,	/* cancel and drain */
316 	};
317 	struct taskqueue *tq;
318 
319 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
320 	    "linux_cancel_work_sync() might sleep");
321 
322 	switch (linux_update_state(&work->state, states)) {
323 	case WORK_ST_IDLE:
324 	case WORK_ST_TIMER:
325 		return (0);
326 	case WORK_ST_EXEC:
327 		tq = work->work_queue->taskqueue;
328 		if (taskqueue_cancel(tq, &work->work_task, NULL) != 0)
329 			taskqueue_drain(tq, &work->work_task);
330 		return (0);
331 	default:
332 		tq = work->work_queue->taskqueue;
333 		if (taskqueue_cancel(tq, &work->work_task, NULL) != 0)
334 			taskqueue_drain(tq, &work->work_task);
335 		return (1);
336 	}
337 }
338 
339 /*
340  * This function atomically stops the timer and callback. The timer
341  * callback will not be called after this function returns. This
342  * functions returns true when the timeout was cancelled. Else the
343  * timeout was not started or has already been called.
344  */
345 static inline bool
346 linux_cancel_timer(struct delayed_work *dwork, bool drain)
347 {
348 	bool cancelled;
349 
350 	mtx_lock(&dwork->timer.mtx);
351 	cancelled = (callout_stop(&dwork->timer.callout) == 1);
352 	mtx_unlock(&dwork->timer.mtx);
353 
354 	/* check if we should drain */
355 	if (drain)
356 		callout_drain(&dwork->timer.callout);
357 	return (cancelled);
358 }
359 
360 /*
361  * This function cancels the given delayed work structure in a
362  * non-blocking fashion. It returns non-zero if the work was
363  * successfully cancelled. Else the work may still be busy or already
364  * cancelled.
365  */
366 bool
367 linux_cancel_delayed_work(struct delayed_work *dwork)
368 {
369 	static const uint8_t states[WORK_ST_MAX] __aligned(8) = {
370 		[WORK_ST_IDLE] = WORK_ST_IDLE,		/* NOP */
371 		[WORK_ST_TIMER] = WORK_ST_CANCEL,	/* try to cancel */
372 		[WORK_ST_TASK] = WORK_ST_CANCEL,	/* try to cancel */
373 		[WORK_ST_EXEC] = WORK_ST_EXEC,		/* NOP */
374 		[WORK_ST_CANCEL] = WORK_ST_CANCEL,	/* NOP */
375 	};
376 	struct taskqueue *tq;
377 
378 	switch (linux_update_state(&dwork->work.state, states)) {
379 	case WORK_ST_TIMER:
380 	case WORK_ST_CANCEL:
381 		if (linux_cancel_timer(dwork, 0)) {
382 			atomic_cmpxchg(&dwork->work.state,
383 			    WORK_ST_CANCEL, WORK_ST_IDLE);
384 			return (1);
385 		}
386 		/* FALLTHROUGH */
387 	case WORK_ST_TASK:
388 		tq = dwork->work.work_queue->taskqueue;
389 		if (taskqueue_cancel(tq, &dwork->work.work_task, NULL) == 0) {
390 			atomic_cmpxchg(&dwork->work.state,
391 			    WORK_ST_CANCEL, WORK_ST_IDLE);
392 			return (1);
393 		}
394 		/* FALLTHROUGH */
395 	default:
396 		return (0);
397 	}
398 }
399 
400 /*
401  * This function cancels the given work structure in a synchronous
402  * fashion. It returns non-zero if the work was successfully
403  * cancelled. Else the work was already cancelled.
404  */
405 bool
406 linux_cancel_delayed_work_sync(struct delayed_work *dwork)
407 {
408 	static const uint8_t states[WORK_ST_MAX] __aligned(8) = {
409 		[WORK_ST_IDLE] = WORK_ST_IDLE,		/* NOP */
410 		[WORK_ST_TIMER] = WORK_ST_IDLE,		/* cancel and drain */
411 		[WORK_ST_TASK] = WORK_ST_IDLE,		/* cancel and drain */
412 		[WORK_ST_EXEC] = WORK_ST_IDLE,		/* too late, drain */
413 		[WORK_ST_CANCEL] = WORK_ST_IDLE,	/* cancel and drain */
414 	};
415 	struct taskqueue *tq;
416 
417 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
418 	    "linux_cancel_delayed_work_sync() might sleep");
419 
420 	switch (linux_update_state(&dwork->work.state, states)) {
421 	case WORK_ST_IDLE:
422 		return (0);
423 	case WORK_ST_EXEC:
424 		tq = dwork->work.work_queue->taskqueue;
425 		if (taskqueue_cancel(tq, &dwork->work.work_task, NULL) != 0)
426 			taskqueue_drain(tq, &dwork->work.work_task);
427 		return (0);
428 	case WORK_ST_TIMER:
429 	case WORK_ST_CANCEL:
430 		if (linux_cancel_timer(dwork, 1)) {
431 			/*
432 			 * Make sure taskqueue is also drained before
433 			 * returning:
434 			 */
435 			tq = dwork->work.work_queue->taskqueue;
436 			taskqueue_drain(tq, &dwork->work.work_task);
437 			return (1);
438 		}
439 		/* FALLTHROUGH */
440 	default:
441 		tq = dwork->work.work_queue->taskqueue;
442 		if (taskqueue_cancel(tq, &dwork->work.work_task, NULL) != 0)
443 			taskqueue_drain(tq, &dwork->work.work_task);
444 		return (1);
445 	}
446 }
447 
448 /*
449  * This function waits until the given work structure is completed.
450  * It returns non-zero if the work was successfully
451  * waited for. Else the work was not waited for.
452  */
453 bool
454 linux_flush_work(struct work_struct *work)
455 {
456 	struct taskqueue *tq;
457 	int retval;
458 
459 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
460 	    "linux_flush_work() might sleep");
461 
462 	switch (atomic_read(&work->state)) {
463 	case WORK_ST_IDLE:
464 		return (0);
465 	default:
466 		tq = work->work_queue->taskqueue;
467 		retval = taskqueue_poll_is_busy(tq, &work->work_task);
468 		taskqueue_drain(tq, &work->work_task);
469 		return (retval);
470 	}
471 }
472 
473 /*
474  * This function waits until the given delayed work structure is
475  * completed. It returns non-zero if the work was successfully waited
476  * for. Else the work was not waited for.
477  */
478 bool
479 linux_flush_delayed_work(struct delayed_work *dwork)
480 {
481 	struct taskqueue *tq;
482 	int retval;
483 
484 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
485 	    "linux_flush_delayed_work() might sleep");
486 
487 	switch (atomic_read(&dwork->work.state)) {
488 	case WORK_ST_IDLE:
489 		return (0);
490 	case WORK_ST_TIMER:
491 		if (linux_cancel_timer(dwork, 1))
492 			linux_delayed_work_enqueue(dwork);
493 		/* FALLTHROUGH */
494 	default:
495 		tq = dwork->work.work_queue->taskqueue;
496 		retval = taskqueue_poll_is_busy(tq, &dwork->work.work_task);
497 		taskqueue_drain(tq, &dwork->work.work_task);
498 		return (retval);
499 	}
500 }
501 
502 /*
503  * This function returns true if the given work is pending, and not
504  * yet executing:
505  */
506 bool
507 linux_work_pending(struct work_struct *work)
508 {
509 	switch (atomic_read(&work->state)) {
510 	case WORK_ST_TIMER:
511 	case WORK_ST_TASK:
512 	case WORK_ST_CANCEL:
513 		return (1);
514 	default:
515 		return (0);
516 	}
517 }
518 
519 /*
520  * This function returns true if the given work is busy.
521  */
522 bool
523 linux_work_busy(struct work_struct *work)
524 {
525 	struct taskqueue *tq;
526 
527 	switch (atomic_read(&work->state)) {
528 	case WORK_ST_IDLE:
529 		return (0);
530 	case WORK_ST_EXEC:
531 		tq = work->work_queue->taskqueue;
532 		return (taskqueue_poll_is_busy(tq, &work->work_task));
533 	default:
534 		return (1);
535 	}
536 }
537 
538 struct workqueue_struct *
539 linux_create_workqueue_common(const char *name, int cpus)
540 {
541 	struct workqueue_struct *wq;
542 
543 	/*
544 	 * If zero CPUs are specified use the default number of CPUs:
545 	 */
546 	if (cpus == 0)
547 		cpus = linux_default_wq_cpus;
548 
549 	wq = kmalloc(sizeof(*wq), M_WAITOK | M_ZERO);
550 	wq->taskqueue = taskqueue_create(name, M_WAITOK,
551 	    taskqueue_thread_enqueue, &wq->taskqueue);
552 	atomic_set(&wq->draining, 0);
553 	taskqueue_start_threads(&wq->taskqueue, cpus, PWAIT, "%s", name);
554 	TAILQ_INIT(&wq->exec_head);
555 	mtx_init(&wq->exec_mtx, "linux_wq_exec", NULL, MTX_DEF);
556 
557 	return (wq);
558 }
559 
560 void
561 linux_destroy_workqueue(struct workqueue_struct *wq)
562 {
563 	atomic_inc(&wq->draining);
564 	drain_workqueue(wq);
565 	taskqueue_free(wq->taskqueue);
566 	mtx_destroy(&wq->exec_mtx);
567 	kfree(wq);
568 }
569 
570 void
571 linux_init_delayed_work(struct delayed_work *dwork, work_func_t func)
572 {
573 	memset(dwork, 0, sizeof(*dwork));
574 	dwork->work.func = func;
575 	TASK_INIT(&dwork->work.work_task, 0, linux_delayed_work_fn, dwork);
576 	mtx_init(&dwork->timer.mtx, spin_lock_name("lkpi-dwork"), NULL,
577 	    MTX_DEF | MTX_NOWITNESS);
578 	callout_init_mtx(&dwork->timer.callout, &dwork->timer.mtx, 0);
579 }
580 
581 static void
582 linux_work_init(void *arg)
583 {
584 	int max_wq_cpus = mp_ncpus + 1;
585 
586 	/* avoid deadlock when there are too few threads */
587 	if (max_wq_cpus < 4)
588 		max_wq_cpus = 4;
589 
590 	/* set default number of CPUs */
591 	linux_default_wq_cpus = max_wq_cpus;
592 
593 	linux_system_short_wq = alloc_workqueue("linuxkpi_short_wq", 0, max_wq_cpus);
594 	linux_system_long_wq = alloc_workqueue("linuxkpi_long_wq", 0, max_wq_cpus);
595 
596 	/* populate the workqueue pointers */
597 	system_long_wq = linux_system_long_wq;
598 	system_wq = linux_system_short_wq;
599 	system_power_efficient_wq = linux_system_short_wq;
600 	system_unbound_wq = linux_system_short_wq;
601 }
602 SYSINIT(linux_work_init, SI_SUB_TASKQ, SI_ORDER_THIRD, linux_work_init, NULL);
603 
604 static void
605 linux_work_uninit(void *arg)
606 {
607 	destroy_workqueue(linux_system_short_wq);
608 	destroy_workqueue(linux_system_long_wq);
609 
610 	/* clear workqueue pointers */
611 	system_long_wq = NULL;
612 	system_wq = NULL;
613 	system_power_efficient_wq = NULL;
614 	system_unbound_wq = NULL;
615 }
616 SYSUNINIT(linux_work_uninit, SI_SUB_TASKQ, SI_ORDER_THIRD, linux_work_uninit, NULL);
617