xref: /freebsd/sys/kern/subr_gtaskqueue.c (revision d34048812292b714a0bf99967270d18fe3097c62)
1 /*-
2  * Copyright (c) 2000 Doug Rabson
3  * Copyright (c) 2014 Jeff Roberson
4  * Copyright (c) 2016 Matthew Macy
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/bus.h>
35 #include <sys/cpuset.h>
36 #include <sys/interrupt.h>
37 #include <sys/kernel.h>
38 #include <sys/kthread.h>
39 #include <sys/libkern.h>
40 #include <sys/limits.h>
41 #include <sys/lock.h>
42 #include <sys/malloc.h>
43 #include <sys/mutex.h>
44 #include <sys/proc.h>
45 #include <sys/sched.h>
46 #include <sys/smp.h>
47 #include <sys/gtaskqueue.h>
48 #include <sys/unistd.h>
49 #include <machine/stdarg.h>
50 
51 static MALLOC_DEFINE(M_GTASKQUEUE, "gtaskqueue", "Group Task Queues");
52 static void	gtaskqueue_thread_enqueue(void *);
53 static void	gtaskqueue_thread_loop(void *arg);
54 static int	task_is_running(struct gtaskqueue *queue, struct gtask *gtask);
55 static void	gtaskqueue_drain_locked(struct gtaskqueue *queue, struct gtask *gtask);
56 
57 TASKQGROUP_DEFINE(softirq, mp_ncpus, 1);
58 TASKQGROUP_DEFINE(config, 1, 1);
59 
60 struct gtaskqueue_busy {
61 	struct gtask	*tb_running;
62 	TAILQ_ENTRY(gtaskqueue_busy) tb_link;
63 };
64 
65 static struct gtask * const TB_DRAIN_WAITER = (struct gtask *)0x1;
66 
67 struct gtaskqueue {
68 	STAILQ_HEAD(, gtask)	tq_queue;
69 	gtaskqueue_enqueue_fn	tq_enqueue;
70 	void			*tq_context;
71 	char			*tq_name;
72 	TAILQ_HEAD(, gtaskqueue_busy) tq_active;
73 	struct mtx		tq_mutex;
74 	struct thread		**tq_threads;
75 	int			tq_tcount;
76 	int			tq_spin;
77 	int			tq_flags;
78 	int			tq_callouts;
79 	taskqueue_callback_fn	tq_callbacks[TASKQUEUE_NUM_CALLBACKS];
80 	void			*tq_cb_contexts[TASKQUEUE_NUM_CALLBACKS];
81 };
82 
83 #define	TQ_FLAGS_ACTIVE		(1 << 0)
84 #define	TQ_FLAGS_BLOCKED	(1 << 1)
85 #define	TQ_FLAGS_UNLOCKED_ENQUEUE	(1 << 2)
86 
87 #define	DT_CALLOUT_ARMED	(1 << 0)
88 
89 #define	TQ_LOCK(tq)							\
90 	do {								\
91 		if ((tq)->tq_spin)					\
92 			mtx_lock_spin(&(tq)->tq_mutex);			\
93 		else							\
94 			mtx_lock(&(tq)->tq_mutex);			\
95 	} while (0)
96 #define	TQ_ASSERT_LOCKED(tq)	mtx_assert(&(tq)->tq_mutex, MA_OWNED)
97 
98 #define	TQ_UNLOCK(tq)							\
99 	do {								\
100 		if ((tq)->tq_spin)					\
101 			mtx_unlock_spin(&(tq)->tq_mutex);		\
102 		else							\
103 			mtx_unlock(&(tq)->tq_mutex);			\
104 	} while (0)
105 #define	TQ_ASSERT_UNLOCKED(tq)	mtx_assert(&(tq)->tq_mutex, MA_NOTOWNED)
106 
107 #ifdef INVARIANTS
108 static void
109 gtask_dump(struct gtask *gtask)
110 {
111 	printf("gtask: %p ta_flags=%x ta_priority=%d ta_func=%p ta_context=%p\n",
112 	       gtask, gtask->ta_flags, gtask->ta_priority, gtask->ta_func, gtask->ta_context);
113 }
114 #endif
115 
116 static __inline int
117 TQ_SLEEP(struct gtaskqueue *tq, void *p, struct mtx *m, int pri, const char *wm,
118     int t)
119 {
120 	if (tq->tq_spin)
121 		return (msleep_spin(p, m, wm, t));
122 	return (msleep(p, m, pri, wm, t));
123 }
124 
125 static struct gtaskqueue *
126 _gtaskqueue_create(const char *name, int mflags,
127 		 taskqueue_enqueue_fn enqueue, void *context,
128 		 int mtxflags, const char *mtxname __unused)
129 {
130 	struct gtaskqueue *queue;
131 	char *tq_name;
132 
133 	tq_name = malloc(TASKQUEUE_NAMELEN, M_GTASKQUEUE, mflags | M_ZERO);
134 	if (!tq_name)
135 		return (NULL);
136 
137 	snprintf(tq_name, TASKQUEUE_NAMELEN, "%s", (name) ? name : "taskqueue");
138 
139 	queue = malloc(sizeof(struct gtaskqueue), M_GTASKQUEUE, mflags | M_ZERO);
140 	if (!queue) {
141 		free(tq_name, M_GTASKQUEUE);
142 		return (NULL);
143 	}
144 
145 	STAILQ_INIT(&queue->tq_queue);
146 	TAILQ_INIT(&queue->tq_active);
147 	queue->tq_enqueue = enqueue;
148 	queue->tq_context = context;
149 	queue->tq_name = tq_name;
150 	queue->tq_spin = (mtxflags & MTX_SPIN) != 0;
151 	queue->tq_flags |= TQ_FLAGS_ACTIVE;
152 	if (enqueue == gtaskqueue_thread_enqueue)
153 		queue->tq_flags |= TQ_FLAGS_UNLOCKED_ENQUEUE;
154 	mtx_init(&queue->tq_mutex, tq_name, NULL, mtxflags);
155 
156 	return (queue);
157 }
158 
159 
160 /*
161  * Signal a taskqueue thread to terminate.
162  */
163 static void
164 gtaskqueue_terminate(struct thread **pp, struct gtaskqueue *tq)
165 {
166 
167 	while (tq->tq_tcount > 0 || tq->tq_callouts > 0) {
168 		wakeup(tq);
169 		TQ_SLEEP(tq, pp, &tq->tq_mutex, PWAIT, "taskqueue_destroy", 0);
170 	}
171 }
172 
173 static void
174 gtaskqueue_free(struct gtaskqueue *queue)
175 {
176 
177 	TQ_LOCK(queue);
178 	queue->tq_flags &= ~TQ_FLAGS_ACTIVE;
179 	gtaskqueue_terminate(queue->tq_threads, queue);
180 	KASSERT(TAILQ_EMPTY(&queue->tq_active), ("Tasks still running?"));
181 	KASSERT(queue->tq_callouts == 0, ("Armed timeout tasks"));
182 	mtx_destroy(&queue->tq_mutex);
183 	free(queue->tq_threads, M_GTASKQUEUE);
184 	free(queue->tq_name, M_GTASKQUEUE);
185 	free(queue, M_GTASKQUEUE);
186 }
187 
188 /*
189  * Wait for all to complete, then prevent it from being enqueued
190  */
191 void
192 grouptask_block(struct grouptask *grouptask)
193 {
194 	struct gtaskqueue *queue = grouptask->gt_taskqueue;
195 	struct gtask *gtask = &grouptask->gt_task;
196 
197 #ifdef INVARIANTS
198 	if (queue == NULL) {
199 		gtask_dump(gtask);
200 		panic("queue == NULL");
201 	}
202 #endif
203 	TQ_LOCK(queue);
204 	gtask->ta_flags |= TASK_NOENQUEUE;
205   	gtaskqueue_drain_locked(queue, gtask);
206 	TQ_UNLOCK(queue);
207 }
208 
209 void
210 grouptask_unblock(struct grouptask *grouptask)
211 {
212 	struct gtaskqueue *queue = grouptask->gt_taskqueue;
213 	struct gtask *gtask = &grouptask->gt_task;
214 
215 #ifdef INVARIANTS
216 	if (queue == NULL) {
217 		gtask_dump(gtask);
218 		panic("queue == NULL");
219 	}
220 #endif
221 	TQ_LOCK(queue);
222 	gtask->ta_flags &= ~TASK_NOENQUEUE;
223 	TQ_UNLOCK(queue);
224 }
225 
226 int
227 grouptaskqueue_enqueue(struct gtaskqueue *queue, struct gtask *gtask)
228 {
229 #ifdef INVARIANTS
230 	if (queue == NULL) {
231 		gtask_dump(gtask);
232 		panic("queue == NULL");
233 	}
234 #endif
235 	TQ_LOCK(queue);
236 	if (gtask->ta_flags & TASK_ENQUEUED) {
237 		TQ_UNLOCK(queue);
238 		return (0);
239 	}
240 	if (gtask->ta_flags & TASK_NOENQUEUE) {
241 		TQ_UNLOCK(queue);
242 		return (EAGAIN);
243 	}
244 	STAILQ_INSERT_TAIL(&queue->tq_queue, gtask, ta_link);
245 	gtask->ta_flags |= TASK_ENQUEUED;
246 	TQ_UNLOCK(queue);
247 	if ((queue->tq_flags & TQ_FLAGS_BLOCKED) == 0)
248 		queue->tq_enqueue(queue->tq_context);
249 	return (0);
250 }
251 
252 static void
253 gtaskqueue_task_nop_fn(void *context)
254 {
255 }
256 
257 /*
258  * Block until all currently queued tasks in this taskqueue
259  * have begun execution.  Tasks queued during execution of
260  * this function are ignored.
261  */
262 static void
263 gtaskqueue_drain_tq_queue(struct gtaskqueue *queue)
264 {
265 	struct gtask t_barrier;
266 
267 	if (STAILQ_EMPTY(&queue->tq_queue))
268 		return;
269 
270 	/*
271 	 * Enqueue our barrier after all current tasks, but with
272 	 * the highest priority so that newly queued tasks cannot
273 	 * pass it.  Because of the high priority, we can not use
274 	 * taskqueue_enqueue_locked directly (which drops the lock
275 	 * anyway) so just insert it at tail while we have the
276 	 * queue lock.
277 	 */
278 	GTASK_INIT(&t_barrier, 0, USHRT_MAX, gtaskqueue_task_nop_fn, &t_barrier);
279 	STAILQ_INSERT_TAIL(&queue->tq_queue, &t_barrier, ta_link);
280 	t_barrier.ta_flags |= TASK_ENQUEUED;
281 
282 	/*
283 	 * Once the barrier has executed, all previously queued tasks
284 	 * have completed or are currently executing.
285 	 */
286 	while (t_barrier.ta_flags & TASK_ENQUEUED)
287 		TQ_SLEEP(queue, &t_barrier, &queue->tq_mutex, PWAIT, "-", 0);
288 }
289 
290 /*
291  * Block until all currently executing tasks for this taskqueue
292  * complete.  Tasks that begin execution during the execution
293  * of this function are ignored.
294  */
295 static void
296 gtaskqueue_drain_tq_active(struct gtaskqueue *queue)
297 {
298 	struct gtaskqueue_busy tb_marker, *tb_first;
299 
300 	if (TAILQ_EMPTY(&queue->tq_active))
301 		return;
302 
303 	/* Block taskq_terminate().*/
304 	queue->tq_callouts++;
305 
306 	/*
307 	 * Wait for all currently executing taskqueue threads
308 	 * to go idle.
309 	 */
310 	tb_marker.tb_running = TB_DRAIN_WAITER;
311 	TAILQ_INSERT_TAIL(&queue->tq_active, &tb_marker, tb_link);
312 	while (TAILQ_FIRST(&queue->tq_active) != &tb_marker)
313 		TQ_SLEEP(queue, &tb_marker, &queue->tq_mutex, PWAIT, "-", 0);
314 	TAILQ_REMOVE(&queue->tq_active, &tb_marker, tb_link);
315 
316 	/*
317 	 * Wakeup any other drain waiter that happened to queue up
318 	 * without any intervening active thread.
319 	 */
320 	tb_first = TAILQ_FIRST(&queue->tq_active);
321 	if (tb_first != NULL && tb_first->tb_running == TB_DRAIN_WAITER)
322 		wakeup(tb_first);
323 
324 	/* Release taskqueue_terminate(). */
325 	queue->tq_callouts--;
326 	if ((queue->tq_flags & TQ_FLAGS_ACTIVE) == 0)
327 		wakeup_one(queue->tq_threads);
328 }
329 
330 void
331 gtaskqueue_block(struct gtaskqueue *queue)
332 {
333 
334 	TQ_LOCK(queue);
335 	queue->tq_flags |= TQ_FLAGS_BLOCKED;
336 	TQ_UNLOCK(queue);
337 }
338 
339 void
340 gtaskqueue_unblock(struct gtaskqueue *queue)
341 {
342 
343 	TQ_LOCK(queue);
344 	queue->tq_flags &= ~TQ_FLAGS_BLOCKED;
345 	if (!STAILQ_EMPTY(&queue->tq_queue))
346 		queue->tq_enqueue(queue->tq_context);
347 	TQ_UNLOCK(queue);
348 }
349 
350 static void
351 gtaskqueue_run_locked(struct gtaskqueue *queue)
352 {
353 	struct gtaskqueue_busy tb;
354 	struct gtaskqueue_busy *tb_first;
355 	struct gtask *gtask;
356 
357 	KASSERT(queue != NULL, ("tq is NULL"));
358 	TQ_ASSERT_LOCKED(queue);
359 	tb.tb_running = NULL;
360 
361 	while (STAILQ_FIRST(&queue->tq_queue)) {
362 		TAILQ_INSERT_TAIL(&queue->tq_active, &tb, tb_link);
363 
364 		/*
365 		 * Carefully remove the first task from the queue and
366 		 * clear its TASK_ENQUEUED flag
367 		 */
368 		gtask = STAILQ_FIRST(&queue->tq_queue);
369 		KASSERT(gtask != NULL, ("task is NULL"));
370 		STAILQ_REMOVE_HEAD(&queue->tq_queue, ta_link);
371 		gtask->ta_flags &= ~TASK_ENQUEUED;
372 		tb.tb_running = gtask;
373 		TQ_UNLOCK(queue);
374 
375 		KASSERT(gtask->ta_func != NULL, ("task->ta_func is NULL"));
376 		gtask->ta_func(gtask->ta_context);
377 
378 		TQ_LOCK(queue);
379 		tb.tb_running = NULL;
380 		wakeup(gtask);
381 
382 		TAILQ_REMOVE(&queue->tq_active, &tb, tb_link);
383 		tb_first = TAILQ_FIRST(&queue->tq_active);
384 		if (tb_first != NULL &&
385 		    tb_first->tb_running == TB_DRAIN_WAITER)
386 			wakeup(tb_first);
387 	}
388 }
389 
390 static int
391 task_is_running(struct gtaskqueue *queue, struct gtask *gtask)
392 {
393 	struct gtaskqueue_busy *tb;
394 
395 	TQ_ASSERT_LOCKED(queue);
396 	TAILQ_FOREACH(tb, &queue->tq_active, tb_link) {
397 		if (tb->tb_running == gtask)
398 			return (1);
399 	}
400 	return (0);
401 }
402 
403 static int
404 gtaskqueue_cancel_locked(struct gtaskqueue *queue, struct gtask *gtask)
405 {
406 
407 	if (gtask->ta_flags & TASK_ENQUEUED)
408 		STAILQ_REMOVE(&queue->tq_queue, gtask, gtask, ta_link);
409 	gtask->ta_flags &= ~TASK_ENQUEUED;
410 	return (task_is_running(queue, gtask) ? EBUSY : 0);
411 }
412 
413 int
414 gtaskqueue_cancel(struct gtaskqueue *queue, struct gtask *gtask)
415 {
416 	int error;
417 
418 	TQ_LOCK(queue);
419 	error = gtaskqueue_cancel_locked(queue, gtask);
420 	TQ_UNLOCK(queue);
421 
422 	return (error);
423 }
424 
425 static void
426 gtaskqueue_drain_locked(struct gtaskqueue *queue, struct gtask *gtask)
427 {
428 	while ((gtask->ta_flags & TASK_ENQUEUED) || task_is_running(queue, gtask))
429 		TQ_SLEEP(queue, gtask, &queue->tq_mutex, PWAIT, "-", 0);
430 }
431 
432 void
433 gtaskqueue_drain(struct gtaskqueue *queue, struct gtask *gtask)
434 {
435 
436 	if (!queue->tq_spin)
437 		WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__);
438 
439 	TQ_LOCK(queue);
440 	gtaskqueue_drain_locked(queue, gtask);
441 	TQ_UNLOCK(queue);
442 }
443 
444 void
445 gtaskqueue_drain_all(struct gtaskqueue *queue)
446 {
447 
448 	if (!queue->tq_spin)
449 		WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__);
450 
451 	TQ_LOCK(queue);
452 	gtaskqueue_drain_tq_queue(queue);
453 	gtaskqueue_drain_tq_active(queue);
454 	TQ_UNLOCK(queue);
455 }
456 
457 static int
458 _gtaskqueue_start_threads(struct gtaskqueue **tqp, int count, int pri,
459     cpuset_t *mask, const char *name, va_list ap)
460 {
461 	char ktname[MAXCOMLEN + 1];
462 	struct thread *td;
463 	struct gtaskqueue *tq;
464 	int i, error;
465 
466 	if (count <= 0)
467 		return (EINVAL);
468 
469 	vsnprintf(ktname, sizeof(ktname), name, ap);
470 	tq = *tqp;
471 
472 	tq->tq_threads = malloc(sizeof(struct thread *) * count, M_GTASKQUEUE,
473 	    M_NOWAIT | M_ZERO);
474 	if (tq->tq_threads == NULL) {
475 		printf("%s: no memory for %s threads\n", __func__, ktname);
476 		return (ENOMEM);
477 	}
478 
479 	for (i = 0; i < count; i++) {
480 		if (count == 1)
481 			error = kthread_add(gtaskqueue_thread_loop, tqp, NULL,
482 			    &tq->tq_threads[i], RFSTOPPED, 0, "%s", ktname);
483 		else
484 			error = kthread_add(gtaskqueue_thread_loop, tqp, NULL,
485 			    &tq->tq_threads[i], RFSTOPPED, 0,
486 			    "%s_%d", ktname, i);
487 		if (error) {
488 			/* should be ok to continue, taskqueue_free will dtrt */
489 			printf("%s: kthread_add(%s): error %d", __func__,
490 			    ktname, error);
491 			tq->tq_threads[i] = NULL;		/* paranoid */
492 		} else
493 			tq->tq_tcount++;
494 	}
495 	for (i = 0; i < count; i++) {
496 		if (tq->tq_threads[i] == NULL)
497 			continue;
498 		td = tq->tq_threads[i];
499 		if (mask) {
500 			error = cpuset_setthread(td->td_tid, mask);
501 			/*
502 			 * Failing to pin is rarely an actual fatal error;
503 			 * it'll just affect performance.
504 			 */
505 			if (error)
506 				printf("%s: curthread=%llu: can't pin; "
507 				    "error=%d\n",
508 				    __func__,
509 				    (unsigned long long) td->td_tid,
510 				    error);
511 		}
512 		thread_lock(td);
513 		sched_prio(td, pri);
514 		sched_add(td, SRQ_BORING);
515 		thread_unlock(td);
516 	}
517 
518 	return (0);
519 }
520 
521 static int
522 gtaskqueue_start_threads(struct gtaskqueue **tqp, int count, int pri,
523     const char *name, ...)
524 {
525 	va_list ap;
526 	int error;
527 
528 	va_start(ap, name);
529 	error = _gtaskqueue_start_threads(tqp, count, pri, NULL, name, ap);
530 	va_end(ap);
531 	return (error);
532 }
533 
534 static inline void
535 gtaskqueue_run_callback(struct gtaskqueue *tq,
536     enum taskqueue_callback_type cb_type)
537 {
538 	taskqueue_callback_fn tq_callback;
539 
540 	TQ_ASSERT_UNLOCKED(tq);
541 	tq_callback = tq->tq_callbacks[cb_type];
542 	if (tq_callback != NULL)
543 		tq_callback(tq->tq_cb_contexts[cb_type]);
544 }
545 
546 static void
547 gtaskqueue_thread_loop(void *arg)
548 {
549 	struct gtaskqueue **tqp, *tq;
550 
551 	tqp = arg;
552 	tq = *tqp;
553 	gtaskqueue_run_callback(tq, TASKQUEUE_CALLBACK_TYPE_INIT);
554 	TQ_LOCK(tq);
555 	while ((tq->tq_flags & TQ_FLAGS_ACTIVE) != 0) {
556 		/* XXX ? */
557 		gtaskqueue_run_locked(tq);
558 		/*
559 		 * Because taskqueue_run() can drop tq_mutex, we need to
560 		 * check if the TQ_FLAGS_ACTIVE flag wasn't removed in the
561 		 * meantime, which means we missed a wakeup.
562 		 */
563 		if ((tq->tq_flags & TQ_FLAGS_ACTIVE) == 0)
564 			break;
565 		TQ_SLEEP(tq, tq, &tq->tq_mutex, 0, "-", 0);
566 	}
567 	gtaskqueue_run_locked(tq);
568 	/*
569 	 * This thread is on its way out, so just drop the lock temporarily
570 	 * in order to call the shutdown callback.  This allows the callback
571 	 * to look at the taskqueue, even just before it dies.
572 	 */
573 	TQ_UNLOCK(tq);
574 	gtaskqueue_run_callback(tq, TASKQUEUE_CALLBACK_TYPE_SHUTDOWN);
575 	TQ_LOCK(tq);
576 
577 	/* rendezvous with thread that asked us to terminate */
578 	tq->tq_tcount--;
579 	wakeup_one(tq->tq_threads);
580 	TQ_UNLOCK(tq);
581 	kthread_exit();
582 }
583 
584 static void
585 gtaskqueue_thread_enqueue(void *context)
586 {
587 	struct gtaskqueue **tqp, *tq;
588 
589 	tqp = context;
590 	tq = *tqp;
591 	wakeup_one(tq);
592 }
593 
594 
595 static struct gtaskqueue *
596 gtaskqueue_create_fast(const char *name, int mflags,
597 		 taskqueue_enqueue_fn enqueue, void *context)
598 {
599 	return _gtaskqueue_create(name, mflags, enqueue, context,
600 			MTX_SPIN, "fast_taskqueue");
601 }
602 
603 
604 struct taskqgroup_cpu {
605 	LIST_HEAD(, grouptask)	tgc_tasks;
606 	struct gtaskqueue	*tgc_taskq;
607 	int	tgc_cnt;
608 	int	tgc_cpu;
609 };
610 
611 struct taskqgroup {
612 	struct taskqgroup_cpu tqg_queue[MAXCPU];
613 	struct mtx	tqg_lock;
614 	const char *	tqg_name;
615 	int		tqg_adjusting;
616 	int		tqg_stride;
617 	int		tqg_cnt;
618 };
619 
620 struct taskq_bind_task {
621 	struct gtask bt_task;
622 	int	bt_cpuid;
623 };
624 
625 static void
626 taskqgroup_cpu_create(struct taskqgroup *qgroup, int idx, int cpu)
627 {
628 	struct taskqgroup_cpu *qcpu;
629 
630 	qcpu = &qgroup->tqg_queue[idx];
631 	LIST_INIT(&qcpu->tgc_tasks);
632 	qcpu->tgc_taskq = gtaskqueue_create_fast(NULL, M_WAITOK,
633 	    taskqueue_thread_enqueue, &qcpu->tgc_taskq);
634 	gtaskqueue_start_threads(&qcpu->tgc_taskq, 1, PI_SOFT,
635 	    "%s_%d", qgroup->tqg_name, idx);
636 	qcpu->tgc_cpu = cpu;
637 }
638 
639 static void
640 taskqgroup_cpu_remove(struct taskqgroup *qgroup, int idx)
641 {
642 
643 	gtaskqueue_free(qgroup->tqg_queue[idx].tgc_taskq);
644 }
645 
646 /*
647  * Find the taskq with least # of tasks that doesn't currently have any
648  * other queues from the uniq identifier.
649  */
650 static int
651 taskqgroup_find(struct taskqgroup *qgroup, void *uniq)
652 {
653 	struct grouptask *n;
654 	int i, idx, mincnt;
655 	int strict;
656 
657 	mtx_assert(&qgroup->tqg_lock, MA_OWNED);
658 	if (qgroup->tqg_cnt == 0)
659 		return (0);
660 	idx = -1;
661 	mincnt = INT_MAX;
662 	/*
663 	 * Two passes;  First scan for a queue with the least tasks that
664 	 * does not already service this uniq id.  If that fails simply find
665 	 * the queue with the least total tasks;
666 	 */
667 	for (strict = 1; mincnt == INT_MAX; strict = 0) {
668 		for (i = 0; i < qgroup->tqg_cnt; i++) {
669 			if (qgroup->tqg_queue[i].tgc_cnt > mincnt)
670 				continue;
671 			if (strict) {
672 				LIST_FOREACH(n,
673 				    &qgroup->tqg_queue[i].tgc_tasks, gt_list)
674 					if (n->gt_uniq == uniq)
675 						break;
676 				if (n != NULL)
677 					continue;
678 			}
679 			mincnt = qgroup->tqg_queue[i].tgc_cnt;
680 			idx = i;
681 		}
682 	}
683 	if (idx == -1)
684 		panic("taskqgroup_find: Failed to pick a qid.");
685 
686 	return (idx);
687 }
688 
689 /*
690  * smp_started is unusable since it is not set for UP kernels or even for
691  * SMP kernels when there is 1 CPU.  This is usually handled by adding a
692  * (mp_ncpus == 1) test, but that would be broken here since we need to
693  * to synchronize with the SI_SUB_SMP ordering.  Even in the pure SMP case
694  * smp_started only gives a fuzzy ordering relative to SI_SUB_SMP.
695  *
696  * So maintain our own flag.  It must be set after all CPUs are started
697  * and before SI_SUB_SMP:SI_ORDER_ANY so that the SYSINIT for delayed
698  * adjustment is properly delayed.  SI_ORDER_FOURTH is clearly before
699  * SI_ORDER_ANY and unclearly after the CPUs are started.  It would be
700  * simpler for adjustment to pass a flag indicating if it is delayed.
701  */
702 
703 static int tqg_smp_started;
704 
705 static void
706 tqg_record_smp_started(void *arg)
707 {
708 	tqg_smp_started = 1;
709 }
710 
711 SYSINIT(tqg_record_smp_started, SI_SUB_SMP, SI_ORDER_FOURTH,
712 	tqg_record_smp_started, NULL);
713 
714 void
715 taskqgroup_attach(struct taskqgroup *qgroup, struct grouptask *gtask,
716     void *uniq, int irq, const char *name)
717 {
718 	cpuset_t mask;
719 	int qid, error;
720 
721 	gtask->gt_uniq = uniq;
722 	snprintf(gtask->gt_name, GROUPTASK_NAMELEN, "%s", name ? name : "grouptask");
723 	gtask->gt_irq = irq;
724 	gtask->gt_cpu = -1;
725 	mtx_lock(&qgroup->tqg_lock);
726 	qid = taskqgroup_find(qgroup, uniq);
727 	qgroup->tqg_queue[qid].tgc_cnt++;
728 	LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
729 	gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
730 	if (irq != -1 && tqg_smp_started) {
731 		gtask->gt_cpu = qgroup->tqg_queue[qid].tgc_cpu;
732 		CPU_ZERO(&mask);
733 		CPU_SET(qgroup->tqg_queue[qid].tgc_cpu, &mask);
734 		mtx_unlock(&qgroup->tqg_lock);
735 		error = intr_setaffinity(irq, CPU_WHICH_IRQ, &mask);
736 		if (error)
737 			printf("%s: setaffinity failed for %s: %d\n", __func__, gtask->gt_name, error);
738 	} else
739 		mtx_unlock(&qgroup->tqg_lock);
740 }
741 
742 static void
743 taskqgroup_attach_deferred(struct taskqgroup *qgroup, struct grouptask *gtask)
744 {
745 	cpuset_t mask;
746 	int qid, cpu, error;
747 
748 	mtx_lock(&qgroup->tqg_lock);
749 	qid = taskqgroup_find(qgroup, gtask->gt_uniq);
750 	cpu = qgroup->tqg_queue[qid].tgc_cpu;
751 	if (gtask->gt_irq != -1) {
752 		mtx_unlock(&qgroup->tqg_lock);
753 
754 		CPU_ZERO(&mask);
755 		CPU_SET(cpu, &mask);
756 		error = intr_setaffinity(gtask->gt_irq, CPU_WHICH_IRQ, &mask);
757 		mtx_lock(&qgroup->tqg_lock);
758 		if (error)
759 			printf("%s: %s setaffinity failed: %d\n", __func__, gtask->gt_name, error);
760 
761 	}
762 	qgroup->tqg_queue[qid].tgc_cnt++;
763 
764 	LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask,
765 			 gt_list);
766 	MPASS(qgroup->tqg_queue[qid].tgc_taskq != NULL);
767 	gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
768 	mtx_unlock(&qgroup->tqg_lock);
769 }
770 
771 int
772 taskqgroup_attach_cpu(struct taskqgroup *qgroup, struct grouptask *gtask,
773 	void *uniq, int cpu, int irq, const char *name)
774 {
775 	cpuset_t mask;
776 	int i, qid, error;
777 
778 	qid = -1;
779 	gtask->gt_uniq = uniq;
780 	snprintf(gtask->gt_name, GROUPTASK_NAMELEN, "%s", name ? name : "grouptask");
781 	gtask->gt_irq = irq;
782 	gtask->gt_cpu = cpu;
783 	mtx_lock(&qgroup->tqg_lock);
784 	if (tqg_smp_started) {
785 		for (i = 0; i < qgroup->tqg_cnt; i++)
786 			if (qgroup->tqg_queue[i].tgc_cpu == cpu) {
787 				qid = i;
788 				break;
789 			}
790 		if (qid == -1) {
791 			mtx_unlock(&qgroup->tqg_lock);
792 			printf("%s: qid not found for %s cpu=%d\n", __func__, gtask->gt_name, cpu);
793 			return (EINVAL);
794 		}
795 	} else
796 		qid = 0;
797 	qgroup->tqg_queue[qid].tgc_cnt++;
798 	LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
799 	gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
800 	cpu = qgroup->tqg_queue[qid].tgc_cpu;
801 	mtx_unlock(&qgroup->tqg_lock);
802 
803 	CPU_ZERO(&mask);
804 	CPU_SET(cpu, &mask);
805 	if (irq != -1 && tqg_smp_started) {
806 		error = intr_setaffinity(irq, CPU_WHICH_IRQ, &mask);
807 		if (error)
808 			printf("%s: setaffinity failed: %d\n", __func__, error);
809 	}
810 	return (0);
811 }
812 
813 static int
814 taskqgroup_attach_cpu_deferred(struct taskqgroup *qgroup, struct grouptask *gtask)
815 {
816 	cpuset_t mask;
817 	int i, qid, irq, cpu, error;
818 
819 	qid = -1;
820 	irq = gtask->gt_irq;
821 	cpu = gtask->gt_cpu;
822 	MPASS(tqg_smp_started);
823 	mtx_lock(&qgroup->tqg_lock);
824 	for (i = 0; i < qgroup->tqg_cnt; i++)
825 		if (qgroup->tqg_queue[i].tgc_cpu == cpu) {
826 			qid = i;
827 			break;
828 		}
829 	if (qid == -1) {
830 		mtx_unlock(&qgroup->tqg_lock);
831 		printf("%s: qid not found for %s cpu=%d\n", __func__, gtask->gt_name, cpu);
832 		return (EINVAL);
833 	}
834 	qgroup->tqg_queue[qid].tgc_cnt++;
835 	LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
836 	MPASS(qgroup->tqg_queue[qid].tgc_taskq != NULL);
837 	gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
838 	mtx_unlock(&qgroup->tqg_lock);
839 
840 	CPU_ZERO(&mask);
841 	CPU_SET(cpu, &mask);
842 
843 	if (irq != -1) {
844 		error = intr_setaffinity(irq, CPU_WHICH_IRQ, &mask);
845 		if (error)
846 			printf("%s: setaffinity failed: %d\n", __func__, error);
847 	}
848 	return (0);
849 }
850 
851 void
852 taskqgroup_detach(struct taskqgroup *qgroup, struct grouptask *gtask)
853 {
854 	int i;
855 
856 	grouptask_block(gtask);
857 	mtx_lock(&qgroup->tqg_lock);
858 	for (i = 0; i < qgroup->tqg_cnt; i++)
859 		if (qgroup->tqg_queue[i].tgc_taskq == gtask->gt_taskqueue)
860 			break;
861 	if (i == qgroup->tqg_cnt)
862 		panic("taskqgroup_detach: task %s not in group\n", gtask->gt_name);
863 	qgroup->tqg_queue[i].tgc_cnt--;
864 	LIST_REMOVE(gtask, gt_list);
865 	mtx_unlock(&qgroup->tqg_lock);
866 	gtask->gt_taskqueue = NULL;
867 	gtask->gt_task.ta_flags &= ~TASK_NOENQUEUE;
868 }
869 
870 static void
871 taskqgroup_binder(void *ctx)
872 {
873 	struct taskq_bind_task *gtask = (struct taskq_bind_task *)ctx;
874 	cpuset_t mask;
875 	int error;
876 
877 	CPU_ZERO(&mask);
878 	CPU_SET(gtask->bt_cpuid, &mask);
879 	error = cpuset_setthread(curthread->td_tid, &mask);
880 	thread_lock(curthread);
881 	sched_bind(curthread, gtask->bt_cpuid);
882 	thread_unlock(curthread);
883 
884 	if (error)
885 		printf("%s: setaffinity failed: %d\n", __func__,
886 		    error);
887 	free(gtask, M_DEVBUF);
888 }
889 
890 static void
891 taskqgroup_bind(struct taskqgroup *qgroup)
892 {
893 	struct taskq_bind_task *gtask;
894 	int i;
895 
896 	/*
897 	 * Bind taskqueue threads to specific CPUs, if they have been assigned
898 	 * one.
899 	 */
900 	if (qgroup->tqg_cnt == 1)
901 		return;
902 
903 	for (i = 0; i < qgroup->tqg_cnt; i++) {
904 		gtask = malloc(sizeof (*gtask), M_DEVBUF, M_WAITOK);
905 		GTASK_INIT(&gtask->bt_task, 0, 0, taskqgroup_binder, gtask);
906 		gtask->bt_cpuid = qgroup->tqg_queue[i].tgc_cpu;
907 		grouptaskqueue_enqueue(qgroup->tqg_queue[i].tgc_taskq,
908 		    &gtask->bt_task);
909 	}
910 }
911 
912 static void
913 taskqgroup_config_init(void *arg)
914 {
915 	struct taskqgroup *qgroup = qgroup_config;
916 	LIST_HEAD(, grouptask) gtask_head = LIST_HEAD_INITIALIZER(NULL);
917 
918 	LIST_SWAP(&gtask_head, &qgroup->tqg_queue[0].tgc_tasks,
919 	    grouptask, gt_list);
920 	qgroup->tqg_queue[0].tgc_cnt = 0;
921 	taskqgroup_cpu_create(qgroup, 0, 0);
922 
923 	qgroup->tqg_cnt = 1;
924 	qgroup->tqg_stride = 1;
925 }
926 
927 SYSINIT(taskqgroup_config_init, SI_SUB_TASKQ, SI_ORDER_SECOND,
928 	taskqgroup_config_init, NULL);
929 
930 static int
931 _taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride)
932 {
933 	LIST_HEAD(, grouptask) gtask_head = LIST_HEAD_INITIALIZER(NULL);
934 	struct grouptask *gtask;
935 	int i, k, old_cnt, old_cpu, cpu;
936 
937 	mtx_assert(&qgroup->tqg_lock, MA_OWNED);
938 
939 	if (cnt < 1 || cnt * stride > mp_ncpus || !tqg_smp_started) {
940 		printf("%s: failed cnt: %d stride: %d "
941 		    "mp_ncpus: %d tqg_smp_started: %d\n",
942 		    __func__, cnt, stride, mp_ncpus, tqg_smp_started);
943 		return (EINVAL);
944 	}
945 	if (qgroup->tqg_adjusting) {
946 		printf("%s failed: adjusting\n", __func__);
947 		return (EBUSY);
948 	}
949 	qgroup->tqg_adjusting = 1;
950 	old_cnt = qgroup->tqg_cnt;
951 	old_cpu = 0;
952 	if (old_cnt < cnt)
953 		old_cpu = qgroup->tqg_queue[old_cnt].tgc_cpu;
954 	mtx_unlock(&qgroup->tqg_lock);
955 	/*
956 	 * Set up queue for tasks added before boot.
957 	 */
958 	if (old_cnt == 0) {
959 		LIST_SWAP(&gtask_head, &qgroup->tqg_queue[0].tgc_tasks,
960 		    grouptask, gt_list);
961 		qgroup->tqg_queue[0].tgc_cnt = 0;
962 	}
963 
964 	/*
965 	 * If new taskq threads have been added.
966 	 */
967 	cpu = old_cpu;
968 	for (i = old_cnt; i < cnt; i++) {
969 		taskqgroup_cpu_create(qgroup, i, cpu);
970 
971 		for (k = 0; k < stride; k++)
972 			cpu = CPU_NEXT(cpu);
973 	}
974 	mtx_lock(&qgroup->tqg_lock);
975 	qgroup->tqg_cnt = cnt;
976 	qgroup->tqg_stride = stride;
977 
978 	/*
979 	 * Adjust drivers to use new taskqs.
980 	 */
981 	for (i = 0; i < old_cnt; i++) {
982 		while ((gtask = LIST_FIRST(&qgroup->tqg_queue[i].tgc_tasks))) {
983 			LIST_REMOVE(gtask, gt_list);
984 			qgroup->tqg_queue[i].tgc_cnt--;
985 			LIST_INSERT_HEAD(&gtask_head, gtask, gt_list);
986 		}
987 	}
988 	mtx_unlock(&qgroup->tqg_lock);
989 
990 	while ((gtask = LIST_FIRST(&gtask_head))) {
991 		LIST_REMOVE(gtask, gt_list);
992 		if (gtask->gt_cpu == -1)
993 			taskqgroup_attach_deferred(qgroup, gtask);
994 		else if (taskqgroup_attach_cpu_deferred(qgroup, gtask))
995 			taskqgroup_attach_deferred(qgroup, gtask);
996 	}
997 
998 #ifdef INVARIANTS
999 	mtx_lock(&qgroup->tqg_lock);
1000 	for (i = 0; i < qgroup->tqg_cnt; i++) {
1001 		MPASS(qgroup->tqg_queue[i].tgc_taskq != NULL);
1002 		LIST_FOREACH(gtask, &qgroup->tqg_queue[i].tgc_tasks, gt_list)
1003 			MPASS(gtask->gt_taskqueue != NULL);
1004 	}
1005 	mtx_unlock(&qgroup->tqg_lock);
1006 #endif
1007 	/*
1008 	 * If taskq thread count has been reduced.
1009 	 */
1010 	for (i = cnt; i < old_cnt; i++)
1011 		taskqgroup_cpu_remove(qgroup, i);
1012 
1013 	taskqgroup_bind(qgroup);
1014 
1015 	mtx_lock(&qgroup->tqg_lock);
1016 	qgroup->tqg_adjusting = 0;
1017 
1018 	return (0);
1019 }
1020 
1021 int
1022 taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride)
1023 {
1024 	int error;
1025 
1026 	mtx_lock(&qgroup->tqg_lock);
1027 	error = _taskqgroup_adjust(qgroup, cnt, stride);
1028 	mtx_unlock(&qgroup->tqg_lock);
1029 
1030 	return (error);
1031 }
1032 
1033 struct taskqgroup *
1034 taskqgroup_create(const char *name)
1035 {
1036 	struct taskqgroup *qgroup;
1037 
1038 	qgroup = malloc(sizeof(*qgroup), M_GTASKQUEUE, M_WAITOK | M_ZERO);
1039 	mtx_init(&qgroup->tqg_lock, "taskqgroup", NULL, MTX_DEF);
1040 	qgroup->tqg_name = name;
1041 	LIST_INIT(&qgroup->tqg_queue[0].tgc_tasks);
1042 
1043 	return (qgroup);
1044 }
1045 
1046 void
1047 taskqgroup_destroy(struct taskqgroup *qgroup)
1048 {
1049 
1050 }
1051 
1052 void
1053 taskqgroup_config_gtask_init(void *ctx, struct grouptask *gtask, gtask_fn_t *fn,
1054 	const char *name)
1055 {
1056 
1057 	GROUPTASK_INIT(gtask, 0, fn, ctx);
1058 	taskqgroup_attach(qgroup_config, gtask, gtask, -1, name);
1059 }
1060 
1061 void
1062 taskqgroup_config_gtask_deinit(struct grouptask *gtask)
1063 {
1064 	taskqgroup_detach(qgroup_config, gtask);
1065 }
1066