xref: /freebsd/sys/kern/subr_gtaskqueue.c (revision e12ff891366cf94db4bfe4c2c810b26a5531053d)
1 /*-
2  * Copyright (c) 2000 Doug Rabson
3  * Copyright (c) 2014 Jeff Roberson
4  * Copyright (c) 2016 Matthew Macy
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/bus.h>
35 #include <sys/cpuset.h>
36 #include <sys/kernel.h>
37 #include <sys/kthread.h>
38 #include <sys/libkern.h>
39 #include <sys/limits.h>
40 #include <sys/lock.h>
41 #include <sys/malloc.h>
42 #include <sys/mutex.h>
43 #include <sys/proc.h>
44 #include <sys/sched.h>
45 #include <sys/smp.h>
46 #include <sys/gtaskqueue.h>
47 #include <sys/unistd.h>
48 #include <machine/stdarg.h>
49 
50 static MALLOC_DEFINE(M_GTASKQUEUE, "gtaskqueue", "Group Task Queues");
51 static void	gtaskqueue_thread_enqueue(void *);
52 static void	gtaskqueue_thread_loop(void *arg);
53 static int	task_is_running(struct gtaskqueue *queue, struct gtask *gtask);
54 static void	gtaskqueue_drain_locked(struct gtaskqueue *queue, struct gtask *gtask);
55 
56 TASKQGROUP_DEFINE(softirq, mp_ncpus, 1);
57 TASKQGROUP_DEFINE(config, 1, 1);
58 
59 struct gtaskqueue_busy {
60 	struct gtask	*tb_running;
61 	TAILQ_ENTRY(gtaskqueue_busy) tb_link;
62 };
63 
64 static struct gtask * const TB_DRAIN_WAITER = (struct gtask *)0x1;
65 
66 typedef void (*gtaskqueue_enqueue_fn)(void *context);
67 
68 struct gtaskqueue {
69 	STAILQ_HEAD(, gtask)	tq_queue;
70 	gtaskqueue_enqueue_fn	tq_enqueue;
71 	void			*tq_context;
72 	char			*tq_name;
73 	TAILQ_HEAD(, gtaskqueue_busy) tq_active;
74 	struct mtx		tq_mutex;
75 	struct thread		**tq_threads;
76 	int			tq_tcount;
77 	int			tq_spin;
78 	int			tq_flags;
79 	int			tq_callouts;
80 	taskqueue_callback_fn	tq_callbacks[TASKQUEUE_NUM_CALLBACKS];
81 	void			*tq_cb_contexts[TASKQUEUE_NUM_CALLBACKS];
82 };
83 
84 #define	TQ_FLAGS_ACTIVE		(1 << 0)
85 #define	TQ_FLAGS_BLOCKED	(1 << 1)
86 #define	TQ_FLAGS_UNLOCKED_ENQUEUE	(1 << 2)
87 
88 #define	DT_CALLOUT_ARMED	(1 << 0)
89 
90 #define	TQ_LOCK(tq)							\
91 	do {								\
92 		if ((tq)->tq_spin)					\
93 			mtx_lock_spin(&(tq)->tq_mutex);			\
94 		else							\
95 			mtx_lock(&(tq)->tq_mutex);			\
96 	} while (0)
97 #define	TQ_ASSERT_LOCKED(tq)	mtx_assert(&(tq)->tq_mutex, MA_OWNED)
98 
99 #define	TQ_UNLOCK(tq)							\
100 	do {								\
101 		if ((tq)->tq_spin)					\
102 			mtx_unlock_spin(&(tq)->tq_mutex);		\
103 		else							\
104 			mtx_unlock(&(tq)->tq_mutex);			\
105 	} while (0)
106 #define	TQ_ASSERT_UNLOCKED(tq)	mtx_assert(&(tq)->tq_mutex, MA_NOTOWNED)
107 
108 #ifdef INVARIANTS
109 static void
110 gtask_dump(struct gtask *gtask)
111 {
112 	printf("gtask: %p ta_flags=%x ta_priority=%d ta_func=%p ta_context=%p\n",
113 	       gtask, gtask->ta_flags, gtask->ta_priority, gtask->ta_func, gtask->ta_context);
114 }
115 #endif
116 
117 static __inline int
118 TQ_SLEEP(struct gtaskqueue *tq, void *p, struct mtx *m, int pri, const char *wm,
119     int t)
120 {
121 	if (tq->tq_spin)
122 		return (msleep_spin(p, m, wm, t));
123 	return (msleep(p, m, pri, wm, t));
124 }
125 
126 static struct gtaskqueue *
127 _gtaskqueue_create(const char *name, int mflags,
128 		 taskqueue_enqueue_fn enqueue, void *context,
129 		 int mtxflags, const char *mtxname __unused)
130 {
131 	struct gtaskqueue *queue;
132 	char *tq_name;
133 
134 	tq_name = malloc(TASKQUEUE_NAMELEN, M_GTASKQUEUE, mflags | M_ZERO);
135 	if (!tq_name)
136 		return (NULL);
137 
138 	snprintf(tq_name, TASKQUEUE_NAMELEN, "%s", (name) ? name : "taskqueue");
139 
140 	queue = malloc(sizeof(struct gtaskqueue), M_GTASKQUEUE, mflags | M_ZERO);
141 	if (!queue) {
142 		free(tq_name, M_GTASKQUEUE);
143 		return (NULL);
144 	}
145 
146 	STAILQ_INIT(&queue->tq_queue);
147 	TAILQ_INIT(&queue->tq_active);
148 	queue->tq_enqueue = enqueue;
149 	queue->tq_context = context;
150 	queue->tq_name = tq_name;
151 	queue->tq_spin = (mtxflags & MTX_SPIN) != 0;
152 	queue->tq_flags |= TQ_FLAGS_ACTIVE;
153 	if (enqueue == gtaskqueue_thread_enqueue)
154 		queue->tq_flags |= TQ_FLAGS_UNLOCKED_ENQUEUE;
155 	mtx_init(&queue->tq_mutex, tq_name, NULL, mtxflags);
156 
157 	return (queue);
158 }
159 
160 
161 /*
162  * Signal a taskqueue thread to terminate.
163  */
164 static void
165 gtaskqueue_terminate(struct thread **pp, struct gtaskqueue *tq)
166 {
167 
168 	while (tq->tq_tcount > 0 || tq->tq_callouts > 0) {
169 		wakeup(tq);
170 		TQ_SLEEP(tq, pp, &tq->tq_mutex, PWAIT, "taskqueue_destroy", 0);
171 	}
172 }
173 
174 static void
175 gtaskqueue_free(struct gtaskqueue *queue)
176 {
177 
178 	TQ_LOCK(queue);
179 	queue->tq_flags &= ~TQ_FLAGS_ACTIVE;
180 	gtaskqueue_terminate(queue->tq_threads, queue);
181 	KASSERT(TAILQ_EMPTY(&queue->tq_active), ("Tasks still running?"));
182 	KASSERT(queue->tq_callouts == 0, ("Armed timeout tasks"));
183 	mtx_destroy(&queue->tq_mutex);
184 	free(queue->tq_threads, M_GTASKQUEUE);
185 	free(queue->tq_name, M_GTASKQUEUE);
186 	free(queue, M_GTASKQUEUE);
187 }
188 
189 /*
190  * Wait for all to complete, then prevent it from being enqueued
191  */
192 void
193 grouptask_block(struct grouptask *grouptask)
194 {
195 	struct gtaskqueue *queue = grouptask->gt_taskqueue;
196 	struct gtask *gtask = &grouptask->gt_task;
197 
198 #ifdef INVARIANTS
199 	if (queue == NULL) {
200 		gtask_dump(gtask);
201 		panic("queue == NULL");
202 	}
203 #endif
204 	TQ_LOCK(queue);
205 	gtask->ta_flags |= TASK_NOENQUEUE;
206   	gtaskqueue_drain_locked(queue, gtask);
207 	TQ_UNLOCK(queue);
208 }
209 
210 void
211 grouptask_unblock(struct grouptask *grouptask)
212 {
213 	struct gtaskqueue *queue = grouptask->gt_taskqueue;
214 	struct gtask *gtask = &grouptask->gt_task;
215 
216 #ifdef INVARIANTS
217 	if (queue == NULL) {
218 		gtask_dump(gtask);
219 		panic("queue == NULL");
220 	}
221 #endif
222 	TQ_LOCK(queue);
223 	gtask->ta_flags &= ~TASK_NOENQUEUE;
224 	TQ_UNLOCK(queue);
225 }
226 
227 int
228 grouptaskqueue_enqueue(struct gtaskqueue *queue, struct gtask *gtask)
229 {
230 #ifdef INVARIANTS
231 	if (queue == NULL) {
232 		gtask_dump(gtask);
233 		panic("queue == NULL");
234 	}
235 #endif
236 	TQ_LOCK(queue);
237 	if (gtask->ta_flags & TASK_ENQUEUED) {
238 		TQ_UNLOCK(queue);
239 		return (0);
240 	}
241 	if (gtask->ta_flags & TASK_NOENQUEUE) {
242 		TQ_UNLOCK(queue);
243 		return (EAGAIN);
244 	}
245 	STAILQ_INSERT_TAIL(&queue->tq_queue, gtask, ta_link);
246 	gtask->ta_flags |= TASK_ENQUEUED;
247 	TQ_UNLOCK(queue);
248 	if ((queue->tq_flags & TQ_FLAGS_BLOCKED) == 0)
249 		queue->tq_enqueue(queue->tq_context);
250 	return (0);
251 }
252 
253 static void
254 gtaskqueue_task_nop_fn(void *context)
255 {
256 }
257 
258 /*
259  * Block until all currently queued tasks in this taskqueue
260  * have begun execution.  Tasks queued during execution of
261  * this function are ignored.
262  */
263 static void
264 gtaskqueue_drain_tq_queue(struct gtaskqueue *queue)
265 {
266 	struct gtask t_barrier;
267 
268 	if (STAILQ_EMPTY(&queue->tq_queue))
269 		return;
270 
271 	/*
272 	 * Enqueue our barrier after all current tasks, but with
273 	 * the highest priority so that newly queued tasks cannot
274 	 * pass it.  Because of the high priority, we can not use
275 	 * taskqueue_enqueue_locked directly (which drops the lock
276 	 * anyway) so just insert it at tail while we have the
277 	 * queue lock.
278 	 */
279 	GTASK_INIT(&t_barrier, 0, USHRT_MAX, gtaskqueue_task_nop_fn, &t_barrier);
280 	STAILQ_INSERT_TAIL(&queue->tq_queue, &t_barrier, ta_link);
281 	t_barrier.ta_flags |= TASK_ENQUEUED;
282 
283 	/*
284 	 * Once the barrier has executed, all previously queued tasks
285 	 * have completed or are currently executing.
286 	 */
287 	while (t_barrier.ta_flags & TASK_ENQUEUED)
288 		TQ_SLEEP(queue, &t_barrier, &queue->tq_mutex, PWAIT, "-", 0);
289 }
290 
291 /*
292  * Block until all currently executing tasks for this taskqueue
293  * complete.  Tasks that begin execution during the execution
294  * of this function are ignored.
295  */
296 static void
297 gtaskqueue_drain_tq_active(struct gtaskqueue *queue)
298 {
299 	struct gtaskqueue_busy tb_marker, *tb_first;
300 
301 	if (TAILQ_EMPTY(&queue->tq_active))
302 		return;
303 
304 	/* Block taskq_terminate().*/
305 	queue->tq_callouts++;
306 
307 	/*
308 	 * Wait for all currently executing taskqueue threads
309 	 * to go idle.
310 	 */
311 	tb_marker.tb_running = TB_DRAIN_WAITER;
312 	TAILQ_INSERT_TAIL(&queue->tq_active, &tb_marker, tb_link);
313 	while (TAILQ_FIRST(&queue->tq_active) != &tb_marker)
314 		TQ_SLEEP(queue, &tb_marker, &queue->tq_mutex, PWAIT, "-", 0);
315 	TAILQ_REMOVE(&queue->tq_active, &tb_marker, tb_link);
316 
317 	/*
318 	 * Wakeup any other drain waiter that happened to queue up
319 	 * without any intervening active thread.
320 	 */
321 	tb_first = TAILQ_FIRST(&queue->tq_active);
322 	if (tb_first != NULL && tb_first->tb_running == TB_DRAIN_WAITER)
323 		wakeup(tb_first);
324 
325 	/* Release taskqueue_terminate(). */
326 	queue->tq_callouts--;
327 	if ((queue->tq_flags & TQ_FLAGS_ACTIVE) == 0)
328 		wakeup_one(queue->tq_threads);
329 }
330 
331 void
332 gtaskqueue_block(struct gtaskqueue *queue)
333 {
334 
335 	TQ_LOCK(queue);
336 	queue->tq_flags |= TQ_FLAGS_BLOCKED;
337 	TQ_UNLOCK(queue);
338 }
339 
340 void
341 gtaskqueue_unblock(struct gtaskqueue *queue)
342 {
343 
344 	TQ_LOCK(queue);
345 	queue->tq_flags &= ~TQ_FLAGS_BLOCKED;
346 	if (!STAILQ_EMPTY(&queue->tq_queue))
347 		queue->tq_enqueue(queue->tq_context);
348 	TQ_UNLOCK(queue);
349 }
350 
351 static void
352 gtaskqueue_run_locked(struct gtaskqueue *queue)
353 {
354 	struct gtaskqueue_busy tb;
355 	struct gtaskqueue_busy *tb_first;
356 	struct gtask *gtask;
357 
358 	KASSERT(queue != NULL, ("tq is NULL"));
359 	TQ_ASSERT_LOCKED(queue);
360 	tb.tb_running = NULL;
361 
362 	while (STAILQ_FIRST(&queue->tq_queue)) {
363 		TAILQ_INSERT_TAIL(&queue->tq_active, &tb, tb_link);
364 
365 		/*
366 		 * Carefully remove the first task from the queue and
367 		 * clear its TASK_ENQUEUED flag
368 		 */
369 		gtask = STAILQ_FIRST(&queue->tq_queue);
370 		KASSERT(gtask != NULL, ("task is NULL"));
371 		STAILQ_REMOVE_HEAD(&queue->tq_queue, ta_link);
372 		gtask->ta_flags &= ~TASK_ENQUEUED;
373 		tb.tb_running = gtask;
374 		TQ_UNLOCK(queue);
375 
376 		KASSERT(gtask->ta_func != NULL, ("task->ta_func is NULL"));
377 		gtask->ta_func(gtask->ta_context);
378 
379 		TQ_LOCK(queue);
380 		tb.tb_running = NULL;
381 		wakeup(gtask);
382 
383 		TAILQ_REMOVE(&queue->tq_active, &tb, tb_link);
384 		tb_first = TAILQ_FIRST(&queue->tq_active);
385 		if (tb_first != NULL &&
386 		    tb_first->tb_running == TB_DRAIN_WAITER)
387 			wakeup(tb_first);
388 	}
389 }
390 
391 static int
392 task_is_running(struct gtaskqueue *queue, struct gtask *gtask)
393 {
394 	struct gtaskqueue_busy *tb;
395 
396 	TQ_ASSERT_LOCKED(queue);
397 	TAILQ_FOREACH(tb, &queue->tq_active, tb_link) {
398 		if (tb->tb_running == gtask)
399 			return (1);
400 	}
401 	return (0);
402 }
403 
404 static int
405 gtaskqueue_cancel_locked(struct gtaskqueue *queue, struct gtask *gtask)
406 {
407 
408 	if (gtask->ta_flags & TASK_ENQUEUED)
409 		STAILQ_REMOVE(&queue->tq_queue, gtask, gtask, ta_link);
410 	gtask->ta_flags &= ~TASK_ENQUEUED;
411 	return (task_is_running(queue, gtask) ? EBUSY : 0);
412 }
413 
414 int
415 gtaskqueue_cancel(struct gtaskqueue *queue, struct gtask *gtask)
416 {
417 	int error;
418 
419 	TQ_LOCK(queue);
420 	error = gtaskqueue_cancel_locked(queue, gtask);
421 	TQ_UNLOCK(queue);
422 
423 	return (error);
424 }
425 
426 static void
427 gtaskqueue_drain_locked(struct gtaskqueue *queue, struct gtask *gtask)
428 {
429 	while ((gtask->ta_flags & TASK_ENQUEUED) || task_is_running(queue, gtask))
430 		TQ_SLEEP(queue, gtask, &queue->tq_mutex, PWAIT, "-", 0);
431 }
432 
433 void
434 gtaskqueue_drain(struct gtaskqueue *queue, struct gtask *gtask)
435 {
436 
437 	if (!queue->tq_spin)
438 		WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__);
439 
440 	TQ_LOCK(queue);
441 	gtaskqueue_drain_locked(queue, gtask);
442 	TQ_UNLOCK(queue);
443 }
444 
445 void
446 gtaskqueue_drain_all(struct gtaskqueue *queue)
447 {
448 
449 	if (!queue->tq_spin)
450 		WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__);
451 
452 	TQ_LOCK(queue);
453 	gtaskqueue_drain_tq_queue(queue);
454 	gtaskqueue_drain_tq_active(queue);
455 	TQ_UNLOCK(queue);
456 }
457 
458 static int
459 _gtaskqueue_start_threads(struct gtaskqueue **tqp, int count, int pri,
460     cpuset_t *mask, const char *name, va_list ap)
461 {
462 	char ktname[MAXCOMLEN + 1];
463 	struct thread *td;
464 	struct gtaskqueue *tq;
465 	int i, error;
466 
467 	if (count <= 0)
468 		return (EINVAL);
469 
470 	vsnprintf(ktname, sizeof(ktname), name, ap);
471 	tq = *tqp;
472 
473 	tq->tq_threads = malloc(sizeof(struct thread *) * count, M_GTASKQUEUE,
474 	    M_NOWAIT | M_ZERO);
475 	if (tq->tq_threads == NULL) {
476 		printf("%s: no memory for %s threads\n", __func__, ktname);
477 		return (ENOMEM);
478 	}
479 
480 	for (i = 0; i < count; i++) {
481 		if (count == 1)
482 			error = kthread_add(gtaskqueue_thread_loop, tqp, NULL,
483 			    &tq->tq_threads[i], RFSTOPPED, 0, "%s", ktname);
484 		else
485 			error = kthread_add(gtaskqueue_thread_loop, tqp, NULL,
486 			    &tq->tq_threads[i], RFSTOPPED, 0,
487 			    "%s_%d", ktname, i);
488 		if (error) {
489 			/* should be ok to continue, taskqueue_free will dtrt */
490 			printf("%s: kthread_add(%s): error %d", __func__,
491 			    ktname, error);
492 			tq->tq_threads[i] = NULL;		/* paranoid */
493 		} else
494 			tq->tq_tcount++;
495 	}
496 	for (i = 0; i < count; i++) {
497 		if (tq->tq_threads[i] == NULL)
498 			continue;
499 		td = tq->tq_threads[i];
500 		if (mask) {
501 			error = cpuset_setthread(td->td_tid, mask);
502 			/*
503 			 * Failing to pin is rarely an actual fatal error;
504 			 * it'll just affect performance.
505 			 */
506 			if (error)
507 				printf("%s: curthread=%llu: can't pin; "
508 				    "error=%d\n",
509 				    __func__,
510 				    (unsigned long long) td->td_tid,
511 				    error);
512 		}
513 		thread_lock(td);
514 		sched_prio(td, pri);
515 		sched_add(td, SRQ_BORING);
516 		thread_unlock(td);
517 	}
518 
519 	return (0);
520 }
521 
522 static int
523 gtaskqueue_start_threads(struct gtaskqueue **tqp, int count, int pri,
524     const char *name, ...)
525 {
526 	va_list ap;
527 	int error;
528 
529 	va_start(ap, name);
530 	error = _gtaskqueue_start_threads(tqp, count, pri, NULL, name, ap);
531 	va_end(ap);
532 	return (error);
533 }
534 
535 static inline void
536 gtaskqueue_run_callback(struct gtaskqueue *tq,
537     enum taskqueue_callback_type cb_type)
538 {
539 	taskqueue_callback_fn tq_callback;
540 
541 	TQ_ASSERT_UNLOCKED(tq);
542 	tq_callback = tq->tq_callbacks[cb_type];
543 	if (tq_callback != NULL)
544 		tq_callback(tq->tq_cb_contexts[cb_type]);
545 }
546 
547 static void
548 gtaskqueue_thread_loop(void *arg)
549 {
550 	struct gtaskqueue **tqp, *tq;
551 
552 	tqp = arg;
553 	tq = *tqp;
554 	gtaskqueue_run_callback(tq, TASKQUEUE_CALLBACK_TYPE_INIT);
555 	TQ_LOCK(tq);
556 	while ((tq->tq_flags & TQ_FLAGS_ACTIVE) != 0) {
557 		/* XXX ? */
558 		gtaskqueue_run_locked(tq);
559 		/*
560 		 * Because taskqueue_run() can drop tq_mutex, we need to
561 		 * check if the TQ_FLAGS_ACTIVE flag wasn't removed in the
562 		 * meantime, which means we missed a wakeup.
563 		 */
564 		if ((tq->tq_flags & TQ_FLAGS_ACTIVE) == 0)
565 			break;
566 		TQ_SLEEP(tq, tq, &tq->tq_mutex, 0, "-", 0);
567 	}
568 	gtaskqueue_run_locked(tq);
569 	/*
570 	 * This thread is on its way out, so just drop the lock temporarily
571 	 * in order to call the shutdown callback.  This allows the callback
572 	 * to look at the taskqueue, even just before it dies.
573 	 */
574 	TQ_UNLOCK(tq);
575 	gtaskqueue_run_callback(tq, TASKQUEUE_CALLBACK_TYPE_SHUTDOWN);
576 	TQ_LOCK(tq);
577 
578 	/* rendezvous with thread that asked us to terminate */
579 	tq->tq_tcount--;
580 	wakeup_one(tq->tq_threads);
581 	TQ_UNLOCK(tq);
582 	kthread_exit();
583 }
584 
585 static void
586 gtaskqueue_thread_enqueue(void *context)
587 {
588 	struct gtaskqueue **tqp, *tq;
589 
590 	tqp = context;
591 	tq = *tqp;
592 	wakeup_one(tq);
593 }
594 
595 
596 static struct gtaskqueue *
597 gtaskqueue_create_fast(const char *name, int mflags,
598 		 taskqueue_enqueue_fn enqueue, void *context)
599 {
600 	return _gtaskqueue_create(name, mflags, enqueue, context,
601 			MTX_SPIN, "fast_taskqueue");
602 }
603 
604 
605 struct taskqgroup_cpu {
606 	LIST_HEAD(, grouptask)	tgc_tasks;
607 	struct gtaskqueue	*tgc_taskq;
608 	int	tgc_cnt;
609 	int	tgc_cpu;
610 };
611 
612 struct taskqgroup {
613 	struct taskqgroup_cpu tqg_queue[MAXCPU];
614 	struct mtx	tqg_lock;
615 	const char *	tqg_name;
616 	int		tqg_adjusting;
617 	int		tqg_stride;
618 	int		tqg_cnt;
619 };
620 
621 struct taskq_bind_task {
622 	struct gtask bt_task;
623 	int	bt_cpuid;
624 };
625 
626 static void
627 taskqgroup_cpu_create(struct taskqgroup *qgroup, int idx, int cpu)
628 {
629 	struct taskqgroup_cpu *qcpu;
630 
631 	qcpu = &qgroup->tqg_queue[idx];
632 	LIST_INIT(&qcpu->tgc_tasks);
633 	qcpu->tgc_taskq = gtaskqueue_create_fast(NULL, M_WAITOK,
634 	    taskqueue_thread_enqueue, &qcpu->tgc_taskq);
635 	gtaskqueue_start_threads(&qcpu->tgc_taskq, 1, PI_SOFT,
636 	    "%s_%d", qgroup->tqg_name, idx);
637 	qcpu->tgc_cpu = cpu;
638 }
639 
640 static void
641 taskqgroup_cpu_remove(struct taskqgroup *qgroup, int idx)
642 {
643 
644 	gtaskqueue_free(qgroup->tqg_queue[idx].tgc_taskq);
645 }
646 
647 /*
648  * Find the taskq with least # of tasks that doesn't currently have any
649  * other queues from the uniq identifier.
650  */
651 static int
652 taskqgroup_find(struct taskqgroup *qgroup, void *uniq)
653 {
654 	struct grouptask *n;
655 	int i, idx, mincnt;
656 	int strict;
657 
658 	mtx_assert(&qgroup->tqg_lock, MA_OWNED);
659 	if (qgroup->tqg_cnt == 0)
660 		return (0);
661 	idx = -1;
662 	mincnt = INT_MAX;
663 	/*
664 	 * Two passes;  First scan for a queue with the least tasks that
665 	 * does not already service this uniq id.  If that fails simply find
666 	 * the queue with the least total tasks;
667 	 */
668 	for (strict = 1; mincnt == INT_MAX; strict = 0) {
669 		for (i = 0; i < qgroup->tqg_cnt; i++) {
670 			if (qgroup->tqg_queue[i].tgc_cnt > mincnt)
671 				continue;
672 			if (strict) {
673 				LIST_FOREACH(n,
674 				    &qgroup->tqg_queue[i].tgc_tasks, gt_list)
675 					if (n->gt_uniq == uniq)
676 						break;
677 				if (n != NULL)
678 					continue;
679 			}
680 			mincnt = qgroup->tqg_queue[i].tgc_cnt;
681 			idx = i;
682 		}
683 	}
684 	if (idx == -1)
685 		panic("%s: failed to pick a qid.", __func__);
686 
687 	return (idx);
688 }
689 
690 /*
691  * smp_started is unusable since it is not set for UP kernels or even for
692  * SMP kernels when there is 1 CPU.  This is usually handled by adding a
693  * (mp_ncpus == 1) test, but that would be broken here since we need to
694  * to synchronize with the SI_SUB_SMP ordering.  Even in the pure SMP case
695  * smp_started only gives a fuzzy ordering relative to SI_SUB_SMP.
696  *
697  * So maintain our own flag.  It must be set after all CPUs are started
698  * and before SI_SUB_SMP:SI_ORDER_ANY so that the SYSINIT for delayed
699  * adjustment is properly delayed.  SI_ORDER_FOURTH is clearly before
700  * SI_ORDER_ANY and unclearly after the CPUs are started.  It would be
701  * simpler for adjustment to pass a flag indicating if it is delayed.
702  */
703 
704 static int tqg_smp_started;
705 
706 static void
707 tqg_record_smp_started(void *arg)
708 {
709 	tqg_smp_started = 1;
710 }
711 
712 SYSINIT(tqg_record_smp_started, SI_SUB_SMP, SI_ORDER_FOURTH,
713 	tqg_record_smp_started, NULL);
714 
715 void
716 taskqgroup_attach(struct taskqgroup *qgroup, struct grouptask *gtask,
717     void *uniq, device_t dev, struct resource *irq, const char *name)
718 {
719 	int cpu, qid, error;
720 
721 	gtask->gt_uniq = uniq;
722 	snprintf(gtask->gt_name, GROUPTASK_NAMELEN, "%s", name ? name : "grouptask");
723 	gtask->gt_dev = dev;
724 	gtask->gt_irq = irq;
725 	gtask->gt_cpu = -1;
726 	mtx_lock(&qgroup->tqg_lock);
727 	qid = taskqgroup_find(qgroup, uniq);
728 	qgroup->tqg_queue[qid].tgc_cnt++;
729 	LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
730 	gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
731 	if (dev != NULL && irq != NULL && tqg_smp_started) {
732 		cpu = qgroup->tqg_queue[qid].tgc_cpu;
733 		gtask->gt_cpu = cpu;
734 		mtx_unlock(&qgroup->tqg_lock);
735 		error = bus_bind_intr(dev, irq, cpu);
736 		if (error)
737 			printf("%s: binding interrupt failed for %s: %d\n",
738 			    __func__, gtask->gt_name, error);
739 	} else
740 		mtx_unlock(&qgroup->tqg_lock);
741 }
742 
743 static void
744 taskqgroup_attach_deferred(struct taskqgroup *qgroup, struct grouptask *gtask)
745 {
746 	int qid, cpu, error;
747 
748 	mtx_lock(&qgroup->tqg_lock);
749 	qid = taskqgroup_find(qgroup, gtask->gt_uniq);
750 	cpu = qgroup->tqg_queue[qid].tgc_cpu;
751 	if (gtask->gt_dev != NULL && gtask->gt_irq != NULL) {
752 		mtx_unlock(&qgroup->tqg_lock);
753 		error = bus_bind_intr(gtask->gt_dev, gtask->gt_irq, cpu);
754 		mtx_lock(&qgroup->tqg_lock);
755 		if (error)
756 			printf("%s: binding interrupt failed for %s: %d\n",
757 			    __func__, gtask->gt_name, error);
758 
759 	}
760 	qgroup->tqg_queue[qid].tgc_cnt++;
761 	LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
762 	MPASS(qgroup->tqg_queue[qid].tgc_taskq != NULL);
763 	gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
764 	mtx_unlock(&qgroup->tqg_lock);
765 }
766 
767 int
768 taskqgroup_attach_cpu(struct taskqgroup *qgroup, struct grouptask *gtask,
769     void *uniq, int cpu, device_t dev, struct resource *irq, const char *name)
770 {
771 	int i, qid, error;
772 
773 	qid = -1;
774 	gtask->gt_uniq = uniq;
775 	snprintf(gtask->gt_name, GROUPTASK_NAMELEN, "%s", name ? name : "grouptask");
776 	gtask->gt_dev = dev;
777 	gtask->gt_irq = irq;
778 	gtask->gt_cpu = cpu;
779 	mtx_lock(&qgroup->tqg_lock);
780 	if (tqg_smp_started) {
781 		for (i = 0; i < qgroup->tqg_cnt; i++)
782 			if (qgroup->tqg_queue[i].tgc_cpu == cpu) {
783 				qid = i;
784 				break;
785 			}
786 		if (qid == -1) {
787 			mtx_unlock(&qgroup->tqg_lock);
788 			printf("%s: qid not found for %s cpu=%d\n", __func__, gtask->gt_name, cpu);
789 			return (EINVAL);
790 		}
791 	} else
792 		qid = 0;
793 	qgroup->tqg_queue[qid].tgc_cnt++;
794 	LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
795 	gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
796 	cpu = qgroup->tqg_queue[qid].tgc_cpu;
797 	mtx_unlock(&qgroup->tqg_lock);
798 
799 	if (dev != NULL && irq != NULL && tqg_smp_started) {
800 		error = bus_bind_intr(dev, irq, cpu);
801 		if (error)
802 			printf("%s: binding interrupt failed for %s: %d\n",
803 			    __func__, gtask->gt_name, error);
804 	}
805 	return (0);
806 }
807 
808 static int
809 taskqgroup_attach_cpu_deferred(struct taskqgroup *qgroup, struct grouptask *gtask)
810 {
811 	device_t dev;
812 	struct resource *irq;
813 	int cpu, error, i, qid;
814 
815 	qid = -1;
816 	dev = gtask->gt_dev;
817 	irq = gtask->gt_irq;
818 	cpu = gtask->gt_cpu;
819 	MPASS(tqg_smp_started);
820 	mtx_lock(&qgroup->tqg_lock);
821 	for (i = 0; i < qgroup->tqg_cnt; i++)
822 		if (qgroup->tqg_queue[i].tgc_cpu == cpu) {
823 			qid = i;
824 			break;
825 		}
826 	if (qid == -1) {
827 		mtx_unlock(&qgroup->tqg_lock);
828 		printf("%s: qid not found for %s cpu=%d\n", __func__, gtask->gt_name, cpu);
829 		return (EINVAL);
830 	}
831 	qgroup->tqg_queue[qid].tgc_cnt++;
832 	LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
833 	MPASS(qgroup->tqg_queue[qid].tgc_taskq != NULL);
834 	gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
835 	mtx_unlock(&qgroup->tqg_lock);
836 
837 	if (dev != NULL && irq != NULL) {
838 		error = bus_bind_intr(dev, irq, cpu);
839 		if (error)
840 			printf("%s: binding interrupt failed for %s: %d\n",
841 			    __func__, gtask->gt_name, error);
842 	}
843 	return (0);
844 }
845 
846 void
847 taskqgroup_detach(struct taskqgroup *qgroup, struct grouptask *gtask)
848 {
849 	int i;
850 
851 	grouptask_block(gtask);
852 	mtx_lock(&qgroup->tqg_lock);
853 	for (i = 0; i < qgroup->tqg_cnt; i++)
854 		if (qgroup->tqg_queue[i].tgc_taskq == gtask->gt_taskqueue)
855 			break;
856 	if (i == qgroup->tqg_cnt)
857 		panic("%s: task %s not in group", __func__, gtask->gt_name);
858 	qgroup->tqg_queue[i].tgc_cnt--;
859 	LIST_REMOVE(gtask, gt_list);
860 	mtx_unlock(&qgroup->tqg_lock);
861 	gtask->gt_taskqueue = NULL;
862 	gtask->gt_task.ta_flags &= ~TASK_NOENQUEUE;
863 }
864 
865 static void
866 taskqgroup_binder(void *ctx)
867 {
868 	struct taskq_bind_task *gtask = (struct taskq_bind_task *)ctx;
869 	cpuset_t mask;
870 	int error;
871 
872 	CPU_ZERO(&mask);
873 	CPU_SET(gtask->bt_cpuid, &mask);
874 	error = cpuset_setthread(curthread->td_tid, &mask);
875 	thread_lock(curthread);
876 	sched_bind(curthread, gtask->bt_cpuid);
877 	thread_unlock(curthread);
878 
879 	if (error)
880 		printf("%s: binding curthread failed: %d\n", __func__, error);
881 	free(gtask, M_DEVBUF);
882 }
883 
884 static void
885 taskqgroup_bind(struct taskqgroup *qgroup)
886 {
887 	struct taskq_bind_task *gtask;
888 	int i;
889 
890 	/*
891 	 * Bind taskqueue threads to specific CPUs, if they have been assigned
892 	 * one.
893 	 */
894 	if (qgroup->tqg_cnt == 1)
895 		return;
896 
897 	for (i = 0; i < qgroup->tqg_cnt; i++) {
898 		gtask = malloc(sizeof (*gtask), M_DEVBUF, M_WAITOK);
899 		GTASK_INIT(&gtask->bt_task, 0, 0, taskqgroup_binder, gtask);
900 		gtask->bt_cpuid = qgroup->tqg_queue[i].tgc_cpu;
901 		grouptaskqueue_enqueue(qgroup->tqg_queue[i].tgc_taskq,
902 		    &gtask->bt_task);
903 	}
904 }
905 
906 static void
907 taskqgroup_config_init(void *arg)
908 {
909 	struct taskqgroup *qgroup = qgroup_config;
910 	LIST_HEAD(, grouptask) gtask_head = LIST_HEAD_INITIALIZER(NULL);
911 
912 	LIST_SWAP(&gtask_head, &qgroup->tqg_queue[0].tgc_tasks,
913 	    grouptask, gt_list);
914 	qgroup->tqg_queue[0].tgc_cnt = 0;
915 	taskqgroup_cpu_create(qgroup, 0, 0);
916 
917 	qgroup->tqg_cnt = 1;
918 	qgroup->tqg_stride = 1;
919 }
920 
921 SYSINIT(taskqgroup_config_init, SI_SUB_TASKQ, SI_ORDER_SECOND,
922 	taskqgroup_config_init, NULL);
923 
924 static int
925 _taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride)
926 {
927 	LIST_HEAD(, grouptask) gtask_head = LIST_HEAD_INITIALIZER(NULL);
928 	struct grouptask *gtask;
929 	int i, k, old_cnt, old_cpu, cpu;
930 
931 	mtx_assert(&qgroup->tqg_lock, MA_OWNED);
932 
933 	if (cnt < 1 || cnt * stride > mp_ncpus || !tqg_smp_started) {
934 		printf("%s: failed cnt: %d stride: %d "
935 		    "mp_ncpus: %d tqg_smp_started: %d\n",
936 		    __func__, cnt, stride, mp_ncpus, tqg_smp_started);
937 		return (EINVAL);
938 	}
939 	if (qgroup->tqg_adjusting) {
940 		printf("%s failed: adjusting\n", __func__);
941 		return (EBUSY);
942 	}
943 	qgroup->tqg_adjusting = 1;
944 	old_cnt = qgroup->tqg_cnt;
945 	old_cpu = 0;
946 	if (old_cnt < cnt)
947 		old_cpu = qgroup->tqg_queue[old_cnt].tgc_cpu;
948 	mtx_unlock(&qgroup->tqg_lock);
949 	/*
950 	 * Set up queue for tasks added before boot.
951 	 */
952 	if (old_cnt == 0) {
953 		LIST_SWAP(&gtask_head, &qgroup->tqg_queue[0].tgc_tasks,
954 		    grouptask, gt_list);
955 		qgroup->tqg_queue[0].tgc_cnt = 0;
956 	}
957 
958 	/*
959 	 * If new taskq threads have been added.
960 	 */
961 	cpu = old_cpu;
962 	for (i = old_cnt; i < cnt; i++) {
963 		taskqgroup_cpu_create(qgroup, i, cpu);
964 
965 		for (k = 0; k < stride; k++)
966 			cpu = CPU_NEXT(cpu);
967 	}
968 	mtx_lock(&qgroup->tqg_lock);
969 	qgroup->tqg_cnt = cnt;
970 	qgroup->tqg_stride = stride;
971 
972 	/*
973 	 * Adjust drivers to use new taskqs.
974 	 */
975 	for (i = 0; i < old_cnt; i++) {
976 		while ((gtask = LIST_FIRST(&qgroup->tqg_queue[i].tgc_tasks))) {
977 			LIST_REMOVE(gtask, gt_list);
978 			qgroup->tqg_queue[i].tgc_cnt--;
979 			LIST_INSERT_HEAD(&gtask_head, gtask, gt_list);
980 		}
981 	}
982 	mtx_unlock(&qgroup->tqg_lock);
983 
984 	while ((gtask = LIST_FIRST(&gtask_head))) {
985 		LIST_REMOVE(gtask, gt_list);
986 		if (gtask->gt_cpu == -1)
987 			taskqgroup_attach_deferred(qgroup, gtask);
988 		else if (taskqgroup_attach_cpu_deferred(qgroup, gtask))
989 			taskqgroup_attach_deferred(qgroup, gtask);
990 	}
991 
992 #ifdef INVARIANTS
993 	mtx_lock(&qgroup->tqg_lock);
994 	for (i = 0; i < qgroup->tqg_cnt; i++) {
995 		MPASS(qgroup->tqg_queue[i].tgc_taskq != NULL);
996 		LIST_FOREACH(gtask, &qgroup->tqg_queue[i].tgc_tasks, gt_list)
997 			MPASS(gtask->gt_taskqueue != NULL);
998 	}
999 	mtx_unlock(&qgroup->tqg_lock);
1000 #endif
1001 	/*
1002 	 * If taskq thread count has been reduced.
1003 	 */
1004 	for (i = cnt; i < old_cnt; i++)
1005 		taskqgroup_cpu_remove(qgroup, i);
1006 
1007 	taskqgroup_bind(qgroup);
1008 
1009 	mtx_lock(&qgroup->tqg_lock);
1010 	qgroup->tqg_adjusting = 0;
1011 
1012 	return (0);
1013 }
1014 
1015 int
1016 taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride)
1017 {
1018 	int error;
1019 
1020 	mtx_lock(&qgroup->tqg_lock);
1021 	error = _taskqgroup_adjust(qgroup, cnt, stride);
1022 	mtx_unlock(&qgroup->tqg_lock);
1023 
1024 	return (error);
1025 }
1026 
1027 struct taskqgroup *
1028 taskqgroup_create(const char *name)
1029 {
1030 	struct taskqgroup *qgroup;
1031 
1032 	qgroup = malloc(sizeof(*qgroup), M_GTASKQUEUE, M_WAITOK | M_ZERO);
1033 	mtx_init(&qgroup->tqg_lock, "taskqgroup", NULL, MTX_DEF);
1034 	qgroup->tqg_name = name;
1035 	LIST_INIT(&qgroup->tqg_queue[0].tgc_tasks);
1036 
1037 	return (qgroup);
1038 }
1039 
1040 void
1041 taskqgroup_destroy(struct taskqgroup *qgroup)
1042 {
1043 
1044 }
1045 
1046 void
1047 taskqgroup_config_gtask_init(void *ctx, struct grouptask *gtask, gtask_fn_t *fn,
1048     const char *name)
1049 {
1050 
1051 	GROUPTASK_INIT(gtask, 0, fn, ctx);
1052 	taskqgroup_attach(qgroup_config, gtask, gtask, NULL, NULL, name);
1053 }
1054 
1055 void
1056 taskqgroup_config_gtask_deinit(struct grouptask *gtask)
1057 {
1058 
1059 	taskqgroup_detach(qgroup_config, gtask);
1060 }
1061