xref: /freebsd/sys/kern/subr_gtaskqueue.c (revision 84b3c4547afa496b37e86c5e649e99237c8afc6e)
1 /*-
2  * Copyright (c) 2000 Doug Rabson
3  * Copyright (c) 2014 Jeff Roberson
4  * Copyright (c) 2016 Matthew Macy
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/bus.h>
35 #include <sys/cpuset.h>
36 #include <sys/kernel.h>
37 #include <sys/kthread.h>
38 #include <sys/libkern.h>
39 #include <sys/limits.h>
40 #include <sys/lock.h>
41 #include <sys/malloc.h>
42 #include <sys/mutex.h>
43 #include <sys/proc.h>
44 #include <sys/epoch.h>
45 #include <sys/sched.h>
46 #include <sys/smp.h>
47 #include <sys/gtaskqueue.h>
48 #include <sys/unistd.h>
49 #include <machine/stdarg.h>
50 
51 static MALLOC_DEFINE(M_GTASKQUEUE, "gtaskqueue", "Group Task Queues");
52 static void	gtaskqueue_thread_enqueue(void *);
53 static void	gtaskqueue_thread_loop(void *arg);
54 static int	task_is_running(struct gtaskqueue *queue, struct gtask *gtask);
55 static void	gtaskqueue_drain_locked(struct gtaskqueue *queue, struct gtask *gtask);
56 
57 TASKQGROUP_DEFINE(softirq, mp_ncpus, 1);
58 TASKQGROUP_DEFINE(config, 1, 1);
59 
60 struct gtaskqueue_busy {
61 	struct gtask		*tb_running;
62 	u_int			 tb_seq;
63 	LIST_ENTRY(gtaskqueue_busy) tb_link;
64 };
65 
66 typedef void (*gtaskqueue_enqueue_fn)(void *context);
67 
68 struct gtaskqueue {
69 	STAILQ_HEAD(, gtask)	tq_queue;
70 	LIST_HEAD(, gtaskqueue_busy) tq_active;
71 	u_int			tq_seq;
72 	int			tq_callouts;
73 	struct mtx_padalign	tq_mutex;
74 	gtaskqueue_enqueue_fn	tq_enqueue;
75 	void			*tq_context;
76 	char			*tq_name;
77 	struct thread		**tq_threads;
78 	int			tq_tcount;
79 	int			tq_spin;
80 	int			tq_flags;
81 	taskqueue_callback_fn	tq_callbacks[TASKQUEUE_NUM_CALLBACKS];
82 	void			*tq_cb_contexts[TASKQUEUE_NUM_CALLBACKS];
83 };
84 
85 #define	TQ_FLAGS_ACTIVE		(1 << 0)
86 #define	TQ_FLAGS_BLOCKED	(1 << 1)
87 #define	TQ_FLAGS_UNLOCKED_ENQUEUE	(1 << 2)
88 
89 #define	DT_CALLOUT_ARMED	(1 << 0)
90 
91 #define	TQ_LOCK(tq)							\
92 	do {								\
93 		if ((tq)->tq_spin)					\
94 			mtx_lock_spin(&(tq)->tq_mutex);			\
95 		else							\
96 			mtx_lock(&(tq)->tq_mutex);			\
97 	} while (0)
98 #define	TQ_ASSERT_LOCKED(tq)	mtx_assert(&(tq)->tq_mutex, MA_OWNED)
99 
100 #define	TQ_UNLOCK(tq)							\
101 	do {								\
102 		if ((tq)->tq_spin)					\
103 			mtx_unlock_spin(&(tq)->tq_mutex);		\
104 		else							\
105 			mtx_unlock(&(tq)->tq_mutex);			\
106 	} while (0)
107 #define	TQ_ASSERT_UNLOCKED(tq)	mtx_assert(&(tq)->tq_mutex, MA_NOTOWNED)
108 
109 #ifdef INVARIANTS
110 static void
111 gtask_dump(struct gtask *gtask)
112 {
113 	printf("gtask: %p ta_flags=%x ta_priority=%d ta_func=%p ta_context=%p\n",
114 	       gtask, gtask->ta_flags, gtask->ta_priority, gtask->ta_func, gtask->ta_context);
115 }
116 #endif
117 
118 static __inline int
119 TQ_SLEEP(struct gtaskqueue *tq, void *p, const char *wm)
120 {
121 	if (tq->tq_spin)
122 		return (msleep_spin(p, (struct mtx *)&tq->tq_mutex, wm, 0));
123 	return (msleep(p, &tq->tq_mutex, 0, wm, 0));
124 }
125 
126 static struct gtaskqueue *
127 _gtaskqueue_create(const char *name, int mflags,
128 		 taskqueue_enqueue_fn enqueue, void *context,
129 		 int mtxflags, const char *mtxname __unused)
130 {
131 	struct gtaskqueue *queue;
132 	char *tq_name;
133 
134 	tq_name = malloc(TASKQUEUE_NAMELEN, M_GTASKQUEUE, mflags | M_ZERO);
135 	if (!tq_name)
136 		return (NULL);
137 
138 	snprintf(tq_name, TASKQUEUE_NAMELEN, "%s", (name) ? name : "taskqueue");
139 
140 	queue = malloc(sizeof(struct gtaskqueue), M_GTASKQUEUE, mflags | M_ZERO);
141 	if (!queue) {
142 		free(tq_name, M_GTASKQUEUE);
143 		return (NULL);
144 	}
145 
146 	STAILQ_INIT(&queue->tq_queue);
147 	LIST_INIT(&queue->tq_active);
148 	queue->tq_enqueue = enqueue;
149 	queue->tq_context = context;
150 	queue->tq_name = tq_name;
151 	queue->tq_spin = (mtxflags & MTX_SPIN) != 0;
152 	queue->tq_flags |= TQ_FLAGS_ACTIVE;
153 	if (enqueue == gtaskqueue_thread_enqueue)
154 		queue->tq_flags |= TQ_FLAGS_UNLOCKED_ENQUEUE;
155 	mtx_init(&queue->tq_mutex, tq_name, NULL, mtxflags);
156 
157 	return (queue);
158 }
159 
160 /*
161  * Signal a taskqueue thread to terminate.
162  */
163 static void
164 gtaskqueue_terminate(struct thread **pp, struct gtaskqueue *tq)
165 {
166 
167 	while (tq->tq_tcount > 0 || tq->tq_callouts > 0) {
168 		wakeup(tq);
169 		TQ_SLEEP(tq, pp, "gtq_destroy");
170 	}
171 }
172 
173 static void
174 gtaskqueue_free(struct gtaskqueue *queue)
175 {
176 
177 	TQ_LOCK(queue);
178 	queue->tq_flags &= ~TQ_FLAGS_ACTIVE;
179 	gtaskqueue_terminate(queue->tq_threads, queue);
180 	KASSERT(LIST_EMPTY(&queue->tq_active), ("Tasks still running?"));
181 	KASSERT(queue->tq_callouts == 0, ("Armed timeout tasks"));
182 	mtx_destroy(&queue->tq_mutex);
183 	free(queue->tq_threads, M_GTASKQUEUE);
184 	free(queue->tq_name, M_GTASKQUEUE);
185 	free(queue, M_GTASKQUEUE);
186 }
187 
188 /*
189  * Wait for all to complete, then prevent it from being enqueued
190  */
191 void
192 grouptask_block(struct grouptask *grouptask)
193 {
194 	struct gtaskqueue *queue = grouptask->gt_taskqueue;
195 	struct gtask *gtask = &grouptask->gt_task;
196 
197 #ifdef INVARIANTS
198 	if (queue == NULL) {
199 		gtask_dump(gtask);
200 		panic("queue == NULL");
201 	}
202 #endif
203 	TQ_LOCK(queue);
204 	gtask->ta_flags |= TASK_NOENQUEUE;
205   	gtaskqueue_drain_locked(queue, gtask);
206 	TQ_UNLOCK(queue);
207 }
208 
209 void
210 grouptask_unblock(struct grouptask *grouptask)
211 {
212 	struct gtaskqueue *queue = grouptask->gt_taskqueue;
213 	struct gtask *gtask = &grouptask->gt_task;
214 
215 #ifdef INVARIANTS
216 	if (queue == NULL) {
217 		gtask_dump(gtask);
218 		panic("queue == NULL");
219 	}
220 #endif
221 	TQ_LOCK(queue);
222 	gtask->ta_flags &= ~TASK_NOENQUEUE;
223 	TQ_UNLOCK(queue);
224 }
225 
226 int
227 grouptaskqueue_enqueue(struct gtaskqueue *queue, struct gtask *gtask)
228 {
229 #ifdef INVARIANTS
230 	if (queue == NULL) {
231 		gtask_dump(gtask);
232 		panic("queue == NULL");
233 	}
234 #endif
235 	TQ_LOCK(queue);
236 	if (gtask->ta_flags & TASK_ENQUEUED) {
237 		TQ_UNLOCK(queue);
238 		return (0);
239 	}
240 	if (gtask->ta_flags & TASK_NOENQUEUE) {
241 		TQ_UNLOCK(queue);
242 		return (EAGAIN);
243 	}
244 	STAILQ_INSERT_TAIL(&queue->tq_queue, gtask, ta_link);
245 	gtask->ta_flags |= TASK_ENQUEUED;
246 	TQ_UNLOCK(queue);
247 	if ((queue->tq_flags & TQ_FLAGS_BLOCKED) == 0)
248 		queue->tq_enqueue(queue->tq_context);
249 	return (0);
250 }
251 
252 static void
253 gtaskqueue_task_nop_fn(void *context)
254 {
255 }
256 
257 /*
258  * Block until all currently queued tasks in this taskqueue
259  * have begun execution.  Tasks queued during execution of
260  * this function are ignored.
261  */
262 static void
263 gtaskqueue_drain_tq_queue(struct gtaskqueue *queue)
264 {
265 	struct gtask t_barrier;
266 
267 	if (STAILQ_EMPTY(&queue->tq_queue))
268 		return;
269 
270 	/*
271 	 * Enqueue our barrier after all current tasks, but with
272 	 * the highest priority so that newly queued tasks cannot
273 	 * pass it.  Because of the high priority, we can not use
274 	 * taskqueue_enqueue_locked directly (which drops the lock
275 	 * anyway) so just insert it at tail while we have the
276 	 * queue lock.
277 	 */
278 	GTASK_INIT(&t_barrier, 0, USHRT_MAX, gtaskqueue_task_nop_fn, &t_barrier);
279 	STAILQ_INSERT_TAIL(&queue->tq_queue, &t_barrier, ta_link);
280 	t_barrier.ta_flags |= TASK_ENQUEUED;
281 
282 	/*
283 	 * Once the barrier has executed, all previously queued tasks
284 	 * have completed or are currently executing.
285 	 */
286 	while (t_barrier.ta_flags & TASK_ENQUEUED)
287 		TQ_SLEEP(queue, &t_barrier, "gtq_qdrain");
288 }
289 
290 /*
291  * Block until all currently executing tasks for this taskqueue
292  * complete.  Tasks that begin execution during the execution
293  * of this function are ignored.
294  */
295 static void
296 gtaskqueue_drain_tq_active(struct gtaskqueue *queue)
297 {
298 	struct gtaskqueue_busy *tb;
299 	u_int seq;
300 
301 	if (LIST_EMPTY(&queue->tq_active))
302 		return;
303 
304 	/* Block taskq_terminate().*/
305 	queue->tq_callouts++;
306 
307 	/* Wait for any active task with sequence from the past. */
308 	seq = queue->tq_seq;
309 restart:
310 	LIST_FOREACH(tb, &queue->tq_active, tb_link) {
311 		if ((int)(tb->tb_seq - seq) <= 0) {
312 			TQ_SLEEP(queue, tb->tb_running, "gtq_adrain");
313 			goto restart;
314 		}
315 	}
316 
317 	/* Release taskqueue_terminate(). */
318 	queue->tq_callouts--;
319 	if ((queue->tq_flags & TQ_FLAGS_ACTIVE) == 0)
320 		wakeup_one(queue->tq_threads);
321 }
322 
323 void
324 gtaskqueue_block(struct gtaskqueue *queue)
325 {
326 
327 	TQ_LOCK(queue);
328 	queue->tq_flags |= TQ_FLAGS_BLOCKED;
329 	TQ_UNLOCK(queue);
330 }
331 
332 void
333 gtaskqueue_unblock(struct gtaskqueue *queue)
334 {
335 
336 	TQ_LOCK(queue);
337 	queue->tq_flags &= ~TQ_FLAGS_BLOCKED;
338 	if (!STAILQ_EMPTY(&queue->tq_queue))
339 		queue->tq_enqueue(queue->tq_context);
340 	TQ_UNLOCK(queue);
341 }
342 
343 static void
344 gtaskqueue_run_locked(struct gtaskqueue *queue)
345 {
346 	struct epoch_tracker et;
347 	struct gtaskqueue_busy tb;
348 	struct gtask *gtask;
349 	bool in_net_epoch;
350 
351 	KASSERT(queue != NULL, ("tq is NULL"));
352 	TQ_ASSERT_LOCKED(queue);
353 	tb.tb_running = NULL;
354 	LIST_INSERT_HEAD(&queue->tq_active, &tb, tb_link);
355 	in_net_epoch = false;
356 
357 	while ((gtask = STAILQ_FIRST(&queue->tq_queue)) != NULL) {
358 		STAILQ_REMOVE_HEAD(&queue->tq_queue, ta_link);
359 		gtask->ta_flags &= ~TASK_ENQUEUED;
360 		tb.tb_running = gtask;
361 		tb.tb_seq = ++queue->tq_seq;
362 		TQ_UNLOCK(queue);
363 
364 		KASSERT(gtask->ta_func != NULL, ("task->ta_func is NULL"));
365 		if (!in_net_epoch && TASK_IS_NET(gtask)) {
366 			in_net_epoch = true;
367 			NET_EPOCH_ENTER(et);
368 		} else if (in_net_epoch && !TASK_IS_NET(gtask)) {
369 			NET_EPOCH_EXIT(et);
370 			in_net_epoch = false;
371 		}
372 		gtask->ta_func(gtask->ta_context);
373 
374 		TQ_LOCK(queue);
375 		wakeup(gtask);
376 	}
377 	if (in_net_epoch)
378 		NET_EPOCH_EXIT(et);
379 	LIST_REMOVE(&tb, tb_link);
380 }
381 
382 static int
383 task_is_running(struct gtaskqueue *queue, struct gtask *gtask)
384 {
385 	struct gtaskqueue_busy *tb;
386 
387 	TQ_ASSERT_LOCKED(queue);
388 	LIST_FOREACH(tb, &queue->tq_active, tb_link) {
389 		if (tb->tb_running == gtask)
390 			return (1);
391 	}
392 	return (0);
393 }
394 
395 static int
396 gtaskqueue_cancel_locked(struct gtaskqueue *queue, struct gtask *gtask)
397 {
398 
399 	if (gtask->ta_flags & TASK_ENQUEUED)
400 		STAILQ_REMOVE(&queue->tq_queue, gtask, gtask, ta_link);
401 	gtask->ta_flags &= ~TASK_ENQUEUED;
402 	return (task_is_running(queue, gtask) ? EBUSY : 0);
403 }
404 
405 int
406 gtaskqueue_cancel(struct gtaskqueue *queue, struct gtask *gtask)
407 {
408 	int error;
409 
410 	TQ_LOCK(queue);
411 	error = gtaskqueue_cancel_locked(queue, gtask);
412 	TQ_UNLOCK(queue);
413 
414 	return (error);
415 }
416 
417 static void
418 gtaskqueue_drain_locked(struct gtaskqueue *queue, struct gtask *gtask)
419 {
420 	while ((gtask->ta_flags & TASK_ENQUEUED) || task_is_running(queue, gtask))
421 		TQ_SLEEP(queue, gtask, "gtq_drain");
422 }
423 
424 void
425 gtaskqueue_drain(struct gtaskqueue *queue, struct gtask *gtask)
426 {
427 
428 	if (!queue->tq_spin)
429 		WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__);
430 
431 	TQ_LOCK(queue);
432 	gtaskqueue_drain_locked(queue, gtask);
433 	TQ_UNLOCK(queue);
434 }
435 
436 void
437 gtaskqueue_drain_all(struct gtaskqueue *queue)
438 {
439 
440 	if (!queue->tq_spin)
441 		WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__);
442 
443 	TQ_LOCK(queue);
444 	gtaskqueue_drain_tq_queue(queue);
445 	gtaskqueue_drain_tq_active(queue);
446 	TQ_UNLOCK(queue);
447 }
448 
449 static int
450 _gtaskqueue_start_threads(struct gtaskqueue **tqp, int count, int pri,
451     cpuset_t *mask, const char *name, va_list ap)
452 {
453 	char ktname[MAXCOMLEN + 1];
454 	struct thread *td;
455 	struct gtaskqueue *tq;
456 	int i, error;
457 
458 	if (count <= 0)
459 		return (EINVAL);
460 
461 	vsnprintf(ktname, sizeof(ktname), name, ap);
462 	tq = *tqp;
463 
464 	tq->tq_threads = malloc(sizeof(struct thread *) * count, M_GTASKQUEUE,
465 	    M_NOWAIT | M_ZERO);
466 	if (tq->tq_threads == NULL) {
467 		printf("%s: no memory for %s threads\n", __func__, ktname);
468 		return (ENOMEM);
469 	}
470 
471 	for (i = 0; i < count; i++) {
472 		if (count == 1)
473 			error = kthread_add(gtaskqueue_thread_loop, tqp, NULL,
474 			    &tq->tq_threads[i], RFSTOPPED, 0, "%s", ktname);
475 		else
476 			error = kthread_add(gtaskqueue_thread_loop, tqp, NULL,
477 			    &tq->tq_threads[i], RFSTOPPED, 0,
478 			    "%s_%d", ktname, i);
479 		if (error) {
480 			/* should be ok to continue, taskqueue_free will dtrt */
481 			printf("%s: kthread_add(%s): error %d", __func__,
482 			    ktname, error);
483 			tq->tq_threads[i] = NULL;		/* paranoid */
484 		} else
485 			tq->tq_tcount++;
486 	}
487 	for (i = 0; i < count; i++) {
488 		if (tq->tq_threads[i] == NULL)
489 			continue;
490 		td = tq->tq_threads[i];
491 		if (mask) {
492 			error = cpuset_setthread(td->td_tid, mask);
493 			/*
494 			 * Failing to pin is rarely an actual fatal error;
495 			 * it'll just affect performance.
496 			 */
497 			if (error)
498 				printf("%s: curthread=%llu: can't pin; "
499 				    "error=%d\n",
500 				    __func__,
501 				    (unsigned long long) td->td_tid,
502 				    error);
503 		}
504 		thread_lock(td);
505 		sched_prio(td, pri);
506 		sched_add(td, SRQ_BORING);
507 	}
508 
509 	return (0);
510 }
511 
512 static int
513 gtaskqueue_start_threads(struct gtaskqueue **tqp, int count, int pri,
514     const char *name, ...)
515 {
516 	va_list ap;
517 	int error;
518 
519 	va_start(ap, name);
520 	error = _gtaskqueue_start_threads(tqp, count, pri, NULL, name, ap);
521 	va_end(ap);
522 	return (error);
523 }
524 
525 static inline void
526 gtaskqueue_run_callback(struct gtaskqueue *tq,
527     enum taskqueue_callback_type cb_type)
528 {
529 	taskqueue_callback_fn tq_callback;
530 
531 	TQ_ASSERT_UNLOCKED(tq);
532 	tq_callback = tq->tq_callbacks[cb_type];
533 	if (tq_callback != NULL)
534 		tq_callback(tq->tq_cb_contexts[cb_type]);
535 }
536 
537 static void
538 gtaskqueue_thread_loop(void *arg)
539 {
540 	struct gtaskqueue **tqp, *tq;
541 
542 	tqp = arg;
543 	tq = *tqp;
544 	gtaskqueue_run_callback(tq, TASKQUEUE_CALLBACK_TYPE_INIT);
545 	TQ_LOCK(tq);
546 	while ((tq->tq_flags & TQ_FLAGS_ACTIVE) != 0) {
547 		/* XXX ? */
548 		gtaskqueue_run_locked(tq);
549 		/*
550 		 * Because taskqueue_run() can drop tq_mutex, we need to
551 		 * check if the TQ_FLAGS_ACTIVE flag wasn't removed in the
552 		 * meantime, which means we missed a wakeup.
553 		 */
554 		if ((tq->tq_flags & TQ_FLAGS_ACTIVE) == 0)
555 			break;
556 		TQ_SLEEP(tq, tq, "-");
557 	}
558 	gtaskqueue_run_locked(tq);
559 	/*
560 	 * This thread is on its way out, so just drop the lock temporarily
561 	 * in order to call the shutdown callback.  This allows the callback
562 	 * to look at the taskqueue, even just before it dies.
563 	 */
564 	TQ_UNLOCK(tq);
565 	gtaskqueue_run_callback(tq, TASKQUEUE_CALLBACK_TYPE_SHUTDOWN);
566 	TQ_LOCK(tq);
567 
568 	/* rendezvous with thread that asked us to terminate */
569 	tq->tq_tcount--;
570 	wakeup_one(tq->tq_threads);
571 	TQ_UNLOCK(tq);
572 	kthread_exit();
573 }
574 
575 static void
576 gtaskqueue_thread_enqueue(void *context)
577 {
578 	struct gtaskqueue **tqp, *tq;
579 
580 	tqp = context;
581 	tq = *tqp;
582 	wakeup_any(tq);
583 }
584 
585 static struct gtaskqueue *
586 gtaskqueue_create_fast(const char *name, int mflags,
587 		 taskqueue_enqueue_fn enqueue, void *context)
588 {
589 	return _gtaskqueue_create(name, mflags, enqueue, context,
590 			MTX_SPIN, "fast_taskqueue");
591 }
592 
593 struct taskqgroup_cpu {
594 	LIST_HEAD(, grouptask)	tgc_tasks;
595 	struct gtaskqueue	*tgc_taskq;
596 	int	tgc_cnt;
597 	int	tgc_cpu;
598 };
599 
600 struct taskqgroup {
601 	struct taskqgroup_cpu tqg_queue[MAXCPU];
602 	struct mtx	tqg_lock;
603 	const char *	tqg_name;
604 	int		tqg_adjusting;
605 	int		tqg_stride;
606 	int		tqg_cnt;
607 };
608 
609 struct taskq_bind_task {
610 	struct gtask bt_task;
611 	int	bt_cpuid;
612 };
613 
614 static void
615 taskqgroup_cpu_create(struct taskqgroup *qgroup, int idx, int cpu)
616 {
617 	struct taskqgroup_cpu *qcpu;
618 
619 	qcpu = &qgroup->tqg_queue[idx];
620 	LIST_INIT(&qcpu->tgc_tasks);
621 	qcpu->tgc_taskq = gtaskqueue_create_fast(NULL, M_WAITOK,
622 	    taskqueue_thread_enqueue, &qcpu->tgc_taskq);
623 	gtaskqueue_start_threads(&qcpu->tgc_taskq, 1, PI_SOFT,
624 	    "%s_%d", qgroup->tqg_name, idx);
625 	qcpu->tgc_cpu = cpu;
626 }
627 
628 static void
629 taskqgroup_cpu_remove(struct taskqgroup *qgroup, int idx)
630 {
631 
632 	gtaskqueue_free(qgroup->tqg_queue[idx].tgc_taskq);
633 }
634 
635 /*
636  * Find the taskq with least # of tasks that doesn't currently have any
637  * other queues from the uniq identifier.
638  */
639 static int
640 taskqgroup_find(struct taskqgroup *qgroup, void *uniq)
641 {
642 	struct grouptask *n;
643 	int i, idx, mincnt;
644 	int strict;
645 
646 	mtx_assert(&qgroup->tqg_lock, MA_OWNED);
647 	if (qgroup->tqg_cnt == 0)
648 		return (0);
649 	idx = -1;
650 	mincnt = INT_MAX;
651 	/*
652 	 * Two passes;  First scan for a queue with the least tasks that
653 	 * does not already service this uniq id.  If that fails simply find
654 	 * the queue with the least total tasks;
655 	 */
656 	for (strict = 1; mincnt == INT_MAX; strict = 0) {
657 		for (i = 0; i < qgroup->tqg_cnt; i++) {
658 			if (qgroup->tqg_queue[i].tgc_cnt > mincnt)
659 				continue;
660 			if (strict) {
661 				LIST_FOREACH(n,
662 				    &qgroup->tqg_queue[i].tgc_tasks, gt_list)
663 					if (n->gt_uniq == uniq)
664 						break;
665 				if (n != NULL)
666 					continue;
667 			}
668 			mincnt = qgroup->tqg_queue[i].tgc_cnt;
669 			idx = i;
670 		}
671 	}
672 	if (idx == -1)
673 		panic("%s: failed to pick a qid.", __func__);
674 
675 	return (idx);
676 }
677 
678 /*
679  * smp_started is unusable since it is not set for UP kernels or even for
680  * SMP kernels when there is 1 CPU.  This is usually handled by adding a
681  * (mp_ncpus == 1) test, but that would be broken here since we need to
682  * to synchronize with the SI_SUB_SMP ordering.  Even in the pure SMP case
683  * smp_started only gives a fuzzy ordering relative to SI_SUB_SMP.
684  *
685  * So maintain our own flag.  It must be set after all CPUs are started
686  * and before SI_SUB_SMP:SI_ORDER_ANY so that the SYSINIT for delayed
687  * adjustment is properly delayed.  SI_ORDER_FOURTH is clearly before
688  * SI_ORDER_ANY and unclearly after the CPUs are started.  It would be
689  * simpler for adjustment to pass a flag indicating if it is delayed.
690  */
691 
692 static int tqg_smp_started;
693 
694 static void
695 tqg_record_smp_started(void *arg)
696 {
697 	tqg_smp_started = 1;
698 }
699 
700 SYSINIT(tqg_record_smp_started, SI_SUB_SMP, SI_ORDER_FOURTH,
701 	tqg_record_smp_started, NULL);
702 
703 void
704 taskqgroup_attach(struct taskqgroup *qgroup, struct grouptask *gtask,
705     void *uniq, device_t dev, struct resource *irq, const char *name)
706 {
707 	int cpu, qid, error;
708 
709 	gtask->gt_uniq = uniq;
710 	snprintf(gtask->gt_name, GROUPTASK_NAMELEN, "%s", name ? name : "grouptask");
711 	gtask->gt_dev = dev;
712 	gtask->gt_irq = irq;
713 	gtask->gt_cpu = -1;
714 	mtx_lock(&qgroup->tqg_lock);
715 	qid = taskqgroup_find(qgroup, uniq);
716 	qgroup->tqg_queue[qid].tgc_cnt++;
717 	LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
718 	gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
719 	if (dev != NULL && irq != NULL && tqg_smp_started) {
720 		cpu = qgroup->tqg_queue[qid].tgc_cpu;
721 		gtask->gt_cpu = cpu;
722 		mtx_unlock(&qgroup->tqg_lock);
723 		error = bus_bind_intr(dev, irq, cpu);
724 		if (error)
725 			printf("%s: binding interrupt failed for %s: %d\n",
726 			    __func__, gtask->gt_name, error);
727 	} else
728 		mtx_unlock(&qgroup->tqg_lock);
729 }
730 
731 static void
732 taskqgroup_attach_deferred(struct taskqgroup *qgroup, struct grouptask *gtask)
733 {
734 	int qid, cpu, error;
735 
736 	mtx_lock(&qgroup->tqg_lock);
737 	qid = taskqgroup_find(qgroup, gtask->gt_uniq);
738 	cpu = qgroup->tqg_queue[qid].tgc_cpu;
739 	if (gtask->gt_dev != NULL && gtask->gt_irq != NULL) {
740 		mtx_unlock(&qgroup->tqg_lock);
741 		error = bus_bind_intr(gtask->gt_dev, gtask->gt_irq, cpu);
742 		mtx_lock(&qgroup->tqg_lock);
743 		if (error)
744 			printf("%s: binding interrupt failed for %s: %d\n",
745 			    __func__, gtask->gt_name, error);
746 
747 	}
748 	qgroup->tqg_queue[qid].tgc_cnt++;
749 	LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
750 	MPASS(qgroup->tqg_queue[qid].tgc_taskq != NULL);
751 	gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
752 	mtx_unlock(&qgroup->tqg_lock);
753 }
754 
755 int
756 taskqgroup_attach_cpu(struct taskqgroup *qgroup, struct grouptask *gtask,
757     void *uniq, int cpu, device_t dev, struct resource *irq, const char *name)
758 {
759 	int i, qid, error;
760 
761 	qid = -1;
762 	gtask->gt_uniq = uniq;
763 	snprintf(gtask->gt_name, GROUPTASK_NAMELEN, "%s", name ? name : "grouptask");
764 	gtask->gt_dev = dev;
765 	gtask->gt_irq = irq;
766 	gtask->gt_cpu = cpu;
767 	mtx_lock(&qgroup->tqg_lock);
768 	if (tqg_smp_started) {
769 		for (i = 0; i < qgroup->tqg_cnt; i++)
770 			if (qgroup->tqg_queue[i].tgc_cpu == cpu) {
771 				qid = i;
772 				break;
773 			}
774 		if (qid == -1) {
775 			mtx_unlock(&qgroup->tqg_lock);
776 			printf("%s: qid not found for %s cpu=%d\n", __func__, gtask->gt_name, cpu);
777 			return (EINVAL);
778 		}
779 	} else
780 		qid = 0;
781 	qgroup->tqg_queue[qid].tgc_cnt++;
782 	LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
783 	gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
784 	cpu = qgroup->tqg_queue[qid].tgc_cpu;
785 	mtx_unlock(&qgroup->tqg_lock);
786 
787 	if (dev != NULL && irq != NULL && tqg_smp_started) {
788 		error = bus_bind_intr(dev, irq, cpu);
789 		if (error)
790 			printf("%s: binding interrupt failed for %s: %d\n",
791 			    __func__, gtask->gt_name, error);
792 	}
793 	return (0);
794 }
795 
796 static int
797 taskqgroup_attach_cpu_deferred(struct taskqgroup *qgroup, struct grouptask *gtask)
798 {
799 	device_t dev;
800 	struct resource *irq;
801 	int cpu, error, i, qid;
802 
803 	qid = -1;
804 	dev = gtask->gt_dev;
805 	irq = gtask->gt_irq;
806 	cpu = gtask->gt_cpu;
807 	MPASS(tqg_smp_started);
808 	mtx_lock(&qgroup->tqg_lock);
809 	for (i = 0; i < qgroup->tqg_cnt; i++)
810 		if (qgroup->tqg_queue[i].tgc_cpu == cpu) {
811 			qid = i;
812 			break;
813 		}
814 	if (qid == -1) {
815 		mtx_unlock(&qgroup->tqg_lock);
816 		printf("%s: qid not found for %s cpu=%d\n", __func__, gtask->gt_name, cpu);
817 		return (EINVAL);
818 	}
819 	qgroup->tqg_queue[qid].tgc_cnt++;
820 	LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
821 	MPASS(qgroup->tqg_queue[qid].tgc_taskq != NULL);
822 	gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
823 	mtx_unlock(&qgroup->tqg_lock);
824 
825 	if (dev != NULL && irq != NULL) {
826 		error = bus_bind_intr(dev, irq, cpu);
827 		if (error)
828 			printf("%s: binding interrupt failed for %s: %d\n",
829 			    __func__, gtask->gt_name, error);
830 	}
831 	return (0);
832 }
833 
834 void
835 taskqgroup_detach(struct taskqgroup *qgroup, struct grouptask *gtask)
836 {
837 	int i;
838 
839 	grouptask_block(gtask);
840 	mtx_lock(&qgroup->tqg_lock);
841 	for (i = 0; i < qgroup->tqg_cnt; i++)
842 		if (qgroup->tqg_queue[i].tgc_taskq == gtask->gt_taskqueue)
843 			break;
844 	if (i == qgroup->tqg_cnt)
845 		panic("%s: task %s not in group", __func__, gtask->gt_name);
846 	qgroup->tqg_queue[i].tgc_cnt--;
847 	LIST_REMOVE(gtask, gt_list);
848 	mtx_unlock(&qgroup->tqg_lock);
849 	gtask->gt_taskqueue = NULL;
850 	gtask->gt_task.ta_flags &= ~TASK_NOENQUEUE;
851 }
852 
853 static void
854 taskqgroup_binder(void *ctx)
855 {
856 	struct taskq_bind_task *gtask = (struct taskq_bind_task *)ctx;
857 	cpuset_t mask;
858 	int error;
859 
860 	CPU_ZERO(&mask);
861 	CPU_SET(gtask->bt_cpuid, &mask);
862 	error = cpuset_setthread(curthread->td_tid, &mask);
863 	thread_lock(curthread);
864 	sched_bind(curthread, gtask->bt_cpuid);
865 	thread_unlock(curthread);
866 
867 	if (error)
868 		printf("%s: binding curthread failed: %d\n", __func__, error);
869 	free(gtask, M_DEVBUF);
870 }
871 
872 static void
873 taskqgroup_bind(struct taskqgroup *qgroup)
874 {
875 	struct taskq_bind_task *gtask;
876 	int i;
877 
878 	/*
879 	 * Bind taskqueue threads to specific CPUs, if they have been assigned
880 	 * one.
881 	 */
882 	if (qgroup->tqg_cnt == 1)
883 		return;
884 
885 	for (i = 0; i < qgroup->tqg_cnt; i++) {
886 		gtask = malloc(sizeof (*gtask), M_DEVBUF, M_WAITOK);
887 		GTASK_INIT(&gtask->bt_task, 0, 0, taskqgroup_binder, gtask);
888 		gtask->bt_cpuid = qgroup->tqg_queue[i].tgc_cpu;
889 		grouptaskqueue_enqueue(qgroup->tqg_queue[i].tgc_taskq,
890 		    &gtask->bt_task);
891 	}
892 }
893 
894 static void
895 taskqgroup_config_init(void *arg)
896 {
897 	struct taskqgroup *qgroup = qgroup_config;
898 	LIST_HEAD(, grouptask) gtask_head = LIST_HEAD_INITIALIZER(NULL);
899 
900 	LIST_SWAP(&gtask_head, &qgroup->tqg_queue[0].tgc_tasks,
901 	    grouptask, gt_list);
902 	qgroup->tqg_queue[0].tgc_cnt = 0;
903 	taskqgroup_cpu_create(qgroup, 0, 0);
904 
905 	qgroup->tqg_cnt = 1;
906 	qgroup->tqg_stride = 1;
907 }
908 
909 SYSINIT(taskqgroup_config_init, SI_SUB_TASKQ, SI_ORDER_SECOND,
910 	taskqgroup_config_init, NULL);
911 
912 static int
913 _taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride)
914 {
915 	LIST_HEAD(, grouptask) gtask_head = LIST_HEAD_INITIALIZER(NULL);
916 	struct grouptask *gtask;
917 	int i, k, old_cnt, old_cpu, cpu;
918 
919 	mtx_assert(&qgroup->tqg_lock, MA_OWNED);
920 
921 	if (cnt < 1 || cnt * stride > mp_ncpus || !tqg_smp_started) {
922 		printf("%s: failed cnt: %d stride: %d "
923 		    "mp_ncpus: %d tqg_smp_started: %d\n",
924 		    __func__, cnt, stride, mp_ncpus, tqg_smp_started);
925 		return (EINVAL);
926 	}
927 	if (qgroup->tqg_adjusting) {
928 		printf("%s failed: adjusting\n", __func__);
929 		return (EBUSY);
930 	}
931 	qgroup->tqg_adjusting = 1;
932 	old_cnt = qgroup->tqg_cnt;
933 	old_cpu = 0;
934 	if (old_cnt < cnt)
935 		old_cpu = qgroup->tqg_queue[old_cnt].tgc_cpu;
936 	mtx_unlock(&qgroup->tqg_lock);
937 	/*
938 	 * Set up queue for tasks added before boot.
939 	 */
940 	if (old_cnt == 0) {
941 		LIST_SWAP(&gtask_head, &qgroup->tqg_queue[0].tgc_tasks,
942 		    grouptask, gt_list);
943 		qgroup->tqg_queue[0].tgc_cnt = 0;
944 	}
945 
946 	/*
947 	 * If new taskq threads have been added.
948 	 */
949 	cpu = old_cpu;
950 	for (i = old_cnt; i < cnt; i++) {
951 		taskqgroup_cpu_create(qgroup, i, cpu);
952 
953 		for (k = 0; k < stride; k++)
954 			cpu = CPU_NEXT(cpu);
955 	}
956 	mtx_lock(&qgroup->tqg_lock);
957 	qgroup->tqg_cnt = cnt;
958 	qgroup->tqg_stride = stride;
959 
960 	/*
961 	 * Adjust drivers to use new taskqs.
962 	 */
963 	for (i = 0; i < old_cnt; i++) {
964 		while ((gtask = LIST_FIRST(&qgroup->tqg_queue[i].tgc_tasks))) {
965 			LIST_REMOVE(gtask, gt_list);
966 			qgroup->tqg_queue[i].tgc_cnt--;
967 			LIST_INSERT_HEAD(&gtask_head, gtask, gt_list);
968 		}
969 	}
970 	mtx_unlock(&qgroup->tqg_lock);
971 
972 	while ((gtask = LIST_FIRST(&gtask_head))) {
973 		LIST_REMOVE(gtask, gt_list);
974 		if (gtask->gt_cpu == -1)
975 			taskqgroup_attach_deferred(qgroup, gtask);
976 		else if (taskqgroup_attach_cpu_deferred(qgroup, gtask))
977 			taskqgroup_attach_deferred(qgroup, gtask);
978 	}
979 
980 #ifdef INVARIANTS
981 	mtx_lock(&qgroup->tqg_lock);
982 	for (i = 0; i < qgroup->tqg_cnt; i++) {
983 		MPASS(qgroup->tqg_queue[i].tgc_taskq != NULL);
984 		LIST_FOREACH(gtask, &qgroup->tqg_queue[i].tgc_tasks, gt_list)
985 			MPASS(gtask->gt_taskqueue != NULL);
986 	}
987 	mtx_unlock(&qgroup->tqg_lock);
988 #endif
989 	/*
990 	 * If taskq thread count has been reduced.
991 	 */
992 	for (i = cnt; i < old_cnt; i++)
993 		taskqgroup_cpu_remove(qgroup, i);
994 
995 	taskqgroup_bind(qgroup);
996 
997 	mtx_lock(&qgroup->tqg_lock);
998 	qgroup->tqg_adjusting = 0;
999 
1000 	return (0);
1001 }
1002 
1003 int
1004 taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride)
1005 {
1006 	int error;
1007 
1008 	mtx_lock(&qgroup->tqg_lock);
1009 	error = _taskqgroup_adjust(qgroup, cnt, stride);
1010 	mtx_unlock(&qgroup->tqg_lock);
1011 
1012 	return (error);
1013 }
1014 
1015 struct taskqgroup *
1016 taskqgroup_create(const char *name)
1017 {
1018 	struct taskqgroup *qgroup;
1019 
1020 	qgroup = malloc(sizeof(*qgroup), M_GTASKQUEUE, M_WAITOK | M_ZERO);
1021 	mtx_init(&qgroup->tqg_lock, "taskqgroup", NULL, MTX_DEF);
1022 	qgroup->tqg_name = name;
1023 	LIST_INIT(&qgroup->tqg_queue[0].tgc_tasks);
1024 
1025 	return (qgroup);
1026 }
1027 
1028 void
1029 taskqgroup_destroy(struct taskqgroup *qgroup)
1030 {
1031 
1032 }
1033 
1034 void
1035 taskqgroup_config_gtask_init(void *ctx, struct grouptask *gtask, gtask_fn_t *fn,
1036     const char *name)
1037 {
1038 
1039 	GROUPTASK_INIT(gtask, 0, fn, ctx);
1040 	taskqgroup_attach(qgroup_config, gtask, gtask, NULL, NULL, name);
1041 }
1042 
1043 void
1044 taskqgroup_config_gtask_deinit(struct grouptask *gtask)
1045 {
1046 
1047 	taskqgroup_detach(qgroup_config, gtask);
1048 }
1049