xref: /freebsd/sys/kern/subr_gtaskqueue.c (revision 93a065e7496dfbfbd0a5b0208ef763f37ea975c7)
1 /*-
2  * Copyright (c) 2000 Doug Rabson
3  * Copyright (c) 2014 Jeff Roberson
4  * Copyright (c) 2016 Matthew Macy
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/bus.h>
35 #include <sys/cpuset.h>
36 #include <sys/interrupt.h>
37 #include <sys/kernel.h>
38 #include <sys/kthread.h>
39 #include <sys/libkern.h>
40 #include <sys/limits.h>
41 #include <sys/lock.h>
42 #include <sys/malloc.h>
43 #include <sys/mutex.h>
44 #include <sys/proc.h>
45 #include <sys/sched.h>
46 #include <sys/smp.h>
47 #include <sys/gtaskqueue.h>
48 #include <sys/unistd.h>
49 #include <machine/stdarg.h>
50 
51 static MALLOC_DEFINE(M_GTASKQUEUE, "taskqueue", "Task Queues");
52 static void	gtaskqueue_thread_enqueue(void *);
53 static void	gtaskqueue_thread_loop(void *arg);
54 
55 struct gtaskqueue_busy {
56 	struct gtask	*tb_running;
57 	TAILQ_ENTRY(gtaskqueue_busy) tb_link;
58 };
59 
60 static struct gtask * const TB_DRAIN_WAITER = (struct gtask *)0x1;
61 
62 struct gtaskqueue {
63 	STAILQ_HEAD(, gtask)	tq_queue;
64 	gtaskqueue_enqueue_fn	tq_enqueue;
65 	void			*tq_context;
66 	char			*tq_name;
67 	TAILQ_HEAD(, gtaskqueue_busy) tq_active;
68 	struct mtx		tq_mutex;
69 	struct thread		**tq_threads;
70 	int			tq_tcount;
71 	int			tq_spin;
72 	int			tq_flags;
73 	int			tq_callouts;
74 	taskqueue_callback_fn	tq_callbacks[TASKQUEUE_NUM_CALLBACKS];
75 	void			*tq_cb_contexts[TASKQUEUE_NUM_CALLBACKS];
76 };
77 
78 #define	TQ_FLAGS_ACTIVE		(1 << 0)
79 #define	TQ_FLAGS_BLOCKED	(1 << 1)
80 #define	TQ_FLAGS_UNLOCKED_ENQUEUE	(1 << 2)
81 
82 #define	DT_CALLOUT_ARMED	(1 << 0)
83 
84 #define	TQ_LOCK(tq)							\
85 	do {								\
86 		if ((tq)->tq_spin)					\
87 			mtx_lock_spin(&(tq)->tq_mutex);			\
88 		else							\
89 			mtx_lock(&(tq)->tq_mutex);			\
90 	} while (0)
91 #define	TQ_ASSERT_LOCKED(tq)	mtx_assert(&(tq)->tq_mutex, MA_OWNED)
92 
93 #define	TQ_UNLOCK(tq)							\
94 	do {								\
95 		if ((tq)->tq_spin)					\
96 			mtx_unlock_spin(&(tq)->tq_mutex);		\
97 		else							\
98 			mtx_unlock(&(tq)->tq_mutex);			\
99 	} while (0)
100 #define	TQ_ASSERT_UNLOCKED(tq)	mtx_assert(&(tq)->tq_mutex, MA_NOTOWNED)
101 
102 #ifdef INVARIANTS
103 static void
104 gtask_dump(struct gtask *gtask)
105 {
106 	printf("gtask: %p ta_flags=%x ta_priority=%d ta_func=%p ta_context=%p\n",
107 	       gtask, gtask->ta_flags, gtask->ta_priority, gtask->ta_func, gtask->ta_context);
108 }
109 #endif
110 
111 static __inline int
112 TQ_SLEEP(struct gtaskqueue *tq, void *p, struct mtx *m, int pri, const char *wm,
113     int t)
114 {
115 	if (tq->tq_spin)
116 		return (msleep_spin(p, m, wm, t));
117 	return (msleep(p, m, pri, wm, t));
118 }
119 
120 static struct gtaskqueue *
121 _gtaskqueue_create(const char *name, int mflags,
122 		 taskqueue_enqueue_fn enqueue, void *context,
123 		 int mtxflags, const char *mtxname __unused)
124 {
125 	struct gtaskqueue *queue;
126 	char *tq_name;
127 
128 	tq_name = malloc(TASKQUEUE_NAMELEN, M_GTASKQUEUE, mflags | M_ZERO);
129 	if (!tq_name)
130 		return (NULL);
131 
132 	snprintf(tq_name, TASKQUEUE_NAMELEN, "%s", (name) ? name : "taskqueue");
133 
134 	queue = malloc(sizeof(struct gtaskqueue), M_GTASKQUEUE, mflags | M_ZERO);
135 	if (!queue)
136 		return (NULL);
137 
138 	STAILQ_INIT(&queue->tq_queue);
139 	TAILQ_INIT(&queue->tq_active);
140 	queue->tq_enqueue = enqueue;
141 	queue->tq_context = context;
142 	queue->tq_name = tq_name;
143 	queue->tq_spin = (mtxflags & MTX_SPIN) != 0;
144 	queue->tq_flags |= TQ_FLAGS_ACTIVE;
145 	if (enqueue == gtaskqueue_thread_enqueue)
146 		queue->tq_flags |= TQ_FLAGS_UNLOCKED_ENQUEUE;
147 	mtx_init(&queue->tq_mutex, tq_name, NULL, mtxflags);
148 
149 	return (queue);
150 }
151 
152 
153 /*
154  * Signal a taskqueue thread to terminate.
155  */
156 static void
157 gtaskqueue_terminate(struct thread **pp, struct gtaskqueue *tq)
158 {
159 
160 	while (tq->tq_tcount > 0 || tq->tq_callouts > 0) {
161 		wakeup(tq);
162 		TQ_SLEEP(tq, pp, &tq->tq_mutex, PWAIT, "taskqueue_destroy", 0);
163 	}
164 }
165 
166 static void
167 gtaskqueue_free(struct gtaskqueue *queue)
168 {
169 
170 	TQ_LOCK(queue);
171 	queue->tq_flags &= ~TQ_FLAGS_ACTIVE;
172 	gtaskqueue_terminate(queue->tq_threads, queue);
173 	KASSERT(TAILQ_EMPTY(&queue->tq_active), ("Tasks still running?"));
174 	KASSERT(queue->tq_callouts == 0, ("Armed timeout tasks"));
175 	mtx_destroy(&queue->tq_mutex);
176 	free(queue->tq_threads, M_GTASKQUEUE);
177 	free(queue->tq_name, M_GTASKQUEUE);
178 	free(queue, M_GTASKQUEUE);
179 }
180 
181 int
182 grouptaskqueue_enqueue(struct gtaskqueue *queue, struct gtask *gtask)
183 {
184 #ifdef INVARIANTS
185 	if (queue == NULL) {
186 		gtask_dump(gtask);
187 		panic("queue == NULL");
188 	}
189 #endif
190 	TQ_LOCK(queue);
191 	if (gtask->ta_flags & TASK_ENQUEUED) {
192 		TQ_UNLOCK(queue);
193 		return (0);
194 	}
195 	STAILQ_INSERT_TAIL(&queue->tq_queue, gtask, ta_link);
196 	gtask->ta_flags |= TASK_ENQUEUED;
197 	TQ_UNLOCK(queue);
198 	if ((queue->tq_flags & TQ_FLAGS_BLOCKED) == 0)
199 		queue->tq_enqueue(queue->tq_context);
200 	return (0);
201 }
202 
203 static void
204 gtaskqueue_task_nop_fn(void *context)
205 {
206 }
207 
208 /*
209  * Block until all currently queued tasks in this taskqueue
210  * have begun execution.  Tasks queued during execution of
211  * this function are ignored.
212  */
213 static void
214 gtaskqueue_drain_tq_queue(struct gtaskqueue *queue)
215 {
216 	struct gtask t_barrier;
217 
218 	if (STAILQ_EMPTY(&queue->tq_queue))
219 		return;
220 
221 	/*
222 	 * Enqueue our barrier after all current tasks, but with
223 	 * the highest priority so that newly queued tasks cannot
224 	 * pass it.  Because of the high priority, we can not use
225 	 * taskqueue_enqueue_locked directly (which drops the lock
226 	 * anyway) so just insert it at tail while we have the
227 	 * queue lock.
228 	 */
229 	GTASK_INIT(&t_barrier, 0, USHRT_MAX, gtaskqueue_task_nop_fn, &t_barrier);
230 	STAILQ_INSERT_TAIL(&queue->tq_queue, &t_barrier, ta_link);
231 	t_barrier.ta_flags |= TASK_ENQUEUED;
232 
233 	/*
234 	 * Once the barrier has executed, all previously queued tasks
235 	 * have completed or are currently executing.
236 	 */
237 	while (t_barrier.ta_flags & TASK_ENQUEUED)
238 		TQ_SLEEP(queue, &t_barrier, &queue->tq_mutex, PWAIT, "-", 0);
239 }
240 
241 /*
242  * Block until all currently executing tasks for this taskqueue
243  * complete.  Tasks that begin execution during the execution
244  * of this function are ignored.
245  */
246 static void
247 gtaskqueue_drain_tq_active(struct gtaskqueue *queue)
248 {
249 	struct gtaskqueue_busy tb_marker, *tb_first;
250 
251 	if (TAILQ_EMPTY(&queue->tq_active))
252 		return;
253 
254 	/* Block taskq_terminate().*/
255 	queue->tq_callouts++;
256 
257 	/*
258 	 * Wait for all currently executing taskqueue threads
259 	 * to go idle.
260 	 */
261 	tb_marker.tb_running = TB_DRAIN_WAITER;
262 	TAILQ_INSERT_TAIL(&queue->tq_active, &tb_marker, tb_link);
263 	while (TAILQ_FIRST(&queue->tq_active) != &tb_marker)
264 		TQ_SLEEP(queue, &tb_marker, &queue->tq_mutex, PWAIT, "-", 0);
265 	TAILQ_REMOVE(&queue->tq_active, &tb_marker, tb_link);
266 
267 	/*
268 	 * Wakeup any other drain waiter that happened to queue up
269 	 * without any intervening active thread.
270 	 */
271 	tb_first = TAILQ_FIRST(&queue->tq_active);
272 	if (tb_first != NULL && tb_first->tb_running == TB_DRAIN_WAITER)
273 		wakeup(tb_first);
274 
275 	/* Release taskqueue_terminate(). */
276 	queue->tq_callouts--;
277 	if ((queue->tq_flags & TQ_FLAGS_ACTIVE) == 0)
278 		wakeup_one(queue->tq_threads);
279 }
280 
281 void
282 gtaskqueue_block(struct gtaskqueue *queue)
283 {
284 
285 	TQ_LOCK(queue);
286 	queue->tq_flags |= TQ_FLAGS_BLOCKED;
287 	TQ_UNLOCK(queue);
288 }
289 
290 void
291 gtaskqueue_unblock(struct gtaskqueue *queue)
292 {
293 
294 	TQ_LOCK(queue);
295 	queue->tq_flags &= ~TQ_FLAGS_BLOCKED;
296 	if (!STAILQ_EMPTY(&queue->tq_queue))
297 		queue->tq_enqueue(queue->tq_context);
298 	TQ_UNLOCK(queue);
299 }
300 
301 static void
302 gtaskqueue_run_locked(struct gtaskqueue *queue)
303 {
304 	struct gtaskqueue_busy tb;
305 	struct gtaskqueue_busy *tb_first;
306 	struct gtask *gtask;
307 
308 	KASSERT(queue != NULL, ("tq is NULL"));
309 	TQ_ASSERT_LOCKED(queue);
310 	tb.tb_running = NULL;
311 
312 	while (STAILQ_FIRST(&queue->tq_queue)) {
313 		TAILQ_INSERT_TAIL(&queue->tq_active, &tb, tb_link);
314 
315 		/*
316 		 * Carefully remove the first task from the queue and
317 		 * clear its TASK_ENQUEUED flag
318 		 */
319 		gtask = STAILQ_FIRST(&queue->tq_queue);
320 		KASSERT(gtask != NULL, ("task is NULL"));
321 		STAILQ_REMOVE_HEAD(&queue->tq_queue, ta_link);
322 		gtask->ta_flags &= ~TASK_ENQUEUED;
323 		tb.tb_running = gtask;
324 		TQ_UNLOCK(queue);
325 
326 		KASSERT(gtask->ta_func != NULL, ("task->ta_func is NULL"));
327 		gtask->ta_func(gtask->ta_context);
328 
329 		TQ_LOCK(queue);
330 		tb.tb_running = NULL;
331 		wakeup(gtask);
332 
333 		TAILQ_REMOVE(&queue->tq_active, &tb, tb_link);
334 		tb_first = TAILQ_FIRST(&queue->tq_active);
335 		if (tb_first != NULL &&
336 		    tb_first->tb_running == TB_DRAIN_WAITER)
337 			wakeup(tb_first);
338 	}
339 }
340 
341 static int
342 task_is_running(struct gtaskqueue *queue, struct gtask *gtask)
343 {
344 	struct gtaskqueue_busy *tb;
345 
346 	TQ_ASSERT_LOCKED(queue);
347 	TAILQ_FOREACH(tb, &queue->tq_active, tb_link) {
348 		if (tb->tb_running == gtask)
349 			return (1);
350 	}
351 	return (0);
352 }
353 
354 static int
355 gtaskqueue_cancel_locked(struct gtaskqueue *queue, struct gtask *gtask)
356 {
357 
358 	if (gtask->ta_flags & TASK_ENQUEUED)
359 		STAILQ_REMOVE(&queue->tq_queue, gtask, gtask, ta_link);
360 	gtask->ta_flags &= ~TASK_ENQUEUED;
361 	return (task_is_running(queue, gtask) ? EBUSY : 0);
362 }
363 
364 int
365 gtaskqueue_cancel(struct gtaskqueue *queue, struct gtask *gtask)
366 {
367 	int error;
368 
369 	TQ_LOCK(queue);
370 	error = gtaskqueue_cancel_locked(queue, gtask);
371 	TQ_UNLOCK(queue);
372 
373 	return (error);
374 }
375 
376 void
377 gtaskqueue_drain(struct gtaskqueue *queue, struct gtask *gtask)
378 {
379 
380 	if (!queue->tq_spin)
381 		WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__);
382 
383 	TQ_LOCK(queue);
384 	while ((gtask->ta_flags & TASK_ENQUEUED) || task_is_running(queue, gtask))
385 		TQ_SLEEP(queue, gtask, &queue->tq_mutex, PWAIT, "-", 0);
386 	TQ_UNLOCK(queue);
387 }
388 
389 void
390 gtaskqueue_drain_all(struct gtaskqueue *queue)
391 {
392 
393 	if (!queue->tq_spin)
394 		WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__);
395 
396 	TQ_LOCK(queue);
397 	gtaskqueue_drain_tq_queue(queue);
398 	gtaskqueue_drain_tq_active(queue);
399 	TQ_UNLOCK(queue);
400 }
401 
402 static int
403 _gtaskqueue_start_threads(struct gtaskqueue **tqp, int count, int pri,
404     cpuset_t *mask, const char *name, va_list ap)
405 {
406 	char ktname[MAXCOMLEN + 1];
407 	struct thread *td;
408 	struct gtaskqueue *tq;
409 	int i, error;
410 
411 	if (count <= 0)
412 		return (EINVAL);
413 
414 	vsnprintf(ktname, sizeof(ktname), name, ap);
415 	tq = *tqp;
416 
417 	tq->tq_threads = malloc(sizeof(struct thread *) * count, M_GTASKQUEUE,
418 	    M_NOWAIT | M_ZERO);
419 	if (tq->tq_threads == NULL) {
420 		printf("%s: no memory for %s threads\n", __func__, ktname);
421 		return (ENOMEM);
422 	}
423 
424 	for (i = 0; i < count; i++) {
425 		if (count == 1)
426 			error = kthread_add(gtaskqueue_thread_loop, tqp, NULL,
427 			    &tq->tq_threads[i], RFSTOPPED, 0, "%s", ktname);
428 		else
429 			error = kthread_add(gtaskqueue_thread_loop, tqp, NULL,
430 			    &tq->tq_threads[i], RFSTOPPED, 0,
431 			    "%s_%d", ktname, i);
432 		if (error) {
433 			/* should be ok to continue, taskqueue_free will dtrt */
434 			printf("%s: kthread_add(%s): error %d", __func__,
435 			    ktname, error);
436 			tq->tq_threads[i] = NULL;		/* paranoid */
437 		} else
438 			tq->tq_tcount++;
439 	}
440 	for (i = 0; i < count; i++) {
441 		if (tq->tq_threads[i] == NULL)
442 			continue;
443 		td = tq->tq_threads[i];
444 		if (mask) {
445 			error = cpuset_setthread(td->td_tid, mask);
446 			/*
447 			 * Failing to pin is rarely an actual fatal error;
448 			 * it'll just affect performance.
449 			 */
450 			if (error)
451 				printf("%s: curthread=%llu: can't pin; "
452 				    "error=%d\n",
453 				    __func__,
454 				    (unsigned long long) td->td_tid,
455 				    error);
456 		}
457 		thread_lock(td);
458 		sched_prio(td, pri);
459 		sched_add(td, SRQ_BORING);
460 		thread_unlock(td);
461 	}
462 
463 	return (0);
464 }
465 
466 static int
467 gtaskqueue_start_threads(struct gtaskqueue **tqp, int count, int pri,
468     const char *name, ...)
469 {
470 	va_list ap;
471 	int error;
472 
473 	va_start(ap, name);
474 	error = _gtaskqueue_start_threads(tqp, count, pri, NULL, name, ap);
475 	va_end(ap);
476 	return (error);
477 }
478 
479 static inline void
480 gtaskqueue_run_callback(struct gtaskqueue *tq,
481     enum taskqueue_callback_type cb_type)
482 {
483 	taskqueue_callback_fn tq_callback;
484 
485 	TQ_ASSERT_UNLOCKED(tq);
486 	tq_callback = tq->tq_callbacks[cb_type];
487 	if (tq_callback != NULL)
488 		tq_callback(tq->tq_cb_contexts[cb_type]);
489 }
490 
491 static void
492 gtaskqueue_thread_loop(void *arg)
493 {
494 	struct gtaskqueue **tqp, *tq;
495 
496 	tqp = arg;
497 	tq = *tqp;
498 	gtaskqueue_run_callback(tq, TASKQUEUE_CALLBACK_TYPE_INIT);
499 	TQ_LOCK(tq);
500 	while ((tq->tq_flags & TQ_FLAGS_ACTIVE) != 0) {
501 		/* XXX ? */
502 		gtaskqueue_run_locked(tq);
503 		/*
504 		 * Because taskqueue_run() can drop tq_mutex, we need to
505 		 * check if the TQ_FLAGS_ACTIVE flag wasn't removed in the
506 		 * meantime, which means we missed a wakeup.
507 		 */
508 		if ((tq->tq_flags & TQ_FLAGS_ACTIVE) == 0)
509 			break;
510 		TQ_SLEEP(tq, tq, &tq->tq_mutex, 0, "-", 0);
511 	}
512 	gtaskqueue_run_locked(tq);
513 	/*
514 	 * This thread is on its way out, so just drop the lock temporarily
515 	 * in order to call the shutdown callback.  This allows the callback
516 	 * to look at the taskqueue, even just before it dies.
517 	 */
518 	TQ_UNLOCK(tq);
519 	gtaskqueue_run_callback(tq, TASKQUEUE_CALLBACK_TYPE_SHUTDOWN);
520 	TQ_LOCK(tq);
521 
522 	/* rendezvous with thread that asked us to terminate */
523 	tq->tq_tcount--;
524 	wakeup_one(tq->tq_threads);
525 	TQ_UNLOCK(tq);
526 	kthread_exit();
527 }
528 
529 static void
530 gtaskqueue_thread_enqueue(void *context)
531 {
532 	struct gtaskqueue **tqp, *tq;
533 
534 	tqp = context;
535 	tq = *tqp;
536 	wakeup_one(tq);
537 }
538 
539 
540 static struct gtaskqueue *
541 gtaskqueue_create_fast(const char *name, int mflags,
542 		 taskqueue_enqueue_fn enqueue, void *context)
543 {
544 	return _gtaskqueue_create(name, mflags, enqueue, context,
545 			MTX_SPIN, "fast_taskqueue");
546 }
547 
548 
549 struct taskqgroup_cpu {
550 	LIST_HEAD(, grouptask)	tgc_tasks;
551 	struct gtaskqueue	*tgc_taskq;
552 	int	tgc_cnt;
553 	int	tgc_cpu;
554 };
555 
556 struct taskqgroup {
557 	struct taskqgroup_cpu tqg_queue[MAXCPU];
558 	struct mtx	tqg_lock;
559 	char *		tqg_name;
560 	int		tqg_adjusting;
561 	int		tqg_stride;
562 	int		tqg_cnt;
563 };
564 
565 struct taskq_bind_task {
566 	struct gtask bt_task;
567 	int	bt_cpuid;
568 };
569 
570 static void
571 taskqgroup_cpu_create(struct taskqgroup *qgroup, int idx, int cpu)
572 {
573 	struct taskqgroup_cpu *qcpu;
574 
575 	qcpu = &qgroup->tqg_queue[idx];
576 	LIST_INIT(&qcpu->tgc_tasks);
577 	qcpu->tgc_taskq = gtaskqueue_create_fast(NULL, M_WAITOK,
578 	    taskqueue_thread_enqueue, &qcpu->tgc_taskq);
579 	gtaskqueue_start_threads(&qcpu->tgc_taskq, 1, PI_SOFT,
580 	    "%s_%d", qgroup->tqg_name, idx);
581 	qcpu->tgc_cpu = cpu;
582 }
583 
584 static void
585 taskqgroup_cpu_remove(struct taskqgroup *qgroup, int idx)
586 {
587 
588 	gtaskqueue_free(qgroup->tqg_queue[idx].tgc_taskq);
589 }
590 
591 /*
592  * Find the taskq with least # of tasks that doesn't currently have any
593  * other queues from the uniq identifier.
594  */
595 static int
596 taskqgroup_find(struct taskqgroup *qgroup, void *uniq)
597 {
598 	struct grouptask *n;
599 	int i, idx, mincnt;
600 	int strict;
601 
602 	mtx_assert(&qgroup->tqg_lock, MA_OWNED);
603 	if (qgroup->tqg_cnt == 0)
604 		return (0);
605 	idx = -1;
606 	mincnt = INT_MAX;
607 	/*
608 	 * Two passes;  First scan for a queue with the least tasks that
609 	 * does not already service this uniq id.  If that fails simply find
610 	 * the queue with the least total tasks;
611 	 */
612 	for (strict = 1; mincnt == INT_MAX; strict = 0) {
613 		for (i = 0; i < qgroup->tqg_cnt; i++) {
614 			if (qgroup->tqg_queue[i].tgc_cnt > mincnt)
615 				continue;
616 			if (strict) {
617 				LIST_FOREACH(n,
618 				    &qgroup->tqg_queue[i].tgc_tasks, gt_list)
619 					if (n->gt_uniq == uniq)
620 						break;
621 				if (n != NULL)
622 					continue;
623 			}
624 			mincnt = qgroup->tqg_queue[i].tgc_cnt;
625 			idx = i;
626 		}
627 	}
628 	if (idx == -1)
629 		panic("taskqgroup_find: Failed to pick a qid.");
630 
631 	return (idx);
632 }
633 
634 /*
635  * smp_started is unusable since it is not set for UP kernels or even for
636  * SMP kernels when there is 1 CPU.  This is usually handled by adding a
637  * (mp_ncpus == 1) test, but that would be broken here since we need to
638  * to synchronize with the SI_SUB_SMP ordering.  Even in the pure SMP case
639  * smp_started only gives a fuzzy ordering relative to SI_SUB_SMP.
640  *
641  * So maintain our own flag.  It must be set after all CPUs are started
642  * and before SI_SUB_SMP:SI_ORDER_ANY so that the SYSINIT for delayed
643  * adjustment is properly delayed.  SI_ORDER_FOURTH is clearly before
644  * SI_ORDER_ANY and unclearly after the CPUs are started.  It would be
645  * simpler for adjustment to pass a flag indicating if it is delayed.
646  */
647 
648 static int tqg_smp_started;
649 
650 static void
651 tqg_record_smp_started(void *arg)
652 {
653 	tqg_smp_started = 1;
654 }
655 
656 SYSINIT(tqg_record_smp_started, SI_SUB_SMP, SI_ORDER_FOURTH,
657 	tqg_record_smp_started, NULL);
658 
659 void
660 taskqgroup_attach(struct taskqgroup *qgroup, struct grouptask *gtask,
661     void *uniq, int irq, char *name)
662 {
663 	cpuset_t mask;
664 	int qid;
665 
666 	gtask->gt_uniq = uniq;
667 	gtask->gt_name = name;
668 	gtask->gt_irq = irq;
669 	gtask->gt_cpu = -1;
670 	mtx_lock(&qgroup->tqg_lock);
671 	qid = taskqgroup_find(qgroup, uniq);
672 	qgroup->tqg_queue[qid].tgc_cnt++;
673 	LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
674 	gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
675 	if (irq != -1 && tqg_smp_started) {
676 		gtask->gt_cpu = qgroup->tqg_queue[qid].tgc_cpu;
677 		CPU_ZERO(&mask);
678 		CPU_SET(qgroup->tqg_queue[qid].tgc_cpu, &mask);
679 		mtx_unlock(&qgroup->tqg_lock);
680 		intr_setaffinity(irq, &mask);
681 	} else
682 		mtx_unlock(&qgroup->tqg_lock);
683 }
684 
685 static void
686 taskqgroup_attach_deferred(struct taskqgroup *qgroup, struct grouptask *gtask)
687 {
688 	cpuset_t mask;
689 	int qid, cpu;
690 
691 	mtx_lock(&qgroup->tqg_lock);
692 	qid = taskqgroup_find(qgroup, gtask->gt_uniq);
693 	cpu = qgroup->tqg_queue[qid].tgc_cpu;
694 	if (gtask->gt_irq != -1) {
695 		mtx_unlock(&qgroup->tqg_lock);
696 
697 		CPU_ZERO(&mask);
698 		CPU_SET(cpu, &mask);
699 		intr_setaffinity(gtask->gt_irq, &mask);
700 
701 		mtx_lock(&qgroup->tqg_lock);
702 	}
703 	qgroup->tqg_queue[qid].tgc_cnt++;
704 
705 	LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask,
706 			 gt_list);
707 	MPASS(qgroup->tqg_queue[qid].tgc_taskq != NULL);
708 	gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
709 	mtx_unlock(&qgroup->tqg_lock);
710 }
711 
712 int
713 taskqgroup_attach_cpu(struct taskqgroup *qgroup, struct grouptask *gtask,
714 	void *uniq, int cpu, int irq, char *name)
715 {
716 	cpuset_t mask;
717 	int i, qid;
718 
719 	qid = -1;
720 	gtask->gt_uniq = uniq;
721 	gtask->gt_name = name;
722 	gtask->gt_irq = irq;
723 	gtask->gt_cpu = cpu;
724 	mtx_lock(&qgroup->tqg_lock);
725 	if (tqg_smp_started) {
726 		for (i = 0; i < qgroup->tqg_cnt; i++)
727 			if (qgroup->tqg_queue[i].tgc_cpu == cpu) {
728 				qid = i;
729 				break;
730 			}
731 		if (qid == -1) {
732 			mtx_unlock(&qgroup->tqg_lock);
733 			return (EINVAL);
734 		}
735 	} else
736 		qid = 0;
737 	qgroup->tqg_queue[qid].tgc_cnt++;
738 	LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
739 	gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
740 	cpu = qgroup->tqg_queue[qid].tgc_cpu;
741 	mtx_unlock(&qgroup->tqg_lock);
742 
743 	CPU_ZERO(&mask);
744 	CPU_SET(cpu, &mask);
745 	if (irq != -1 && tqg_smp_started)
746 		intr_setaffinity(irq, &mask);
747 	return (0);
748 }
749 
750 static int
751 taskqgroup_attach_cpu_deferred(struct taskqgroup *qgroup, struct grouptask *gtask)
752 {
753 	cpuset_t mask;
754 	int i, qid, irq, cpu;
755 
756 	qid = -1;
757 	irq = gtask->gt_irq;
758 	cpu = gtask->gt_cpu;
759 	MPASS(tqg_smp_started);
760 	mtx_lock(&qgroup->tqg_lock);
761 	for (i = 0; i < qgroup->tqg_cnt; i++)
762 		if (qgroup->tqg_queue[i].tgc_cpu == cpu) {
763 			qid = i;
764 			break;
765 		}
766 	if (qid == -1) {
767 		mtx_unlock(&qgroup->tqg_lock);
768 		return (EINVAL);
769 	}
770 	qgroup->tqg_queue[qid].tgc_cnt++;
771 	LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
772 	MPASS(qgroup->tqg_queue[qid].tgc_taskq != NULL);
773 	gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
774 	mtx_unlock(&qgroup->tqg_lock);
775 
776 	CPU_ZERO(&mask);
777 	CPU_SET(cpu, &mask);
778 
779 	if (irq != -1)
780 		intr_setaffinity(irq, &mask);
781 	return (0);
782 }
783 
784 void
785 taskqgroup_detach(struct taskqgroup *qgroup, struct grouptask *gtask)
786 {
787 	int i;
788 
789 	mtx_lock(&qgroup->tqg_lock);
790 	for (i = 0; i < qgroup->tqg_cnt; i++)
791 		if (qgroup->tqg_queue[i].tgc_taskq == gtask->gt_taskqueue)
792 			break;
793 	if (i == qgroup->tqg_cnt)
794 		panic("taskqgroup_detach: task not in group\n");
795 	qgroup->tqg_queue[i].tgc_cnt--;
796 	LIST_REMOVE(gtask, gt_list);
797 	mtx_unlock(&qgroup->tqg_lock);
798 	gtask->gt_taskqueue = NULL;
799 }
800 
801 static void
802 taskqgroup_binder(void *ctx)
803 {
804 	struct taskq_bind_task *gtask = (struct taskq_bind_task *)ctx;
805 	cpuset_t mask;
806 	int error;
807 
808 	CPU_ZERO(&mask);
809 	CPU_SET(gtask->bt_cpuid, &mask);
810 	error = cpuset_setthread(curthread->td_tid, &mask);
811 	thread_lock(curthread);
812 	sched_bind(curthread, gtask->bt_cpuid);
813 	thread_unlock(curthread);
814 
815 	if (error)
816 		printf("taskqgroup_binder: setaffinity failed: %d\n",
817 		    error);
818 	free(gtask, M_DEVBUF);
819 }
820 
821 static void
822 taskqgroup_bind(struct taskqgroup *qgroup)
823 {
824 	struct taskq_bind_task *gtask;
825 	int i;
826 
827 	/*
828 	 * Bind taskqueue threads to specific CPUs, if they have been assigned
829 	 * one.
830 	 */
831 	if (qgroup->tqg_cnt == 1)
832 		return;
833 
834 	for (i = 0; i < qgroup->tqg_cnt; i++) {
835 		gtask = malloc(sizeof (*gtask), M_DEVBUF, M_WAITOK);
836 		GTASK_INIT(&gtask->bt_task, 0, 0, taskqgroup_binder, gtask);
837 		gtask->bt_cpuid = qgroup->tqg_queue[i].tgc_cpu;
838 		grouptaskqueue_enqueue(qgroup->tqg_queue[i].tgc_taskq,
839 		    &gtask->bt_task);
840 	}
841 }
842 
843 static int
844 _taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride)
845 {
846 	LIST_HEAD(, grouptask) gtask_head = LIST_HEAD_INITIALIZER(NULL);
847 	struct grouptask *gtask;
848 	int i, k, old_cnt, old_cpu, cpu;
849 
850 	mtx_assert(&qgroup->tqg_lock, MA_OWNED);
851 
852 	if (cnt < 1 || cnt * stride > mp_ncpus || !tqg_smp_started) {
853 		printf("%s: failed cnt: %d stride: %d "
854 		    "mp_ncpus: %d tqg_smp_started: %d\n",
855 		    __func__, cnt, stride, mp_ncpus, tqg_smp_started);
856 		return (EINVAL);
857 	}
858 	if (qgroup->tqg_adjusting) {
859 		printf("taskqgroup_adjust failed: adjusting\n");
860 		return (EBUSY);
861 	}
862 	qgroup->tqg_adjusting = 1;
863 	old_cnt = qgroup->tqg_cnt;
864 	old_cpu = 0;
865 	if (old_cnt < cnt)
866 		old_cpu = qgroup->tqg_queue[old_cnt].tgc_cpu;
867 	mtx_unlock(&qgroup->tqg_lock);
868 	/*
869 	 * Set up queue for tasks added before boot.
870 	 */
871 	if (old_cnt == 0) {
872 		LIST_SWAP(&gtask_head, &qgroup->tqg_queue[0].tgc_tasks,
873 		    grouptask, gt_list);
874 		qgroup->tqg_queue[0].tgc_cnt = 0;
875 	}
876 
877 	/*
878 	 * If new taskq threads have been added.
879 	 */
880 	cpu = old_cpu;
881 	for (i = old_cnt; i < cnt; i++) {
882 		taskqgroup_cpu_create(qgroup, i, cpu);
883 
884 		for (k = 0; k < stride; k++)
885 			cpu = CPU_NEXT(cpu);
886 	}
887 	mtx_lock(&qgroup->tqg_lock);
888 	qgroup->tqg_cnt = cnt;
889 	qgroup->tqg_stride = stride;
890 
891 	/*
892 	 * Adjust drivers to use new taskqs.
893 	 */
894 	for (i = 0; i < old_cnt; i++) {
895 		while ((gtask = LIST_FIRST(&qgroup->tqg_queue[i].tgc_tasks))) {
896 			LIST_REMOVE(gtask, gt_list);
897 			qgroup->tqg_queue[i].tgc_cnt--;
898 			LIST_INSERT_HEAD(&gtask_head, gtask, gt_list);
899 		}
900 	}
901 	mtx_unlock(&qgroup->tqg_lock);
902 
903 	while ((gtask = LIST_FIRST(&gtask_head))) {
904 		LIST_REMOVE(gtask, gt_list);
905 		if (gtask->gt_cpu == -1)
906 			taskqgroup_attach_deferred(qgroup, gtask);
907 		else if (taskqgroup_attach_cpu_deferred(qgroup, gtask))
908 			taskqgroup_attach_deferred(qgroup, gtask);
909 	}
910 
911 #ifdef INVARIANTS
912 	mtx_lock(&qgroup->tqg_lock);
913 	for (i = 0; i < qgroup->tqg_cnt; i++) {
914 		MPASS(qgroup->tqg_queue[i].tgc_taskq != NULL);
915 		LIST_FOREACH(gtask, &qgroup->tqg_queue[i].tgc_tasks, gt_list)
916 			MPASS(gtask->gt_taskqueue != NULL);
917 	}
918 	mtx_unlock(&qgroup->tqg_lock);
919 #endif
920 	/*
921 	 * If taskq thread count has been reduced.
922 	 */
923 	for (i = cnt; i < old_cnt; i++)
924 		taskqgroup_cpu_remove(qgroup, i);
925 
926 	taskqgroup_bind(qgroup);
927 
928 	mtx_lock(&qgroup->tqg_lock);
929 	qgroup->tqg_adjusting = 0;
930 
931 	return (0);
932 }
933 
934 int
935 taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride)
936 {
937 	int error;
938 
939 	mtx_lock(&qgroup->tqg_lock);
940 	error = _taskqgroup_adjust(qgroup, cnt, stride);
941 	mtx_unlock(&qgroup->tqg_lock);
942 
943 	return (error);
944 }
945 
946 struct taskqgroup *
947 taskqgroup_create(char *name)
948 {
949 	struct taskqgroup *qgroup;
950 
951 	qgroup = malloc(sizeof(*qgroup), M_GTASKQUEUE, M_WAITOK | M_ZERO);
952 	mtx_init(&qgroup->tqg_lock, "taskqgroup", NULL, MTX_DEF);
953 	qgroup->tqg_name = name;
954 	LIST_INIT(&qgroup->tqg_queue[0].tgc_tasks);
955 
956 	return (qgroup);
957 }
958 
959 void
960 taskqgroup_destroy(struct taskqgroup *qgroup)
961 {
962 
963 }
964