xref: /freebsd/sys/kern/subr_taskqueue.c (revision f4bf2442a03f9b72cfe6d051766b650a4721f3d8)
1 /*-
2  * Copyright (c) 2000 Doug Rabson
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
29 
30 #include <sys/param.h>
31 #include <sys/systm.h>
32 #include <sys/bus.h>
33 #include <sys/cpuset.h>
34 #include <sys/interrupt.h>
35 #include <sys/kernel.h>
36 #include <sys/kthread.h>
37 #include <sys/libkern.h>
38 #include <sys/limits.h>
39 #include <sys/lock.h>
40 #include <sys/malloc.h>
41 #include <sys/mutex.h>
42 #include <sys/proc.h>
43 #include <sys/sched.h>
44 #include <sys/smp.h>
45 #include <sys/taskqueue.h>
46 #include <sys/unistd.h>
47 #include <machine/stdarg.h>
48 
49 static MALLOC_DEFINE(M_TASKQUEUE, "taskqueue", "Task Queues");
50 static void	*taskqueue_giant_ih;
51 static void	*taskqueue_ih;
52 static void	 taskqueue_fast_enqueue(void *);
53 static void	 taskqueue_swi_enqueue(void *);
54 static void	 taskqueue_swi_giant_enqueue(void *);
55 
56 struct taskqueue_busy {
57 	struct task	*tb_running;
58 	TAILQ_ENTRY(taskqueue_busy) tb_link;
59 };
60 
61 struct task * const TB_DRAIN_WAITER = (struct task *)0x1;
62 
63 struct taskqueue {
64 	STAILQ_HEAD(, task)	tq_queue;
65 	taskqueue_enqueue_fn	tq_enqueue;
66 	void			*tq_context;
67 	char			*tq_name;
68 	TAILQ_HEAD(, taskqueue_busy) tq_active;
69 	struct mtx		tq_mutex;
70 	struct thread		**tq_threads;
71 	struct thread		*tq_curthread;
72 	int			tq_tcount;
73 	int			tq_spin;
74 	int			tq_flags;
75 	int			tq_callouts;
76 	taskqueue_callback_fn	tq_callbacks[TASKQUEUE_NUM_CALLBACKS];
77 	void			*tq_cb_contexts[TASKQUEUE_NUM_CALLBACKS];
78 };
79 
80 #define	TQ_FLAGS_ACTIVE		(1 << 0)
81 #define	TQ_FLAGS_BLOCKED	(1 << 1)
82 #define	TQ_FLAGS_UNLOCKED_ENQUEUE	(1 << 2)
83 
84 #define	DT_CALLOUT_ARMED	(1 << 0)
85 
86 #define	TQ_LOCK(tq)							\
87 	do {								\
88 		if ((tq)->tq_spin)					\
89 			mtx_lock_spin(&(tq)->tq_mutex);			\
90 		else							\
91 			mtx_lock(&(tq)->tq_mutex);			\
92 	} while (0)
93 #define	TQ_ASSERT_LOCKED(tq)	mtx_assert(&(tq)->tq_mutex, MA_OWNED)
94 
95 #define	TQ_UNLOCK(tq)							\
96 	do {								\
97 		if ((tq)->tq_spin)					\
98 			mtx_unlock_spin(&(tq)->tq_mutex);		\
99 		else							\
100 			mtx_unlock(&(tq)->tq_mutex);			\
101 	} while (0)
102 #define	TQ_ASSERT_UNLOCKED(tq)	mtx_assert(&(tq)->tq_mutex, MA_NOTOWNED)
103 
104 void
105 _timeout_task_init(struct taskqueue *queue, struct timeout_task *timeout_task,
106     int priority, task_fn_t func, void *context)
107 {
108 
109 	TASK_INIT(&timeout_task->t, priority, func, context);
110 	callout_init_mtx(&timeout_task->c, &queue->tq_mutex,
111 	    CALLOUT_RETURNUNLOCKED);
112 	timeout_task->q = queue;
113 	timeout_task->f = 0;
114 }
115 
116 static __inline int
117 TQ_SLEEP(struct taskqueue *tq, void *p, struct mtx *m, int pri, const char *wm,
118     int t)
119 {
120 	if (tq->tq_spin)
121 		return (msleep_spin(p, m, wm, t));
122 	return (msleep(p, m, pri, wm, t));
123 }
124 
125 static struct taskqueue *
126 _taskqueue_create(const char *name, int mflags,
127 		 taskqueue_enqueue_fn enqueue, void *context,
128 		 int mtxflags, const char *mtxname __unused)
129 {
130 	struct taskqueue *queue;
131 	char *tq_name;
132 
133 	tq_name = malloc(TASKQUEUE_NAMELEN, M_TASKQUEUE, mflags | M_ZERO);
134 	if (!tq_name)
135 		return (NULL);
136 
137 	snprintf(tq_name, TASKQUEUE_NAMELEN, "%s", (name) ? name : "taskqueue");
138 
139 	queue = malloc(sizeof(struct taskqueue), M_TASKQUEUE, mflags | M_ZERO);
140 	if (!queue)
141 		return (NULL);
142 
143 	STAILQ_INIT(&queue->tq_queue);
144 	TAILQ_INIT(&queue->tq_active);
145 	queue->tq_enqueue = enqueue;
146 	queue->tq_context = context;
147 	queue->tq_name = tq_name;
148 	queue->tq_spin = (mtxflags & MTX_SPIN) != 0;
149 	queue->tq_flags |= TQ_FLAGS_ACTIVE;
150 	if (enqueue == taskqueue_fast_enqueue ||
151 	    enqueue == taskqueue_swi_enqueue ||
152 	    enqueue == taskqueue_swi_giant_enqueue ||
153 	    enqueue == taskqueue_thread_enqueue)
154 		queue->tq_flags |= TQ_FLAGS_UNLOCKED_ENQUEUE;
155 	mtx_init(&queue->tq_mutex, tq_name, NULL, mtxflags);
156 
157 	return (queue);
158 }
159 
160 struct taskqueue *
161 taskqueue_create(const char *name, int mflags,
162 		 taskqueue_enqueue_fn enqueue, void *context)
163 {
164 
165 	return _taskqueue_create(name, mflags, enqueue, context,
166 			MTX_DEF, name);
167 }
168 
169 void
170 taskqueue_set_callback(struct taskqueue *queue,
171     enum taskqueue_callback_type cb_type, taskqueue_callback_fn callback,
172     void *context)
173 {
174 
175 	KASSERT(((cb_type >= TASKQUEUE_CALLBACK_TYPE_MIN) &&
176 	    (cb_type <= TASKQUEUE_CALLBACK_TYPE_MAX)),
177 	    ("Callback type %d not valid, must be %d-%d", cb_type,
178 	    TASKQUEUE_CALLBACK_TYPE_MIN, TASKQUEUE_CALLBACK_TYPE_MAX));
179 	KASSERT((queue->tq_callbacks[cb_type] == NULL),
180 	    ("Re-initialization of taskqueue callback?"));
181 
182 	queue->tq_callbacks[cb_type] = callback;
183 	queue->tq_cb_contexts[cb_type] = context;
184 }
185 
186 /*
187  * Signal a taskqueue thread to terminate.
188  */
189 static void
190 taskqueue_terminate(struct thread **pp, struct taskqueue *tq)
191 {
192 
193 	while (tq->tq_tcount > 0 || tq->tq_callouts > 0) {
194 		wakeup(tq);
195 		TQ_SLEEP(tq, pp, &tq->tq_mutex, PWAIT, "taskqueue_destroy", 0);
196 	}
197 }
198 
199 void
200 taskqueue_free(struct taskqueue *queue)
201 {
202 
203 	TQ_LOCK(queue);
204 	queue->tq_flags &= ~TQ_FLAGS_ACTIVE;
205 	taskqueue_terminate(queue->tq_threads, queue);
206 	KASSERT(TAILQ_EMPTY(&queue->tq_active), ("Tasks still running?"));
207 	KASSERT(queue->tq_callouts == 0, ("Armed timeout tasks"));
208 	mtx_destroy(&queue->tq_mutex);
209 	free(queue->tq_threads, M_TASKQUEUE);
210 	free(queue->tq_name, M_TASKQUEUE);
211 	free(queue, M_TASKQUEUE);
212 }
213 
214 static int
215 taskqueue_enqueue_locked(struct taskqueue *queue, struct task *task)
216 {
217 	struct task *ins;
218 	struct task *prev;
219 
220 	KASSERT(task->ta_func != NULL, ("enqueueing task with NULL func"));
221 	/*
222 	 * Count multiple enqueues.
223 	 */
224 	if (task->ta_pending) {
225 		if (task->ta_pending < UCHAR_MAX)
226 			task->ta_pending++;
227 		TQ_UNLOCK(queue);
228 		return (0);
229 	}
230 
231 	/*
232 	 * Optimise the case when all tasks have the same priority.
233 	 */
234 	prev = STAILQ_LAST(&queue->tq_queue, task, ta_link);
235 	if (!prev || prev->ta_priority >= task->ta_priority) {
236 		STAILQ_INSERT_TAIL(&queue->tq_queue, task, ta_link);
237 	} else {
238 		prev = NULL;
239 		for (ins = STAILQ_FIRST(&queue->tq_queue); ins;
240 		     prev = ins, ins = STAILQ_NEXT(ins, ta_link))
241 			if (ins->ta_priority < task->ta_priority)
242 				break;
243 
244 		if (prev)
245 			STAILQ_INSERT_AFTER(&queue->tq_queue, prev, task, ta_link);
246 		else
247 			STAILQ_INSERT_HEAD(&queue->tq_queue, task, ta_link);
248 	}
249 
250 	task->ta_pending = 1;
251 	if ((queue->tq_flags & TQ_FLAGS_UNLOCKED_ENQUEUE) != 0)
252 		TQ_UNLOCK(queue);
253 	if ((queue->tq_flags & TQ_FLAGS_BLOCKED) == 0)
254 		queue->tq_enqueue(queue->tq_context);
255 	if ((queue->tq_flags & TQ_FLAGS_UNLOCKED_ENQUEUE) == 0)
256 		TQ_UNLOCK(queue);
257 
258 	/* Return with lock released. */
259 	return (0);
260 }
261 
262 int
263 grouptaskqueue_enqueue(struct taskqueue *queue, struct task *task)
264 {
265 	TQ_LOCK(queue);
266 	if (task->ta_pending) {
267 		TQ_UNLOCK(queue);
268 		return (0);
269 	}
270 	STAILQ_INSERT_TAIL(&queue->tq_queue, task, ta_link);
271 	task->ta_pending = 1;
272 	TQ_UNLOCK(queue);
273 	if ((queue->tq_flags & TQ_FLAGS_BLOCKED) == 0)
274 		queue->tq_enqueue(queue->tq_context);
275 	return (0);
276 }
277 
278 int
279 taskqueue_enqueue(struct taskqueue *queue, struct task *task)
280 {
281 	int res;
282 
283 	TQ_LOCK(queue);
284 	res = taskqueue_enqueue_locked(queue, task);
285 	/* The lock is released inside. */
286 
287 	return (res);
288 }
289 
290 static void
291 taskqueue_timeout_func(void *arg)
292 {
293 	struct taskqueue *queue;
294 	struct timeout_task *timeout_task;
295 
296 	timeout_task = arg;
297 	queue = timeout_task->q;
298 	KASSERT((timeout_task->f & DT_CALLOUT_ARMED) != 0, ("Stray timeout"));
299 	timeout_task->f &= ~DT_CALLOUT_ARMED;
300 	queue->tq_callouts--;
301 	taskqueue_enqueue_locked(timeout_task->q, &timeout_task->t);
302 	/* The lock is released inside. */
303 }
304 
305 int
306 taskqueue_enqueue_timeout(struct taskqueue *queue,
307     struct timeout_task *timeout_task, int ticks)
308 {
309 	int res;
310 
311 	TQ_LOCK(queue);
312 	KASSERT(timeout_task->q == NULL || timeout_task->q == queue,
313 	    ("Migrated queue"));
314 	KASSERT(!queue->tq_spin, ("Timeout for spin-queue"));
315 	timeout_task->q = queue;
316 	res = timeout_task->t.ta_pending;
317 	if (ticks == 0) {
318 		taskqueue_enqueue_locked(queue, &timeout_task->t);
319 		/* The lock is released inside. */
320 	} else {
321 		if ((timeout_task->f & DT_CALLOUT_ARMED) != 0) {
322 			res++;
323 		} else {
324 			queue->tq_callouts++;
325 			timeout_task->f |= DT_CALLOUT_ARMED;
326 			if (ticks < 0)
327 				ticks = -ticks; /* Ignore overflow. */
328 		}
329 		if (ticks > 0) {
330 			callout_reset(&timeout_task->c, ticks,
331 			    taskqueue_timeout_func, timeout_task);
332 		}
333 		TQ_UNLOCK(queue);
334 	}
335 	return (res);
336 }
337 
338 static void
339 taskqueue_task_nop_fn(void *context, int pending)
340 {
341 }
342 
343 /*
344  * Block until all currently queued tasks in this taskqueue
345  * have begun execution.  Tasks queued during execution of
346  * this function are ignored.
347  */
348 static void
349 taskqueue_drain_tq_queue(struct taskqueue *queue)
350 {
351 	struct task t_barrier;
352 
353 	if (STAILQ_EMPTY(&queue->tq_queue))
354 		return;
355 
356 	/*
357 	 * Enqueue our barrier after all current tasks, but with
358 	 * the highest priority so that newly queued tasks cannot
359 	 * pass it.  Because of the high priority, we can not use
360 	 * taskqueue_enqueue_locked directly (which drops the lock
361 	 * anyway) so just insert it at tail while we have the
362 	 * queue lock.
363 	 */
364 	TASK_INIT(&t_barrier, USHRT_MAX, taskqueue_task_nop_fn, &t_barrier);
365 	STAILQ_INSERT_TAIL(&queue->tq_queue, &t_barrier, ta_link);
366 	t_barrier.ta_pending = 1;
367 
368 	/*
369 	 * Once the barrier has executed, all previously queued tasks
370 	 * have completed or are currently executing.
371 	 */
372 	while (t_barrier.ta_pending != 0)
373 		TQ_SLEEP(queue, &t_barrier, &queue->tq_mutex, PWAIT, "-", 0);
374 }
375 
376 /*
377  * Block until all currently executing tasks for this taskqueue
378  * complete.  Tasks that begin execution during the execution
379  * of this function are ignored.
380  */
381 static void
382 taskqueue_drain_tq_active(struct taskqueue *queue)
383 {
384 	struct taskqueue_busy tb_marker, *tb_first;
385 
386 	if (TAILQ_EMPTY(&queue->tq_active))
387 		return;
388 
389 	/* Block taskq_terminate().*/
390 	queue->tq_callouts++;
391 
392 	/*
393 	 * Wait for all currently executing taskqueue threads
394 	 * to go idle.
395 	 */
396 	tb_marker.tb_running = TB_DRAIN_WAITER;
397 	TAILQ_INSERT_TAIL(&queue->tq_active, &tb_marker, tb_link);
398 	while (TAILQ_FIRST(&queue->tq_active) != &tb_marker)
399 		TQ_SLEEP(queue, &tb_marker, &queue->tq_mutex, PWAIT, "-", 0);
400 	TAILQ_REMOVE(&queue->tq_active, &tb_marker, tb_link);
401 
402 	/*
403 	 * Wakeup any other drain waiter that happened to queue up
404 	 * without any intervening active thread.
405 	 */
406 	tb_first = TAILQ_FIRST(&queue->tq_active);
407 	if (tb_first != NULL && tb_first->tb_running == TB_DRAIN_WAITER)
408 		wakeup(tb_first);
409 
410 	/* Release taskqueue_terminate(). */
411 	queue->tq_callouts--;
412 	if ((queue->tq_flags & TQ_FLAGS_ACTIVE) == 0)
413 		wakeup_one(queue->tq_threads);
414 }
415 
416 void
417 taskqueue_block(struct taskqueue *queue)
418 {
419 
420 	TQ_LOCK(queue);
421 	queue->tq_flags |= TQ_FLAGS_BLOCKED;
422 	TQ_UNLOCK(queue);
423 }
424 
425 void
426 taskqueue_unblock(struct taskqueue *queue)
427 {
428 
429 	TQ_LOCK(queue);
430 	queue->tq_flags &= ~TQ_FLAGS_BLOCKED;
431 	if (!STAILQ_EMPTY(&queue->tq_queue))
432 		queue->tq_enqueue(queue->tq_context);
433 	TQ_UNLOCK(queue);
434 }
435 
436 static void
437 taskqueue_run_locked(struct taskqueue *queue)
438 {
439 	struct taskqueue_busy tb;
440 	struct taskqueue_busy *tb_first;
441 	struct task *task;
442 	int pending;
443 
444 	KASSERT(queue != NULL, ("tq is NULL"));
445 	TQ_ASSERT_LOCKED(queue);
446 	tb.tb_running = NULL;
447 
448 	while (STAILQ_FIRST(&queue->tq_queue)) {
449 		TAILQ_INSERT_TAIL(&queue->tq_active, &tb, tb_link);
450 
451 		/*
452 		 * Carefully remove the first task from the queue and
453 		 * zero its pending count.
454 		 */
455 		task = STAILQ_FIRST(&queue->tq_queue);
456 		KASSERT(task != NULL, ("task is NULL"));
457 		STAILQ_REMOVE_HEAD(&queue->tq_queue, ta_link);
458 		pending = task->ta_pending;
459 		task->ta_pending = 0;
460 		tb.tb_running = task;
461 		TQ_UNLOCK(queue);
462 
463 		KASSERT(task->ta_func != NULL, ("task->ta_func is NULL"));
464 		task->ta_func(task->ta_context, pending);
465 
466 		TQ_LOCK(queue);
467 		tb.tb_running = NULL;
468 		if ((task->ta_flags & TASK_SKIP_WAKEUP) == 0)
469 			wakeup(task);
470 
471 		TAILQ_REMOVE(&queue->tq_active, &tb, tb_link);
472 		tb_first = TAILQ_FIRST(&queue->tq_active);
473 		if (tb_first != NULL &&
474 		    tb_first->tb_running == TB_DRAIN_WAITER)
475 			wakeup(tb_first);
476 	}
477 }
478 
479 void
480 taskqueue_run(struct taskqueue *queue)
481 {
482 
483 	TQ_LOCK(queue);
484 	queue->tq_curthread = curthread;
485 	taskqueue_run_locked(queue);
486 	queue->tq_curthread = NULL;
487 	TQ_UNLOCK(queue);
488 }
489 
490 static int
491 task_is_running(struct taskqueue *queue, struct task *task)
492 {
493 	struct taskqueue_busy *tb;
494 
495 	TQ_ASSERT_LOCKED(queue);
496 	TAILQ_FOREACH(tb, &queue->tq_active, tb_link) {
497 		if (tb->tb_running == task)
498 			return (1);
499 	}
500 	return (0);
501 }
502 
503 static int
504 taskqueue_cancel_locked(struct taskqueue *queue, struct task *task,
505     u_int *pendp)
506 {
507 
508 	if (task->ta_pending > 0)
509 		STAILQ_REMOVE(&queue->tq_queue, task, task, ta_link);
510 	if (pendp != NULL)
511 		*pendp = task->ta_pending;
512 	task->ta_pending = 0;
513 	return (task_is_running(queue, task) ? EBUSY : 0);
514 }
515 
516 int
517 taskqueue_cancel(struct taskqueue *queue, struct task *task, u_int *pendp)
518 {
519 	int error;
520 
521 	TQ_LOCK(queue);
522 	error = taskqueue_cancel_locked(queue, task, pendp);
523 	TQ_UNLOCK(queue);
524 
525 	return (error);
526 }
527 
528 int
529 taskqueue_cancel_timeout(struct taskqueue *queue,
530     struct timeout_task *timeout_task, u_int *pendp)
531 {
532 	u_int pending, pending1;
533 	int error;
534 
535 	TQ_LOCK(queue);
536 	pending = !!(callout_stop(&timeout_task->c) > 0);
537 	error = taskqueue_cancel_locked(queue, &timeout_task->t, &pending1);
538 	if ((timeout_task->f & DT_CALLOUT_ARMED) != 0) {
539 		timeout_task->f &= ~DT_CALLOUT_ARMED;
540 		queue->tq_callouts--;
541 	}
542 	TQ_UNLOCK(queue);
543 
544 	if (pendp != NULL)
545 		*pendp = pending + pending1;
546 	return (error);
547 }
548 
549 void
550 taskqueue_drain(struct taskqueue *queue, struct task *task)
551 {
552 
553 	if (!queue->tq_spin)
554 		WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__);
555 
556 	TQ_LOCK(queue);
557 	while (task->ta_pending != 0 || task_is_running(queue, task))
558 		TQ_SLEEP(queue, task, &queue->tq_mutex, PWAIT, "-", 0);
559 	TQ_UNLOCK(queue);
560 }
561 
562 void
563 taskqueue_drain_all(struct taskqueue *queue)
564 {
565 
566 	if (!queue->tq_spin)
567 		WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__);
568 
569 	TQ_LOCK(queue);
570 	taskqueue_drain_tq_queue(queue);
571 	taskqueue_drain_tq_active(queue);
572 	TQ_UNLOCK(queue);
573 }
574 
575 void
576 taskqueue_drain_timeout(struct taskqueue *queue,
577     struct timeout_task *timeout_task)
578 {
579 
580 	callout_drain(&timeout_task->c);
581 	taskqueue_drain(queue, &timeout_task->t);
582 }
583 
584 static void
585 taskqueue_swi_enqueue(void *context)
586 {
587 	swi_sched(taskqueue_ih, 0);
588 }
589 
590 static void
591 taskqueue_swi_run(void *dummy)
592 {
593 	taskqueue_run(taskqueue_swi);
594 }
595 
596 static void
597 taskqueue_swi_giant_enqueue(void *context)
598 {
599 	swi_sched(taskqueue_giant_ih, 0);
600 }
601 
602 static void
603 taskqueue_swi_giant_run(void *dummy)
604 {
605 	taskqueue_run(taskqueue_swi_giant);
606 }
607 
608 static int
609 _taskqueue_start_threads(struct taskqueue **tqp, int count, int pri,
610     cpuset_t *mask, const char *name, va_list ap)
611 {
612 	char ktname[MAXCOMLEN + 1];
613 	struct thread *td;
614 	struct taskqueue *tq;
615 	int i, error;
616 
617 	if (count <= 0)
618 		return (EINVAL);
619 
620 	vsnprintf(ktname, sizeof(ktname), name, ap);
621 	tq = *tqp;
622 
623 	tq->tq_threads = malloc(sizeof(struct thread *) * count, M_TASKQUEUE,
624 	    M_NOWAIT | M_ZERO);
625 	if (tq->tq_threads == NULL) {
626 		printf("%s: no memory for %s threads\n", __func__, ktname);
627 		return (ENOMEM);
628 	}
629 
630 	for (i = 0; i < count; i++) {
631 		if (count == 1)
632 			error = kthread_add(taskqueue_thread_loop, tqp, NULL,
633 			    &tq->tq_threads[i], RFSTOPPED, 0, "%s", ktname);
634 		else
635 			error = kthread_add(taskqueue_thread_loop, tqp, NULL,
636 			    &tq->tq_threads[i], RFSTOPPED, 0,
637 			    "%s_%d", ktname, i);
638 		if (error) {
639 			/* should be ok to continue, taskqueue_free will dtrt */
640 			printf("%s: kthread_add(%s): error %d", __func__,
641 			    ktname, error);
642 			tq->tq_threads[i] = NULL;		/* paranoid */
643 		} else
644 			tq->tq_tcount++;
645 	}
646 	for (i = 0; i < count; i++) {
647 		if (tq->tq_threads[i] == NULL)
648 			continue;
649 		td = tq->tq_threads[i];
650 		if (mask) {
651 			error = cpuset_setthread(td->td_tid, mask);
652 			/*
653 			 * Failing to pin is rarely an actual fatal error;
654 			 * it'll just affect performance.
655 			 */
656 			if (error)
657 				printf("%s: curthread=%llu: can't pin; "
658 				    "error=%d\n",
659 				    __func__,
660 				    (unsigned long long) td->td_tid,
661 				    error);
662 		}
663 		thread_lock(td);
664 		sched_prio(td, pri);
665 		sched_add(td, SRQ_BORING);
666 		thread_unlock(td);
667 	}
668 
669 	return (0);
670 }
671 
672 int
673 taskqueue_start_threads(struct taskqueue **tqp, int count, int pri,
674     const char *name, ...)
675 {
676 	va_list ap;
677 	int error;
678 
679 	va_start(ap, name);
680 	error = _taskqueue_start_threads(tqp, count, pri, NULL, name, ap);
681 	va_end(ap);
682 	return (error);
683 }
684 
685 int
686 taskqueue_start_threads_cpuset(struct taskqueue **tqp, int count, int pri,
687     cpuset_t *mask, const char *name, ...)
688 {
689 	va_list ap;
690 	int error;
691 
692 	va_start(ap, name);
693 	error = _taskqueue_start_threads(tqp, count, pri, mask, name, ap);
694 	va_end(ap);
695 	return (error);
696 }
697 
698 static inline void
699 taskqueue_run_callback(struct taskqueue *tq,
700     enum taskqueue_callback_type cb_type)
701 {
702 	taskqueue_callback_fn tq_callback;
703 
704 	TQ_ASSERT_UNLOCKED(tq);
705 	tq_callback = tq->tq_callbacks[cb_type];
706 	if (tq_callback != NULL)
707 		tq_callback(tq->tq_cb_contexts[cb_type]);
708 }
709 
710 void
711 taskqueue_thread_loop(void *arg)
712 {
713 	struct taskqueue **tqp, *tq;
714 
715 	tqp = arg;
716 	tq = *tqp;
717 	taskqueue_run_callback(tq, TASKQUEUE_CALLBACK_TYPE_INIT);
718 	TQ_LOCK(tq);
719 	tq->tq_curthread = curthread;
720 	while ((tq->tq_flags & TQ_FLAGS_ACTIVE) != 0) {
721 		/* XXX ? */
722 		taskqueue_run_locked(tq);
723 		/*
724 		 * Because taskqueue_run() can drop tq_mutex, we need to
725 		 * check if the TQ_FLAGS_ACTIVE flag wasn't removed in the
726 		 * meantime, which means we missed a wakeup.
727 		 */
728 		if ((tq->tq_flags & TQ_FLAGS_ACTIVE) == 0)
729 			break;
730 		TQ_SLEEP(tq, tq, &tq->tq_mutex, 0, "-", 0);
731 	}
732 	taskqueue_run_locked(tq);
733 	tq->tq_curthread = NULL;
734 	/*
735 	 * This thread is on its way out, so just drop the lock temporarily
736 	 * in order to call the shutdown callback.  This allows the callback
737 	 * to look at the taskqueue, even just before it dies.
738 	 */
739 	TQ_UNLOCK(tq);
740 	taskqueue_run_callback(tq, TASKQUEUE_CALLBACK_TYPE_SHUTDOWN);
741 	TQ_LOCK(tq);
742 
743 	/* rendezvous with thread that asked us to terminate */
744 	tq->tq_tcount--;
745 	wakeup_one(tq->tq_threads);
746 	TQ_UNLOCK(tq);
747 	kthread_exit();
748 }
749 
750 void
751 taskqueue_thread_enqueue(void *context)
752 {
753 	struct taskqueue **tqp, *tq;
754 
755 	tqp = context;
756 	tq = *tqp;
757 	if (tq->tq_curthread != curthread)
758 		wakeup_one(tq);
759 }
760 
761 TASKQUEUE_DEFINE(swi, taskqueue_swi_enqueue, NULL,
762 		 swi_add(NULL, "task queue", taskqueue_swi_run, NULL, SWI_TQ,
763 		     INTR_MPSAFE, &taskqueue_ih));
764 
765 TASKQUEUE_DEFINE(swi_giant, taskqueue_swi_giant_enqueue, NULL,
766 		 swi_add(NULL, "Giant taskq", taskqueue_swi_giant_run,
767 		     NULL, SWI_TQ_GIANT, 0, &taskqueue_giant_ih));
768 
769 TASKQUEUE_DEFINE_THREAD(thread);
770 
771 struct taskqueue *
772 taskqueue_create_fast(const char *name, int mflags,
773 		 taskqueue_enqueue_fn enqueue, void *context)
774 {
775 	return _taskqueue_create(name, mflags, enqueue, context,
776 			MTX_SPIN, "fast_taskqueue");
777 }
778 
779 static void	*taskqueue_fast_ih;
780 
781 static void
782 taskqueue_fast_enqueue(void *context)
783 {
784 	swi_sched(taskqueue_fast_ih, 0);
785 }
786 
787 static void
788 taskqueue_fast_run(void *dummy)
789 {
790 	taskqueue_run(taskqueue_fast);
791 }
792 
793 TASKQUEUE_FAST_DEFINE(fast, taskqueue_fast_enqueue, NULL,
794 	swi_add(NULL, "fast taskq", taskqueue_fast_run, NULL,
795 	SWI_TQ_FAST, INTR_MPSAFE, &taskqueue_fast_ih));
796 
797 int
798 taskqueue_member(struct taskqueue *queue, struct thread *td)
799 {
800 	int i, j, ret = 0;
801 
802 	for (i = 0, j = 0; ; i++) {
803 		if (queue->tq_threads[i] == NULL)
804 			continue;
805 		if (queue->tq_threads[i] == td) {
806 			ret = 1;
807 			break;
808 		}
809 		if (++j >= queue->tq_tcount)
810 			break;
811 	}
812 	return (ret);
813 }
814 
815 struct taskqgroup_cpu {
816 	LIST_HEAD(, grouptask)	tgc_tasks;
817 	struct taskqueue	*tgc_taskq;
818 	int	tgc_cnt;
819 	int	tgc_cpu;
820 };
821 
822 struct taskqgroup {
823 	struct taskqgroup_cpu tqg_queue[MAXCPU];
824 	struct mtx	tqg_lock;
825 	char *		tqg_name;
826 	int		tqg_adjusting;
827 	int		tqg_stride;
828 	int		tqg_cnt;
829 };
830 
831 struct taskq_bind_task {
832 	struct task bt_task;
833 	int	bt_cpuid;
834 };
835 
836 static void
837 taskqgroup_cpu_create(struct taskqgroup *qgroup, int idx)
838 {
839 	struct taskqgroup_cpu *qcpu;
840 
841 	qcpu = &qgroup->tqg_queue[idx];
842 	LIST_INIT(&qcpu->tgc_tasks);
843 	qcpu->tgc_taskq = taskqueue_create_fast(NULL, M_WAITOK,
844 	    taskqueue_thread_enqueue, &qcpu->tgc_taskq);
845 	taskqueue_start_threads(&qcpu->tgc_taskq, 1, PI_SOFT,
846 	    "%s_%d", qgroup->tqg_name, idx);
847 	qcpu->tgc_cpu = idx * qgroup->tqg_stride;
848 }
849 
850 static void
851 taskqgroup_cpu_remove(struct taskqgroup *qgroup, int idx)
852 {
853 
854 	taskqueue_free(qgroup->tqg_queue[idx].tgc_taskq);
855 }
856 
857 /*
858  * Find the taskq with least # of tasks that doesn't currently have any
859  * other queues from the uniq identifier.
860  */
861 static int
862 taskqgroup_find(struct taskqgroup *qgroup, void *uniq)
863 {
864 	struct grouptask *n;
865 	int i, idx, mincnt;
866 	int strict;
867 
868 	mtx_assert(&qgroup->tqg_lock, MA_OWNED);
869 	if (qgroup->tqg_cnt == 0)
870 		return (0);
871 	idx = -1;
872 	mincnt = INT_MAX;
873 	/*
874 	 * Two passes;  First scan for a queue with the least tasks that
875 	 * does not already service this uniq id.  If that fails simply find
876 	 * the queue with the least total tasks;
877 	 */
878 	for (strict = 1; mincnt == INT_MAX; strict = 0) {
879 		for (i = 0; i < qgroup->tqg_cnt; i++) {
880 			if (qgroup->tqg_queue[i].tgc_cnt > mincnt)
881 				continue;
882 			if (strict) {
883 				LIST_FOREACH(n,
884 				    &qgroup->tqg_queue[i].tgc_tasks, gt_list)
885 					if (n->gt_uniq == uniq)
886 						break;
887 				if (n != NULL)
888 					continue;
889 			}
890 			mincnt = qgroup->tqg_queue[i].tgc_cnt;
891 			idx = i;
892 		}
893 	}
894 	if (idx == -1)
895 		panic("taskqgroup_find: Failed to pick a qid.");
896 
897 	return (idx);
898 }
899 
900 void
901 taskqgroup_attach(struct taskqgroup *qgroup, struct grouptask *gtask,
902     void *uniq, int irq, char *name)
903 {
904 	cpuset_t mask;
905 	int qid;
906 
907 	gtask->gt_uniq = uniq;
908 	gtask->gt_name = name;
909 	gtask->gt_irq = irq;
910 	gtask->gt_cpu = -1;
911 	mtx_lock(&qgroup->tqg_lock);
912 	qid = taskqgroup_find(qgroup, uniq);
913 	qgroup->tqg_queue[qid].tgc_cnt++;
914 	LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
915 	gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
916 	if (irq != -1 && smp_started) {
917 		CPU_ZERO(&mask);
918 		CPU_SET(qgroup->tqg_queue[qid].tgc_cpu, &mask);
919 		mtx_unlock(&qgroup->tqg_lock);
920 		intr_setaffinity(irq, &mask);
921 	} else
922 		mtx_unlock(&qgroup->tqg_lock);
923 }
924 
925 int
926 taskqgroup_attach_cpu(struct taskqgroup *qgroup, struct grouptask *gtask,
927 	void *uniq, int cpu, int irq, char *name)
928 {
929 	cpuset_t mask;
930 	int i, qid;
931 
932 	qid = -1;
933 	gtask->gt_uniq = uniq;
934 	gtask->gt_name = name;
935 	gtask->gt_irq = irq;
936 	gtask->gt_cpu = cpu;
937 	mtx_lock(&qgroup->tqg_lock);
938 	if (smp_started) {
939 		for (i = 0; i < qgroup->tqg_cnt; i++)
940 			if (qgroup->tqg_queue[i].tgc_cpu == cpu) {
941 				qid = i;
942 				break;
943 			}
944 		if (qid == -1) {
945 			mtx_unlock(&qgroup->tqg_lock);
946 			return (EINVAL);
947 		}
948 	} else
949 		qid = 0;
950 	qgroup->tqg_queue[qid].tgc_cnt++;
951 	LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
952 	gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
953 	if (irq != -1 && smp_started) {
954 		CPU_ZERO(&mask);
955 		CPU_SET(qgroup->tqg_queue[qid].tgc_cpu, &mask);
956 		mtx_unlock(&qgroup->tqg_lock);
957 		intr_setaffinity(irq, &mask);
958 	} else
959 		mtx_unlock(&qgroup->tqg_lock);
960 	return (0);
961 }
962 
963 void
964 taskqgroup_detach(struct taskqgroup *qgroup, struct grouptask *gtask)
965 {
966 	int i;
967 
968 	mtx_lock(&qgroup->tqg_lock);
969 	for (i = 0; i < qgroup->tqg_cnt; i++)
970 		if (qgroup->tqg_queue[i].tgc_taskq == gtask->gt_taskqueue)
971 			break;
972 	if (i == qgroup->tqg_cnt)
973 		panic("taskqgroup_detach: task not in group\n");
974 	qgroup->tqg_queue[i].tgc_cnt--;
975 	LIST_REMOVE(gtask, gt_list);
976 	mtx_unlock(&qgroup->tqg_lock);
977 	gtask->gt_taskqueue = NULL;
978 }
979 
980 static void
981 taskqgroup_binder(void *ctx, int pending)
982 {
983 	struct taskq_bind_task *task = (struct taskq_bind_task *)ctx;
984 	cpuset_t mask;
985 	int error;
986 
987 	CPU_ZERO(&mask);
988 	CPU_SET(task->bt_cpuid, &mask);
989 	error = cpuset_setthread(curthread->td_tid, &mask);
990 	thread_lock(curthread);
991 	sched_bind(curthread, task->bt_cpuid);
992 	thread_unlock(curthread);
993 
994 	if (error)
995 		printf("taskqgroup_binder: setaffinity failed: %d\n",
996 		    error);
997 	free(task, M_DEVBUF);
998 }
999 
1000 static void
1001 taskqgroup_bind(struct taskqgroup *qgroup)
1002 {
1003 	struct taskq_bind_task *task;
1004 	int i;
1005 
1006 	/*
1007 	 * Bind taskqueue threads to specific CPUs, if they have been assigned
1008 	 * one.
1009 	 */
1010 	for (i = 0; i < qgroup->tqg_cnt; i++) {
1011 		task = malloc(sizeof (*task), M_DEVBUF, M_NOWAIT);
1012 		TASK_INIT(&task->bt_task, 0, taskqgroup_binder, task);
1013 		task->bt_cpuid = qgroup->tqg_queue[i].tgc_cpu;
1014 		taskqueue_enqueue(qgroup->tqg_queue[i].tgc_taskq,
1015 		    &task->bt_task);
1016 	}
1017 }
1018 
1019 static int
1020 _taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride)
1021 {
1022 	LIST_HEAD(, grouptask) gtask_head = LIST_HEAD_INITIALIZER(NULL);
1023 	cpuset_t mask;
1024 	struct grouptask *gtask;
1025 	int i, old_cnt, qid;
1026 
1027 	mtx_assert(&qgroup->tqg_lock, MA_OWNED);
1028 
1029 	if (cnt < 1 || cnt * stride > mp_ncpus || !smp_started) {
1030 		printf("taskqgroup_adjust failed cnt: %d stride: %d mp_ncpus: %d smp_started: %d\n",
1031 			   cnt, stride, mp_ncpus, smp_started);
1032 		return (EINVAL);
1033 	}
1034 	if (qgroup->tqg_adjusting) {
1035 		printf("taskqgroup_adjust failed: adjusting\n");
1036 		return (EBUSY);
1037 	}
1038 	qgroup->tqg_adjusting = 1;
1039 	old_cnt = qgroup->tqg_cnt;
1040 	mtx_unlock(&qgroup->tqg_lock);
1041 	/*
1042 	 * Set up queue for tasks added before boot.
1043 	 */
1044 	if (old_cnt == 0) {
1045 		LIST_SWAP(&gtask_head, &qgroup->tqg_queue[0].tgc_tasks,
1046 		    grouptask, gt_list);
1047 		qgroup->tqg_queue[0].tgc_cnt = 0;
1048 	}
1049 
1050 	/*
1051 	 * If new taskq threads have been added.
1052 	 */
1053 	for (i = old_cnt; i < cnt; i++)
1054 		taskqgroup_cpu_create(qgroup, i);
1055 	mtx_lock(&qgroup->tqg_lock);
1056 	qgroup->tqg_cnt = cnt;
1057 	qgroup->tqg_stride = stride;
1058 
1059 	/*
1060 	 * Adjust drivers to use new taskqs.
1061 	 */
1062 	for (i = 0; i < old_cnt; i++) {
1063 		while ((gtask = LIST_FIRST(&qgroup->tqg_queue[i].tgc_tasks))) {
1064 			LIST_REMOVE(gtask, gt_list);
1065 			qgroup->tqg_queue[i].tgc_cnt--;
1066 			LIST_INSERT_HEAD(&gtask_head, gtask, gt_list);
1067 		}
1068 	}
1069 
1070 	while ((gtask = LIST_FIRST(&gtask_head))) {
1071 		LIST_REMOVE(gtask, gt_list);
1072 		if (gtask->gt_cpu == -1)
1073 			qid = taskqgroup_find(qgroup, gtask->gt_uniq);
1074 		else {
1075 			for (i = 0; i < qgroup->tqg_cnt; i++)
1076 				if (qgroup->tqg_queue[i].tgc_cpu == gtask->gt_cpu) {
1077 					qid = i;
1078 					break;
1079 				}
1080 		}
1081 		qgroup->tqg_queue[qid].tgc_cnt++;
1082 		LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask,
1083 		    gt_list);
1084 		gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
1085 	}
1086 	/*
1087 	 * Set new CPU and IRQ affinity
1088 	 */
1089 	for (i = 0; i < cnt; i++) {
1090 		qgroup->tqg_queue[i].tgc_cpu = i * qgroup->tqg_stride;
1091 		CPU_ZERO(&mask);
1092 		CPU_SET(qgroup->tqg_queue[i].tgc_cpu, &mask);
1093 		LIST_FOREACH(gtask, &qgroup->tqg_queue[i].tgc_tasks, gt_list) {
1094 			if (gtask->gt_irq == -1)
1095 				continue;
1096 			intr_setaffinity(gtask->gt_irq, &mask);
1097 		}
1098 	}
1099 	mtx_unlock(&qgroup->tqg_lock);
1100 
1101 	/*
1102 	 * If taskq thread count has been reduced.
1103 	 */
1104 	for (i = cnt; i < old_cnt; i++)
1105 		taskqgroup_cpu_remove(qgroup, i);
1106 
1107 	mtx_lock(&qgroup->tqg_lock);
1108 	qgroup->tqg_adjusting = 0;
1109 
1110 	taskqgroup_bind(qgroup);
1111 
1112 	return (0);
1113 }
1114 
1115 int
1116 taskqgroup_adjust(struct taskqgroup *qgroup, int cpu, int stride)
1117 {
1118 	int error;
1119 
1120 	mtx_lock(&qgroup->tqg_lock);
1121 	error = _taskqgroup_adjust(qgroup, cpu, stride);
1122 	mtx_unlock(&qgroup->tqg_lock);
1123 
1124 	return (error);
1125 }
1126 
1127 struct taskqgroup *
1128 taskqgroup_create(char *name)
1129 {
1130 	struct taskqgroup *qgroup;
1131 
1132 	qgroup = malloc(sizeof(*qgroup), M_TASKQUEUE, M_WAITOK | M_ZERO);
1133 	mtx_init(&qgroup->tqg_lock, "taskqgroup", NULL, MTX_DEF);
1134 	qgroup->tqg_name = name;
1135 	LIST_INIT(&qgroup->tqg_queue[0].tgc_tasks);
1136 
1137 	return (qgroup);
1138 }
1139 
1140 void
1141 taskqgroup_destroy(struct taskqgroup *qgroup)
1142 {
1143 
1144 }
1145