xref: /freebsd/sys/kern/subr_taskqueue.c (revision f4dc9bf43457515e5c88d1400d4f5ff70a82d9c7)
1 /*-
2  * Copyright (c) 2000 Doug Rabson
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
29 
30 #include <sys/param.h>
31 #include <sys/systm.h>
32 #include <sys/bus.h>
33 #include <sys/cpuset.h>
34 #include <sys/interrupt.h>
35 #include <sys/kernel.h>
36 #include <sys/kthread.h>
37 #include <sys/libkern.h>
38 #include <sys/limits.h>
39 #include <sys/lock.h>
40 #include <sys/malloc.h>
41 #include <sys/mutex.h>
42 #include <sys/proc.h>
43 #include <sys/sched.h>
44 #include <sys/smp.h>
45 #include <sys/taskqueue.h>
46 #include <sys/unistd.h>
47 #include <machine/stdarg.h>
48 
49 static MALLOC_DEFINE(M_TASKQUEUE, "taskqueue", "Task Queues");
50 static void	*taskqueue_giant_ih;
51 static void	*taskqueue_ih;
52 static void	 taskqueue_fast_enqueue(void *);
53 static void	 taskqueue_swi_enqueue(void *);
54 static void	 taskqueue_swi_giant_enqueue(void *);
55 
56 struct taskqueue_busy {
57 	struct task	*tb_running;
58 	TAILQ_ENTRY(taskqueue_busy) tb_link;
59 };
60 
61 struct task * const TB_DRAIN_WAITER = (struct task *)0x1;
62 
63 struct taskqueue {
64 	STAILQ_HEAD(, task)	tq_queue;
65 	taskqueue_enqueue_fn	tq_enqueue;
66 	void			*tq_context;
67 	char			*tq_name;
68 	TAILQ_HEAD(, taskqueue_busy) tq_active;
69 	struct mtx		tq_mutex;
70 	struct thread		**tq_threads;
71 	int			tq_tcount;
72 	int			tq_spin;
73 	int			tq_flags;
74 	int			tq_callouts;
75 	taskqueue_callback_fn	tq_callbacks[TASKQUEUE_NUM_CALLBACKS];
76 	void			*tq_cb_contexts[TASKQUEUE_NUM_CALLBACKS];
77 };
78 
79 #define	TQ_FLAGS_ACTIVE		(1 << 0)
80 #define	TQ_FLAGS_BLOCKED	(1 << 1)
81 #define	TQ_FLAGS_UNLOCKED_ENQUEUE	(1 << 2)
82 
83 #define	DT_CALLOUT_ARMED	(1 << 0)
84 
85 #define	TQ_LOCK(tq)							\
86 	do {								\
87 		if ((tq)->tq_spin)					\
88 			mtx_lock_spin(&(tq)->tq_mutex);			\
89 		else							\
90 			mtx_lock(&(tq)->tq_mutex);			\
91 	} while (0)
92 #define	TQ_ASSERT_LOCKED(tq)	mtx_assert(&(tq)->tq_mutex, MA_OWNED)
93 
94 #define	TQ_UNLOCK(tq)							\
95 	do {								\
96 		if ((tq)->tq_spin)					\
97 			mtx_unlock_spin(&(tq)->tq_mutex);		\
98 		else							\
99 			mtx_unlock(&(tq)->tq_mutex);			\
100 	} while (0)
101 #define	TQ_ASSERT_UNLOCKED(tq)	mtx_assert(&(tq)->tq_mutex, MA_NOTOWNED)
102 
103 void
104 _timeout_task_init(struct taskqueue *queue, struct timeout_task *timeout_task,
105     int priority, task_fn_t func, void *context)
106 {
107 
108 	TASK_INIT(&timeout_task->t, priority, func, context);
109 	callout_init_mtx(&timeout_task->c, &queue->tq_mutex,
110 	    CALLOUT_RETURNUNLOCKED);
111 	timeout_task->q = queue;
112 	timeout_task->f = 0;
113 }
114 
115 static __inline int
116 TQ_SLEEP(struct taskqueue *tq, void *p, struct mtx *m, int pri, const char *wm,
117     int t)
118 {
119 	if (tq->tq_spin)
120 		return (msleep_spin(p, m, wm, t));
121 	return (msleep(p, m, pri, wm, t));
122 }
123 
124 static struct taskqueue *
125 _taskqueue_create(const char *name, int mflags,
126 		 taskqueue_enqueue_fn enqueue, void *context,
127 		 int mtxflags, const char *mtxname __unused)
128 {
129 	struct taskqueue *queue;
130 	char *tq_name;
131 
132 	tq_name = malloc(TASKQUEUE_NAMELEN, M_TASKQUEUE, mflags | M_ZERO);
133 	if (tq_name == NULL)
134 		return (NULL);
135 
136 	queue = malloc(sizeof(struct taskqueue), M_TASKQUEUE, mflags | M_ZERO);
137 	if (queue == NULL) {
138 		free(tq_name, M_TASKQUEUE);
139 		return (NULL);
140 	}
141 
142 	snprintf(tq_name, TASKQUEUE_NAMELEN, "%s", (name) ? name : "taskqueue");
143 
144 	STAILQ_INIT(&queue->tq_queue);
145 	TAILQ_INIT(&queue->tq_active);
146 	queue->tq_enqueue = enqueue;
147 	queue->tq_context = context;
148 	queue->tq_name = tq_name;
149 	queue->tq_spin = (mtxflags & MTX_SPIN) != 0;
150 	queue->tq_flags |= TQ_FLAGS_ACTIVE;
151 	if (enqueue == taskqueue_fast_enqueue ||
152 	    enqueue == taskqueue_swi_enqueue ||
153 	    enqueue == taskqueue_swi_giant_enqueue ||
154 	    enqueue == taskqueue_thread_enqueue)
155 		queue->tq_flags |= TQ_FLAGS_UNLOCKED_ENQUEUE;
156 	mtx_init(&queue->tq_mutex, tq_name, NULL, mtxflags);
157 
158 	return (queue);
159 }
160 
161 struct taskqueue *
162 taskqueue_create(const char *name, int mflags,
163 		 taskqueue_enqueue_fn enqueue, void *context)
164 {
165 
166 	return _taskqueue_create(name, mflags, enqueue, context,
167 			MTX_DEF, name);
168 }
169 
170 void
171 taskqueue_set_callback(struct taskqueue *queue,
172     enum taskqueue_callback_type cb_type, taskqueue_callback_fn callback,
173     void *context)
174 {
175 
176 	KASSERT(((cb_type >= TASKQUEUE_CALLBACK_TYPE_MIN) &&
177 	    (cb_type <= TASKQUEUE_CALLBACK_TYPE_MAX)),
178 	    ("Callback type %d not valid, must be %d-%d", cb_type,
179 	    TASKQUEUE_CALLBACK_TYPE_MIN, TASKQUEUE_CALLBACK_TYPE_MAX));
180 	KASSERT((queue->tq_callbacks[cb_type] == NULL),
181 	    ("Re-initialization of taskqueue callback?"));
182 
183 	queue->tq_callbacks[cb_type] = callback;
184 	queue->tq_cb_contexts[cb_type] = context;
185 }
186 
187 /*
188  * Signal a taskqueue thread to terminate.
189  */
190 static void
191 taskqueue_terminate(struct thread **pp, struct taskqueue *tq)
192 {
193 
194 	while (tq->tq_tcount > 0 || tq->tq_callouts > 0) {
195 		wakeup(tq);
196 		TQ_SLEEP(tq, pp, &tq->tq_mutex, PWAIT, "taskqueue_destroy", 0);
197 	}
198 }
199 
200 void
201 taskqueue_free(struct taskqueue *queue)
202 {
203 
204 	TQ_LOCK(queue);
205 	queue->tq_flags &= ~TQ_FLAGS_ACTIVE;
206 	taskqueue_terminate(queue->tq_threads, queue);
207 	KASSERT(TAILQ_EMPTY(&queue->tq_active), ("Tasks still running?"));
208 	KASSERT(queue->tq_callouts == 0, ("Armed timeout tasks"));
209 	mtx_destroy(&queue->tq_mutex);
210 	free(queue->tq_threads, M_TASKQUEUE);
211 	free(queue->tq_name, M_TASKQUEUE);
212 	free(queue, M_TASKQUEUE);
213 }
214 
215 static int
216 taskqueue_enqueue_locked(struct taskqueue *queue, struct task *task)
217 {
218 	struct task *ins;
219 	struct task *prev;
220 
221 	KASSERT(task->ta_func != NULL, ("enqueueing task with NULL func"));
222 	/*
223 	 * Count multiple enqueues.
224 	 */
225 	if (task->ta_pending) {
226 		if (task->ta_pending < USHRT_MAX)
227 			task->ta_pending++;
228 		TQ_UNLOCK(queue);
229 		return (0);
230 	}
231 
232 	/*
233 	 * Optimise the case when all tasks have the same priority.
234 	 */
235 	prev = STAILQ_LAST(&queue->tq_queue, task, ta_link);
236 	if (!prev || prev->ta_priority >= task->ta_priority) {
237 		STAILQ_INSERT_TAIL(&queue->tq_queue, task, ta_link);
238 	} else {
239 		prev = NULL;
240 		for (ins = STAILQ_FIRST(&queue->tq_queue); ins;
241 		     prev = ins, ins = STAILQ_NEXT(ins, ta_link))
242 			if (ins->ta_priority < task->ta_priority)
243 				break;
244 
245 		if (prev)
246 			STAILQ_INSERT_AFTER(&queue->tq_queue, prev, task, ta_link);
247 		else
248 			STAILQ_INSERT_HEAD(&queue->tq_queue, task, ta_link);
249 	}
250 
251 	task->ta_pending = 1;
252 	if ((queue->tq_flags & TQ_FLAGS_UNLOCKED_ENQUEUE) != 0)
253 		TQ_UNLOCK(queue);
254 	if ((queue->tq_flags & TQ_FLAGS_BLOCKED) == 0)
255 		queue->tq_enqueue(queue->tq_context);
256 	if ((queue->tq_flags & TQ_FLAGS_UNLOCKED_ENQUEUE) == 0)
257 		TQ_UNLOCK(queue);
258 
259 	/* Return with lock released. */
260 	return (0);
261 }
262 
263 int
264 grouptaskqueue_enqueue(struct taskqueue *queue, struct task *task)
265 {
266 	TQ_LOCK(queue);
267 	if (task->ta_pending) {
268 		TQ_UNLOCK(queue);
269 		return (0);
270 	}
271 	STAILQ_INSERT_TAIL(&queue->tq_queue, task, ta_link);
272 	task->ta_pending = 1;
273 	TQ_UNLOCK(queue);
274 	if ((queue->tq_flags & TQ_FLAGS_BLOCKED) == 0)
275 		queue->tq_enqueue(queue->tq_context);
276 	return (0);
277 }
278 
279 int
280 taskqueue_enqueue(struct taskqueue *queue, struct task *task)
281 {
282 	int res;
283 
284 	TQ_LOCK(queue);
285 	res = taskqueue_enqueue_locked(queue, task);
286 	/* The lock is released inside. */
287 
288 	return (res);
289 }
290 
291 static void
292 taskqueue_timeout_func(void *arg)
293 {
294 	struct taskqueue *queue;
295 	struct timeout_task *timeout_task;
296 
297 	timeout_task = arg;
298 	queue = timeout_task->q;
299 	KASSERT((timeout_task->f & DT_CALLOUT_ARMED) != 0, ("Stray timeout"));
300 	timeout_task->f &= ~DT_CALLOUT_ARMED;
301 	queue->tq_callouts--;
302 	taskqueue_enqueue_locked(timeout_task->q, &timeout_task->t);
303 	/* The lock is released inside. */
304 }
305 
306 int
307 taskqueue_enqueue_timeout(struct taskqueue *queue,
308     struct timeout_task *timeout_task, int ticks)
309 {
310 	int res;
311 
312 	TQ_LOCK(queue);
313 	KASSERT(timeout_task->q == NULL || timeout_task->q == queue,
314 	    ("Migrated queue"));
315 	KASSERT(!queue->tq_spin, ("Timeout for spin-queue"));
316 	timeout_task->q = queue;
317 	res = timeout_task->t.ta_pending;
318 	if (ticks == 0) {
319 		taskqueue_enqueue_locked(queue, &timeout_task->t);
320 		/* The lock is released inside. */
321 	} else {
322 		if ((timeout_task->f & DT_CALLOUT_ARMED) != 0) {
323 			res++;
324 		} else {
325 			queue->tq_callouts++;
326 			timeout_task->f |= DT_CALLOUT_ARMED;
327 			if (ticks < 0)
328 				ticks = -ticks; /* Ignore overflow. */
329 		}
330 		if (ticks > 0) {
331 			callout_reset(&timeout_task->c, ticks,
332 			    taskqueue_timeout_func, timeout_task);
333 		}
334 		TQ_UNLOCK(queue);
335 	}
336 	return (res);
337 }
338 
339 static void
340 taskqueue_task_nop_fn(void *context, int pending)
341 {
342 }
343 
344 /*
345  * Block until all currently queued tasks in this taskqueue
346  * have begun execution.  Tasks queued during execution of
347  * this function are ignored.
348  */
349 static void
350 taskqueue_drain_tq_queue(struct taskqueue *queue)
351 {
352 	struct task t_barrier;
353 
354 	if (STAILQ_EMPTY(&queue->tq_queue))
355 		return;
356 
357 	/*
358 	 * Enqueue our barrier after all current tasks, but with
359 	 * the highest priority so that newly queued tasks cannot
360 	 * pass it.  Because of the high priority, we can not use
361 	 * taskqueue_enqueue_locked directly (which drops the lock
362 	 * anyway) so just insert it at tail while we have the
363 	 * queue lock.
364 	 */
365 	TASK_INIT(&t_barrier, USHRT_MAX, taskqueue_task_nop_fn, &t_barrier);
366 	STAILQ_INSERT_TAIL(&queue->tq_queue, &t_barrier, ta_link);
367 	t_barrier.ta_pending = 1;
368 
369 	/*
370 	 * Once the barrier has executed, all previously queued tasks
371 	 * have completed or are currently executing.
372 	 */
373 	while (t_barrier.ta_pending != 0)
374 		TQ_SLEEP(queue, &t_barrier, &queue->tq_mutex, PWAIT, "-", 0);
375 }
376 
377 /*
378  * Block until all currently executing tasks for this taskqueue
379  * complete.  Tasks that begin execution during the execution
380  * of this function are ignored.
381  */
382 static void
383 taskqueue_drain_tq_active(struct taskqueue *queue)
384 {
385 	struct taskqueue_busy tb_marker, *tb_first;
386 
387 	if (TAILQ_EMPTY(&queue->tq_active))
388 		return;
389 
390 	/* Block taskq_terminate().*/
391 	queue->tq_callouts++;
392 
393 	/*
394 	 * Wait for all currently executing taskqueue threads
395 	 * to go idle.
396 	 */
397 	tb_marker.tb_running = TB_DRAIN_WAITER;
398 	TAILQ_INSERT_TAIL(&queue->tq_active, &tb_marker, tb_link);
399 	while (TAILQ_FIRST(&queue->tq_active) != &tb_marker)
400 		TQ_SLEEP(queue, &tb_marker, &queue->tq_mutex, PWAIT, "-", 0);
401 	TAILQ_REMOVE(&queue->tq_active, &tb_marker, tb_link);
402 
403 	/*
404 	 * Wakeup any other drain waiter that happened to queue up
405 	 * without any intervening active thread.
406 	 */
407 	tb_first = TAILQ_FIRST(&queue->tq_active);
408 	if (tb_first != NULL && tb_first->tb_running == TB_DRAIN_WAITER)
409 		wakeup(tb_first);
410 
411 	/* Release taskqueue_terminate(). */
412 	queue->tq_callouts--;
413 	if ((queue->tq_flags & TQ_FLAGS_ACTIVE) == 0)
414 		wakeup_one(queue->tq_threads);
415 }
416 
417 void
418 taskqueue_block(struct taskqueue *queue)
419 {
420 
421 	TQ_LOCK(queue);
422 	queue->tq_flags |= TQ_FLAGS_BLOCKED;
423 	TQ_UNLOCK(queue);
424 }
425 
426 void
427 taskqueue_unblock(struct taskqueue *queue)
428 {
429 
430 	TQ_LOCK(queue);
431 	queue->tq_flags &= ~TQ_FLAGS_BLOCKED;
432 	if (!STAILQ_EMPTY(&queue->tq_queue))
433 		queue->tq_enqueue(queue->tq_context);
434 	TQ_UNLOCK(queue);
435 }
436 
437 static void
438 taskqueue_run_locked(struct taskqueue *queue)
439 {
440 	struct taskqueue_busy tb;
441 	struct taskqueue_busy *tb_first;
442 	struct task *task;
443 	int pending;
444 
445 	KASSERT(queue != NULL, ("tq is NULL"));
446 	TQ_ASSERT_LOCKED(queue);
447 	tb.tb_running = NULL;
448 
449 	while (STAILQ_FIRST(&queue->tq_queue)) {
450 		TAILQ_INSERT_TAIL(&queue->tq_active, &tb, tb_link);
451 
452 		/*
453 		 * Carefully remove the first task from the queue and
454 		 * zero its pending count.
455 		 */
456 		task = STAILQ_FIRST(&queue->tq_queue);
457 		KASSERT(task != NULL, ("task is NULL"));
458 		STAILQ_REMOVE_HEAD(&queue->tq_queue, ta_link);
459 		pending = task->ta_pending;
460 		task->ta_pending = 0;
461 		tb.tb_running = task;
462 		TQ_UNLOCK(queue);
463 
464 		KASSERT(task->ta_func != NULL, ("task->ta_func is NULL"));
465 		task->ta_func(task->ta_context, pending);
466 
467 		TQ_LOCK(queue);
468 		tb.tb_running = NULL;
469 		wakeup(task);
470 
471 		TAILQ_REMOVE(&queue->tq_active, &tb, tb_link);
472 		tb_first = TAILQ_FIRST(&queue->tq_active);
473 		if (tb_first != NULL &&
474 		    tb_first->tb_running == TB_DRAIN_WAITER)
475 			wakeup(tb_first);
476 	}
477 }
478 
479 void
480 taskqueue_run(struct taskqueue *queue)
481 {
482 
483 	TQ_LOCK(queue);
484 	taskqueue_run_locked(queue);
485 	TQ_UNLOCK(queue);
486 }
487 
488 static int
489 task_is_running(struct taskqueue *queue, struct task *task)
490 {
491 	struct taskqueue_busy *tb;
492 
493 	TQ_ASSERT_LOCKED(queue);
494 	TAILQ_FOREACH(tb, &queue->tq_active, tb_link) {
495 		if (tb->tb_running == task)
496 			return (1);
497 	}
498 	return (0);
499 }
500 
501 static int
502 taskqueue_cancel_locked(struct taskqueue *queue, struct task *task,
503     u_int *pendp)
504 {
505 
506 	if (task->ta_pending > 0)
507 		STAILQ_REMOVE(&queue->tq_queue, task, task, ta_link);
508 	if (pendp != NULL)
509 		*pendp = task->ta_pending;
510 	task->ta_pending = 0;
511 	return (task_is_running(queue, task) ? EBUSY : 0);
512 }
513 
514 int
515 taskqueue_cancel(struct taskqueue *queue, struct task *task, u_int *pendp)
516 {
517 	int error;
518 
519 	TQ_LOCK(queue);
520 	error = taskqueue_cancel_locked(queue, task, pendp);
521 	TQ_UNLOCK(queue);
522 
523 	return (error);
524 }
525 
526 int
527 taskqueue_cancel_timeout(struct taskqueue *queue,
528     struct timeout_task *timeout_task, u_int *pendp)
529 {
530 	u_int pending, pending1;
531 	int error;
532 
533 	TQ_LOCK(queue);
534 	pending = !!(callout_stop(&timeout_task->c) > 0);
535 	error = taskqueue_cancel_locked(queue, &timeout_task->t, &pending1);
536 	if ((timeout_task->f & DT_CALLOUT_ARMED) != 0) {
537 		timeout_task->f &= ~DT_CALLOUT_ARMED;
538 		queue->tq_callouts--;
539 	}
540 	TQ_UNLOCK(queue);
541 
542 	if (pendp != NULL)
543 		*pendp = pending + pending1;
544 	return (error);
545 }
546 
547 void
548 taskqueue_drain(struct taskqueue *queue, struct task *task)
549 {
550 
551 	if (!queue->tq_spin)
552 		WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__);
553 
554 	TQ_LOCK(queue);
555 	while (task->ta_pending != 0 || task_is_running(queue, task))
556 		TQ_SLEEP(queue, task, &queue->tq_mutex, PWAIT, "-", 0);
557 	TQ_UNLOCK(queue);
558 }
559 
560 void
561 taskqueue_drain_all(struct taskqueue *queue)
562 {
563 
564 	if (!queue->tq_spin)
565 		WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__);
566 
567 	TQ_LOCK(queue);
568 	taskqueue_drain_tq_queue(queue);
569 	taskqueue_drain_tq_active(queue);
570 	TQ_UNLOCK(queue);
571 }
572 
573 void
574 taskqueue_drain_timeout(struct taskqueue *queue,
575     struct timeout_task *timeout_task)
576 {
577 
578 	callout_drain(&timeout_task->c);
579 	taskqueue_drain(queue, &timeout_task->t);
580 }
581 
582 static void
583 taskqueue_swi_enqueue(void *context)
584 {
585 	swi_sched(taskqueue_ih, 0);
586 }
587 
588 static void
589 taskqueue_swi_run(void *dummy)
590 {
591 	taskqueue_run(taskqueue_swi);
592 }
593 
594 static void
595 taskqueue_swi_giant_enqueue(void *context)
596 {
597 	swi_sched(taskqueue_giant_ih, 0);
598 }
599 
600 static void
601 taskqueue_swi_giant_run(void *dummy)
602 {
603 	taskqueue_run(taskqueue_swi_giant);
604 }
605 
606 static int
607 _taskqueue_start_threads(struct taskqueue **tqp, int count, int pri,
608     cpuset_t *mask, const char *name, va_list ap)
609 {
610 	char ktname[MAXCOMLEN + 1];
611 	struct thread *td;
612 	struct taskqueue *tq;
613 	int i, error;
614 
615 	if (count <= 0)
616 		return (EINVAL);
617 
618 	vsnprintf(ktname, sizeof(ktname), name, ap);
619 	tq = *tqp;
620 
621 	tq->tq_threads = malloc(sizeof(struct thread *) * count, M_TASKQUEUE,
622 	    M_NOWAIT | M_ZERO);
623 	if (tq->tq_threads == NULL) {
624 		printf("%s: no memory for %s threads\n", __func__, ktname);
625 		return (ENOMEM);
626 	}
627 
628 	for (i = 0; i < count; i++) {
629 		if (count == 1)
630 			error = kthread_add(taskqueue_thread_loop, tqp, NULL,
631 			    &tq->tq_threads[i], RFSTOPPED, 0, "%s", ktname);
632 		else
633 			error = kthread_add(taskqueue_thread_loop, tqp, NULL,
634 			    &tq->tq_threads[i], RFSTOPPED, 0,
635 			    "%s_%d", ktname, i);
636 		if (error) {
637 			/* should be ok to continue, taskqueue_free will dtrt */
638 			printf("%s: kthread_add(%s): error %d", __func__,
639 			    ktname, error);
640 			tq->tq_threads[i] = NULL;		/* paranoid */
641 		} else
642 			tq->tq_tcount++;
643 	}
644 	for (i = 0; i < count; i++) {
645 		if (tq->tq_threads[i] == NULL)
646 			continue;
647 		td = tq->tq_threads[i];
648 		if (mask) {
649 			error = cpuset_setthread(td->td_tid, mask);
650 			/*
651 			 * Failing to pin is rarely an actual fatal error;
652 			 * it'll just affect performance.
653 			 */
654 			if (error)
655 				printf("%s: curthread=%llu: can't pin; "
656 				    "error=%d\n",
657 				    __func__,
658 				    (unsigned long long) td->td_tid,
659 				    error);
660 		}
661 		thread_lock(td);
662 		sched_prio(td, pri);
663 		sched_add(td, SRQ_BORING);
664 		thread_unlock(td);
665 	}
666 
667 	return (0);
668 }
669 
670 int
671 taskqueue_start_threads(struct taskqueue **tqp, int count, int pri,
672     const char *name, ...)
673 {
674 	va_list ap;
675 	int error;
676 
677 	va_start(ap, name);
678 	error = _taskqueue_start_threads(tqp, count, pri, NULL, name, ap);
679 	va_end(ap);
680 	return (error);
681 }
682 
683 int
684 taskqueue_start_threads_cpuset(struct taskqueue **tqp, int count, int pri,
685     cpuset_t *mask, const char *name, ...)
686 {
687 	va_list ap;
688 	int error;
689 
690 	va_start(ap, name);
691 	error = _taskqueue_start_threads(tqp, count, pri, mask, name, ap);
692 	va_end(ap);
693 	return (error);
694 }
695 
696 static inline void
697 taskqueue_run_callback(struct taskqueue *tq,
698     enum taskqueue_callback_type cb_type)
699 {
700 	taskqueue_callback_fn tq_callback;
701 
702 	TQ_ASSERT_UNLOCKED(tq);
703 	tq_callback = tq->tq_callbacks[cb_type];
704 	if (tq_callback != NULL)
705 		tq_callback(tq->tq_cb_contexts[cb_type]);
706 }
707 
708 void
709 taskqueue_thread_loop(void *arg)
710 {
711 	struct taskqueue **tqp, *tq;
712 
713 	tqp = arg;
714 	tq = *tqp;
715 	taskqueue_run_callback(tq, TASKQUEUE_CALLBACK_TYPE_INIT);
716 	TQ_LOCK(tq);
717 	while ((tq->tq_flags & TQ_FLAGS_ACTIVE) != 0) {
718 		/* XXX ? */
719 		taskqueue_run_locked(tq);
720 		/*
721 		 * Because taskqueue_run() can drop tq_mutex, we need to
722 		 * check if the TQ_FLAGS_ACTIVE flag wasn't removed in the
723 		 * meantime, which means we missed a wakeup.
724 		 */
725 		if ((tq->tq_flags & TQ_FLAGS_ACTIVE) == 0)
726 			break;
727 		TQ_SLEEP(tq, tq, &tq->tq_mutex, 0, "-", 0);
728 	}
729 	taskqueue_run_locked(tq);
730 	/*
731 	 * This thread is on its way out, so just drop the lock temporarily
732 	 * in order to call the shutdown callback.  This allows the callback
733 	 * to look at the taskqueue, even just before it dies.
734 	 */
735 	TQ_UNLOCK(tq);
736 	taskqueue_run_callback(tq, TASKQUEUE_CALLBACK_TYPE_SHUTDOWN);
737 	TQ_LOCK(tq);
738 
739 	/* rendezvous with thread that asked us to terminate */
740 	tq->tq_tcount--;
741 	wakeup_one(tq->tq_threads);
742 	TQ_UNLOCK(tq);
743 	kthread_exit();
744 }
745 
746 void
747 taskqueue_thread_enqueue(void *context)
748 {
749 	struct taskqueue **tqp, *tq;
750 
751 	tqp = context;
752 	tq = *tqp;
753 	wakeup_one(tq);
754 }
755 
756 TASKQUEUE_DEFINE(swi, taskqueue_swi_enqueue, NULL,
757 		 swi_add(NULL, "task queue", taskqueue_swi_run, NULL, SWI_TQ,
758 		     INTR_MPSAFE, &taskqueue_ih));
759 
760 TASKQUEUE_DEFINE(swi_giant, taskqueue_swi_giant_enqueue, NULL,
761 		 swi_add(NULL, "Giant taskq", taskqueue_swi_giant_run,
762 		     NULL, SWI_TQ_GIANT, 0, &taskqueue_giant_ih));
763 
764 TASKQUEUE_DEFINE_THREAD(thread);
765 
766 struct taskqueue *
767 taskqueue_create_fast(const char *name, int mflags,
768 		 taskqueue_enqueue_fn enqueue, void *context)
769 {
770 	return _taskqueue_create(name, mflags, enqueue, context,
771 			MTX_SPIN, "fast_taskqueue");
772 }
773 
774 static void	*taskqueue_fast_ih;
775 
776 static void
777 taskqueue_fast_enqueue(void *context)
778 {
779 	swi_sched(taskqueue_fast_ih, 0);
780 }
781 
782 static void
783 taskqueue_fast_run(void *dummy)
784 {
785 	taskqueue_run(taskqueue_fast);
786 }
787 
788 TASKQUEUE_FAST_DEFINE(fast, taskqueue_fast_enqueue, NULL,
789 	swi_add(NULL, "fast taskq", taskqueue_fast_run, NULL,
790 	SWI_TQ_FAST, INTR_MPSAFE, &taskqueue_fast_ih));
791 
792 int
793 taskqueue_member(struct taskqueue *queue, struct thread *td)
794 {
795 	int i, j, ret = 0;
796 
797 	for (i = 0, j = 0; ; i++) {
798 		if (queue->tq_threads[i] == NULL)
799 			continue;
800 		if (queue->tq_threads[i] == td) {
801 			ret = 1;
802 			break;
803 		}
804 		if (++j >= queue->tq_tcount)
805 			break;
806 	}
807 	return (ret);
808 }
809 
810 struct taskqgroup_cpu {
811 	LIST_HEAD(, grouptask)	tgc_tasks;
812 	struct taskqueue	*tgc_taskq;
813 	int	tgc_cnt;
814 	int	tgc_cpu;
815 };
816 
817 struct taskqgroup {
818 	struct taskqgroup_cpu tqg_queue[MAXCPU];
819 	struct mtx	tqg_lock;
820 	char *		tqg_name;
821 	int		tqg_adjusting;
822 	int		tqg_stride;
823 	int		tqg_cnt;
824 };
825 
826 struct taskq_bind_task {
827 	struct task bt_task;
828 	int	bt_cpuid;
829 };
830 
831 static void
832 taskqgroup_cpu_create(struct taskqgroup *qgroup, int idx)
833 {
834 	struct taskqgroup_cpu *qcpu;
835 	int i, j;
836 
837 	qcpu = &qgroup->tqg_queue[idx];
838 	LIST_INIT(&qcpu->tgc_tasks);
839 	qcpu->tgc_taskq = taskqueue_create_fast(NULL, M_WAITOK,
840 	    taskqueue_thread_enqueue, &qcpu->tgc_taskq);
841 	taskqueue_start_threads(&qcpu->tgc_taskq, 1, PI_SOFT,
842 	    "%s_%d", qgroup->tqg_name, idx);
843 
844 	for (i = CPU_FIRST(), j = 0; j < idx * qgroup->tqg_stride;
845 	    j++, i = CPU_NEXT(i)) {
846 		/*
847 		 * Wait: evaluate the idx * qgroup->tqg_stride'th CPU,
848 		 * potentially wrapping the actual count
849 		 */
850 	}
851 	qcpu->tgc_cpu = i;
852 }
853 
854 static void
855 taskqgroup_cpu_remove(struct taskqgroup *qgroup, int idx)
856 {
857 
858 	taskqueue_free(qgroup->tqg_queue[idx].tgc_taskq);
859 }
860 
861 /*
862  * Find the taskq with least # of tasks that doesn't currently have any
863  * other queues from the uniq identifier.
864  */
865 static int
866 taskqgroup_find(struct taskqgroup *qgroup, void *uniq)
867 {
868 	struct grouptask *n;
869 	int i, idx, mincnt;
870 	int strict;
871 
872 	mtx_assert(&qgroup->tqg_lock, MA_OWNED);
873 	if (qgroup->tqg_cnt == 0)
874 		return (0);
875 	idx = -1;
876 	mincnt = INT_MAX;
877 	/*
878 	 * Two passes;  First scan for a queue with the least tasks that
879 	 * does not already service this uniq id.  If that fails simply find
880 	 * the queue with the least total tasks;
881 	 */
882 	for (strict = 1; mincnt == INT_MAX; strict = 0) {
883 		for (i = 0; i < qgroup->tqg_cnt; i++) {
884 			if (qgroup->tqg_queue[i].tgc_cnt > mincnt)
885 				continue;
886 			if (strict) {
887 				LIST_FOREACH(n,
888 				    &qgroup->tqg_queue[i].tgc_tasks, gt_list)
889 					if (n->gt_uniq == uniq)
890 						break;
891 				if (n != NULL)
892 					continue;
893 			}
894 			mincnt = qgroup->tqg_queue[i].tgc_cnt;
895 			idx = i;
896 		}
897 	}
898 	if (idx == -1)
899 		panic("taskqgroup_find: Failed to pick a qid.");
900 
901 	return (idx);
902 }
903 
904 void
905 taskqgroup_attach(struct taskqgroup *qgroup, struct grouptask *gtask,
906     void *uniq, int irq, char *name)
907 {
908 	cpuset_t mask;
909 	int qid;
910 
911 	gtask->gt_uniq = uniq;
912 	gtask->gt_name = name;
913 	gtask->gt_irq = irq;
914 	gtask->gt_cpu = -1;
915 	mtx_lock(&qgroup->tqg_lock);
916 	qid = taskqgroup_find(qgroup, uniq);
917 	qgroup->tqg_queue[qid].tgc_cnt++;
918 	LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
919 	gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
920 	if (irq != -1 && smp_started) {
921 		CPU_ZERO(&mask);
922 		CPU_SET(qgroup->tqg_queue[qid].tgc_cpu, &mask);
923 		mtx_unlock(&qgroup->tqg_lock);
924 		intr_setaffinity(irq, &mask);
925 	} else
926 		mtx_unlock(&qgroup->tqg_lock);
927 }
928 
929 int
930 taskqgroup_attach_cpu(struct taskqgroup *qgroup, struct grouptask *gtask,
931 	void *uniq, int cpu, int irq, char *name)
932 {
933 	cpuset_t mask;
934 	int i, qid;
935 
936 	qid = -1;
937 	gtask->gt_uniq = uniq;
938 	gtask->gt_name = name;
939 	gtask->gt_irq = irq;
940 	gtask->gt_cpu = cpu;
941 	mtx_lock(&qgroup->tqg_lock);
942 	if (smp_started) {
943 		for (i = 0; i < qgroup->tqg_cnt; i++)
944 			if (qgroup->tqg_queue[i].tgc_cpu == cpu) {
945 				qid = i;
946 				break;
947 			}
948 		if (qid == -1) {
949 			mtx_unlock(&qgroup->tqg_lock);
950 			return (EINVAL);
951 		}
952 	} else
953 		qid = 0;
954 	qgroup->tqg_queue[qid].tgc_cnt++;
955 	LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
956 	gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
957 	if (irq != -1 && smp_started) {
958 		CPU_ZERO(&mask);
959 		CPU_SET(qgroup->tqg_queue[qid].tgc_cpu, &mask);
960 		mtx_unlock(&qgroup->tqg_lock);
961 		intr_setaffinity(irq, &mask);
962 	} else
963 		mtx_unlock(&qgroup->tqg_lock);
964 	return (0);
965 }
966 
967 void
968 taskqgroup_detach(struct taskqgroup *qgroup, struct grouptask *gtask)
969 {
970 	int i;
971 
972 	mtx_lock(&qgroup->tqg_lock);
973 	for (i = 0; i < qgroup->tqg_cnt; i++)
974 		if (qgroup->tqg_queue[i].tgc_taskq == gtask->gt_taskqueue)
975 			break;
976 	if (i == qgroup->tqg_cnt)
977 		panic("taskqgroup_detach: task not in group\n");
978 	qgroup->tqg_queue[i].tgc_cnt--;
979 	LIST_REMOVE(gtask, gt_list);
980 	mtx_unlock(&qgroup->tqg_lock);
981 	gtask->gt_taskqueue = NULL;
982 }
983 
984 static void
985 taskqgroup_binder(void *ctx, int pending)
986 {
987 	struct taskq_bind_task *task = (struct taskq_bind_task *)ctx;
988 	cpuset_t mask;
989 	int error;
990 
991 	CPU_ZERO(&mask);
992 	CPU_SET(task->bt_cpuid, &mask);
993 	error = cpuset_setthread(curthread->td_tid, &mask);
994 	thread_lock(curthread);
995 	sched_bind(curthread, task->bt_cpuid);
996 	thread_unlock(curthread);
997 
998 	if (error)
999 		printf("taskqgroup_binder: setaffinity failed: %d\n",
1000 		    error);
1001 	free(task, M_DEVBUF);
1002 }
1003 
1004 static void
1005 taskqgroup_bind(struct taskqgroup *qgroup)
1006 {
1007 	struct taskq_bind_task *task;
1008 	int i;
1009 
1010 	/*
1011 	 * Bind taskqueue threads to specific CPUs, if they have been assigned
1012 	 * one.
1013 	 */
1014 	for (i = 0; i < qgroup->tqg_cnt; i++) {
1015 		task = malloc(sizeof (*task), M_DEVBUF, M_NOWAIT);
1016 		TASK_INIT(&task->bt_task, 0, taskqgroup_binder, task);
1017 		task->bt_cpuid = qgroup->tqg_queue[i].tgc_cpu;
1018 		taskqueue_enqueue(qgroup->tqg_queue[i].tgc_taskq,
1019 		    &task->bt_task);
1020 	}
1021 }
1022 
1023 static int
1024 _taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride)
1025 {
1026 	LIST_HEAD(, grouptask) gtask_head = LIST_HEAD_INITIALIZER(NULL);
1027 	cpuset_t mask;
1028 	struct grouptask *gtask;
1029 	int i, k, old_cnt, qid, cpu;
1030 
1031 	mtx_assert(&qgroup->tqg_lock, MA_OWNED);
1032 
1033 	if (cnt < 1 || cnt * stride > mp_ncpus || !smp_started) {
1034 		printf("taskqgroup_adjust failed cnt: %d stride: %d "
1035 		    "mp_ncpus: %d smp_started: %d\n", cnt, stride, mp_ncpus,
1036 		    smp_started);
1037 		return (EINVAL);
1038 	}
1039 	if (qgroup->tqg_adjusting) {
1040 		printf("taskqgroup_adjust failed: adjusting\n");
1041 		return (EBUSY);
1042 	}
1043 	qgroup->tqg_adjusting = 1;
1044 	old_cnt = qgroup->tqg_cnt;
1045 	mtx_unlock(&qgroup->tqg_lock);
1046 	/*
1047 	 * Set up queue for tasks added before boot.
1048 	 */
1049 	if (old_cnt == 0) {
1050 		LIST_SWAP(&gtask_head, &qgroup->tqg_queue[0].tgc_tasks,
1051 		    grouptask, gt_list);
1052 		qgroup->tqg_queue[0].tgc_cnt = 0;
1053 	}
1054 
1055 	/*
1056 	 * If new taskq threads have been added.
1057 	 */
1058 	for (i = old_cnt; i < cnt; i++)
1059 		taskqgroup_cpu_create(qgroup, i);
1060 	mtx_lock(&qgroup->tqg_lock);
1061 	qgroup->tqg_cnt = cnt;
1062 	qgroup->tqg_stride = stride;
1063 
1064 	/*
1065 	 * Adjust drivers to use new taskqs.
1066 	 */
1067 	for (i = 0; i < old_cnt; i++) {
1068 		while ((gtask = LIST_FIRST(&qgroup->tqg_queue[i].tgc_tasks))) {
1069 			LIST_REMOVE(gtask, gt_list);
1070 			qgroup->tqg_queue[i].tgc_cnt--;
1071 			LIST_INSERT_HEAD(&gtask_head, gtask, gt_list);
1072 		}
1073 	}
1074 
1075 	while ((gtask = LIST_FIRST(&gtask_head))) {
1076 		LIST_REMOVE(gtask, gt_list);
1077 		if (gtask->gt_cpu == -1)
1078 			qid = taskqgroup_find(qgroup, gtask->gt_uniq);
1079 		else {
1080 			for (i = 0; i < qgroup->tqg_cnt; i++)
1081 				if (qgroup->tqg_queue[i].tgc_cpu == gtask->gt_cpu) {
1082 					qid = i;
1083 					break;
1084 				}
1085 		}
1086 		qgroup->tqg_queue[qid].tgc_cnt++;
1087 		LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask,
1088 		    gt_list);
1089 		gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
1090 	}
1091 	/*
1092 	 * Set new CPU and IRQ affinity
1093 	 */
1094 	cpu = CPU_FIRST();
1095 	for (i = 0; i < cnt; i++) {
1096 		qgroup->tqg_queue[i].tgc_cpu = cpu;
1097 		for (k = 0; k < qgroup->tqg_stride; k++)
1098 			cpu = CPU_NEXT(cpu);
1099 		CPU_ZERO(&mask);
1100 		CPU_SET(qgroup->tqg_queue[i].tgc_cpu, &mask);
1101 		LIST_FOREACH(gtask, &qgroup->tqg_queue[i].tgc_tasks, gt_list) {
1102 			if (gtask->gt_irq == -1)
1103 				continue;
1104 			intr_setaffinity(gtask->gt_irq, &mask);
1105 		}
1106 	}
1107 	mtx_unlock(&qgroup->tqg_lock);
1108 
1109 	/*
1110 	 * If taskq thread count has been reduced.
1111 	 */
1112 	for (i = cnt; i < old_cnt; i++)
1113 		taskqgroup_cpu_remove(qgroup, i);
1114 
1115 	mtx_lock(&qgroup->tqg_lock);
1116 	qgroup->tqg_adjusting = 0;
1117 
1118 	taskqgroup_bind(qgroup);
1119 
1120 	return (0);
1121 }
1122 
1123 int
1124 taskqgroup_adjust(struct taskqgroup *qgroup, int cpu, int stride)
1125 {
1126 	int error;
1127 
1128 	mtx_lock(&qgroup->tqg_lock);
1129 	error = _taskqgroup_adjust(qgroup, cpu, stride);
1130 	mtx_unlock(&qgroup->tqg_lock);
1131 
1132 	return (error);
1133 }
1134 
1135 struct taskqgroup *
1136 taskqgroup_create(char *name)
1137 {
1138 	struct taskqgroup *qgroup;
1139 
1140 	qgroup = malloc(sizeof(*qgroup), M_TASKQUEUE, M_WAITOK | M_ZERO);
1141 	mtx_init(&qgroup->tqg_lock, "taskqgroup", NULL, MTX_DEF);
1142 	qgroup->tqg_name = name;
1143 	LIST_INIT(&qgroup->tqg_queue[0].tgc_tasks);
1144 
1145 	return (qgroup);
1146 }
1147 
1148 void
1149 taskqgroup_destroy(struct taskqgroup *qgroup)
1150 {
1151 
1152 }
1153