xref: /freebsd/sys/kern/subr_gtaskqueue.c (revision 52f72944b8f5abb2386eae924357dee8aea17d5b)
1 /*-
2  * Copyright (c) 2000 Doug Rabson
3  * Copyright (c) 2014 Jeff Roberson
4  * Copyright (c) 2016 Matthew Macy
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/bus.h>
35 #include <sys/cpuset.h>
36 #include <sys/interrupt.h>
37 #include <sys/kernel.h>
38 #include <sys/kthread.h>
39 #include <sys/libkern.h>
40 #include <sys/limits.h>
41 #include <sys/lock.h>
42 #include <sys/malloc.h>
43 #include <sys/mutex.h>
44 #include <sys/proc.h>
45 #include <sys/sched.h>
46 #include <sys/smp.h>
47 #include <sys/gtaskqueue.h>
48 #include <sys/unistd.h>
49 #include <machine/stdarg.h>
50 
51 static MALLOC_DEFINE(M_GTASKQUEUE, "gtaskqueue", "Group Task Queues");
52 static void	gtaskqueue_thread_enqueue(void *);
53 static void	gtaskqueue_thread_loop(void *arg);
54 
55 TASKQGROUP_DEFINE(softirq, mp_ncpus, 1);
56 
57 struct gtaskqueue_busy {
58 	struct gtask	*tb_running;
59 	TAILQ_ENTRY(gtaskqueue_busy) tb_link;
60 };
61 
62 static struct gtask * const TB_DRAIN_WAITER = (struct gtask *)0x1;
63 
64 struct gtaskqueue {
65 	STAILQ_HEAD(, gtask)	tq_queue;
66 	gtaskqueue_enqueue_fn	tq_enqueue;
67 	void			*tq_context;
68 	char			*tq_name;
69 	TAILQ_HEAD(, gtaskqueue_busy) tq_active;
70 	struct mtx		tq_mutex;
71 	struct thread		**tq_threads;
72 	int			tq_tcount;
73 	int			tq_spin;
74 	int			tq_flags;
75 	int			tq_callouts;
76 	taskqueue_callback_fn	tq_callbacks[TASKQUEUE_NUM_CALLBACKS];
77 	void			*tq_cb_contexts[TASKQUEUE_NUM_CALLBACKS];
78 };
79 
80 #define	TQ_FLAGS_ACTIVE		(1 << 0)
81 #define	TQ_FLAGS_BLOCKED	(1 << 1)
82 #define	TQ_FLAGS_UNLOCKED_ENQUEUE	(1 << 2)
83 
84 #define	DT_CALLOUT_ARMED	(1 << 0)
85 
86 #define	TQ_LOCK(tq)							\
87 	do {								\
88 		if ((tq)->tq_spin)					\
89 			mtx_lock_spin(&(tq)->tq_mutex);			\
90 		else							\
91 			mtx_lock(&(tq)->tq_mutex);			\
92 	} while (0)
93 #define	TQ_ASSERT_LOCKED(tq)	mtx_assert(&(tq)->tq_mutex, MA_OWNED)
94 
95 #define	TQ_UNLOCK(tq)							\
96 	do {								\
97 		if ((tq)->tq_spin)					\
98 			mtx_unlock_spin(&(tq)->tq_mutex);		\
99 		else							\
100 			mtx_unlock(&(tq)->tq_mutex);			\
101 	} while (0)
102 #define	TQ_ASSERT_UNLOCKED(tq)	mtx_assert(&(tq)->tq_mutex, MA_NOTOWNED)
103 
104 #ifdef INVARIANTS
105 static void
106 gtask_dump(struct gtask *gtask)
107 {
108 	printf("gtask: %p ta_flags=%x ta_priority=%d ta_func=%p ta_context=%p\n",
109 	       gtask, gtask->ta_flags, gtask->ta_priority, gtask->ta_func, gtask->ta_context);
110 }
111 #endif
112 
113 static __inline int
114 TQ_SLEEP(struct gtaskqueue *tq, void *p, struct mtx *m, int pri, const char *wm,
115     int t)
116 {
117 	if (tq->tq_spin)
118 		return (msleep_spin(p, m, wm, t));
119 	return (msleep(p, m, pri, wm, t));
120 }
121 
122 static struct gtaskqueue *
123 _gtaskqueue_create(const char *name, int mflags,
124 		 taskqueue_enqueue_fn enqueue, void *context,
125 		 int mtxflags, const char *mtxname __unused)
126 {
127 	struct gtaskqueue *queue;
128 	char *tq_name;
129 
130 	tq_name = malloc(TASKQUEUE_NAMELEN, M_GTASKQUEUE, mflags | M_ZERO);
131 	if (!tq_name)
132 		return (NULL);
133 
134 	snprintf(tq_name, TASKQUEUE_NAMELEN, "%s", (name) ? name : "taskqueue");
135 
136 	queue = malloc(sizeof(struct gtaskqueue), M_GTASKQUEUE, mflags | M_ZERO);
137 	if (!queue) {
138 		free(tq_name, M_GTASKQUEUE);
139 		return (NULL);
140 	}
141 
142 	STAILQ_INIT(&queue->tq_queue);
143 	TAILQ_INIT(&queue->tq_active);
144 	queue->tq_enqueue = enqueue;
145 	queue->tq_context = context;
146 	queue->tq_name = tq_name;
147 	queue->tq_spin = (mtxflags & MTX_SPIN) != 0;
148 	queue->tq_flags |= TQ_FLAGS_ACTIVE;
149 	if (enqueue == gtaskqueue_thread_enqueue)
150 		queue->tq_flags |= TQ_FLAGS_UNLOCKED_ENQUEUE;
151 	mtx_init(&queue->tq_mutex, tq_name, NULL, mtxflags);
152 
153 	return (queue);
154 }
155 
156 
157 /*
158  * Signal a taskqueue thread to terminate.
159  */
160 static void
161 gtaskqueue_terminate(struct thread **pp, struct gtaskqueue *tq)
162 {
163 
164 	while (tq->tq_tcount > 0 || tq->tq_callouts > 0) {
165 		wakeup(tq);
166 		TQ_SLEEP(tq, pp, &tq->tq_mutex, PWAIT, "taskqueue_destroy", 0);
167 	}
168 }
169 
170 static void
171 gtaskqueue_free(struct gtaskqueue *queue)
172 {
173 
174 	TQ_LOCK(queue);
175 	queue->tq_flags &= ~TQ_FLAGS_ACTIVE;
176 	gtaskqueue_terminate(queue->tq_threads, queue);
177 	KASSERT(TAILQ_EMPTY(&queue->tq_active), ("Tasks still running?"));
178 	KASSERT(queue->tq_callouts == 0, ("Armed timeout tasks"));
179 	mtx_destroy(&queue->tq_mutex);
180 	free(queue->tq_threads, M_GTASKQUEUE);
181 	free(queue->tq_name, M_GTASKQUEUE);
182 	free(queue, M_GTASKQUEUE);
183 }
184 
185 int
186 grouptaskqueue_enqueue(struct gtaskqueue *queue, struct gtask *gtask)
187 {
188 #ifdef INVARIANTS
189 	if (queue == NULL) {
190 		gtask_dump(gtask);
191 		panic("queue == NULL");
192 	}
193 #endif
194 	TQ_LOCK(queue);
195 	if (gtask->ta_flags & TASK_ENQUEUED) {
196 		TQ_UNLOCK(queue);
197 		return (0);
198 	}
199 	STAILQ_INSERT_TAIL(&queue->tq_queue, gtask, ta_link);
200 	gtask->ta_flags |= TASK_ENQUEUED;
201 	TQ_UNLOCK(queue);
202 	if ((queue->tq_flags & TQ_FLAGS_BLOCKED) == 0)
203 		queue->tq_enqueue(queue->tq_context);
204 	return (0);
205 }
206 
207 static void
208 gtaskqueue_task_nop_fn(void *context)
209 {
210 }
211 
212 /*
213  * Block until all currently queued tasks in this taskqueue
214  * have begun execution.  Tasks queued during execution of
215  * this function are ignored.
216  */
217 static void
218 gtaskqueue_drain_tq_queue(struct gtaskqueue *queue)
219 {
220 	struct gtask t_barrier;
221 
222 	if (STAILQ_EMPTY(&queue->tq_queue))
223 		return;
224 
225 	/*
226 	 * Enqueue our barrier after all current tasks, but with
227 	 * the highest priority so that newly queued tasks cannot
228 	 * pass it.  Because of the high priority, we can not use
229 	 * taskqueue_enqueue_locked directly (which drops the lock
230 	 * anyway) so just insert it at tail while we have the
231 	 * queue lock.
232 	 */
233 	GTASK_INIT(&t_barrier, 0, USHRT_MAX, gtaskqueue_task_nop_fn, &t_barrier);
234 	STAILQ_INSERT_TAIL(&queue->tq_queue, &t_barrier, ta_link);
235 	t_barrier.ta_flags |= TASK_ENQUEUED;
236 
237 	/*
238 	 * Once the barrier has executed, all previously queued tasks
239 	 * have completed or are currently executing.
240 	 */
241 	while (t_barrier.ta_flags & TASK_ENQUEUED)
242 		TQ_SLEEP(queue, &t_barrier, &queue->tq_mutex, PWAIT, "-", 0);
243 }
244 
245 /*
246  * Block until all currently executing tasks for this taskqueue
247  * complete.  Tasks that begin execution during the execution
248  * of this function are ignored.
249  */
250 static void
251 gtaskqueue_drain_tq_active(struct gtaskqueue *queue)
252 {
253 	struct gtaskqueue_busy tb_marker, *tb_first;
254 
255 	if (TAILQ_EMPTY(&queue->tq_active))
256 		return;
257 
258 	/* Block taskq_terminate().*/
259 	queue->tq_callouts++;
260 
261 	/*
262 	 * Wait for all currently executing taskqueue threads
263 	 * to go idle.
264 	 */
265 	tb_marker.tb_running = TB_DRAIN_WAITER;
266 	TAILQ_INSERT_TAIL(&queue->tq_active, &tb_marker, tb_link);
267 	while (TAILQ_FIRST(&queue->tq_active) != &tb_marker)
268 		TQ_SLEEP(queue, &tb_marker, &queue->tq_mutex, PWAIT, "-", 0);
269 	TAILQ_REMOVE(&queue->tq_active, &tb_marker, tb_link);
270 
271 	/*
272 	 * Wakeup any other drain waiter that happened to queue up
273 	 * without any intervening active thread.
274 	 */
275 	tb_first = TAILQ_FIRST(&queue->tq_active);
276 	if (tb_first != NULL && tb_first->tb_running == TB_DRAIN_WAITER)
277 		wakeup(tb_first);
278 
279 	/* Release taskqueue_terminate(). */
280 	queue->tq_callouts--;
281 	if ((queue->tq_flags & TQ_FLAGS_ACTIVE) == 0)
282 		wakeup_one(queue->tq_threads);
283 }
284 
285 void
286 gtaskqueue_block(struct gtaskqueue *queue)
287 {
288 
289 	TQ_LOCK(queue);
290 	queue->tq_flags |= TQ_FLAGS_BLOCKED;
291 	TQ_UNLOCK(queue);
292 }
293 
294 void
295 gtaskqueue_unblock(struct gtaskqueue *queue)
296 {
297 
298 	TQ_LOCK(queue);
299 	queue->tq_flags &= ~TQ_FLAGS_BLOCKED;
300 	if (!STAILQ_EMPTY(&queue->tq_queue))
301 		queue->tq_enqueue(queue->tq_context);
302 	TQ_UNLOCK(queue);
303 }
304 
305 static void
306 gtaskqueue_run_locked(struct gtaskqueue *queue)
307 {
308 	struct gtaskqueue_busy tb;
309 	struct gtaskqueue_busy *tb_first;
310 	struct gtask *gtask;
311 
312 	KASSERT(queue != NULL, ("tq is NULL"));
313 	TQ_ASSERT_LOCKED(queue);
314 	tb.tb_running = NULL;
315 
316 	while (STAILQ_FIRST(&queue->tq_queue)) {
317 		TAILQ_INSERT_TAIL(&queue->tq_active, &tb, tb_link);
318 
319 		/*
320 		 * Carefully remove the first task from the queue and
321 		 * clear its TASK_ENQUEUED flag
322 		 */
323 		gtask = STAILQ_FIRST(&queue->tq_queue);
324 		KASSERT(gtask != NULL, ("task is NULL"));
325 		STAILQ_REMOVE_HEAD(&queue->tq_queue, ta_link);
326 		gtask->ta_flags &= ~TASK_ENQUEUED;
327 		tb.tb_running = gtask;
328 		TQ_UNLOCK(queue);
329 
330 		KASSERT(gtask->ta_func != NULL, ("task->ta_func is NULL"));
331 		gtask->ta_func(gtask->ta_context);
332 
333 		TQ_LOCK(queue);
334 		tb.tb_running = NULL;
335 		wakeup(gtask);
336 
337 		TAILQ_REMOVE(&queue->tq_active, &tb, tb_link);
338 		tb_first = TAILQ_FIRST(&queue->tq_active);
339 		if (tb_first != NULL &&
340 		    tb_first->tb_running == TB_DRAIN_WAITER)
341 			wakeup(tb_first);
342 	}
343 }
344 
345 static int
346 task_is_running(struct gtaskqueue *queue, struct gtask *gtask)
347 {
348 	struct gtaskqueue_busy *tb;
349 
350 	TQ_ASSERT_LOCKED(queue);
351 	TAILQ_FOREACH(tb, &queue->tq_active, tb_link) {
352 		if (tb->tb_running == gtask)
353 			return (1);
354 	}
355 	return (0);
356 }
357 
358 static int
359 gtaskqueue_cancel_locked(struct gtaskqueue *queue, struct gtask *gtask)
360 {
361 
362 	if (gtask->ta_flags & TASK_ENQUEUED)
363 		STAILQ_REMOVE(&queue->tq_queue, gtask, gtask, ta_link);
364 	gtask->ta_flags &= ~TASK_ENQUEUED;
365 	return (task_is_running(queue, gtask) ? EBUSY : 0);
366 }
367 
368 int
369 gtaskqueue_cancel(struct gtaskqueue *queue, struct gtask *gtask)
370 {
371 	int error;
372 
373 	TQ_LOCK(queue);
374 	error = gtaskqueue_cancel_locked(queue, gtask);
375 	TQ_UNLOCK(queue);
376 
377 	return (error);
378 }
379 
380 void
381 gtaskqueue_drain(struct gtaskqueue *queue, struct gtask *gtask)
382 {
383 
384 	if (!queue->tq_spin)
385 		WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__);
386 
387 	TQ_LOCK(queue);
388 	while ((gtask->ta_flags & TASK_ENQUEUED) || task_is_running(queue, gtask))
389 		TQ_SLEEP(queue, gtask, &queue->tq_mutex, PWAIT, "-", 0);
390 	TQ_UNLOCK(queue);
391 }
392 
393 void
394 gtaskqueue_drain_all(struct gtaskqueue *queue)
395 {
396 
397 	if (!queue->tq_spin)
398 		WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__);
399 
400 	TQ_LOCK(queue);
401 	gtaskqueue_drain_tq_queue(queue);
402 	gtaskqueue_drain_tq_active(queue);
403 	TQ_UNLOCK(queue);
404 }
405 
406 static int
407 _gtaskqueue_start_threads(struct gtaskqueue **tqp, int count, int pri,
408     cpuset_t *mask, const char *name, va_list ap)
409 {
410 	char ktname[MAXCOMLEN + 1];
411 	struct thread *td;
412 	struct gtaskqueue *tq;
413 	int i, error;
414 
415 	if (count <= 0)
416 		return (EINVAL);
417 
418 	vsnprintf(ktname, sizeof(ktname), name, ap);
419 	tq = *tqp;
420 
421 	tq->tq_threads = malloc(sizeof(struct thread *) * count, M_GTASKQUEUE,
422 	    M_NOWAIT | M_ZERO);
423 	if (tq->tq_threads == NULL) {
424 		printf("%s: no memory for %s threads\n", __func__, ktname);
425 		return (ENOMEM);
426 	}
427 
428 	for (i = 0; i < count; i++) {
429 		if (count == 1)
430 			error = kthread_add(gtaskqueue_thread_loop, tqp, NULL,
431 			    &tq->tq_threads[i], RFSTOPPED, 0, "%s", ktname);
432 		else
433 			error = kthread_add(gtaskqueue_thread_loop, tqp, NULL,
434 			    &tq->tq_threads[i], RFSTOPPED, 0,
435 			    "%s_%d", ktname, i);
436 		if (error) {
437 			/* should be ok to continue, taskqueue_free will dtrt */
438 			printf("%s: kthread_add(%s): error %d", __func__,
439 			    ktname, error);
440 			tq->tq_threads[i] = NULL;		/* paranoid */
441 		} else
442 			tq->tq_tcount++;
443 	}
444 	for (i = 0; i < count; i++) {
445 		if (tq->tq_threads[i] == NULL)
446 			continue;
447 		td = tq->tq_threads[i];
448 		if (mask) {
449 			error = cpuset_setthread(td->td_tid, mask);
450 			/*
451 			 * Failing to pin is rarely an actual fatal error;
452 			 * it'll just affect performance.
453 			 */
454 			if (error)
455 				printf("%s: curthread=%llu: can't pin; "
456 				    "error=%d\n",
457 				    __func__,
458 				    (unsigned long long) td->td_tid,
459 				    error);
460 		}
461 		thread_lock(td);
462 		sched_prio(td, pri);
463 		sched_add(td, SRQ_BORING);
464 		thread_unlock(td);
465 	}
466 
467 	return (0);
468 }
469 
470 static int
471 gtaskqueue_start_threads(struct gtaskqueue **tqp, int count, int pri,
472     const char *name, ...)
473 {
474 	va_list ap;
475 	int error;
476 
477 	va_start(ap, name);
478 	error = _gtaskqueue_start_threads(tqp, count, pri, NULL, name, ap);
479 	va_end(ap);
480 	return (error);
481 }
482 
483 static inline void
484 gtaskqueue_run_callback(struct gtaskqueue *tq,
485     enum taskqueue_callback_type cb_type)
486 {
487 	taskqueue_callback_fn tq_callback;
488 
489 	TQ_ASSERT_UNLOCKED(tq);
490 	tq_callback = tq->tq_callbacks[cb_type];
491 	if (tq_callback != NULL)
492 		tq_callback(tq->tq_cb_contexts[cb_type]);
493 }
494 
495 static void
496 gtaskqueue_thread_loop(void *arg)
497 {
498 	struct gtaskqueue **tqp, *tq;
499 
500 	tqp = arg;
501 	tq = *tqp;
502 	gtaskqueue_run_callback(tq, TASKQUEUE_CALLBACK_TYPE_INIT);
503 	TQ_LOCK(tq);
504 	while ((tq->tq_flags & TQ_FLAGS_ACTIVE) != 0) {
505 		/* XXX ? */
506 		gtaskqueue_run_locked(tq);
507 		/*
508 		 * Because taskqueue_run() can drop tq_mutex, we need to
509 		 * check if the TQ_FLAGS_ACTIVE flag wasn't removed in the
510 		 * meantime, which means we missed a wakeup.
511 		 */
512 		if ((tq->tq_flags & TQ_FLAGS_ACTIVE) == 0)
513 			break;
514 		TQ_SLEEP(tq, tq, &tq->tq_mutex, 0, "-", 0);
515 	}
516 	gtaskqueue_run_locked(tq);
517 	/*
518 	 * This thread is on its way out, so just drop the lock temporarily
519 	 * in order to call the shutdown callback.  This allows the callback
520 	 * to look at the taskqueue, even just before it dies.
521 	 */
522 	TQ_UNLOCK(tq);
523 	gtaskqueue_run_callback(tq, TASKQUEUE_CALLBACK_TYPE_SHUTDOWN);
524 	TQ_LOCK(tq);
525 
526 	/* rendezvous with thread that asked us to terminate */
527 	tq->tq_tcount--;
528 	wakeup_one(tq->tq_threads);
529 	TQ_UNLOCK(tq);
530 	kthread_exit();
531 }
532 
533 static void
534 gtaskqueue_thread_enqueue(void *context)
535 {
536 	struct gtaskqueue **tqp, *tq;
537 
538 	tqp = context;
539 	tq = *tqp;
540 	wakeup_one(tq);
541 }
542 
543 
544 static struct gtaskqueue *
545 gtaskqueue_create_fast(const char *name, int mflags,
546 		 taskqueue_enqueue_fn enqueue, void *context)
547 {
548 	return _gtaskqueue_create(name, mflags, enqueue, context,
549 			MTX_SPIN, "fast_taskqueue");
550 }
551 
552 
553 struct taskqgroup_cpu {
554 	LIST_HEAD(, grouptask)	tgc_tasks;
555 	struct gtaskqueue	*tgc_taskq;
556 	int	tgc_cnt;
557 	int	tgc_cpu;
558 };
559 
560 struct taskqgroup {
561 	struct taskqgroup_cpu tqg_queue[MAXCPU];
562 	struct mtx	tqg_lock;
563 	char *		tqg_name;
564 	int		tqg_adjusting;
565 	int		tqg_stride;
566 	int		tqg_cnt;
567 };
568 
569 struct taskq_bind_task {
570 	struct gtask bt_task;
571 	int	bt_cpuid;
572 };
573 
574 static void
575 taskqgroup_cpu_create(struct taskqgroup *qgroup, int idx, int cpu)
576 {
577 	struct taskqgroup_cpu *qcpu;
578 
579 	qcpu = &qgroup->tqg_queue[idx];
580 	LIST_INIT(&qcpu->tgc_tasks);
581 	qcpu->tgc_taskq = gtaskqueue_create_fast(NULL, M_WAITOK,
582 	    taskqueue_thread_enqueue, &qcpu->tgc_taskq);
583 	gtaskqueue_start_threads(&qcpu->tgc_taskq, 1, PI_SOFT,
584 	    "%s_%d", qgroup->tqg_name, idx);
585 	qcpu->tgc_cpu = cpu;
586 }
587 
588 static void
589 taskqgroup_cpu_remove(struct taskqgroup *qgroup, int idx)
590 {
591 
592 	gtaskqueue_free(qgroup->tqg_queue[idx].tgc_taskq);
593 }
594 
595 /*
596  * Find the taskq with least # of tasks that doesn't currently have any
597  * other queues from the uniq identifier.
598  */
599 static int
600 taskqgroup_find(struct taskqgroup *qgroup, void *uniq)
601 {
602 	struct grouptask *n;
603 	int i, idx, mincnt;
604 	int strict;
605 
606 	mtx_assert(&qgroup->tqg_lock, MA_OWNED);
607 	if (qgroup->tqg_cnt == 0)
608 		return (0);
609 	idx = -1;
610 	mincnt = INT_MAX;
611 	/*
612 	 * Two passes;  First scan for a queue with the least tasks that
613 	 * does not already service this uniq id.  If that fails simply find
614 	 * the queue with the least total tasks;
615 	 */
616 	for (strict = 1; mincnt == INT_MAX; strict = 0) {
617 		for (i = 0; i < qgroup->tqg_cnt; i++) {
618 			if (qgroup->tqg_queue[i].tgc_cnt > mincnt)
619 				continue;
620 			if (strict) {
621 				LIST_FOREACH(n,
622 				    &qgroup->tqg_queue[i].tgc_tasks, gt_list)
623 					if (n->gt_uniq == uniq)
624 						break;
625 				if (n != NULL)
626 					continue;
627 			}
628 			mincnt = qgroup->tqg_queue[i].tgc_cnt;
629 			idx = i;
630 		}
631 	}
632 	if (idx == -1)
633 		panic("taskqgroup_find: Failed to pick a qid.");
634 
635 	return (idx);
636 }
637 
638 /*
639  * smp_started is unusable since it is not set for UP kernels or even for
640  * SMP kernels when there is 1 CPU.  This is usually handled by adding a
641  * (mp_ncpus == 1) test, but that would be broken here since we need to
642  * to synchronize with the SI_SUB_SMP ordering.  Even in the pure SMP case
643  * smp_started only gives a fuzzy ordering relative to SI_SUB_SMP.
644  *
645  * So maintain our own flag.  It must be set after all CPUs are started
646  * and before SI_SUB_SMP:SI_ORDER_ANY so that the SYSINIT for delayed
647  * adjustment is properly delayed.  SI_ORDER_FOURTH is clearly before
648  * SI_ORDER_ANY and unclearly after the CPUs are started.  It would be
649  * simpler for adjustment to pass a flag indicating if it is delayed.
650  */
651 
652 static int tqg_smp_started;
653 
654 static void
655 tqg_record_smp_started(void *arg)
656 {
657 	tqg_smp_started = 1;
658 }
659 
660 SYSINIT(tqg_record_smp_started, SI_SUB_SMP, SI_ORDER_FOURTH,
661 	tqg_record_smp_started, NULL);
662 
663 void
664 taskqgroup_attach(struct taskqgroup *qgroup, struct grouptask *gtask,
665     void *uniq, int irq, char *name)
666 {
667 	cpuset_t mask;
668 	int qid, error;
669 
670 	gtask->gt_uniq = uniq;
671 	snprintf(gtask->gt_name, GROUPTASK_NAMELEN, "%s", name ? name : "grouptask");
672 	gtask->gt_irq = irq;
673 	gtask->gt_cpu = -1;
674 	mtx_lock(&qgroup->tqg_lock);
675 	qid = taskqgroup_find(qgroup, uniq);
676 	qgroup->tqg_queue[qid].tgc_cnt++;
677 	LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
678 	gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
679 	if (irq != -1 && tqg_smp_started) {
680 		gtask->gt_cpu = qgroup->tqg_queue[qid].tgc_cpu;
681 		CPU_ZERO(&mask);
682 		CPU_SET(qgroup->tqg_queue[qid].tgc_cpu, &mask);
683 		mtx_unlock(&qgroup->tqg_lock);
684 		error = intr_setaffinity(irq, CPU_WHICH_IRQ, &mask);
685 		if (error)
686 			printf("%s: setaffinity failed for %s: %d\n", __func__, gtask->gt_name, error);
687 	} else
688 		mtx_unlock(&qgroup->tqg_lock);
689 }
690 
691 static void
692 taskqgroup_attach_deferred(struct taskqgroup *qgroup, struct grouptask *gtask)
693 {
694 	cpuset_t mask;
695 	int qid, cpu, error;
696 
697 	mtx_lock(&qgroup->tqg_lock);
698 	qid = taskqgroup_find(qgroup, gtask->gt_uniq);
699 	cpu = qgroup->tqg_queue[qid].tgc_cpu;
700 	if (gtask->gt_irq != -1) {
701 		mtx_unlock(&qgroup->tqg_lock);
702 
703 		CPU_ZERO(&mask);
704 		CPU_SET(cpu, &mask);
705 		error = intr_setaffinity(gtask->gt_irq, CPU_WHICH_IRQ, &mask);
706 		mtx_lock(&qgroup->tqg_lock);
707 		if (error)
708 			printf("%s: %s setaffinity failed: %d\n", __func__, gtask->gt_name, error);
709 
710 	}
711 	qgroup->tqg_queue[qid].tgc_cnt++;
712 
713 	LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask,
714 			 gt_list);
715 	MPASS(qgroup->tqg_queue[qid].tgc_taskq != NULL);
716 	gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
717 	mtx_unlock(&qgroup->tqg_lock);
718 }
719 
720 int
721 taskqgroup_attach_cpu(struct taskqgroup *qgroup, struct grouptask *gtask,
722 	void *uniq, int cpu, int irq, char *name)
723 {
724 	cpuset_t mask;
725 	int i, qid, error;
726 
727 	qid = -1;
728 	gtask->gt_uniq = uniq;
729 	snprintf(gtask->gt_name, GROUPTASK_NAMELEN, "%s", name ? name : "grouptask");
730 	gtask->gt_irq = irq;
731 	gtask->gt_cpu = cpu;
732 	mtx_lock(&qgroup->tqg_lock);
733 	if (tqg_smp_started) {
734 		for (i = 0; i < qgroup->tqg_cnt; i++)
735 			if (qgroup->tqg_queue[i].tgc_cpu == cpu) {
736 				qid = i;
737 				break;
738 			}
739 		if (qid == -1) {
740 			mtx_unlock(&qgroup->tqg_lock);
741 			printf("%s: qid not found for %s cpu=%d\n", __func__, gtask->gt_name, cpu);
742 			return (EINVAL);
743 		}
744 	} else
745 		qid = 0;
746 	qgroup->tqg_queue[qid].tgc_cnt++;
747 	LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
748 	gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
749 	cpu = qgroup->tqg_queue[qid].tgc_cpu;
750 	mtx_unlock(&qgroup->tqg_lock);
751 
752 	CPU_ZERO(&mask);
753 	CPU_SET(cpu, &mask);
754 	if (irq != -1 && tqg_smp_started) {
755 		error = intr_setaffinity(irq, CPU_WHICH_IRQ, &mask);
756 		if (error)
757 			printf("%s: setaffinity failed: %d\n", __func__, error);
758 	}
759 	return (0);
760 }
761 
762 static int
763 taskqgroup_attach_cpu_deferred(struct taskqgroup *qgroup, struct grouptask *gtask)
764 {
765 	cpuset_t mask;
766 	int i, qid, irq, cpu, error;
767 
768 	qid = -1;
769 	irq = gtask->gt_irq;
770 	cpu = gtask->gt_cpu;
771 	MPASS(tqg_smp_started);
772 	mtx_lock(&qgroup->tqg_lock);
773 	for (i = 0; i < qgroup->tqg_cnt; i++)
774 		if (qgroup->tqg_queue[i].tgc_cpu == cpu) {
775 			qid = i;
776 			break;
777 		}
778 	if (qid == -1) {
779 		mtx_unlock(&qgroup->tqg_lock);
780 		printf("%s: qid not found for %s cpu=%d\n", __func__, gtask->gt_name, cpu);
781 		return (EINVAL);
782 	}
783 	qgroup->tqg_queue[qid].tgc_cnt++;
784 	LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
785 	MPASS(qgroup->tqg_queue[qid].tgc_taskq != NULL);
786 	gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
787 	mtx_unlock(&qgroup->tqg_lock);
788 
789 	CPU_ZERO(&mask);
790 	CPU_SET(cpu, &mask);
791 
792 	if (irq != -1) {
793 		error = intr_setaffinity(irq, CPU_WHICH_IRQ, &mask);
794 		if (error)
795 			printf("%s: setaffinity failed: %d\n", __func__, error);
796 	}
797 	return (0);
798 }
799 
800 void
801 taskqgroup_detach(struct taskqgroup *qgroup, struct grouptask *gtask)
802 {
803 	int i;
804 
805 	mtx_lock(&qgroup->tqg_lock);
806 	for (i = 0; i < qgroup->tqg_cnt; i++)
807 		if (qgroup->tqg_queue[i].tgc_taskq == gtask->gt_taskqueue)
808 			break;
809 	if (i == qgroup->tqg_cnt)
810 		panic("taskqgroup_detach: task %s not in group\n", gtask->gt_name);
811 	qgroup->tqg_queue[i].tgc_cnt--;
812 	LIST_REMOVE(gtask, gt_list);
813 	mtx_unlock(&qgroup->tqg_lock);
814 	gtask->gt_taskqueue = NULL;
815 }
816 
817 static void
818 taskqgroup_binder(void *ctx)
819 {
820 	struct taskq_bind_task *gtask = (struct taskq_bind_task *)ctx;
821 	cpuset_t mask;
822 	int error;
823 
824 	CPU_ZERO(&mask);
825 	CPU_SET(gtask->bt_cpuid, &mask);
826 	error = cpuset_setthread(curthread->td_tid, &mask);
827 	thread_lock(curthread);
828 	sched_bind(curthread, gtask->bt_cpuid);
829 	thread_unlock(curthread);
830 
831 	if (error)
832 		printf("%s: setaffinity failed: %d\n", __func__,
833 		    error);
834 	free(gtask, M_DEVBUF);
835 }
836 
837 static void
838 taskqgroup_bind(struct taskqgroup *qgroup)
839 {
840 	struct taskq_bind_task *gtask;
841 	int i;
842 
843 	/*
844 	 * Bind taskqueue threads to specific CPUs, if they have been assigned
845 	 * one.
846 	 */
847 	if (qgroup->tqg_cnt == 1)
848 		return;
849 
850 	for (i = 0; i < qgroup->tqg_cnt; i++) {
851 		gtask = malloc(sizeof (*gtask), M_DEVBUF, M_WAITOK);
852 		GTASK_INIT(&gtask->bt_task, 0, 0, taskqgroup_binder, gtask);
853 		gtask->bt_cpuid = qgroup->tqg_queue[i].tgc_cpu;
854 		grouptaskqueue_enqueue(qgroup->tqg_queue[i].tgc_taskq,
855 		    &gtask->bt_task);
856 	}
857 }
858 
859 static int
860 _taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride)
861 {
862 	LIST_HEAD(, grouptask) gtask_head = LIST_HEAD_INITIALIZER(NULL);
863 	struct grouptask *gtask;
864 	int i, k, old_cnt, old_cpu, cpu;
865 
866 	mtx_assert(&qgroup->tqg_lock, MA_OWNED);
867 
868 	if (cnt < 1 || cnt * stride > mp_ncpus || !tqg_smp_started) {
869 		printf("%s: failed cnt: %d stride: %d "
870 		    "mp_ncpus: %d tqg_smp_started: %d\n",
871 		    __func__, cnt, stride, mp_ncpus, tqg_smp_started);
872 		return (EINVAL);
873 	}
874 	if (qgroup->tqg_adjusting) {
875 		printf("%s failed: adjusting\n", __func__);
876 		return (EBUSY);
877 	}
878 	qgroup->tqg_adjusting = 1;
879 	old_cnt = qgroup->tqg_cnt;
880 	old_cpu = 0;
881 	if (old_cnt < cnt)
882 		old_cpu = qgroup->tqg_queue[old_cnt].tgc_cpu;
883 	mtx_unlock(&qgroup->tqg_lock);
884 	/*
885 	 * Set up queue for tasks added before boot.
886 	 */
887 	if (old_cnt == 0) {
888 		LIST_SWAP(&gtask_head, &qgroup->tqg_queue[0].tgc_tasks,
889 		    grouptask, gt_list);
890 		qgroup->tqg_queue[0].tgc_cnt = 0;
891 	}
892 
893 	/*
894 	 * If new taskq threads have been added.
895 	 */
896 	cpu = old_cpu;
897 	for (i = old_cnt; i < cnt; i++) {
898 		taskqgroup_cpu_create(qgroup, i, cpu);
899 
900 		for (k = 0; k < stride; k++)
901 			cpu = CPU_NEXT(cpu);
902 	}
903 	mtx_lock(&qgroup->tqg_lock);
904 	qgroup->tqg_cnt = cnt;
905 	qgroup->tqg_stride = stride;
906 
907 	/*
908 	 * Adjust drivers to use new taskqs.
909 	 */
910 	for (i = 0; i < old_cnt; i++) {
911 		while ((gtask = LIST_FIRST(&qgroup->tqg_queue[i].tgc_tasks))) {
912 			LIST_REMOVE(gtask, gt_list);
913 			qgroup->tqg_queue[i].tgc_cnt--;
914 			LIST_INSERT_HEAD(&gtask_head, gtask, gt_list);
915 		}
916 	}
917 	mtx_unlock(&qgroup->tqg_lock);
918 
919 	while ((gtask = LIST_FIRST(&gtask_head))) {
920 		LIST_REMOVE(gtask, gt_list);
921 		if (gtask->gt_cpu == -1)
922 			taskqgroup_attach_deferred(qgroup, gtask);
923 		else if (taskqgroup_attach_cpu_deferred(qgroup, gtask))
924 			taskqgroup_attach_deferred(qgroup, gtask);
925 	}
926 
927 #ifdef INVARIANTS
928 	mtx_lock(&qgroup->tqg_lock);
929 	for (i = 0; i < qgroup->tqg_cnt; i++) {
930 		MPASS(qgroup->tqg_queue[i].tgc_taskq != NULL);
931 		LIST_FOREACH(gtask, &qgroup->tqg_queue[i].tgc_tasks, gt_list)
932 			MPASS(gtask->gt_taskqueue != NULL);
933 	}
934 	mtx_unlock(&qgroup->tqg_lock);
935 #endif
936 	/*
937 	 * If taskq thread count has been reduced.
938 	 */
939 	for (i = cnt; i < old_cnt; i++)
940 		taskqgroup_cpu_remove(qgroup, i);
941 
942 	taskqgroup_bind(qgroup);
943 
944 	mtx_lock(&qgroup->tqg_lock);
945 	qgroup->tqg_adjusting = 0;
946 
947 	return (0);
948 }
949 
950 int
951 taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride)
952 {
953 	int error;
954 
955 	mtx_lock(&qgroup->tqg_lock);
956 	error = _taskqgroup_adjust(qgroup, cnt, stride);
957 	mtx_unlock(&qgroup->tqg_lock);
958 
959 	return (error);
960 }
961 
962 struct taskqgroup *
963 taskqgroup_create(char *name)
964 {
965 	struct taskqgroup *qgroup;
966 
967 	qgroup = malloc(sizeof(*qgroup), M_GTASKQUEUE, M_WAITOK | M_ZERO);
968 	mtx_init(&qgroup->tqg_lock, "taskqgroup", NULL, MTX_DEF);
969 	qgroup->tqg_name = name;
970 	LIST_INIT(&qgroup->tqg_queue[0].tgc_tasks);
971 
972 	return (qgroup);
973 }
974 
975 void
976 taskqgroup_destroy(struct taskqgroup *qgroup)
977 {
978 
979 }
980