xref: /freebsd/sys/contrib/openzfs/module/os/linux/spl/spl-taskq.c (revision f9fd7337f63698f33239c58c07bf430198235a22)
1 /*
2  *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
3  *  Copyright (C) 2007 The Regents of the University of California.
4  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
5  *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
6  *  UCRL-CODE-235197
7  *
8  *  This file is part of the SPL, Solaris Porting Layer.
9  *
10  *  The SPL is free software; you can redistribute it and/or modify it
11  *  under the terms of the GNU General Public License as published by the
12  *  Free Software Foundation; either version 2 of the License, or (at your
13  *  option) any later version.
14  *
15  *  The SPL is distributed in the hope that it will be useful, but WITHOUT
16  *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
17  *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
18  *  for more details.
19  *
20  *  You should have received a copy of the GNU General Public License along
21  *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
22  *
23  *  Solaris Porting Layer (SPL) Task Queue Implementation.
24  */
25 
26 #include <sys/timer.h>
27 #include <sys/taskq.h>
28 #include <sys/kmem.h>
29 #include <sys/tsd.h>
30 #include <sys/trace_spl.h>
31 
32 int spl_taskq_thread_bind = 0;
33 module_param(spl_taskq_thread_bind, int, 0644);
34 MODULE_PARM_DESC(spl_taskq_thread_bind, "Bind taskq thread to CPU by default");
35 
36 
37 int spl_taskq_thread_dynamic = 1;
38 module_param(spl_taskq_thread_dynamic, int, 0644);
39 MODULE_PARM_DESC(spl_taskq_thread_dynamic, "Allow dynamic taskq threads");
40 
41 int spl_taskq_thread_priority = 1;
42 module_param(spl_taskq_thread_priority, int, 0644);
43 MODULE_PARM_DESC(spl_taskq_thread_priority,
44 	"Allow non-default priority for taskq threads");
45 
46 int spl_taskq_thread_sequential = 4;
47 module_param(spl_taskq_thread_sequential, int, 0644);
48 MODULE_PARM_DESC(spl_taskq_thread_sequential,
49 	"Create new taskq threads after N sequential tasks");
50 
51 /* Global system-wide dynamic task queue available for all consumers */
52 taskq_t *system_taskq;
53 EXPORT_SYMBOL(system_taskq);
54 /* Global dynamic task queue for long delay */
55 taskq_t *system_delay_taskq;
56 EXPORT_SYMBOL(system_delay_taskq);
57 
58 /* Private dedicated taskq for creating new taskq threads on demand. */
59 static taskq_t *dynamic_taskq;
60 static taskq_thread_t *taskq_thread_create(taskq_t *);
61 
62 /* List of all taskqs */
63 LIST_HEAD(tq_list);
64 struct rw_semaphore tq_list_sem;
65 static uint_t taskq_tsd;
66 
67 static int
68 task_km_flags(uint_t flags)
69 {
70 	if (flags & TQ_NOSLEEP)
71 		return (KM_NOSLEEP);
72 
73 	if (flags & TQ_PUSHPAGE)
74 		return (KM_PUSHPAGE);
75 
76 	return (KM_SLEEP);
77 }
78 
79 /*
80  * taskq_find_by_name - Find the largest instance number of a named taskq.
81  */
82 static int
83 taskq_find_by_name(const char *name)
84 {
85 	struct list_head *tql = NULL;
86 	taskq_t *tq;
87 
88 	list_for_each_prev(tql, &tq_list) {
89 		tq = list_entry(tql, taskq_t, tq_taskqs);
90 		if (strcmp(name, tq->tq_name) == 0)
91 			return (tq->tq_instance);
92 	}
93 	return (-1);
94 }
95 
96 /*
97  * NOTE: Must be called with tq->tq_lock held, returns a list_t which
98  * is not attached to the free, work, or pending taskq lists.
99  */
100 static taskq_ent_t *
101 task_alloc(taskq_t *tq, uint_t flags, unsigned long *irqflags)
102 {
103 	taskq_ent_t *t;
104 	int count = 0;
105 
106 	ASSERT(tq);
107 retry:
108 	/* Acquire taskq_ent_t's from free list if available */
109 	if (!list_empty(&tq->tq_free_list) && !(flags & TQ_NEW)) {
110 		t = list_entry(tq->tq_free_list.next, taskq_ent_t, tqent_list);
111 
112 		ASSERT(!(t->tqent_flags & TQENT_FLAG_PREALLOC));
113 		ASSERT(!(t->tqent_flags & TQENT_FLAG_CANCEL));
114 		ASSERT(!timer_pending(&t->tqent_timer));
115 
116 		list_del_init(&t->tqent_list);
117 		return (t);
118 	}
119 
120 	/* Free list is empty and memory allocations are prohibited */
121 	if (flags & TQ_NOALLOC)
122 		return (NULL);
123 
124 	/* Hit maximum taskq_ent_t pool size */
125 	if (tq->tq_nalloc >= tq->tq_maxalloc) {
126 		if (flags & TQ_NOSLEEP)
127 			return (NULL);
128 
129 		/*
130 		 * Sleep periodically polling the free list for an available
131 		 * taskq_ent_t. Dispatching with TQ_SLEEP should always succeed
132 		 * but we cannot block forever waiting for an taskq_ent_t to
133 		 * show up in the free list, otherwise a deadlock can happen.
134 		 *
135 		 * Therefore, we need to allocate a new task even if the number
136 		 * of allocated tasks is above tq->tq_maxalloc, but we still
137 		 * end up delaying the task allocation by one second, thereby
138 		 * throttling the task dispatch rate.
139 		 */
140 		spin_unlock_irqrestore(&tq->tq_lock, *irqflags);
141 		schedule_timeout(HZ / 100);
142 		spin_lock_irqsave_nested(&tq->tq_lock, *irqflags,
143 		    tq->tq_lock_class);
144 		if (count < 100) {
145 			count++;
146 			goto retry;
147 		}
148 	}
149 
150 	spin_unlock_irqrestore(&tq->tq_lock, *irqflags);
151 	t = kmem_alloc(sizeof (taskq_ent_t), task_km_flags(flags));
152 	spin_lock_irqsave_nested(&tq->tq_lock, *irqflags, tq->tq_lock_class);
153 
154 	if (t) {
155 		taskq_init_ent(t);
156 		tq->tq_nalloc++;
157 	}
158 
159 	return (t);
160 }
161 
162 /*
163  * NOTE: Must be called with tq->tq_lock held, expects the taskq_ent_t
164  * to already be removed from the free, work, or pending taskq lists.
165  */
166 static void
167 task_free(taskq_t *tq, taskq_ent_t *t)
168 {
169 	ASSERT(tq);
170 	ASSERT(t);
171 	ASSERT(list_empty(&t->tqent_list));
172 	ASSERT(!timer_pending(&t->tqent_timer));
173 
174 	kmem_free(t, sizeof (taskq_ent_t));
175 	tq->tq_nalloc--;
176 }
177 
178 /*
179  * NOTE: Must be called with tq->tq_lock held, either destroys the
180  * taskq_ent_t if too many exist or moves it to the free list for later use.
181  */
182 static void
183 task_done(taskq_t *tq, taskq_ent_t *t)
184 {
185 	ASSERT(tq);
186 	ASSERT(t);
187 
188 	/* Wake tasks blocked in taskq_wait_id() */
189 	wake_up_all(&t->tqent_waitq);
190 
191 	list_del_init(&t->tqent_list);
192 
193 	if (tq->tq_nalloc <= tq->tq_minalloc) {
194 		t->tqent_id = TASKQID_INVALID;
195 		t->tqent_func = NULL;
196 		t->tqent_arg = NULL;
197 		t->tqent_flags = 0;
198 
199 		list_add_tail(&t->tqent_list, &tq->tq_free_list);
200 	} else {
201 		task_free(tq, t);
202 	}
203 }
204 
205 /*
206  * When a delayed task timer expires remove it from the delay list and
207  * add it to the priority list in order for immediate processing.
208  */
209 static void
210 task_expire_impl(taskq_ent_t *t)
211 {
212 	taskq_ent_t *w;
213 	taskq_t *tq = t->tqent_taskq;
214 	struct list_head *l = NULL;
215 	unsigned long flags;
216 
217 	spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class);
218 
219 	if (t->tqent_flags & TQENT_FLAG_CANCEL) {
220 		ASSERT(list_empty(&t->tqent_list));
221 		spin_unlock_irqrestore(&tq->tq_lock, flags);
222 		return;
223 	}
224 
225 	t->tqent_birth = jiffies;
226 	DTRACE_PROBE1(taskq_ent__birth, taskq_ent_t *, t);
227 
228 	/*
229 	 * The priority list must be maintained in strict task id order
230 	 * from lowest to highest for lowest_id to be easily calculable.
231 	 */
232 	list_del(&t->tqent_list);
233 	list_for_each_prev(l, &tq->tq_prio_list) {
234 		w = list_entry(l, taskq_ent_t, tqent_list);
235 		if (w->tqent_id < t->tqent_id) {
236 			list_add(&t->tqent_list, l);
237 			break;
238 		}
239 	}
240 	if (l == &tq->tq_prio_list)
241 		list_add(&t->tqent_list, &tq->tq_prio_list);
242 
243 	spin_unlock_irqrestore(&tq->tq_lock, flags);
244 
245 	wake_up(&tq->tq_work_waitq);
246 }
247 
248 static void
249 task_expire(spl_timer_list_t tl)
250 {
251 	struct timer_list *tmr = (struct timer_list *)tl;
252 	taskq_ent_t *t = from_timer(t, tmr, tqent_timer);
253 	task_expire_impl(t);
254 }
255 
256 /*
257  * Returns the lowest incomplete taskqid_t.  The taskqid_t may
258  * be queued on the pending list, on the priority list, on the
259  * delay list, or on the work list currently being handled, but
260  * it is not 100% complete yet.
261  */
262 static taskqid_t
263 taskq_lowest_id(taskq_t *tq)
264 {
265 	taskqid_t lowest_id = tq->tq_next_id;
266 	taskq_ent_t *t;
267 	taskq_thread_t *tqt;
268 
269 	ASSERT(tq);
270 
271 	if (!list_empty(&tq->tq_pend_list)) {
272 		t = list_entry(tq->tq_pend_list.next, taskq_ent_t, tqent_list);
273 		lowest_id = MIN(lowest_id, t->tqent_id);
274 	}
275 
276 	if (!list_empty(&tq->tq_prio_list)) {
277 		t = list_entry(tq->tq_prio_list.next, taskq_ent_t, tqent_list);
278 		lowest_id = MIN(lowest_id, t->tqent_id);
279 	}
280 
281 	if (!list_empty(&tq->tq_delay_list)) {
282 		t = list_entry(tq->tq_delay_list.next, taskq_ent_t, tqent_list);
283 		lowest_id = MIN(lowest_id, t->tqent_id);
284 	}
285 
286 	if (!list_empty(&tq->tq_active_list)) {
287 		tqt = list_entry(tq->tq_active_list.next, taskq_thread_t,
288 		    tqt_active_list);
289 		ASSERT(tqt->tqt_id != TASKQID_INVALID);
290 		lowest_id = MIN(lowest_id, tqt->tqt_id);
291 	}
292 
293 	return (lowest_id);
294 }
295 
296 /*
297  * Insert a task into a list keeping the list sorted by increasing taskqid.
298  */
299 static void
300 taskq_insert_in_order(taskq_t *tq, taskq_thread_t *tqt)
301 {
302 	taskq_thread_t *w;
303 	struct list_head *l = NULL;
304 
305 	ASSERT(tq);
306 	ASSERT(tqt);
307 
308 	list_for_each_prev(l, &tq->tq_active_list) {
309 		w = list_entry(l, taskq_thread_t, tqt_active_list);
310 		if (w->tqt_id < tqt->tqt_id) {
311 			list_add(&tqt->tqt_active_list, l);
312 			break;
313 		}
314 	}
315 	if (l == &tq->tq_active_list)
316 		list_add(&tqt->tqt_active_list, &tq->tq_active_list);
317 }
318 
319 /*
320  * Find and return a task from the given list if it exists.  The list
321  * must be in lowest to highest task id order.
322  */
323 static taskq_ent_t *
324 taskq_find_list(taskq_t *tq, struct list_head *lh, taskqid_t id)
325 {
326 	struct list_head *l = NULL;
327 	taskq_ent_t *t;
328 
329 	list_for_each(l, lh) {
330 		t = list_entry(l, taskq_ent_t, tqent_list);
331 
332 		if (t->tqent_id == id)
333 			return (t);
334 
335 		if (t->tqent_id > id)
336 			break;
337 	}
338 
339 	return (NULL);
340 }
341 
342 /*
343  * Find an already dispatched task given the task id regardless of what
344  * state it is in.  If a task is still pending it will be returned.
345  * If a task is executing, then -EBUSY will be returned instead.
346  * If the task has already been run then NULL is returned.
347  */
348 static taskq_ent_t *
349 taskq_find(taskq_t *tq, taskqid_t id)
350 {
351 	taskq_thread_t *tqt;
352 	struct list_head *l = NULL;
353 	taskq_ent_t *t;
354 
355 	t = taskq_find_list(tq, &tq->tq_delay_list, id);
356 	if (t)
357 		return (t);
358 
359 	t = taskq_find_list(tq, &tq->tq_prio_list, id);
360 	if (t)
361 		return (t);
362 
363 	t = taskq_find_list(tq, &tq->tq_pend_list, id);
364 	if (t)
365 		return (t);
366 
367 	list_for_each(l, &tq->tq_active_list) {
368 		tqt = list_entry(l, taskq_thread_t, tqt_active_list);
369 		if (tqt->tqt_id == id) {
370 			/*
371 			 * Instead of returning tqt_task, we just return a non
372 			 * NULL value to prevent misuse, since tqt_task only
373 			 * has two valid fields.
374 			 */
375 			return (ERR_PTR(-EBUSY));
376 		}
377 	}
378 
379 	return (NULL);
380 }
381 
382 /*
383  * Theory for the taskq_wait_id(), taskq_wait_outstanding(), and
384  * taskq_wait() functions below.
385  *
386  * Taskq waiting is accomplished by tracking the lowest outstanding task
387  * id and the next available task id.  As tasks are dispatched they are
388  * added to the tail of the pending, priority, or delay lists.  As worker
389  * threads become available the tasks are removed from the heads of these
390  * lists and linked to the worker threads.  This ensures the lists are
391  * kept sorted by lowest to highest task id.
392  *
393  * Therefore the lowest outstanding task id can be quickly determined by
394  * checking the head item from all of these lists.  This value is stored
395  * with the taskq as the lowest id.  It only needs to be recalculated when
396  * either the task with the current lowest id completes or is canceled.
397  *
398  * By blocking until the lowest task id exceeds the passed task id the
399  * taskq_wait_outstanding() function can be easily implemented.  Similarly,
400  * by blocking until the lowest task id matches the next task id taskq_wait()
401  * can be implemented.
402  *
403  * Callers should be aware that when there are multiple worked threads it
404  * is possible for larger task ids to complete before smaller ones.  Also
405  * when the taskq contains delay tasks with small task ids callers may
406  * block for a considerable length of time waiting for them to expire and
407  * execute.
408  */
409 static int
410 taskq_wait_id_check(taskq_t *tq, taskqid_t id)
411 {
412 	int rc;
413 	unsigned long flags;
414 
415 	spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class);
416 	rc = (taskq_find(tq, id) == NULL);
417 	spin_unlock_irqrestore(&tq->tq_lock, flags);
418 
419 	return (rc);
420 }
421 
422 /*
423  * The taskq_wait_id() function blocks until the passed task id completes.
424  * This does not guarantee that all lower task ids have completed.
425  */
426 void
427 taskq_wait_id(taskq_t *tq, taskqid_t id)
428 {
429 	wait_event(tq->tq_wait_waitq, taskq_wait_id_check(tq, id));
430 }
431 EXPORT_SYMBOL(taskq_wait_id);
432 
433 static int
434 taskq_wait_outstanding_check(taskq_t *tq, taskqid_t id)
435 {
436 	int rc;
437 	unsigned long flags;
438 
439 	spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class);
440 	rc = (id < tq->tq_lowest_id);
441 	spin_unlock_irqrestore(&tq->tq_lock, flags);
442 
443 	return (rc);
444 }
445 
446 /*
447  * The taskq_wait_outstanding() function will block until all tasks with a
448  * lower taskqid than the passed 'id' have been completed.  Note that all
449  * task id's are assigned monotonically at dispatch time.  Zero may be
450  * passed for the id to indicate all tasks dispatch up to this point,
451  * but not after, should be waited for.
452  */
453 void
454 taskq_wait_outstanding(taskq_t *tq, taskqid_t id)
455 {
456 	id = id ? id : tq->tq_next_id - 1;
457 	wait_event(tq->tq_wait_waitq, taskq_wait_outstanding_check(tq, id));
458 }
459 EXPORT_SYMBOL(taskq_wait_outstanding);
460 
461 static int
462 taskq_wait_check(taskq_t *tq)
463 {
464 	int rc;
465 	unsigned long flags;
466 
467 	spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class);
468 	rc = (tq->tq_lowest_id == tq->tq_next_id);
469 	spin_unlock_irqrestore(&tq->tq_lock, flags);
470 
471 	return (rc);
472 }
473 
474 /*
475  * The taskq_wait() function will block until the taskq is empty.
476  * This means that if a taskq re-dispatches work to itself taskq_wait()
477  * callers will block indefinitely.
478  */
479 void
480 taskq_wait(taskq_t *tq)
481 {
482 	wait_event(tq->tq_wait_waitq, taskq_wait_check(tq));
483 }
484 EXPORT_SYMBOL(taskq_wait);
485 
486 int
487 taskq_member(taskq_t *tq, kthread_t *t)
488 {
489 	return (tq == (taskq_t *)tsd_get_by_thread(taskq_tsd, t));
490 }
491 EXPORT_SYMBOL(taskq_member);
492 
493 taskq_t *
494 taskq_of_curthread(void)
495 {
496 	return (tsd_get(taskq_tsd));
497 }
498 EXPORT_SYMBOL(taskq_of_curthread);
499 
500 /*
501  * Cancel an already dispatched task given the task id.  Still pending tasks
502  * will be immediately canceled, and if the task is active the function will
503  * block until it completes.  Preallocated tasks which are canceled must be
504  * freed by the caller.
505  */
506 int
507 taskq_cancel_id(taskq_t *tq, taskqid_t id)
508 {
509 	taskq_ent_t *t;
510 	int rc = ENOENT;
511 	unsigned long flags;
512 
513 	ASSERT(tq);
514 
515 	spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class);
516 	t = taskq_find(tq, id);
517 	if (t && t != ERR_PTR(-EBUSY)) {
518 		list_del_init(&t->tqent_list);
519 		t->tqent_flags |= TQENT_FLAG_CANCEL;
520 
521 		/*
522 		 * When canceling the lowest outstanding task id we
523 		 * must recalculate the new lowest outstanding id.
524 		 */
525 		if (tq->tq_lowest_id == t->tqent_id) {
526 			tq->tq_lowest_id = taskq_lowest_id(tq);
527 			ASSERT3S(tq->tq_lowest_id, >, t->tqent_id);
528 		}
529 
530 		/*
531 		 * The task_expire() function takes the tq->tq_lock so drop
532 		 * drop the lock before synchronously cancelling the timer.
533 		 */
534 		if (timer_pending(&t->tqent_timer)) {
535 			spin_unlock_irqrestore(&tq->tq_lock, flags);
536 			del_timer_sync(&t->tqent_timer);
537 			spin_lock_irqsave_nested(&tq->tq_lock, flags,
538 			    tq->tq_lock_class);
539 		}
540 
541 		if (!(t->tqent_flags & TQENT_FLAG_PREALLOC))
542 			task_done(tq, t);
543 
544 		rc = 0;
545 	}
546 	spin_unlock_irqrestore(&tq->tq_lock, flags);
547 
548 	if (t == ERR_PTR(-EBUSY)) {
549 		taskq_wait_id(tq, id);
550 		rc = EBUSY;
551 	}
552 
553 	return (rc);
554 }
555 EXPORT_SYMBOL(taskq_cancel_id);
556 
557 static int taskq_thread_spawn(taskq_t *tq);
558 
559 taskqid_t
560 taskq_dispatch(taskq_t *tq, task_func_t func, void *arg, uint_t flags)
561 {
562 	taskq_ent_t *t;
563 	taskqid_t rc = TASKQID_INVALID;
564 	unsigned long irqflags;
565 
566 	ASSERT(tq);
567 	ASSERT(func);
568 
569 	spin_lock_irqsave_nested(&tq->tq_lock, irqflags, tq->tq_lock_class);
570 
571 	/* Taskq being destroyed and all tasks drained */
572 	if (!(tq->tq_flags & TASKQ_ACTIVE))
573 		goto out;
574 
575 	/* Do not queue the task unless there is idle thread for it */
576 	ASSERT(tq->tq_nactive <= tq->tq_nthreads);
577 	if ((flags & TQ_NOQUEUE) && (tq->tq_nactive == tq->tq_nthreads)) {
578 		/* Dynamic taskq may be able to spawn another thread */
579 		if (!(tq->tq_flags & TASKQ_DYNAMIC) ||
580 		    taskq_thread_spawn(tq) == 0)
581 			goto out;
582 	}
583 
584 	if ((t = task_alloc(tq, flags, &irqflags)) == NULL)
585 		goto out;
586 
587 	spin_lock(&t->tqent_lock);
588 
589 	/* Queue to the front of the list to enforce TQ_NOQUEUE semantics */
590 	if (flags & TQ_NOQUEUE)
591 		list_add(&t->tqent_list, &tq->tq_prio_list);
592 	/* Queue to the priority list instead of the pending list */
593 	else if (flags & TQ_FRONT)
594 		list_add_tail(&t->tqent_list, &tq->tq_prio_list);
595 	else
596 		list_add_tail(&t->tqent_list, &tq->tq_pend_list);
597 
598 	t->tqent_id = rc = tq->tq_next_id;
599 	tq->tq_next_id++;
600 	t->tqent_func = func;
601 	t->tqent_arg = arg;
602 	t->tqent_taskq = tq;
603 	t->tqent_timer.function = NULL;
604 	t->tqent_timer.expires = 0;
605 
606 	t->tqent_birth = jiffies;
607 	DTRACE_PROBE1(taskq_ent__birth, taskq_ent_t *, t);
608 
609 	ASSERT(!(t->tqent_flags & TQENT_FLAG_PREALLOC));
610 
611 	spin_unlock(&t->tqent_lock);
612 
613 	wake_up(&tq->tq_work_waitq);
614 out:
615 	/* Spawn additional taskq threads if required. */
616 	if (!(flags & TQ_NOQUEUE) && tq->tq_nactive == tq->tq_nthreads)
617 		(void) taskq_thread_spawn(tq);
618 
619 	spin_unlock_irqrestore(&tq->tq_lock, irqflags);
620 	return (rc);
621 }
622 EXPORT_SYMBOL(taskq_dispatch);
623 
624 taskqid_t
625 taskq_dispatch_delay(taskq_t *tq, task_func_t func, void *arg,
626     uint_t flags, clock_t expire_time)
627 {
628 	taskqid_t rc = TASKQID_INVALID;
629 	taskq_ent_t *t;
630 	unsigned long irqflags;
631 
632 	ASSERT(tq);
633 	ASSERT(func);
634 
635 	spin_lock_irqsave_nested(&tq->tq_lock, irqflags, tq->tq_lock_class);
636 
637 	/* Taskq being destroyed and all tasks drained */
638 	if (!(tq->tq_flags & TASKQ_ACTIVE))
639 		goto out;
640 
641 	if ((t = task_alloc(tq, flags, &irqflags)) == NULL)
642 		goto out;
643 
644 	spin_lock(&t->tqent_lock);
645 
646 	/* Queue to the delay list for subsequent execution */
647 	list_add_tail(&t->tqent_list, &tq->tq_delay_list);
648 
649 	t->tqent_id = rc = tq->tq_next_id;
650 	tq->tq_next_id++;
651 	t->tqent_func = func;
652 	t->tqent_arg = arg;
653 	t->tqent_taskq = tq;
654 	t->tqent_timer.function = task_expire;
655 	t->tqent_timer.expires = (unsigned long)expire_time;
656 	add_timer(&t->tqent_timer);
657 
658 	ASSERT(!(t->tqent_flags & TQENT_FLAG_PREALLOC));
659 
660 	spin_unlock(&t->tqent_lock);
661 out:
662 	/* Spawn additional taskq threads if required. */
663 	if (tq->tq_nactive == tq->tq_nthreads)
664 		(void) taskq_thread_spawn(tq);
665 	spin_unlock_irqrestore(&tq->tq_lock, irqflags);
666 	return (rc);
667 }
668 EXPORT_SYMBOL(taskq_dispatch_delay);
669 
670 void
671 taskq_dispatch_ent(taskq_t *tq, task_func_t func, void *arg, uint_t flags,
672     taskq_ent_t *t)
673 {
674 	unsigned long irqflags;
675 	ASSERT(tq);
676 	ASSERT(func);
677 
678 	spin_lock_irqsave_nested(&tq->tq_lock, irqflags,
679 	    tq->tq_lock_class);
680 
681 	/* Taskq being destroyed and all tasks drained */
682 	if (!(tq->tq_flags & TASKQ_ACTIVE)) {
683 		t->tqent_id = TASKQID_INVALID;
684 		goto out;
685 	}
686 
687 	if ((flags & TQ_NOQUEUE) && (tq->tq_nactive == tq->tq_nthreads)) {
688 		/* Dynamic taskq may be able to spawn another thread */
689 		if (!(tq->tq_flags & TASKQ_DYNAMIC) ||
690 		    taskq_thread_spawn(tq) == 0)
691 			goto out2;
692 		flags |= TQ_FRONT;
693 	}
694 
695 	spin_lock(&t->tqent_lock);
696 
697 	/*
698 	 * Make sure the entry is not on some other taskq; it is important to
699 	 * ASSERT() under lock
700 	 */
701 	ASSERT(taskq_empty_ent(t));
702 
703 	/*
704 	 * Mark it as a prealloc'd task.  This is important
705 	 * to ensure that we don't free it later.
706 	 */
707 	t->tqent_flags |= TQENT_FLAG_PREALLOC;
708 
709 	/* Queue to the priority list instead of the pending list */
710 	if (flags & TQ_FRONT)
711 		list_add_tail(&t->tqent_list, &tq->tq_prio_list);
712 	else
713 		list_add_tail(&t->tqent_list, &tq->tq_pend_list);
714 
715 	t->tqent_id = tq->tq_next_id;
716 	tq->tq_next_id++;
717 	t->tqent_func = func;
718 	t->tqent_arg = arg;
719 	t->tqent_taskq = tq;
720 
721 	t->tqent_birth = jiffies;
722 	DTRACE_PROBE1(taskq_ent__birth, taskq_ent_t *, t);
723 
724 	spin_unlock(&t->tqent_lock);
725 
726 	wake_up(&tq->tq_work_waitq);
727 out:
728 	/* Spawn additional taskq threads if required. */
729 	if (tq->tq_nactive == tq->tq_nthreads)
730 		(void) taskq_thread_spawn(tq);
731 out2:
732 	spin_unlock_irqrestore(&tq->tq_lock, irqflags);
733 }
734 EXPORT_SYMBOL(taskq_dispatch_ent);
735 
736 int
737 taskq_empty_ent(taskq_ent_t *t)
738 {
739 	return (list_empty(&t->tqent_list));
740 }
741 EXPORT_SYMBOL(taskq_empty_ent);
742 
743 void
744 taskq_init_ent(taskq_ent_t *t)
745 {
746 	spin_lock_init(&t->tqent_lock);
747 	init_waitqueue_head(&t->tqent_waitq);
748 	timer_setup(&t->tqent_timer, NULL, 0);
749 	INIT_LIST_HEAD(&t->tqent_list);
750 	t->tqent_id = 0;
751 	t->tqent_func = NULL;
752 	t->tqent_arg = NULL;
753 	t->tqent_flags = 0;
754 	t->tqent_taskq = NULL;
755 }
756 EXPORT_SYMBOL(taskq_init_ent);
757 
758 /*
759  * Return the next pending task, preference is given to tasks on the
760  * priority list which were dispatched with TQ_FRONT.
761  */
762 static taskq_ent_t *
763 taskq_next_ent(taskq_t *tq)
764 {
765 	struct list_head *list;
766 
767 	if (!list_empty(&tq->tq_prio_list))
768 		list = &tq->tq_prio_list;
769 	else if (!list_empty(&tq->tq_pend_list))
770 		list = &tq->tq_pend_list;
771 	else
772 		return (NULL);
773 
774 	return (list_entry(list->next, taskq_ent_t, tqent_list));
775 }
776 
777 /*
778  * Spawns a new thread for the specified taskq.
779  */
780 static void
781 taskq_thread_spawn_task(void *arg)
782 {
783 	taskq_t *tq = (taskq_t *)arg;
784 	unsigned long flags;
785 
786 	if (taskq_thread_create(tq) == NULL) {
787 		/* restore spawning count if failed */
788 		spin_lock_irqsave_nested(&tq->tq_lock, flags,
789 		    tq->tq_lock_class);
790 		tq->tq_nspawn--;
791 		spin_unlock_irqrestore(&tq->tq_lock, flags);
792 	}
793 }
794 
795 /*
796  * Spawn addition threads for dynamic taskqs (TASKQ_DYNAMIC) the current
797  * number of threads is insufficient to handle the pending tasks.  These
798  * new threads must be created by the dedicated dynamic_taskq to avoid
799  * deadlocks between thread creation and memory reclaim.  The system_taskq
800  * which is also a dynamic taskq cannot be safely used for this.
801  */
802 static int
803 taskq_thread_spawn(taskq_t *tq)
804 {
805 	int spawning = 0;
806 
807 	if (!(tq->tq_flags & TASKQ_DYNAMIC))
808 		return (0);
809 
810 	if ((tq->tq_nthreads + tq->tq_nspawn < tq->tq_maxthreads) &&
811 	    (tq->tq_flags & TASKQ_ACTIVE)) {
812 		spawning = (++tq->tq_nspawn);
813 		taskq_dispatch(dynamic_taskq, taskq_thread_spawn_task,
814 		    tq, TQ_NOSLEEP);
815 	}
816 
817 	return (spawning);
818 }
819 
820 /*
821  * Threads in a dynamic taskq should only exit once it has been completely
822  * drained and no other threads are actively servicing tasks.  This prevents
823  * threads from being created and destroyed more than is required.
824  *
825  * The first thread is the thread list is treated as the primary thread.
826  * There is nothing special about the primary thread but in order to avoid
827  * all the taskq pids from changing we opt to make it long running.
828  */
829 static int
830 taskq_thread_should_stop(taskq_t *tq, taskq_thread_t *tqt)
831 {
832 	if (!(tq->tq_flags & TASKQ_DYNAMIC))
833 		return (0);
834 
835 	if (list_first_entry(&(tq->tq_thread_list), taskq_thread_t,
836 	    tqt_thread_list) == tqt)
837 		return (0);
838 
839 	return
840 	    ((tq->tq_nspawn == 0) &&	/* No threads are being spawned */
841 	    (tq->tq_nactive == 0) &&	/* No threads are handling tasks */
842 	    (tq->tq_nthreads > 1) &&	/* More than 1 thread is running */
843 	    (!taskq_next_ent(tq)) &&	/* There are no pending tasks */
844 	    (spl_taskq_thread_dynamic)); /* Dynamic taskqs are allowed */
845 }
846 
847 static int
848 taskq_thread(void *args)
849 {
850 	DECLARE_WAITQUEUE(wait, current);
851 	sigset_t blocked;
852 	taskq_thread_t *tqt = args;
853 	taskq_t *tq;
854 	taskq_ent_t *t;
855 	int seq_tasks = 0;
856 	unsigned long flags;
857 	taskq_ent_t dup_task = {};
858 
859 	ASSERT(tqt);
860 	ASSERT(tqt->tqt_tq);
861 	tq = tqt->tqt_tq;
862 	current->flags |= PF_NOFREEZE;
863 
864 	(void) spl_fstrans_mark();
865 
866 	sigfillset(&blocked);
867 	sigprocmask(SIG_BLOCK, &blocked, NULL);
868 	flush_signals(current);
869 
870 	tsd_set(taskq_tsd, tq);
871 	spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class);
872 	/*
873 	 * If we are dynamically spawned, decrease spawning count. Note that
874 	 * we could be created during taskq_create, in which case we shouldn't
875 	 * do the decrement. But it's fine because taskq_create will reset
876 	 * tq_nspawn later.
877 	 */
878 	if (tq->tq_flags & TASKQ_DYNAMIC)
879 		tq->tq_nspawn--;
880 
881 	/* Immediately exit if more threads than allowed were created. */
882 	if (tq->tq_nthreads >= tq->tq_maxthreads)
883 		goto error;
884 
885 	tq->tq_nthreads++;
886 	list_add_tail(&tqt->tqt_thread_list, &tq->tq_thread_list);
887 	wake_up(&tq->tq_wait_waitq);
888 	set_current_state(TASK_INTERRUPTIBLE);
889 
890 	while (!kthread_should_stop()) {
891 
892 		if (list_empty(&tq->tq_pend_list) &&
893 		    list_empty(&tq->tq_prio_list)) {
894 
895 			if (taskq_thread_should_stop(tq, tqt)) {
896 				wake_up_all(&tq->tq_wait_waitq);
897 				break;
898 			}
899 
900 			add_wait_queue_exclusive(&tq->tq_work_waitq, &wait);
901 			spin_unlock_irqrestore(&tq->tq_lock, flags);
902 
903 			schedule();
904 			seq_tasks = 0;
905 
906 			spin_lock_irqsave_nested(&tq->tq_lock, flags,
907 			    tq->tq_lock_class);
908 			remove_wait_queue(&tq->tq_work_waitq, &wait);
909 		} else {
910 			__set_current_state(TASK_RUNNING);
911 		}
912 
913 		if ((t = taskq_next_ent(tq)) != NULL) {
914 			list_del_init(&t->tqent_list);
915 
916 			/*
917 			 * A TQENT_FLAG_PREALLOC task may be reused or freed
918 			 * during the task function call. Store tqent_id and
919 			 * tqent_flags here.
920 			 *
921 			 * Also use an on stack taskq_ent_t for tqt_task
922 			 * assignment in this case; we want to make sure
923 			 * to duplicate all fields, so the values are
924 			 * correct when it's accessed via DTRACE_PROBE*.
925 			 */
926 			tqt->tqt_id = t->tqent_id;
927 			tqt->tqt_flags = t->tqent_flags;
928 
929 			if (t->tqent_flags & TQENT_FLAG_PREALLOC) {
930 				dup_task = *t;
931 				t = &dup_task;
932 			}
933 			tqt->tqt_task = t;
934 
935 			taskq_insert_in_order(tq, tqt);
936 			tq->tq_nactive++;
937 			spin_unlock_irqrestore(&tq->tq_lock, flags);
938 
939 			DTRACE_PROBE1(taskq_ent__start, taskq_ent_t *, t);
940 
941 			/* Perform the requested task */
942 			t->tqent_func(t->tqent_arg);
943 
944 			DTRACE_PROBE1(taskq_ent__finish, taskq_ent_t *, t);
945 
946 			spin_lock_irqsave_nested(&tq->tq_lock, flags,
947 			    tq->tq_lock_class);
948 			tq->tq_nactive--;
949 			list_del_init(&tqt->tqt_active_list);
950 			tqt->tqt_task = NULL;
951 
952 			/* For prealloc'd tasks, we don't free anything. */
953 			if (!(tqt->tqt_flags & TQENT_FLAG_PREALLOC))
954 				task_done(tq, t);
955 
956 			/*
957 			 * When the current lowest outstanding taskqid is
958 			 * done calculate the new lowest outstanding id
959 			 */
960 			if (tq->tq_lowest_id == tqt->tqt_id) {
961 				tq->tq_lowest_id = taskq_lowest_id(tq);
962 				ASSERT3S(tq->tq_lowest_id, >, tqt->tqt_id);
963 			}
964 
965 			/* Spawn additional taskq threads if required. */
966 			if ((++seq_tasks) > spl_taskq_thread_sequential &&
967 			    taskq_thread_spawn(tq))
968 				seq_tasks = 0;
969 
970 			tqt->tqt_id = TASKQID_INVALID;
971 			tqt->tqt_flags = 0;
972 			wake_up_all(&tq->tq_wait_waitq);
973 		} else {
974 			if (taskq_thread_should_stop(tq, tqt))
975 				break;
976 		}
977 
978 		set_current_state(TASK_INTERRUPTIBLE);
979 
980 	}
981 
982 	__set_current_state(TASK_RUNNING);
983 	tq->tq_nthreads--;
984 	list_del_init(&tqt->tqt_thread_list);
985 error:
986 	kmem_free(tqt, sizeof (taskq_thread_t));
987 	spin_unlock_irqrestore(&tq->tq_lock, flags);
988 
989 	tsd_set(taskq_tsd, NULL);
990 
991 	return (0);
992 }
993 
994 static taskq_thread_t *
995 taskq_thread_create(taskq_t *tq)
996 {
997 	static int last_used_cpu = 0;
998 	taskq_thread_t *tqt;
999 
1000 	tqt = kmem_alloc(sizeof (*tqt), KM_PUSHPAGE);
1001 	INIT_LIST_HEAD(&tqt->tqt_thread_list);
1002 	INIT_LIST_HEAD(&tqt->tqt_active_list);
1003 	tqt->tqt_tq = tq;
1004 	tqt->tqt_id = TASKQID_INVALID;
1005 
1006 	tqt->tqt_thread = spl_kthread_create(taskq_thread, tqt,
1007 	    "%s", tq->tq_name);
1008 	if (tqt->tqt_thread == NULL) {
1009 		kmem_free(tqt, sizeof (taskq_thread_t));
1010 		return (NULL);
1011 	}
1012 
1013 	if (spl_taskq_thread_bind) {
1014 		last_used_cpu = (last_used_cpu + 1) % num_online_cpus();
1015 		kthread_bind(tqt->tqt_thread, last_used_cpu);
1016 	}
1017 
1018 	if (spl_taskq_thread_priority)
1019 		set_user_nice(tqt->tqt_thread, PRIO_TO_NICE(tq->tq_pri));
1020 
1021 	wake_up_process(tqt->tqt_thread);
1022 
1023 	return (tqt);
1024 }
1025 
1026 taskq_t *
1027 taskq_create(const char *name, int nthreads, pri_t pri,
1028     int minalloc, int maxalloc, uint_t flags)
1029 {
1030 	taskq_t *tq;
1031 	taskq_thread_t *tqt;
1032 	int count = 0, rc = 0, i;
1033 	unsigned long irqflags;
1034 
1035 	ASSERT(name != NULL);
1036 	ASSERT(minalloc >= 0);
1037 	ASSERT(maxalloc <= INT_MAX);
1038 	ASSERT(!(flags & (TASKQ_CPR_SAFE))); /* Unsupported */
1039 
1040 	/* Scale the number of threads using nthreads as a percentage */
1041 	if (flags & TASKQ_THREADS_CPU_PCT) {
1042 		ASSERT(nthreads <= 100);
1043 		ASSERT(nthreads >= 0);
1044 		nthreads = MIN(nthreads, 100);
1045 		nthreads = MAX(nthreads, 0);
1046 		nthreads = MAX((num_online_cpus() * nthreads) / 100, 1);
1047 	}
1048 
1049 	tq = kmem_alloc(sizeof (*tq), KM_PUSHPAGE);
1050 	if (tq == NULL)
1051 		return (NULL);
1052 
1053 	spin_lock_init(&tq->tq_lock);
1054 	INIT_LIST_HEAD(&tq->tq_thread_list);
1055 	INIT_LIST_HEAD(&tq->tq_active_list);
1056 	tq->tq_name = kmem_strdup(name);
1057 	tq->tq_nactive = 0;
1058 	tq->tq_nthreads = 0;
1059 	tq->tq_nspawn = 0;
1060 	tq->tq_maxthreads = nthreads;
1061 	tq->tq_pri = pri;
1062 	tq->tq_minalloc = minalloc;
1063 	tq->tq_maxalloc = maxalloc;
1064 	tq->tq_nalloc = 0;
1065 	tq->tq_flags = (flags | TASKQ_ACTIVE);
1066 	tq->tq_next_id = TASKQID_INITIAL;
1067 	tq->tq_lowest_id = TASKQID_INITIAL;
1068 	INIT_LIST_HEAD(&tq->tq_free_list);
1069 	INIT_LIST_HEAD(&tq->tq_pend_list);
1070 	INIT_LIST_HEAD(&tq->tq_prio_list);
1071 	INIT_LIST_HEAD(&tq->tq_delay_list);
1072 	init_waitqueue_head(&tq->tq_work_waitq);
1073 	init_waitqueue_head(&tq->tq_wait_waitq);
1074 	tq->tq_lock_class = TQ_LOCK_GENERAL;
1075 	INIT_LIST_HEAD(&tq->tq_taskqs);
1076 
1077 	if (flags & TASKQ_PREPOPULATE) {
1078 		spin_lock_irqsave_nested(&tq->tq_lock, irqflags,
1079 		    tq->tq_lock_class);
1080 
1081 		for (i = 0; i < minalloc; i++)
1082 			task_done(tq, task_alloc(tq, TQ_PUSHPAGE | TQ_NEW,
1083 			    &irqflags));
1084 
1085 		spin_unlock_irqrestore(&tq->tq_lock, irqflags);
1086 	}
1087 
1088 	if ((flags & TASKQ_DYNAMIC) && spl_taskq_thread_dynamic)
1089 		nthreads = 1;
1090 
1091 	for (i = 0; i < nthreads; i++) {
1092 		tqt = taskq_thread_create(tq);
1093 		if (tqt == NULL)
1094 			rc = 1;
1095 		else
1096 			count++;
1097 	}
1098 
1099 	/* Wait for all threads to be started before potential destroy */
1100 	wait_event(tq->tq_wait_waitq, tq->tq_nthreads == count);
1101 	/*
1102 	 * taskq_thread might have touched nspawn, but we don't want them to
1103 	 * because they're not dynamically spawned. So we reset it to 0
1104 	 */
1105 	tq->tq_nspawn = 0;
1106 
1107 	if (rc) {
1108 		taskq_destroy(tq);
1109 		tq = NULL;
1110 	} else {
1111 		down_write(&tq_list_sem);
1112 		tq->tq_instance = taskq_find_by_name(name) + 1;
1113 		list_add_tail(&tq->tq_taskqs, &tq_list);
1114 		up_write(&tq_list_sem);
1115 	}
1116 
1117 	return (tq);
1118 }
1119 EXPORT_SYMBOL(taskq_create);
1120 
1121 void
1122 taskq_destroy(taskq_t *tq)
1123 {
1124 	struct task_struct *thread;
1125 	taskq_thread_t *tqt;
1126 	taskq_ent_t *t;
1127 	unsigned long flags;
1128 
1129 	ASSERT(tq);
1130 	spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class);
1131 	tq->tq_flags &= ~TASKQ_ACTIVE;
1132 	spin_unlock_irqrestore(&tq->tq_lock, flags);
1133 
1134 	/*
1135 	 * When TASKQ_ACTIVE is clear new tasks may not be added nor may
1136 	 * new worker threads be spawned for dynamic taskq.
1137 	 */
1138 	if (dynamic_taskq != NULL)
1139 		taskq_wait_outstanding(dynamic_taskq, 0);
1140 
1141 	taskq_wait(tq);
1142 
1143 	/* remove taskq from global list used by the kstats */
1144 	down_write(&tq_list_sem);
1145 	list_del(&tq->tq_taskqs);
1146 	up_write(&tq_list_sem);
1147 
1148 	spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class);
1149 	/* wait for spawning threads to insert themselves to the list */
1150 	while (tq->tq_nspawn) {
1151 		spin_unlock_irqrestore(&tq->tq_lock, flags);
1152 		schedule_timeout_interruptible(1);
1153 		spin_lock_irqsave_nested(&tq->tq_lock, flags,
1154 		    tq->tq_lock_class);
1155 	}
1156 
1157 	/*
1158 	 * Signal each thread to exit and block until it does.  Each thread
1159 	 * is responsible for removing itself from the list and freeing its
1160 	 * taskq_thread_t.  This allows for idle threads to opt to remove
1161 	 * themselves from the taskq.  They can be recreated as needed.
1162 	 */
1163 	while (!list_empty(&tq->tq_thread_list)) {
1164 		tqt = list_entry(tq->tq_thread_list.next,
1165 		    taskq_thread_t, tqt_thread_list);
1166 		thread = tqt->tqt_thread;
1167 		spin_unlock_irqrestore(&tq->tq_lock, flags);
1168 
1169 		kthread_stop(thread);
1170 
1171 		spin_lock_irqsave_nested(&tq->tq_lock, flags,
1172 		    tq->tq_lock_class);
1173 	}
1174 
1175 	while (!list_empty(&tq->tq_free_list)) {
1176 		t = list_entry(tq->tq_free_list.next, taskq_ent_t, tqent_list);
1177 
1178 		ASSERT(!(t->tqent_flags & TQENT_FLAG_PREALLOC));
1179 
1180 		list_del_init(&t->tqent_list);
1181 		task_free(tq, t);
1182 	}
1183 
1184 	ASSERT0(tq->tq_nthreads);
1185 	ASSERT0(tq->tq_nalloc);
1186 	ASSERT0(tq->tq_nspawn);
1187 	ASSERT(list_empty(&tq->tq_thread_list));
1188 	ASSERT(list_empty(&tq->tq_active_list));
1189 	ASSERT(list_empty(&tq->tq_free_list));
1190 	ASSERT(list_empty(&tq->tq_pend_list));
1191 	ASSERT(list_empty(&tq->tq_prio_list));
1192 	ASSERT(list_empty(&tq->tq_delay_list));
1193 
1194 	spin_unlock_irqrestore(&tq->tq_lock, flags);
1195 
1196 	kmem_strfree(tq->tq_name);
1197 	kmem_free(tq, sizeof (taskq_t));
1198 }
1199 EXPORT_SYMBOL(taskq_destroy);
1200 
1201 
1202 static unsigned int spl_taskq_kick = 0;
1203 
1204 /*
1205  * 2.6.36 API Change
1206  * module_param_cb is introduced to take kernel_param_ops and
1207  * module_param_call is marked as obsolete. Also set and get operations
1208  * were changed to take a 'const struct kernel_param *'.
1209  */
1210 static int
1211 #ifdef module_param_cb
1212 param_set_taskq_kick(const char *val, const struct kernel_param *kp)
1213 #else
1214 param_set_taskq_kick(const char *val, struct kernel_param *kp)
1215 #endif
1216 {
1217 	int ret;
1218 	taskq_t *tq = NULL;
1219 	taskq_ent_t *t;
1220 	unsigned long flags;
1221 
1222 	ret = param_set_uint(val, kp);
1223 	if (ret < 0 || !spl_taskq_kick)
1224 		return (ret);
1225 	/* reset value */
1226 	spl_taskq_kick = 0;
1227 
1228 	down_read(&tq_list_sem);
1229 	list_for_each_entry(tq, &tq_list, tq_taskqs) {
1230 		spin_lock_irqsave_nested(&tq->tq_lock, flags,
1231 		    tq->tq_lock_class);
1232 		/* Check if the first pending is older than 5 seconds */
1233 		t = taskq_next_ent(tq);
1234 		if (t && time_after(jiffies, t->tqent_birth + 5*HZ)) {
1235 			(void) taskq_thread_spawn(tq);
1236 			printk(KERN_INFO "spl: Kicked taskq %s/%d\n",
1237 			    tq->tq_name, tq->tq_instance);
1238 		}
1239 		spin_unlock_irqrestore(&tq->tq_lock, flags);
1240 	}
1241 	up_read(&tq_list_sem);
1242 	return (ret);
1243 }
1244 
1245 #ifdef module_param_cb
1246 static const struct kernel_param_ops param_ops_taskq_kick = {
1247 	.set = param_set_taskq_kick,
1248 	.get = param_get_uint,
1249 };
1250 module_param_cb(spl_taskq_kick, &param_ops_taskq_kick, &spl_taskq_kick, 0644);
1251 #else
1252 module_param_call(spl_taskq_kick, param_set_taskq_kick, param_get_uint,
1253 	&spl_taskq_kick, 0644);
1254 #endif
1255 MODULE_PARM_DESC(spl_taskq_kick,
1256 	"Write nonzero to kick stuck taskqs to spawn more threads");
1257 
1258 int
1259 spl_taskq_init(void)
1260 {
1261 	init_rwsem(&tq_list_sem);
1262 	tsd_create(&taskq_tsd, NULL);
1263 
1264 	system_taskq = taskq_create("spl_system_taskq", MAX(boot_ncpus, 64),
1265 	    maxclsyspri, boot_ncpus, INT_MAX, TASKQ_PREPOPULATE|TASKQ_DYNAMIC);
1266 	if (system_taskq == NULL)
1267 		return (1);
1268 
1269 	system_delay_taskq = taskq_create("spl_delay_taskq", MAX(boot_ncpus, 4),
1270 	    maxclsyspri, boot_ncpus, INT_MAX, TASKQ_PREPOPULATE|TASKQ_DYNAMIC);
1271 	if (system_delay_taskq == NULL) {
1272 		taskq_destroy(system_taskq);
1273 		return (1);
1274 	}
1275 
1276 	dynamic_taskq = taskq_create("spl_dynamic_taskq", 1,
1277 	    maxclsyspri, boot_ncpus, INT_MAX, TASKQ_PREPOPULATE);
1278 	if (dynamic_taskq == NULL) {
1279 		taskq_destroy(system_taskq);
1280 		taskq_destroy(system_delay_taskq);
1281 		return (1);
1282 	}
1283 
1284 	/*
1285 	 * This is used to annotate tq_lock, so
1286 	 *   taskq_dispatch -> taskq_thread_spawn -> taskq_dispatch
1287 	 * does not trigger a lockdep warning re: possible recursive locking
1288 	 */
1289 	dynamic_taskq->tq_lock_class = TQ_LOCK_DYNAMIC;
1290 
1291 	return (0);
1292 }
1293 
1294 void
1295 spl_taskq_fini(void)
1296 {
1297 	taskq_destroy(dynamic_taskq);
1298 	dynamic_taskq = NULL;
1299 
1300 	taskq_destroy(system_delay_taskq);
1301 	system_delay_taskq = NULL;
1302 
1303 	taskq_destroy(system_taskq);
1304 	system_taskq = NULL;
1305 
1306 	tsd_destroy(&taskq_tsd);
1307 }
1308