xref: /linux/kernel/kthread.c (revision c5ab54e9945b5f3dc8e9c31b93bb334fcea126f4)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Kernel thread helper functions.
3  *   Copyright (C) 2004 IBM Corporation, Rusty Russell.
4  *
5  * Creation is done via kthreadd, so that we get a clean environment
6  * even if we're invoked from userspace (think modprobe, hotplug cpu,
7  * etc.).
8  */
9 #include <uapi/linux/sched/types.h>
10 #include <linux/sched.h>
11 #include <linux/sched/task.h>
12 #include <linux/kthread.h>
13 #include <linux/completion.h>
14 #include <linux/err.h>
15 #include <linux/cgroup.h>
16 #include <linux/cpuset.h>
17 #include <linux/unistd.h>
18 #include <linux/file.h>
19 #include <linux/export.h>
20 #include <linux/mutex.h>
21 #include <linux/slab.h>
22 #include <linux/freezer.h>
23 #include <linux/ptrace.h>
24 #include <linux/uaccess.h>
25 #include <linux/numa.h>
26 #include <trace/events/sched.h>
27 
28 static DEFINE_SPINLOCK(kthread_create_lock);
29 static LIST_HEAD(kthread_create_list);
30 struct task_struct *kthreadd_task;
31 
32 struct kthread_create_info
33 {
34 	/* Information passed to kthread() from kthreadd. */
35 	int (*threadfn)(void *data);
36 	void *data;
37 	int node;
38 
39 	/* Result passed back to kthread_create() from kthreadd. */
40 	struct task_struct *result;
41 	struct completion *done;
42 
43 	struct list_head list;
44 };
45 
46 struct kthread {
47 	unsigned long flags;
48 	unsigned int cpu;
49 	void *data;
50 	struct completion parked;
51 	struct completion exited;
52 #ifdef CONFIG_BLK_CGROUP
53 	struct cgroup_subsys_state *blkcg_css;
54 #endif
55 };
56 
57 enum KTHREAD_BITS {
58 	KTHREAD_IS_PER_CPU = 0,
59 	KTHREAD_SHOULD_STOP,
60 	KTHREAD_SHOULD_PARK,
61 };
62 
63 static inline void set_kthread_struct(void *kthread)
64 {
65 	/*
66 	 * We abuse ->set_child_tid to avoid the new member and because it
67 	 * can't be wrongly copied by copy_process(). We also rely on fact
68 	 * that the caller can't exec, so PF_KTHREAD can't be cleared.
69 	 */
70 	current->set_child_tid = (__force void __user *)kthread;
71 }
72 
73 static inline struct kthread *to_kthread(struct task_struct *k)
74 {
75 	WARN_ON(!(k->flags & PF_KTHREAD));
76 	return (__force void *)k->set_child_tid;
77 }
78 
79 void free_kthread_struct(struct task_struct *k)
80 {
81 	struct kthread *kthread;
82 
83 	/*
84 	 * Can be NULL if this kthread was created by kernel_thread()
85 	 * or if kmalloc() in kthread() failed.
86 	 */
87 	kthread = to_kthread(k);
88 #ifdef CONFIG_BLK_CGROUP
89 	WARN_ON_ONCE(kthread && kthread->blkcg_css);
90 #endif
91 	kfree(kthread);
92 }
93 
94 /**
95  * kthread_should_stop - should this kthread return now?
96  *
97  * When someone calls kthread_stop() on your kthread, it will be woken
98  * and this will return true.  You should then return, and your return
99  * value will be passed through to kthread_stop().
100  */
101 bool kthread_should_stop(void)
102 {
103 	return test_bit(KTHREAD_SHOULD_STOP, &to_kthread(current)->flags);
104 }
105 EXPORT_SYMBOL(kthread_should_stop);
106 
107 bool __kthread_should_park(struct task_struct *k)
108 {
109 	return test_bit(KTHREAD_SHOULD_PARK, &to_kthread(k)->flags);
110 }
111 EXPORT_SYMBOL_GPL(__kthread_should_park);
112 
113 /**
114  * kthread_should_park - should this kthread park now?
115  *
116  * When someone calls kthread_park() on your kthread, it will be woken
117  * and this will return true.  You should then do the necessary
118  * cleanup and call kthread_parkme()
119  *
120  * Similar to kthread_should_stop(), but this keeps the thread alive
121  * and in a park position. kthread_unpark() "restarts" the thread and
122  * calls the thread function again.
123  */
124 bool kthread_should_park(void)
125 {
126 	return __kthread_should_park(current);
127 }
128 EXPORT_SYMBOL_GPL(kthread_should_park);
129 
130 /**
131  * kthread_freezable_should_stop - should this freezable kthread return now?
132  * @was_frozen: optional out parameter, indicates whether %current was frozen
133  *
134  * kthread_should_stop() for freezable kthreads, which will enter
135  * refrigerator if necessary.  This function is safe from kthread_stop() /
136  * freezer deadlock and freezable kthreads should use this function instead
137  * of calling try_to_freeze() directly.
138  */
139 bool kthread_freezable_should_stop(bool *was_frozen)
140 {
141 	bool frozen = false;
142 
143 	might_sleep();
144 
145 	if (unlikely(freezing(current)))
146 		frozen = __refrigerator(true);
147 
148 	if (was_frozen)
149 		*was_frozen = frozen;
150 
151 	return kthread_should_stop();
152 }
153 EXPORT_SYMBOL_GPL(kthread_freezable_should_stop);
154 
155 /**
156  * kthread_data - return data value specified on kthread creation
157  * @task: kthread task in question
158  *
159  * Return the data value specified when kthread @task was created.
160  * The caller is responsible for ensuring the validity of @task when
161  * calling this function.
162  */
163 void *kthread_data(struct task_struct *task)
164 {
165 	return to_kthread(task)->data;
166 }
167 
168 /**
169  * kthread_probe_data - speculative version of kthread_data()
170  * @task: possible kthread task in question
171  *
172  * @task could be a kthread task.  Return the data value specified when it
173  * was created if accessible.  If @task isn't a kthread task or its data is
174  * inaccessible for any reason, %NULL is returned.  This function requires
175  * that @task itself is safe to dereference.
176  */
177 void *kthread_probe_data(struct task_struct *task)
178 {
179 	struct kthread *kthread = to_kthread(task);
180 	void *data = NULL;
181 
182 	probe_kernel_read(&data, &kthread->data, sizeof(data));
183 	return data;
184 }
185 
186 static void __kthread_parkme(struct kthread *self)
187 {
188 	for (;;) {
189 		/*
190 		 * TASK_PARKED is a special state; we must serialize against
191 		 * possible pending wakeups to avoid store-store collisions on
192 		 * task->state.
193 		 *
194 		 * Such a collision might possibly result in the task state
195 		 * changin from TASK_PARKED and us failing the
196 		 * wait_task_inactive() in kthread_park().
197 		 */
198 		set_special_state(TASK_PARKED);
199 		if (!test_bit(KTHREAD_SHOULD_PARK, &self->flags))
200 			break;
201 
202 		complete(&self->parked);
203 		schedule();
204 	}
205 	__set_current_state(TASK_RUNNING);
206 }
207 
208 void kthread_parkme(void)
209 {
210 	__kthread_parkme(to_kthread(current));
211 }
212 EXPORT_SYMBOL_GPL(kthread_parkme);
213 
214 static int kthread(void *_create)
215 {
216 	/* Copy data: it's on kthread's stack */
217 	struct kthread_create_info *create = _create;
218 	int (*threadfn)(void *data) = create->threadfn;
219 	void *data = create->data;
220 	struct completion *done;
221 	struct kthread *self;
222 	int ret;
223 
224 	self = kzalloc(sizeof(*self), GFP_KERNEL);
225 	set_kthread_struct(self);
226 
227 	/* If user was SIGKILLed, I release the structure. */
228 	done = xchg(&create->done, NULL);
229 	if (!done) {
230 		kfree(create);
231 		do_exit(-EINTR);
232 	}
233 
234 	if (!self) {
235 		create->result = ERR_PTR(-ENOMEM);
236 		complete(done);
237 		do_exit(-ENOMEM);
238 	}
239 
240 	self->data = data;
241 	init_completion(&self->exited);
242 	init_completion(&self->parked);
243 	current->vfork_done = &self->exited;
244 
245 	/* OK, tell user we're spawned, wait for stop or wakeup */
246 	__set_current_state(TASK_UNINTERRUPTIBLE);
247 	create->result = current;
248 	complete(done);
249 	schedule();
250 
251 	ret = -EINTR;
252 	if (!test_bit(KTHREAD_SHOULD_STOP, &self->flags)) {
253 		cgroup_kthread_ready();
254 		__kthread_parkme(self);
255 		ret = threadfn(data);
256 	}
257 	do_exit(ret);
258 }
259 
260 /* called from do_fork() to get node information for about to be created task */
261 int tsk_fork_get_node(struct task_struct *tsk)
262 {
263 #ifdef CONFIG_NUMA
264 	if (tsk == kthreadd_task)
265 		return tsk->pref_node_fork;
266 #endif
267 	return NUMA_NO_NODE;
268 }
269 
270 static void create_kthread(struct kthread_create_info *create)
271 {
272 	int pid;
273 
274 #ifdef CONFIG_NUMA
275 	current->pref_node_fork = create->node;
276 #endif
277 	/* We want our own signal handler (we take no signals by default). */
278 	pid = kernel_thread(kthread, create, CLONE_FS | CLONE_FILES | SIGCHLD);
279 	if (pid < 0) {
280 		/* If user was SIGKILLed, I release the structure. */
281 		struct completion *done = xchg(&create->done, NULL);
282 
283 		if (!done) {
284 			kfree(create);
285 			return;
286 		}
287 		create->result = ERR_PTR(pid);
288 		complete(done);
289 	}
290 }
291 
292 static __printf(4, 0)
293 struct task_struct *__kthread_create_on_node(int (*threadfn)(void *data),
294 						    void *data, int node,
295 						    const char namefmt[],
296 						    va_list args)
297 {
298 	DECLARE_COMPLETION_ONSTACK(done);
299 	struct task_struct *task;
300 	struct kthread_create_info *create = kmalloc(sizeof(*create),
301 						     GFP_KERNEL);
302 
303 	if (!create)
304 		return ERR_PTR(-ENOMEM);
305 	create->threadfn = threadfn;
306 	create->data = data;
307 	create->node = node;
308 	create->done = &done;
309 
310 	spin_lock(&kthread_create_lock);
311 	list_add_tail(&create->list, &kthread_create_list);
312 	spin_unlock(&kthread_create_lock);
313 
314 	wake_up_process(kthreadd_task);
315 	/*
316 	 * Wait for completion in killable state, for I might be chosen by
317 	 * the OOM killer while kthreadd is trying to allocate memory for
318 	 * new kernel thread.
319 	 */
320 	if (unlikely(wait_for_completion_killable(&done))) {
321 		/*
322 		 * If I was SIGKILLed before kthreadd (or new kernel thread)
323 		 * calls complete(), leave the cleanup of this structure to
324 		 * that thread.
325 		 */
326 		if (xchg(&create->done, NULL))
327 			return ERR_PTR(-EINTR);
328 		/*
329 		 * kthreadd (or new kernel thread) will call complete()
330 		 * shortly.
331 		 */
332 		wait_for_completion(&done);
333 	}
334 	task = create->result;
335 	if (!IS_ERR(task)) {
336 		static const struct sched_param param = { .sched_priority = 0 };
337 		char name[TASK_COMM_LEN];
338 
339 		/*
340 		 * task is already visible to other tasks, so updating
341 		 * COMM must be protected.
342 		 */
343 		vsnprintf(name, sizeof(name), namefmt, args);
344 		set_task_comm(task, name);
345 		/*
346 		 * root may have changed our (kthreadd's) priority or CPU mask.
347 		 * The kernel thread should not inherit these properties.
348 		 */
349 		sched_setscheduler_nocheck(task, SCHED_NORMAL, &param);
350 		set_cpus_allowed_ptr(task, cpu_all_mask);
351 	}
352 	kfree(create);
353 	return task;
354 }
355 
356 /**
357  * kthread_create_on_node - create a kthread.
358  * @threadfn: the function to run until signal_pending(current).
359  * @data: data ptr for @threadfn.
360  * @node: task and thread structures for the thread are allocated on this node
361  * @namefmt: printf-style name for the thread.
362  *
363  * Description: This helper function creates and names a kernel
364  * thread.  The thread will be stopped: use wake_up_process() to start
365  * it.  See also kthread_run().  The new thread has SCHED_NORMAL policy and
366  * is affine to all CPUs.
367  *
368  * If thread is going to be bound on a particular cpu, give its node
369  * in @node, to get NUMA affinity for kthread stack, or else give NUMA_NO_NODE.
370  * When woken, the thread will run @threadfn() with @data as its
371  * argument. @threadfn() can either call do_exit() directly if it is a
372  * standalone thread for which no one will call kthread_stop(), or
373  * return when 'kthread_should_stop()' is true (which means
374  * kthread_stop() has been called).  The return value should be zero
375  * or a negative error number; it will be passed to kthread_stop().
376  *
377  * Returns a task_struct or ERR_PTR(-ENOMEM) or ERR_PTR(-EINTR).
378  */
379 struct task_struct *kthread_create_on_node(int (*threadfn)(void *data),
380 					   void *data, int node,
381 					   const char namefmt[],
382 					   ...)
383 {
384 	struct task_struct *task;
385 	va_list args;
386 
387 	va_start(args, namefmt);
388 	task = __kthread_create_on_node(threadfn, data, node, namefmt, args);
389 	va_end(args);
390 
391 	return task;
392 }
393 EXPORT_SYMBOL(kthread_create_on_node);
394 
395 static void __kthread_bind_mask(struct task_struct *p, const struct cpumask *mask, long state)
396 {
397 	unsigned long flags;
398 
399 	if (!wait_task_inactive(p, state)) {
400 		WARN_ON(1);
401 		return;
402 	}
403 
404 	/* It's safe because the task is inactive. */
405 	raw_spin_lock_irqsave(&p->pi_lock, flags);
406 	do_set_cpus_allowed(p, mask);
407 	p->flags |= PF_NO_SETAFFINITY;
408 	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
409 }
410 
411 static void __kthread_bind(struct task_struct *p, unsigned int cpu, long state)
412 {
413 	__kthread_bind_mask(p, cpumask_of(cpu), state);
414 }
415 
416 void kthread_bind_mask(struct task_struct *p, const struct cpumask *mask)
417 {
418 	__kthread_bind_mask(p, mask, TASK_UNINTERRUPTIBLE);
419 }
420 
421 /**
422  * kthread_bind - bind a just-created kthread to a cpu.
423  * @p: thread created by kthread_create().
424  * @cpu: cpu (might not be online, must be possible) for @k to run on.
425  *
426  * Description: This function is equivalent to set_cpus_allowed(),
427  * except that @cpu doesn't need to be online, and the thread must be
428  * stopped (i.e., just returned from kthread_create()).
429  */
430 void kthread_bind(struct task_struct *p, unsigned int cpu)
431 {
432 	__kthread_bind(p, cpu, TASK_UNINTERRUPTIBLE);
433 }
434 EXPORT_SYMBOL(kthread_bind);
435 
436 /**
437  * kthread_create_on_cpu - Create a cpu bound kthread
438  * @threadfn: the function to run until signal_pending(current).
439  * @data: data ptr for @threadfn.
440  * @cpu: The cpu on which the thread should be bound,
441  * @namefmt: printf-style name for the thread. Format is restricted
442  *	     to "name.*%u". Code fills in cpu number.
443  *
444  * Description: This helper function creates and names a kernel thread
445  * The thread will be woken and put into park mode.
446  */
447 struct task_struct *kthread_create_on_cpu(int (*threadfn)(void *data),
448 					  void *data, unsigned int cpu,
449 					  const char *namefmt)
450 {
451 	struct task_struct *p;
452 
453 	p = kthread_create_on_node(threadfn, data, cpu_to_node(cpu), namefmt,
454 				   cpu);
455 	if (IS_ERR(p))
456 		return p;
457 	kthread_bind(p, cpu);
458 	/* CPU hotplug need to bind once again when unparking the thread. */
459 	set_bit(KTHREAD_IS_PER_CPU, &to_kthread(p)->flags);
460 	to_kthread(p)->cpu = cpu;
461 	return p;
462 }
463 
464 /**
465  * kthread_unpark - unpark a thread created by kthread_create().
466  * @k:		thread created by kthread_create().
467  *
468  * Sets kthread_should_park() for @k to return false, wakes it, and
469  * waits for it to return. If the thread is marked percpu then its
470  * bound to the cpu again.
471  */
472 void kthread_unpark(struct task_struct *k)
473 {
474 	struct kthread *kthread = to_kthread(k);
475 
476 	/*
477 	 * Newly created kthread was parked when the CPU was offline.
478 	 * The binding was lost and we need to set it again.
479 	 */
480 	if (test_bit(KTHREAD_IS_PER_CPU, &kthread->flags))
481 		__kthread_bind(k, kthread->cpu, TASK_PARKED);
482 
483 	clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
484 	/*
485 	 * __kthread_parkme() will either see !SHOULD_PARK or get the wakeup.
486 	 */
487 	wake_up_state(k, TASK_PARKED);
488 }
489 EXPORT_SYMBOL_GPL(kthread_unpark);
490 
491 /**
492  * kthread_park - park a thread created by kthread_create().
493  * @k: thread created by kthread_create().
494  *
495  * Sets kthread_should_park() for @k to return true, wakes it, and
496  * waits for it to return. This can also be called after kthread_create()
497  * instead of calling wake_up_process(): the thread will park without
498  * calling threadfn().
499  *
500  * Returns 0 if the thread is parked, -ENOSYS if the thread exited.
501  * If called by the kthread itself just the park bit is set.
502  */
503 int kthread_park(struct task_struct *k)
504 {
505 	struct kthread *kthread = to_kthread(k);
506 
507 	if (WARN_ON(k->flags & PF_EXITING))
508 		return -ENOSYS;
509 
510 	if (WARN_ON_ONCE(test_bit(KTHREAD_SHOULD_PARK, &kthread->flags)))
511 		return -EBUSY;
512 
513 	set_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
514 	if (k != current) {
515 		wake_up_process(k);
516 		/*
517 		 * Wait for __kthread_parkme() to complete(), this means we
518 		 * _will_ have TASK_PARKED and are about to call schedule().
519 		 */
520 		wait_for_completion(&kthread->parked);
521 		/*
522 		 * Now wait for that schedule() to complete and the task to
523 		 * get scheduled out.
524 		 */
525 		WARN_ON_ONCE(!wait_task_inactive(k, TASK_PARKED));
526 	}
527 
528 	return 0;
529 }
530 EXPORT_SYMBOL_GPL(kthread_park);
531 
532 /**
533  * kthread_stop - stop a thread created by kthread_create().
534  * @k: thread created by kthread_create().
535  *
536  * Sets kthread_should_stop() for @k to return true, wakes it, and
537  * waits for it to exit. This can also be called after kthread_create()
538  * instead of calling wake_up_process(): the thread will exit without
539  * calling threadfn().
540  *
541  * If threadfn() may call do_exit() itself, the caller must ensure
542  * task_struct can't go away.
543  *
544  * Returns the result of threadfn(), or %-EINTR if wake_up_process()
545  * was never called.
546  */
547 int kthread_stop(struct task_struct *k)
548 {
549 	struct kthread *kthread;
550 	int ret;
551 
552 	trace_sched_kthread_stop(k);
553 
554 	get_task_struct(k);
555 	kthread = to_kthread(k);
556 	set_bit(KTHREAD_SHOULD_STOP, &kthread->flags);
557 	kthread_unpark(k);
558 	wake_up_process(k);
559 	wait_for_completion(&kthread->exited);
560 	ret = k->exit_code;
561 	put_task_struct(k);
562 
563 	trace_sched_kthread_stop_ret(ret);
564 	return ret;
565 }
566 EXPORT_SYMBOL(kthread_stop);
567 
568 int kthreadd(void *unused)
569 {
570 	struct task_struct *tsk = current;
571 
572 	/* Setup a clean context for our children to inherit. */
573 	set_task_comm(tsk, "kthreadd");
574 	ignore_signals(tsk);
575 	set_cpus_allowed_ptr(tsk, cpu_all_mask);
576 	set_mems_allowed(node_states[N_MEMORY]);
577 
578 	current->flags |= PF_NOFREEZE;
579 	cgroup_init_kthreadd();
580 
581 	for (;;) {
582 		set_current_state(TASK_INTERRUPTIBLE);
583 		if (list_empty(&kthread_create_list))
584 			schedule();
585 		__set_current_state(TASK_RUNNING);
586 
587 		spin_lock(&kthread_create_lock);
588 		while (!list_empty(&kthread_create_list)) {
589 			struct kthread_create_info *create;
590 
591 			create = list_entry(kthread_create_list.next,
592 					    struct kthread_create_info, list);
593 			list_del_init(&create->list);
594 			spin_unlock(&kthread_create_lock);
595 
596 			create_kthread(create);
597 
598 			spin_lock(&kthread_create_lock);
599 		}
600 		spin_unlock(&kthread_create_lock);
601 	}
602 
603 	return 0;
604 }
605 
606 void __kthread_init_worker(struct kthread_worker *worker,
607 				const char *name,
608 				struct lock_class_key *key)
609 {
610 	memset(worker, 0, sizeof(struct kthread_worker));
611 	raw_spin_lock_init(&worker->lock);
612 	lockdep_set_class_and_name(&worker->lock, key, name);
613 	INIT_LIST_HEAD(&worker->work_list);
614 	INIT_LIST_HEAD(&worker->delayed_work_list);
615 }
616 EXPORT_SYMBOL_GPL(__kthread_init_worker);
617 
618 /**
619  * kthread_worker_fn - kthread function to process kthread_worker
620  * @worker_ptr: pointer to initialized kthread_worker
621  *
622  * This function implements the main cycle of kthread worker. It processes
623  * work_list until it is stopped with kthread_stop(). It sleeps when the queue
624  * is empty.
625  *
626  * The works are not allowed to keep any locks, disable preemption or interrupts
627  * when they finish. There is defined a safe point for freezing when one work
628  * finishes and before a new one is started.
629  *
630  * Also the works must not be handled by more than one worker at the same time,
631  * see also kthread_queue_work().
632  */
633 int kthread_worker_fn(void *worker_ptr)
634 {
635 	struct kthread_worker *worker = worker_ptr;
636 	struct kthread_work *work;
637 
638 	/*
639 	 * FIXME: Update the check and remove the assignment when all kthread
640 	 * worker users are created using kthread_create_worker*() functions.
641 	 */
642 	WARN_ON(worker->task && worker->task != current);
643 	worker->task = current;
644 
645 	if (worker->flags & KTW_FREEZABLE)
646 		set_freezable();
647 
648 repeat:
649 	set_current_state(TASK_INTERRUPTIBLE);	/* mb paired w/ kthread_stop */
650 
651 	if (kthread_should_stop()) {
652 		__set_current_state(TASK_RUNNING);
653 		raw_spin_lock_irq(&worker->lock);
654 		worker->task = NULL;
655 		raw_spin_unlock_irq(&worker->lock);
656 		return 0;
657 	}
658 
659 	work = NULL;
660 	raw_spin_lock_irq(&worker->lock);
661 	if (!list_empty(&worker->work_list)) {
662 		work = list_first_entry(&worker->work_list,
663 					struct kthread_work, node);
664 		list_del_init(&work->node);
665 	}
666 	worker->current_work = work;
667 	raw_spin_unlock_irq(&worker->lock);
668 
669 	if (work) {
670 		__set_current_state(TASK_RUNNING);
671 		work->func(work);
672 	} else if (!freezing(current))
673 		schedule();
674 
675 	try_to_freeze();
676 	cond_resched();
677 	goto repeat;
678 }
679 EXPORT_SYMBOL_GPL(kthread_worker_fn);
680 
681 static __printf(3, 0) struct kthread_worker *
682 __kthread_create_worker(int cpu, unsigned int flags,
683 			const char namefmt[], va_list args)
684 {
685 	struct kthread_worker *worker;
686 	struct task_struct *task;
687 	int node = NUMA_NO_NODE;
688 
689 	worker = kzalloc(sizeof(*worker), GFP_KERNEL);
690 	if (!worker)
691 		return ERR_PTR(-ENOMEM);
692 
693 	kthread_init_worker(worker);
694 
695 	if (cpu >= 0)
696 		node = cpu_to_node(cpu);
697 
698 	task = __kthread_create_on_node(kthread_worker_fn, worker,
699 						node, namefmt, args);
700 	if (IS_ERR(task))
701 		goto fail_task;
702 
703 	if (cpu >= 0)
704 		kthread_bind(task, cpu);
705 
706 	worker->flags = flags;
707 	worker->task = task;
708 	wake_up_process(task);
709 	return worker;
710 
711 fail_task:
712 	kfree(worker);
713 	return ERR_CAST(task);
714 }
715 
716 /**
717  * kthread_create_worker - create a kthread worker
718  * @flags: flags modifying the default behavior of the worker
719  * @namefmt: printf-style name for the kthread worker (task).
720  *
721  * Returns a pointer to the allocated worker on success, ERR_PTR(-ENOMEM)
722  * when the needed structures could not get allocated, and ERR_PTR(-EINTR)
723  * when the worker was SIGKILLed.
724  */
725 struct kthread_worker *
726 kthread_create_worker(unsigned int flags, const char namefmt[], ...)
727 {
728 	struct kthread_worker *worker;
729 	va_list args;
730 
731 	va_start(args, namefmt);
732 	worker = __kthread_create_worker(-1, flags, namefmt, args);
733 	va_end(args);
734 
735 	return worker;
736 }
737 EXPORT_SYMBOL(kthread_create_worker);
738 
739 /**
740  * kthread_create_worker_on_cpu - create a kthread worker and bind it
741  *	it to a given CPU and the associated NUMA node.
742  * @cpu: CPU number
743  * @flags: flags modifying the default behavior of the worker
744  * @namefmt: printf-style name for the kthread worker (task).
745  *
746  * Use a valid CPU number if you want to bind the kthread worker
747  * to the given CPU and the associated NUMA node.
748  *
749  * A good practice is to add the cpu number also into the worker name.
750  * For example, use kthread_create_worker_on_cpu(cpu, "helper/%d", cpu).
751  *
752  * Returns a pointer to the allocated worker on success, ERR_PTR(-ENOMEM)
753  * when the needed structures could not get allocated, and ERR_PTR(-EINTR)
754  * when the worker was SIGKILLed.
755  */
756 struct kthread_worker *
757 kthread_create_worker_on_cpu(int cpu, unsigned int flags,
758 			     const char namefmt[], ...)
759 {
760 	struct kthread_worker *worker;
761 	va_list args;
762 
763 	va_start(args, namefmt);
764 	worker = __kthread_create_worker(cpu, flags, namefmt, args);
765 	va_end(args);
766 
767 	return worker;
768 }
769 EXPORT_SYMBOL(kthread_create_worker_on_cpu);
770 
771 /*
772  * Returns true when the work could not be queued at the moment.
773  * It happens when it is already pending in a worker list
774  * or when it is being cancelled.
775  */
776 static inline bool queuing_blocked(struct kthread_worker *worker,
777 				   struct kthread_work *work)
778 {
779 	lockdep_assert_held(&worker->lock);
780 
781 	return !list_empty(&work->node) || work->canceling;
782 }
783 
784 static void kthread_insert_work_sanity_check(struct kthread_worker *worker,
785 					     struct kthread_work *work)
786 {
787 	lockdep_assert_held(&worker->lock);
788 	WARN_ON_ONCE(!list_empty(&work->node));
789 	/* Do not use a work with >1 worker, see kthread_queue_work() */
790 	WARN_ON_ONCE(work->worker && work->worker != worker);
791 }
792 
793 /* insert @work before @pos in @worker */
794 static void kthread_insert_work(struct kthread_worker *worker,
795 				struct kthread_work *work,
796 				struct list_head *pos)
797 {
798 	kthread_insert_work_sanity_check(worker, work);
799 
800 	list_add_tail(&work->node, pos);
801 	work->worker = worker;
802 	if (!worker->current_work && likely(worker->task))
803 		wake_up_process(worker->task);
804 }
805 
806 /**
807  * kthread_queue_work - queue a kthread_work
808  * @worker: target kthread_worker
809  * @work: kthread_work to queue
810  *
811  * Queue @work to work processor @task for async execution.  @task
812  * must have been created with kthread_worker_create().  Returns %true
813  * if @work was successfully queued, %false if it was already pending.
814  *
815  * Reinitialize the work if it needs to be used by another worker.
816  * For example, when the worker was stopped and started again.
817  */
818 bool kthread_queue_work(struct kthread_worker *worker,
819 			struct kthread_work *work)
820 {
821 	bool ret = false;
822 	unsigned long flags;
823 
824 	raw_spin_lock_irqsave(&worker->lock, flags);
825 	if (!queuing_blocked(worker, work)) {
826 		kthread_insert_work(worker, work, &worker->work_list);
827 		ret = true;
828 	}
829 	raw_spin_unlock_irqrestore(&worker->lock, flags);
830 	return ret;
831 }
832 EXPORT_SYMBOL_GPL(kthread_queue_work);
833 
834 /**
835  * kthread_delayed_work_timer_fn - callback that queues the associated kthread
836  *	delayed work when the timer expires.
837  * @t: pointer to the expired timer
838  *
839  * The format of the function is defined by struct timer_list.
840  * It should have been called from irqsafe timer with irq already off.
841  */
842 void kthread_delayed_work_timer_fn(struct timer_list *t)
843 {
844 	struct kthread_delayed_work *dwork = from_timer(dwork, t, timer);
845 	struct kthread_work *work = &dwork->work;
846 	struct kthread_worker *worker = work->worker;
847 	unsigned long flags;
848 
849 	/*
850 	 * This might happen when a pending work is reinitialized.
851 	 * It means that it is used a wrong way.
852 	 */
853 	if (WARN_ON_ONCE(!worker))
854 		return;
855 
856 	raw_spin_lock_irqsave(&worker->lock, flags);
857 	/* Work must not be used with >1 worker, see kthread_queue_work(). */
858 	WARN_ON_ONCE(work->worker != worker);
859 
860 	/* Move the work from worker->delayed_work_list. */
861 	WARN_ON_ONCE(list_empty(&work->node));
862 	list_del_init(&work->node);
863 	kthread_insert_work(worker, work, &worker->work_list);
864 
865 	raw_spin_unlock_irqrestore(&worker->lock, flags);
866 }
867 EXPORT_SYMBOL(kthread_delayed_work_timer_fn);
868 
869 static void __kthread_queue_delayed_work(struct kthread_worker *worker,
870 					 struct kthread_delayed_work *dwork,
871 					 unsigned long delay)
872 {
873 	struct timer_list *timer = &dwork->timer;
874 	struct kthread_work *work = &dwork->work;
875 
876 	WARN_ON_ONCE(timer->function != kthread_delayed_work_timer_fn);
877 
878 	/*
879 	 * If @delay is 0, queue @dwork->work immediately.  This is for
880 	 * both optimization and correctness.  The earliest @timer can
881 	 * expire is on the closest next tick and delayed_work users depend
882 	 * on that there's no such delay when @delay is 0.
883 	 */
884 	if (!delay) {
885 		kthread_insert_work(worker, work, &worker->work_list);
886 		return;
887 	}
888 
889 	/* Be paranoid and try to detect possible races already now. */
890 	kthread_insert_work_sanity_check(worker, work);
891 
892 	list_add(&work->node, &worker->delayed_work_list);
893 	work->worker = worker;
894 	timer->expires = jiffies + delay;
895 	add_timer(timer);
896 }
897 
898 /**
899  * kthread_queue_delayed_work - queue the associated kthread work
900  *	after a delay.
901  * @worker: target kthread_worker
902  * @dwork: kthread_delayed_work to queue
903  * @delay: number of jiffies to wait before queuing
904  *
905  * If the work has not been pending it starts a timer that will queue
906  * the work after the given @delay. If @delay is zero, it queues the
907  * work immediately.
908  *
909  * Return: %false if the @work has already been pending. It means that
910  * either the timer was running or the work was queued. It returns %true
911  * otherwise.
912  */
913 bool kthread_queue_delayed_work(struct kthread_worker *worker,
914 				struct kthread_delayed_work *dwork,
915 				unsigned long delay)
916 {
917 	struct kthread_work *work = &dwork->work;
918 	unsigned long flags;
919 	bool ret = false;
920 
921 	raw_spin_lock_irqsave(&worker->lock, flags);
922 
923 	if (!queuing_blocked(worker, work)) {
924 		__kthread_queue_delayed_work(worker, dwork, delay);
925 		ret = true;
926 	}
927 
928 	raw_spin_unlock_irqrestore(&worker->lock, flags);
929 	return ret;
930 }
931 EXPORT_SYMBOL_GPL(kthread_queue_delayed_work);
932 
933 struct kthread_flush_work {
934 	struct kthread_work	work;
935 	struct completion	done;
936 };
937 
938 static void kthread_flush_work_fn(struct kthread_work *work)
939 {
940 	struct kthread_flush_work *fwork =
941 		container_of(work, struct kthread_flush_work, work);
942 	complete(&fwork->done);
943 }
944 
945 /**
946  * kthread_flush_work - flush a kthread_work
947  * @work: work to flush
948  *
949  * If @work is queued or executing, wait for it to finish execution.
950  */
951 void kthread_flush_work(struct kthread_work *work)
952 {
953 	struct kthread_flush_work fwork = {
954 		KTHREAD_WORK_INIT(fwork.work, kthread_flush_work_fn),
955 		COMPLETION_INITIALIZER_ONSTACK(fwork.done),
956 	};
957 	struct kthread_worker *worker;
958 	bool noop = false;
959 
960 	worker = work->worker;
961 	if (!worker)
962 		return;
963 
964 	raw_spin_lock_irq(&worker->lock);
965 	/* Work must not be used with >1 worker, see kthread_queue_work(). */
966 	WARN_ON_ONCE(work->worker != worker);
967 
968 	if (!list_empty(&work->node))
969 		kthread_insert_work(worker, &fwork.work, work->node.next);
970 	else if (worker->current_work == work)
971 		kthread_insert_work(worker, &fwork.work,
972 				    worker->work_list.next);
973 	else
974 		noop = true;
975 
976 	raw_spin_unlock_irq(&worker->lock);
977 
978 	if (!noop)
979 		wait_for_completion(&fwork.done);
980 }
981 EXPORT_SYMBOL_GPL(kthread_flush_work);
982 
983 /*
984  * This function removes the work from the worker queue. Also it makes sure
985  * that it won't get queued later via the delayed work's timer.
986  *
987  * The work might still be in use when this function finishes. See the
988  * current_work proceed by the worker.
989  *
990  * Return: %true if @work was pending and successfully canceled,
991  *	%false if @work was not pending
992  */
993 static bool __kthread_cancel_work(struct kthread_work *work, bool is_dwork,
994 				  unsigned long *flags)
995 {
996 	/* Try to cancel the timer if exists. */
997 	if (is_dwork) {
998 		struct kthread_delayed_work *dwork =
999 			container_of(work, struct kthread_delayed_work, work);
1000 		struct kthread_worker *worker = work->worker;
1001 
1002 		/*
1003 		 * del_timer_sync() must be called to make sure that the timer
1004 		 * callback is not running. The lock must be temporary released
1005 		 * to avoid a deadlock with the callback. In the meantime,
1006 		 * any queuing is blocked by setting the canceling counter.
1007 		 */
1008 		work->canceling++;
1009 		raw_spin_unlock_irqrestore(&worker->lock, *flags);
1010 		del_timer_sync(&dwork->timer);
1011 		raw_spin_lock_irqsave(&worker->lock, *flags);
1012 		work->canceling--;
1013 	}
1014 
1015 	/*
1016 	 * Try to remove the work from a worker list. It might either
1017 	 * be from worker->work_list or from worker->delayed_work_list.
1018 	 */
1019 	if (!list_empty(&work->node)) {
1020 		list_del_init(&work->node);
1021 		return true;
1022 	}
1023 
1024 	return false;
1025 }
1026 
1027 /**
1028  * kthread_mod_delayed_work - modify delay of or queue a kthread delayed work
1029  * @worker: kthread worker to use
1030  * @dwork: kthread delayed work to queue
1031  * @delay: number of jiffies to wait before queuing
1032  *
1033  * If @dwork is idle, equivalent to kthread_queue_delayed_work(). Otherwise,
1034  * modify @dwork's timer so that it expires after @delay. If @delay is zero,
1035  * @work is guaranteed to be queued immediately.
1036  *
1037  * Return: %true if @dwork was pending and its timer was modified,
1038  * %false otherwise.
1039  *
1040  * A special case is when the work is being canceled in parallel.
1041  * It might be caused either by the real kthread_cancel_delayed_work_sync()
1042  * or yet another kthread_mod_delayed_work() call. We let the other command
1043  * win and return %false here. The caller is supposed to synchronize these
1044  * operations a reasonable way.
1045  *
1046  * This function is safe to call from any context including IRQ handler.
1047  * See __kthread_cancel_work() and kthread_delayed_work_timer_fn()
1048  * for details.
1049  */
1050 bool kthread_mod_delayed_work(struct kthread_worker *worker,
1051 			      struct kthread_delayed_work *dwork,
1052 			      unsigned long delay)
1053 {
1054 	struct kthread_work *work = &dwork->work;
1055 	unsigned long flags;
1056 	int ret = false;
1057 
1058 	raw_spin_lock_irqsave(&worker->lock, flags);
1059 
1060 	/* Do not bother with canceling when never queued. */
1061 	if (!work->worker)
1062 		goto fast_queue;
1063 
1064 	/* Work must not be used with >1 worker, see kthread_queue_work() */
1065 	WARN_ON_ONCE(work->worker != worker);
1066 
1067 	/* Do not fight with another command that is canceling this work. */
1068 	if (work->canceling)
1069 		goto out;
1070 
1071 	ret = __kthread_cancel_work(work, true, &flags);
1072 fast_queue:
1073 	__kthread_queue_delayed_work(worker, dwork, delay);
1074 out:
1075 	raw_spin_unlock_irqrestore(&worker->lock, flags);
1076 	return ret;
1077 }
1078 EXPORT_SYMBOL_GPL(kthread_mod_delayed_work);
1079 
1080 static bool __kthread_cancel_work_sync(struct kthread_work *work, bool is_dwork)
1081 {
1082 	struct kthread_worker *worker = work->worker;
1083 	unsigned long flags;
1084 	int ret = false;
1085 
1086 	if (!worker)
1087 		goto out;
1088 
1089 	raw_spin_lock_irqsave(&worker->lock, flags);
1090 	/* Work must not be used with >1 worker, see kthread_queue_work(). */
1091 	WARN_ON_ONCE(work->worker != worker);
1092 
1093 	ret = __kthread_cancel_work(work, is_dwork, &flags);
1094 
1095 	if (worker->current_work != work)
1096 		goto out_fast;
1097 
1098 	/*
1099 	 * The work is in progress and we need to wait with the lock released.
1100 	 * In the meantime, block any queuing by setting the canceling counter.
1101 	 */
1102 	work->canceling++;
1103 	raw_spin_unlock_irqrestore(&worker->lock, flags);
1104 	kthread_flush_work(work);
1105 	raw_spin_lock_irqsave(&worker->lock, flags);
1106 	work->canceling--;
1107 
1108 out_fast:
1109 	raw_spin_unlock_irqrestore(&worker->lock, flags);
1110 out:
1111 	return ret;
1112 }
1113 
1114 /**
1115  * kthread_cancel_work_sync - cancel a kthread work and wait for it to finish
1116  * @work: the kthread work to cancel
1117  *
1118  * Cancel @work and wait for its execution to finish.  This function
1119  * can be used even if the work re-queues itself. On return from this
1120  * function, @work is guaranteed to be not pending or executing on any CPU.
1121  *
1122  * kthread_cancel_work_sync(&delayed_work->work) must not be used for
1123  * delayed_work's. Use kthread_cancel_delayed_work_sync() instead.
1124  *
1125  * The caller must ensure that the worker on which @work was last
1126  * queued can't be destroyed before this function returns.
1127  *
1128  * Return: %true if @work was pending, %false otherwise.
1129  */
1130 bool kthread_cancel_work_sync(struct kthread_work *work)
1131 {
1132 	return __kthread_cancel_work_sync(work, false);
1133 }
1134 EXPORT_SYMBOL_GPL(kthread_cancel_work_sync);
1135 
1136 /**
1137  * kthread_cancel_delayed_work_sync - cancel a kthread delayed work and
1138  *	wait for it to finish.
1139  * @dwork: the kthread delayed work to cancel
1140  *
1141  * This is kthread_cancel_work_sync() for delayed works.
1142  *
1143  * Return: %true if @dwork was pending, %false otherwise.
1144  */
1145 bool kthread_cancel_delayed_work_sync(struct kthread_delayed_work *dwork)
1146 {
1147 	return __kthread_cancel_work_sync(&dwork->work, true);
1148 }
1149 EXPORT_SYMBOL_GPL(kthread_cancel_delayed_work_sync);
1150 
1151 /**
1152  * kthread_flush_worker - flush all current works on a kthread_worker
1153  * @worker: worker to flush
1154  *
1155  * Wait until all currently executing or pending works on @worker are
1156  * finished.
1157  */
1158 void kthread_flush_worker(struct kthread_worker *worker)
1159 {
1160 	struct kthread_flush_work fwork = {
1161 		KTHREAD_WORK_INIT(fwork.work, kthread_flush_work_fn),
1162 		COMPLETION_INITIALIZER_ONSTACK(fwork.done),
1163 	};
1164 
1165 	kthread_queue_work(worker, &fwork.work);
1166 	wait_for_completion(&fwork.done);
1167 }
1168 EXPORT_SYMBOL_GPL(kthread_flush_worker);
1169 
1170 /**
1171  * kthread_destroy_worker - destroy a kthread worker
1172  * @worker: worker to be destroyed
1173  *
1174  * Flush and destroy @worker.  The simple flush is enough because the kthread
1175  * worker API is used only in trivial scenarios.  There are no multi-step state
1176  * machines needed.
1177  */
1178 void kthread_destroy_worker(struct kthread_worker *worker)
1179 {
1180 	struct task_struct *task;
1181 
1182 	task = worker->task;
1183 	if (WARN_ON(!task))
1184 		return;
1185 
1186 	kthread_flush_worker(worker);
1187 	kthread_stop(task);
1188 	WARN_ON(!list_empty(&worker->work_list));
1189 	kfree(worker);
1190 }
1191 EXPORT_SYMBOL(kthread_destroy_worker);
1192 
1193 #ifdef CONFIG_BLK_CGROUP
1194 /**
1195  * kthread_associate_blkcg - associate blkcg to current kthread
1196  * @css: the cgroup info
1197  *
1198  * Current thread must be a kthread. The thread is running jobs on behalf of
1199  * other threads. In some cases, we expect the jobs attach cgroup info of
1200  * original threads instead of that of current thread. This function stores
1201  * original thread's cgroup info in current kthread context for later
1202  * retrieval.
1203  */
1204 void kthread_associate_blkcg(struct cgroup_subsys_state *css)
1205 {
1206 	struct kthread *kthread;
1207 
1208 	if (!(current->flags & PF_KTHREAD))
1209 		return;
1210 	kthread = to_kthread(current);
1211 	if (!kthread)
1212 		return;
1213 
1214 	if (kthread->blkcg_css) {
1215 		css_put(kthread->blkcg_css);
1216 		kthread->blkcg_css = NULL;
1217 	}
1218 	if (css) {
1219 		css_get(css);
1220 		kthread->blkcg_css = css;
1221 	}
1222 }
1223 EXPORT_SYMBOL(kthread_associate_blkcg);
1224 
1225 /**
1226  * kthread_blkcg - get associated blkcg css of current kthread
1227  *
1228  * Current thread must be a kthread.
1229  */
1230 struct cgroup_subsys_state *kthread_blkcg(void)
1231 {
1232 	struct kthread *kthread;
1233 
1234 	if (current->flags & PF_KTHREAD) {
1235 		kthread = to_kthread(current);
1236 		if (kthread)
1237 			return kthread->blkcg_css;
1238 	}
1239 	return NULL;
1240 }
1241 EXPORT_SYMBOL(kthread_blkcg);
1242 #endif
1243