Lines Matching +full:region +full:- +full:freeze +full:- +full:timeout +full:- +full:us
1 // SPDX-License-Identifier: GPL-2.0-only
3 * kernel/workqueue.c - generic async execution with shared worker pool
10 * Kai Petzke <wpp@marie.physik.tu-berlin.de>
22 * pools for workqueues which are not bound to any specific CPU - the
25 * Please read Documentation/core-api/workqueue.rst for details.
79 * BH pool is per-CPU and always DISASSOCIATED.
108 UNBOUND_POOL_HASH_ORDER = 6, /* hashed by pool->attrs */
128 WORKER_ID_LEN = 10 + WQ_NAME_LEN, /* "kworker/R-" + WQ_NAME_LEN */
142 * I: Modifiable by initialization/destruction paths and read-only for
148 * L: pool->lock protected. Access with pool->lock held.
150 * LN: pool->lock and wq_node_nr_active->lock protected for writes. Either for
153 * K: Only modified by worker while holding pool->lock. Can be safely read by
154 * self, while holding pool->lock or from IRQ context if %current is the
165 * PW: wq_pool_mutex and wq->mutex protected for writes. Either for reads.
167 * PWR: wq_pool_mutex and wq->mutex protected for writes. Either or
170 * WQ: wq->mutex protected.
172 * WR: wq->mutex protected for writes. RCU protected for reads.
174 * WO: wq->mutex protected for writes. Updated with WRITE_ONCE() and can be read
197 * but w/ pool->lock held. The readers grab pool->lock and are
208 struct timer_list idle_timer; /* L: worker idle timeout */
236 * Per-pool_workqueue statistics. These can be monitored using
244 PWQ_STAT_CM_WAKEUP, /* concurrency-management worker wakeups */
253 * The per-pool workqueue. While queued, bits below WORK_PWQ_SHIFT
254 * of work_struct->data are used for flags and the remaining high bits
271 * When pwq->nr_active >= max_active, new work item is queued to
272 * pwq->inactive_works instead of pool->worklist and marked with
276 * nr_active and all work items in pwq->inactive_works are marked with
278 * in pwq->inactive_works. Some of them are ready to run in
279 * pool->worklist or worker->scheduled. Those work itmes are only struct
281 * in nr_active. For non-barrier work item, it is marked with
282 * WORK_STRUCT_INACTIVE iff it is in pwq->inactive_works.
286 struct list_head pending_node; /* LN: node on wq_node_nr_active->pending_pwqs */
287 struct list_head pwqs_node; /* WR: node on wq->pwqs */
288 struct list_head mayday_node; /* MD: node on wq->maydays */
296 * grabbing wq->mutex.
314 * Unlike in a per-cpu workqueue where max_active limits its concurrency level
319 * The following struct is used to enforce per-node max_active. When a pwq wants
320 * to start executing a work item, it should increment ->nr using
321 * tryinc_node_nr_active(). If acquisition fails due to ->nr already being over
322 * ->max, the pwq is queued on ->pending_pwqs. As in-flight work items finish
323 * and decrement ->nr, node_activate_pending_pwq() activates the pending pwqs in
324 * round-robin order.
327 int max; /* per-node max_active */
328 atomic_t nr; /* per-node nr_active */
383 struct pool_workqueue __rcu * __percpu *cpu_pwq; /* I: per-cpu pwqs */
384 struct wq_node_nr_active *node_nr_active[]; /* I: per-node nr_active */
389 * See the comment above workqueue_attrs->affn_scope.
393 cpumask_var_t *pod_cpus; /* pod -> cpus */
394 int *pod_node; /* pod -> node */
395 int *cpu_pod; /* cpu -> pod */
414 * Per-cpu work items which run for longer than the following threshold are
416 * management to prevent them from noticeably delaying other per-cpu work items.
444 static DEFINE_RAW_SPINLOCK(wq_mayday_lock); /* protects wq->maydays list */
471 * following always forces round-robin CPU selection on unbound work items
487 /* the per-cpu worker pools */
492 /* PL: hash of all unbound pools keyed by pool->attrs */
504 * worker to avoid A-A deadlocks.
546 !lockdep_is_held(&wq->mutex) && \
548 "RCU, wq->mutex or wq_pool_mutex should be held")
561 * for_each_pool - iterate through all worker_pools in the system
578 * for_each_pool_worker - iterate through all workers of a worker_pool
588 list_for_each_entry((worker), &(pool)->workers, node) \
593 * for_each_pwq - iterate through all pool_workqueues of the specified workqueue
597 * This must be called either with wq->mutex held or RCU read locked.
605 list_for_each_entry_rcu((pwq), &(wq)->pwqs, pwqs_node, \
606 lockdep_is_held(&(wq->mutex)))
614 return ((struct work_struct *) addr)->func; in work_debug_hint()
626 * - an active object is initialized
644 * - an active object is freed
695 timer_destroy_on_stack(&work->timer); in destroy_delayed_work_on_stack()
696 debug_object_free(&work->work, &work_debug_descr); in destroy_delayed_work_on_stack()
706 * worker_pool_assign_id - allocate ID and assign it to @pool
710 * successfully, -errno on failure.
721 pool->id = ret; in worker_pool_assign_id()
731 return per_cpu_ptr(wq->cpu_pwq, cpu); in unbound_pwq_slot()
733 return &wq->dfl_pwq; in unbound_pwq_slot()
741 lockdep_is_held(&wq->mutex)); in unbound_pwq()
745 * unbound_effective_cpumask - effective cpumask of an unbound workqueue
748 * @wq->unbound_attrs->cpumask contains the cpumask requested by the user which
754 return unbound_pwq(wq, -1)->pool->attrs->__pod_cpumask; in unbound_effective_cpumask()
765 ((1 << WORK_STRUCT_COLOR_BITS) - 1); in get_work_color()
775 return (pool->flags & POOL_BH) ? WORK_OFFQ_BH : 0; in pool_offq_flags()
784 * can be used to set the pwq, pool or clear work->data. These functions should
785 * only be called while the work is owned - ie. while the PENDING bit is set.
795 atomic_long_set(&work->data, data | work_static(work)); in set_work_data()
827 * work->current_func, which is executed afterwards. This possible in set_work_pool_and_clear_pending()
832 * ---------------------------- -------------------------------- in set_work_pool_and_clear_pending()
840 * 7 work->current_func() { in set_work_pool_and_clear_pending()
862 unsigned long data = atomic_long_read(&work->data); in get_work_pwq()
871 * get_work_pool - return the worker_pool a given work was associated with
876 * called under wq_pool_mutex or inside of a rcu_read_lock() region.
887 unsigned long data = atomic_long_read(&work->data); in get_work_pool()
893 return work_struct_pwq(data)->pool; in get_work_pool()
904 return (v >> shift) & ((1U << bits) - 1); in shift_and_mask()
911 offqd->pool_id = shift_and_mask(data, WORK_OFFQ_POOL_SHIFT, in work_offqd_unpack()
913 offqd->disable = shift_and_mask(data, WORK_OFFQ_DISABLE_SHIFT, in work_offqd_unpack()
915 offqd->flags = data & WORK_OFFQ_FLAG_MASK; in work_offqd_unpack()
920 return ((unsigned long)offqd->disable << WORK_OFFQ_DISABLE_SHIFT) | in work_offqd_pack_flags()
921 ((unsigned long)offqd->flags); in work_offqd_pack_flags()
927 * they're being called with pool->lock held.
940 return !list_empty(&pool->worklist) && !pool->nr_running; in need_more_worker()
946 return pool->nr_idle; in may_start_working()
952 return !list_empty(&pool->worklist) && (pool->nr_running <= 1); in keep_working()
964 bool managing = pool->flags & POOL_MANAGER_ACTIVE; in too_many_workers()
965 int nr_idle = pool->nr_idle + managing; /* manager is considered idle */ in too_many_workers()
966 int nr_busy = pool->nr_workers - nr_idle; in too_many_workers()
968 return nr_idle > 2 && (nr_idle - 2) * MAX_IDLE_WORKERS_RATIO >= nr_busy; in too_many_workers()
972 * worker_set_flags - set worker flags and adjust nr_running accordingly
976 * Set @flags in @worker->flags and adjust nr_running accordingly.
980 struct worker_pool *pool = worker->pool; in worker_set_flags()
982 lockdep_assert_held(&pool->lock); in worker_set_flags()
986 !(worker->flags & WORKER_NOT_RUNNING)) { in worker_set_flags()
987 pool->nr_running--; in worker_set_flags()
990 worker->flags |= flags; in worker_set_flags()
994 * worker_clr_flags - clear worker flags and adjust nr_running accordingly
998 * Clear @flags in @worker->flags and adjust nr_running accordingly.
1002 struct worker_pool *pool = worker->pool; in worker_clr_flags()
1003 unsigned int oflags = worker->flags; in worker_clr_flags()
1005 lockdep_assert_held(&pool->lock); in worker_clr_flags()
1007 worker->flags &= ~flags; in worker_clr_flags()
1015 if (!(worker->flags & WORKER_NOT_RUNNING)) in worker_clr_flags()
1016 pool->nr_running++; in worker_clr_flags()
1019 /* Return the first idle worker. Called with pool->lock held. */
1022 if (unlikely(list_empty(&pool->idle_list))) in first_idle_worker()
1025 return list_first_entry(&pool->idle_list, struct worker, entry); in first_idle_worker()
1029 * worker_enter_idle - enter idle state
1036 * raw_spin_lock_irq(pool->lock).
1040 struct worker_pool *pool = worker->pool; in worker_enter_idle()
1042 if (WARN_ON_ONCE(worker->flags & WORKER_IDLE) || in worker_enter_idle()
1043 WARN_ON_ONCE(!list_empty(&worker->entry) && in worker_enter_idle()
1044 (worker->hentry.next || worker->hentry.pprev))) in worker_enter_idle()
1048 worker->flags |= WORKER_IDLE; in worker_enter_idle()
1049 pool->nr_idle++; in worker_enter_idle()
1050 worker->last_active = jiffies; in worker_enter_idle()
1053 list_add(&worker->entry, &pool->idle_list); in worker_enter_idle()
1055 if (too_many_workers(pool) && !timer_pending(&pool->idle_timer)) in worker_enter_idle()
1056 mod_timer(&pool->idle_timer, jiffies + IDLE_WORKER_TIMEOUT); in worker_enter_idle()
1059 WARN_ON_ONCE(pool->nr_workers == pool->nr_idle && pool->nr_running); in worker_enter_idle()
1063 * worker_leave_idle - leave idle state
1069 * raw_spin_lock_irq(pool->lock).
1073 struct worker_pool *pool = worker->pool; in worker_leave_idle()
1075 if (WARN_ON_ONCE(!(worker->flags & WORKER_IDLE))) in worker_leave_idle()
1078 pool->nr_idle--; in worker_leave_idle()
1079 list_del_init(&worker->entry); in worker_leave_idle()
1083 * find_worker_executing_work - find worker which is executing a work
1088 * @pool->busy_hash which is keyed by the address of @work. For a worker
1109 * raw_spin_lock_irq(pool->lock).
1120 hash_for_each_possible(pool->busy_hash, worker, hentry, in find_worker_executing_work()
1122 if (worker->current_work == work && in find_worker_executing_work()
1123 worker->current_func == work->func) in find_worker_executing_work()
1130 * move_linked_works - move linked works to a list
1141 * raw_spin_lock_irq(pool->lock).
1153 list_move_tail(&work->entry, head); in move_linked_works()
1168 * assign_work - assign a work item and its linked work items to a worker
1186 struct worker_pool *pool = worker->pool; in assign_work()
1189 lockdep_assert_held(&pool->lock); in assign_work()
1201 move_linked_works(work, &collision->scheduled, nextp); in assign_work()
1205 move_linked_works(work, &worker->scheduled, nextp); in assign_work()
1211 int high = pool->attrs->nice == HIGHPRI_NICE_LEVEL ? 1 : 0; in bh_pool_irq_work()
1213 return &per_cpu(bh_pool_irq_works, pool->cpu)[high]; in bh_pool_irq_work()
1220 if (unlikely(pool->cpu != smp_processor_id() && in kick_bh_pool()
1221 !(pool->flags & POOL_BH_DRAINING))) { in kick_bh_pool()
1222 irq_work_queue_on(bh_pool_irq_work(pool), pool->cpu); in kick_bh_pool()
1226 if (pool->attrs->nice == HIGHPRI_NICE_LEVEL) in kick_bh_pool()
1233 * kick_pool - wake up an idle worker if necessary
1244 lockdep_assert_held(&pool->lock); in kick_pool()
1249 if (pool->flags & POOL_BH) { in kick_pool()
1254 p = worker->task; in kick_pool()
1265 * so, setting the wake_cpu won't do anything. As this is a best-effort in kick_pool()
1266 * optimization and the race window is narrow, let's leave as-is for in kick_pool()
1270 * If @pool has non-strict affinity, @worker might have ended up outside in kick_pool()
1273 if (!pool->attrs->affn_strict && in kick_pool()
1274 !cpumask_test_cpu(p->wake_cpu, pool->attrs->__pod_cpumask)) { in kick_pool()
1275 struct work_struct *work = list_first_entry(&pool->worklist, in kick_pool()
1277 int wake_cpu = cpumask_any_and_distribute(pool->attrs->__pod_cpumask, in kick_pool()
1280 p->wake_cpu = wake_cpu; in kick_pool()
1281 get_work_pwq(work)->stats[PWQ_STAT_REPATRIATED]++; in kick_pool()
1292 * Concurrency-managed per-cpu work items that hog CPU for longer than
1294 * which prevents them from stalling other concurrency-managed work items. If a
1322 if (ent->func == func) in wci_find_ent()
1341 cnt = atomic64_inc_return_relaxed(&ent->cnt); in wq_cpu_intensive_report()
1344 is_power_of_2(cnt + 1 - wq_cpu_intensive_warning_thresh)) in wq_cpu_intensive_report()
1346 ent->func, wq_cpu_intensive_thresh_us, in wq_cpu_intensive_report()
1347 atomic64_read(&ent->cnt)); in wq_cpu_intensive_report()
1372 ent->func = func; in wq_cpu_intensive_report()
1373 atomic64_set(&ent->cnt, 0); in wq_cpu_intensive_report()
1374 hash_add_rcu(wci_hash, &ent->hash_node, (unsigned long)func); in wq_cpu_intensive_report()
1386 * wq_worker_running - a worker is running again
1395 if (!READ_ONCE(worker->sleeping)) in wq_worker_running()
1401 * and leave with an unexpected pool->nr_running == 1 on the newly unbound in wq_worker_running()
1405 if (!(worker->flags & WORKER_NOT_RUNNING)) in wq_worker_running()
1406 worker->pool->nr_running++; in wq_worker_running()
1410 * CPU intensive auto-detection cares about how long a work item hogged in wq_worker_running()
1413 worker->current_at = worker->task->se.sum_exec_runtime; in wq_worker_running()
1415 WRITE_ONCE(worker->sleeping, 0); in wq_worker_running()
1419 * wq_worker_sleeping - a worker is going to sleep
1435 if (worker->flags & WORKER_NOT_RUNNING) in wq_worker_sleeping()
1438 pool = worker->pool; in wq_worker_sleeping()
1441 if (READ_ONCE(worker->sleeping)) in wq_worker_sleeping()
1444 WRITE_ONCE(worker->sleeping, 1); in wq_worker_sleeping()
1445 raw_spin_lock_irq(&pool->lock); in wq_worker_sleeping()
1448 * Recheck in case unbind_workers() preempted us. We don't in wq_worker_sleeping()
1452 if (worker->flags & WORKER_NOT_RUNNING) { in wq_worker_sleeping()
1453 raw_spin_unlock_irq(&pool->lock); in wq_worker_sleeping()
1457 pool->nr_running--; in wq_worker_sleeping()
1459 worker->current_pwq->stats[PWQ_STAT_CM_WAKEUP]++; in wq_worker_sleeping()
1461 raw_spin_unlock_irq(&pool->lock); in wq_worker_sleeping()
1465 * wq_worker_tick - a scheduler tick occurred while a kworker is running
1474 struct pool_workqueue *pwq = worker->current_pwq; in wq_worker_tick()
1475 struct worker_pool *pool = worker->pool; in wq_worker_tick()
1480 pwq->stats[PWQ_STAT_CPU_TIME] += TICK_USEC; in wq_worker_tick()
1488 * CPU_INTENSIVE to avoid stalling other concurrency-managed work items. in wq_worker_tick()
1490 * Set @worker->sleeping means that @worker is in the process of in wq_worker_tick()
1492 * @pool->nr_running until it wakes up. As wq_worker_sleeping() also in wq_worker_tick()
1493 * decrements ->nr_running, setting CPU_INTENSIVE here can lead to in wq_worker_tick()
1497 if ((worker->flags & WORKER_NOT_RUNNING) || READ_ONCE(worker->sleeping) || in wq_worker_tick()
1498 worker->task->se.sum_exec_runtime - worker->current_at < in wq_worker_tick()
1502 raw_spin_lock(&pool->lock); in wq_worker_tick()
1505 wq_cpu_intensive_report(worker->current_func); in wq_worker_tick()
1506 pwq->stats[PWQ_STAT_CPU_INTENSIVE]++; in wq_worker_tick()
1509 pwq->stats[PWQ_STAT_CM_WAKEUP]++; in wq_worker_tick()
1511 raw_spin_unlock(&pool->lock); in wq_worker_tick()
1515 * wq_worker_last_func - retrieve worker's last work function
1522 * raw_spin_lock_irq(rq->lock)
1526 * dequeuing, to allow periodic aggregation to shut-off when that
1529 * As this function doesn't involve any workqueue-related locking, it
1542 return worker->last_func; in wq_worker_last_func()
1546 * wq_node_nr_active - Determine wq_node_nr_active to use
1552 * - %NULL for per-cpu workqueues as they don't need to use shared nr_active.
1554 * - node_nr_active[nr_node_ids] if @node is %NUMA_NO_NODE.
1556 * - Otherwise, node_nr_active[@node].
1561 if (!(wq->flags & WQ_UNBOUND)) in wq_node_nr_active()
1567 return wq->node_nr_active[node]; in wq_node_nr_active()
1571 * wq_update_node_max_active - Update per-node max_actives to use
1573 * @off_cpu: CPU that's going down, -1 if a CPU is not going down
1575 * Update @wq->node_nr_active[]->max. @wq must be unbound. max_active is
1577 * cpus. The result is always between @wq->min_active and max_active.
1582 int min_active = READ_ONCE(wq->min_active); in wq_update_node_max_active()
1583 int max_active = READ_ONCE(wq->max_active); in wq_update_node_max_active()
1586 lockdep_assert_held(&wq->mutex); in wq_update_node_max_active()
1592 off_cpu = -1; in wq_update_node_max_active()
1596 total_cpus--; in wq_update_node_max_active()
1601 wq_node_nr_active(wq, node)->max = min_active; in wq_update_node_max_active()
1603 wq_node_nr_active(wq, NUMA_NO_NODE)->max = max_active; in wq_update_node_max_active()
1612 node_cpus--; in wq_update_node_max_active()
1614 wq_node_nr_active(wq, node)->max = in wq_update_node_max_active()
1619 wq_node_nr_active(wq, NUMA_NO_NODE)->max = max_active; in wq_update_node_max_active()
1623 * get_pwq - get an extra reference on the specified pool_workqueue
1627 * @pwq has positive refcnt and be holding the matching pool->lock.
1631 lockdep_assert_held(&pwq->pool->lock); in get_pwq()
1632 WARN_ON_ONCE(pwq->refcnt <= 0); in get_pwq()
1633 pwq->refcnt++; in get_pwq()
1637 * put_pwq - put a pool_workqueue reference
1641 * destruction. The caller should be holding the matching pool->lock.
1645 lockdep_assert_held(&pwq->pool->lock); in put_pwq()
1646 if (likely(--pwq->refcnt)) in put_pwq()
1649 * @pwq can't be released under pool->lock, bounce to a dedicated in put_pwq()
1650 * kthread_worker to avoid A-A deadlocks. in put_pwq()
1652 kthread_queue_work(pwq_release_worker, &pwq->release_work); in put_pwq()
1656 * put_pwq_unlocked - put_pwq() with surrounding pool lock/unlock
1668 raw_spin_lock_irq(&pwq->pool->lock); in put_pwq_unlocked()
1670 raw_spin_unlock_irq(&pwq->pool->lock); in put_pwq_unlocked()
1676 return !pwq->nr_active && list_empty(&pwq->inactive_works); in pwq_is_empty()
1686 if (list_empty(&pwq->pool->worklist)) in __pwq_activate_work()
1687 pwq->pool->watchdog_ts = jiffies; in __pwq_activate_work()
1688 move_linked_works(work, &pwq->pool->worklist, NULL); in __pwq_activate_work()
1694 int max = READ_ONCE(nna->max); in tryinc_node_nr_active()
1695 int old = atomic_read(&nna->nr); in tryinc_node_nr_active()
1700 } while (!atomic_try_cmpxchg_relaxed(&nna->nr, &old, old + 1)); in tryinc_node_nr_active()
1706 * pwq_tryinc_nr_active - Try to increment nr_active for a pwq
1715 struct workqueue_struct *wq = pwq->wq; in pwq_tryinc_nr_active()
1716 struct worker_pool *pool = pwq->pool; in pwq_tryinc_nr_active()
1717 struct wq_node_nr_active *nna = wq_node_nr_active(wq, pool->node); in pwq_tryinc_nr_active()
1720 lockdep_assert_held(&pool->lock); in pwq_tryinc_nr_active()
1723 /* BH or per-cpu workqueue, pwq->nr_active is sufficient */ in pwq_tryinc_nr_active()
1724 obtained = pwq->nr_active < READ_ONCE(wq->max_active); in pwq_tryinc_nr_active()
1728 if (unlikely(pwq->plugged)) in pwq_tryinc_nr_active()
1732 * Unbound workqueue uses per-node shared nr_active $nna. If @pwq is in pwq_tryinc_nr_active()
1740 if (!list_empty(&pwq->pending_node) && likely(!fill)) in pwq_tryinc_nr_active()
1748 * Lockless acquisition failed. Lock, add ourself to $nna->pending_pwqs in pwq_tryinc_nr_active()
1751 * we see the decremented $nna->nr or they see non-empty in pwq_tryinc_nr_active()
1752 * $nna->pending_pwqs. in pwq_tryinc_nr_active()
1754 raw_spin_lock(&nna->lock); in pwq_tryinc_nr_active()
1756 if (list_empty(&pwq->pending_node)) in pwq_tryinc_nr_active()
1757 list_add_tail(&pwq->pending_node, &nna->pending_pwqs); in pwq_tryinc_nr_active()
1770 list_del_init(&pwq->pending_node); in pwq_tryinc_nr_active()
1773 raw_spin_unlock(&nna->lock); in pwq_tryinc_nr_active()
1776 pwq->nr_active++; in pwq_tryinc_nr_active()
1781 * pwq_activate_first_inactive - Activate the first inactive work item on a pwq
1794 list_first_entry_or_null(&pwq->inactive_works, in pwq_activate_first_inactive()
1806 * unplug_oldest_pwq - unplug the oldest pool_workqueue
1813 * dfl_pwq --------------+ [P] - plugged
1816 * pwqs -> A -> B [P] -> C [P] (newest)
1824 * pwq's are linked into wq->pwqs with the oldest first, so the first one in
1831 lockdep_assert_held(&wq->mutex); in unplug_oldest_pwq()
1834 pwq = list_first_entry_or_null(&wq->pwqs, struct pool_workqueue, in unplug_oldest_pwq()
1836 raw_spin_lock_irq(&pwq->pool->lock); in unplug_oldest_pwq()
1837 if (pwq->plugged) { in unplug_oldest_pwq()
1838 pwq->plugged = false; in unplug_oldest_pwq()
1840 kick_pool(pwq->pool); in unplug_oldest_pwq()
1842 raw_spin_unlock_irq(&pwq->pool->lock); in unplug_oldest_pwq()
1846 * node_activate_pending_pwq - Activate a pending pwq on a wq_node_nr_active
1850 * Activate a pwq in @nna->pending_pwqs. Called with @caller_pool locked.
1860 lockdep_assert_held(&caller_pool->lock); in node_activate_pending_pwq()
1862 raw_spin_lock(&nna->lock); in node_activate_pending_pwq()
1864 pwq = list_first_entry_or_null(&nna->pending_pwqs, in node_activate_pending_pwq()
1871 * @pwq->pool->lock. Let's trylock first. If unsuccessful, do the unlock in node_activate_pending_pwq()
1872 * / lock dance. For that, we also need to release @nna->lock as it's in node_activate_pending_pwq()
1875 if (pwq->pool != locked_pool) { in node_activate_pending_pwq()
1876 raw_spin_unlock(&locked_pool->lock); in node_activate_pending_pwq()
1877 locked_pool = pwq->pool; in node_activate_pending_pwq()
1878 if (!raw_spin_trylock(&locked_pool->lock)) { in node_activate_pending_pwq()
1879 raw_spin_unlock(&nna->lock); in node_activate_pending_pwq()
1880 raw_spin_lock(&locked_pool->lock); in node_activate_pending_pwq()
1881 raw_spin_lock(&nna->lock); in node_activate_pending_pwq()
1890 work = list_first_entry_or_null(&pwq->inactive_works, in node_activate_pending_pwq()
1893 list_del_init(&pwq->pending_node); in node_activate_pending_pwq()
1900 * pending_pwqs so that we round-robin through them. This means that in node_activate_pending_pwq()
1905 pwq->nr_active++; in node_activate_pending_pwq()
1908 if (list_empty(&pwq->inactive_works)) in node_activate_pending_pwq()
1909 list_del_init(&pwq->pending_node); in node_activate_pending_pwq()
1911 list_move_tail(&pwq->pending_node, &nna->pending_pwqs); in node_activate_pending_pwq()
1914 if (pwq->pool != caller_pool) in node_activate_pending_pwq()
1915 kick_pool(pwq->pool); in node_activate_pending_pwq()
1919 raw_spin_unlock(&nna->lock); in node_activate_pending_pwq()
1921 raw_spin_unlock(&locked_pool->lock); in node_activate_pending_pwq()
1922 raw_spin_lock(&caller_pool->lock); in node_activate_pending_pwq()
1927 * pwq_dec_nr_active - Retire an active count
1931 * For unbound workqueues, this function may temporarily drop @pwq->pool->lock.
1935 struct worker_pool *pool = pwq->pool; in pwq_dec_nr_active()
1936 struct wq_node_nr_active *nna = wq_node_nr_active(pwq->wq, pool->node); in pwq_dec_nr_active()
1938 lockdep_assert_held(&pool->lock); in pwq_dec_nr_active()
1941 * @pwq->nr_active should be decremented for both percpu and unbound in pwq_dec_nr_active()
1944 pwq->nr_active--; in pwq_dec_nr_active()
1959 * $nna->pending_pwqs. The following atomic_dec_return()'s implied in pwq_dec_nr_active()
1961 * guarantee that either we see non-empty pending_pwqs or they see in pwq_dec_nr_active()
1962 * decremented $nna->nr. in pwq_dec_nr_active()
1964 * $nna->max may change as CPUs come online/offline and @pwq->wq's in pwq_dec_nr_active()
1966 * larger than @pwq->wq->min_active which is above zero unless freezing. in pwq_dec_nr_active()
1969 if (atomic_dec_return(&nna->nr) >= READ_ONCE(nna->max)) in pwq_dec_nr_active()
1972 if (!list_empty(&nna->pending_pwqs)) in pwq_dec_nr_active()
1977 * pwq_dec_nr_in_flight - decrement pwq's nr_in_flight
1985 * For unbound workqueues, this function may temporarily drop @pwq->pool->lock
1986 * and thus should be called after all other state updates for the in-flight
1990 * raw_spin_lock_irq(pool->lock).
1999 pwq->nr_in_flight[color]--; in pwq_dec_nr_in_flight()
2002 if (likely(pwq->flush_color != color)) in pwq_dec_nr_in_flight()
2005 /* are there still in-flight works? */ in pwq_dec_nr_in_flight()
2006 if (pwq->nr_in_flight[color]) in pwq_dec_nr_in_flight()
2010 pwq->flush_color = -1; in pwq_dec_nr_in_flight()
2016 if (atomic_dec_and_test(&pwq->wq->nr_pwqs_to_flush)) in pwq_dec_nr_in_flight()
2017 complete(&pwq->wq->first_flusher->done); in pwq_dec_nr_in_flight()
2023 * try_to_grab_pending - steal work item from worklist and disable irq
2029 * stable state - idle, on timer or on worklist.
2036 * -EAGAIN if PENDING couldn't be grabbed at the moment, safe to busy-retry
2042 * disabled on entry. This, combined with delayed_work->timer being
2043 * irqsafe, ensures that we return -EAGAIN for finite short period of time.
2063 * dwork->timer is irqsafe. If timer_delete() fails, it's in try_to_grab_pending()
2067 if (likely(timer_delete(&dwork->timer))) in try_to_grab_pending()
2078 * steal it from ->worklist without clearing WORK_STRUCT_PENDING. in try_to_grab_pending()
2084 raw_spin_lock(&pool->lock); in try_to_grab_pending()
2086 * work->data is guaranteed to point to pwq only while the work in try_to_grab_pending()
2087 * item is queued on pwq->wq, and both updating work->data to point in try_to_grab_pending()
2089 * pwq->pool->lock. This in turn guarantees that, if work->data in try_to_grab_pending()
2094 if (pwq && pwq->pool == pool) { in try_to_grab_pending()
2101 * pwq->inactive_works since a queued barrier can't be in try_to_grab_pending()
2106 * on the inactive_works list, will confuse pwq->nr_active in try_to_grab_pending()
2114 move_linked_works(work, &pwq->pool->worklist, NULL); in try_to_grab_pending()
2116 list_del_init(&work->entry); in try_to_grab_pending()
2119 * work->data points to pwq iff queued. Let's point to pool. As in try_to_grab_pending()
2120 * this destroys work->data needed by the next step, stash it. in try_to_grab_pending()
2122 set_work_pool_and_keep_pending(work, pool->id, in try_to_grab_pending()
2128 raw_spin_unlock(&pool->lock); in try_to_grab_pending()
2132 raw_spin_unlock(&pool->lock); in try_to_grab_pending()
2136 return -EAGAIN; in try_to_grab_pending()
2140 * work_grab_pending - steal work item from worklist and disable irq
2145 * Grab PENDING bit of @work. @work can be in any stable state - idle, on timer
2149 * stored in *@irq_flags. The caller is responsible for re-enabling it using
2168 * insert_work - insert a work into a pool
2178 * raw_spin_lock_irq(pool->lock).
2190 list_add_tail(&work->entry, head); in insert_work()
2207 return worker && worker->current_pwq->wq == wq; in is_chained_work()
2223 pr_warn_once("workqueue: round-robin CPU selection forced, expect performance impact\n"); in wq_select_unbound_cpu()
2245 * steal the PENDING will busy-loop waiting for it to either get in __queue_work()
2256 if (unlikely(wq->flags & (__WQ_DESTROYING | __WQ_DRAINING) && in __queue_work()
2258 work->func, wq->name))) { in __queue_work()
2265 if (wq->flags & WQ_UNBOUND) in __queue_work()
2271 pwq = rcu_dereference(*per_cpu_ptr(wq->cpu_pwq, cpu)); in __queue_work()
2272 pool = pwq->pool; in __queue_work()
2277 * pool to guarantee non-reentrancy. in __queue_work()
2281 * non-reentrancy. See the comments above unplug_oldest_pwq(). in __queue_work()
2284 if (last_pool && last_pool != pool && !(wq->flags & __WQ_ORDERED)) { in __queue_work()
2287 raw_spin_lock(&last_pool->lock); in __queue_work()
2291 if (worker && worker->current_pwq->wq == wq) { in __queue_work()
2292 pwq = worker->current_pwq; in __queue_work()
2293 pool = pwq->pool; in __queue_work()
2297 raw_spin_unlock(&last_pool->lock); in __queue_work()
2298 raw_spin_lock(&pool->lock); in __queue_work()
2301 raw_spin_lock(&pool->lock); in __queue_work()
2309 * on it, so the retrying is guaranteed to make forward-progress. in __queue_work()
2311 if (unlikely(!pwq->refcnt)) { in __queue_work()
2312 if (wq->flags & WQ_UNBOUND) { in __queue_work()
2313 raw_spin_unlock(&pool->lock); in __queue_work()
2318 WARN_ONCE(true, "workqueue: per-cpu pwq for %s on cpu%d has 0 refcnt", in __queue_work()
2319 wq->name, cpu); in __queue_work()
2325 if (WARN_ON(!list_empty(&work->entry))) in __queue_work()
2328 pwq->nr_in_flight[pwq->work_color]++; in __queue_work()
2329 work_flags = work_color_to_flags(pwq->work_color); in __queue_work()
2336 if (list_empty(&pwq->inactive_works) && pwq_tryinc_nr_active(pwq, false)) { in __queue_work()
2337 if (list_empty(&pool->worklist)) in __queue_work()
2338 pool->watchdog_ts = jiffies; in __queue_work()
2341 insert_work(pwq, work, &pool->worklist, work_flags); in __queue_work()
2345 insert_work(pwq, work, &pwq->inactive_works, work_flags); in __queue_work()
2349 raw_spin_unlock(&pool->lock); in __queue_work()
2369 * queue_work_on - queue work on specific cpu
2402 * select_numa_node_cpu - Select a CPU based on NUMA node
2431 * queue_work_node - queue work on a "random" cpu for a given NUMA node
2461 * If this is used with a per-cpu workqueue then the logic in in queue_work_node()
2465 WARN_ON_ONCE(!(wq->flags & WQ_UNBOUND)); in queue_work_node()
2487 __queue_work(dwork->cpu, dwork->wq, &dwork->work); in delayed_work_timer_fn()
2494 struct timer_list *timer = &dwork->timer; in __queue_delayed_work()
2495 struct work_struct *work = &dwork->work; in __queue_delayed_work()
2498 WARN_ON_ONCE(timer->function != delayed_work_timer_fn); in __queue_delayed_work()
2500 WARN_ON_ONCE(!list_empty(&work->entry)); in __queue_delayed_work()
2503 * If @delay is 0, queue @dwork->work immediately. This is for in __queue_delayed_work()
2509 __queue_work(cpu, wq, &dwork->work); in __queue_delayed_work()
2514 dwork->wq = wq; in __queue_delayed_work()
2515 dwork->cpu = cpu; in __queue_delayed_work()
2516 timer->expires = jiffies + delay; in __queue_delayed_work()
2533 * queue_delayed_work_on - queue work on specific CPU after delay
2539 * We queue the delayed_work to a specific CPU, for non-zero delays the
2541 * to ensure this, may get @dwork->timer queued to an offlined CPU and
2542 * this will prevent queueing of @dwork->work unless the offlined CPU
2552 struct work_struct *work = &dwork->work; in queue_delayed_work_on()
2571 * mod_delayed_work_on - modify delay of or queue a delayed work on specific CPU
2594 ret = work_grab_pending(&dwork->work, WORK_CANCEL_DELAYED, &irq_flags); in mod_delayed_work_on()
2596 if (!clear_pending_if_disabled(&dwork->work)) in mod_delayed_work_on()
2610 __queue_work(WORK_CPU_UNBOUND, rwork->wq, &rwork->work); in rcu_work_rcufn()
2615 * queue_rcu_work - queue work after a RCU grace period
2626 struct work_struct *work = &rwork->work; in queue_rcu_work()
2634 rwork->wq = wq; in queue_rcu_work()
2635 call_rcu_hurry(&rwork->rcu, rcu_work_rcufn); in queue_rcu_work()
2649 INIT_LIST_HEAD(&worker->entry); in alloc_worker()
2650 INIT_LIST_HEAD(&worker->scheduled); in alloc_worker()
2651 INIT_LIST_HEAD(&worker->node); in alloc_worker()
2653 worker->flags = WORKER_PREP; in alloc_worker()
2660 if (pool->cpu < 0 && pool->attrs->affn_strict) in pool_allowed_cpus()
2661 return pool->attrs->__pod_cpumask; in pool_allowed_cpus()
2663 return pool->attrs->cpumask; in pool_allowed_cpus()
2667 * worker_attach_to_pool() - attach a worker to a pool
2672 * cpu-binding of @worker are kept coordinated with the pool across
2673 * cpu-[un]hotplugs.
2683 * details. BH workers are, while per-CPU, always DISASSOCIATED. in worker_attach_to_pool()
2685 if (pool->flags & POOL_DISASSOCIATED) { in worker_attach_to_pool()
2686 worker->flags |= WORKER_UNBOUND; in worker_attach_to_pool()
2688 WARN_ON_ONCE(pool->flags & POOL_BH); in worker_attach_to_pool()
2689 kthread_set_per_cpu(worker->task, pool->cpu); in worker_attach_to_pool()
2692 if (worker->rescue_wq) in worker_attach_to_pool()
2693 set_cpus_allowed_ptr(worker->task, pool_allowed_cpus(pool)); in worker_attach_to_pool()
2695 list_add_tail(&worker->node, &pool->workers); in worker_attach_to_pool()
2696 worker->pool = pool; in worker_attach_to_pool()
2705 kthread_set_per_cpu(worker->task, -1); in unbind_worker()
2707 WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task, wq_unbound_cpumask) < 0); in unbind_worker()
2709 WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task, cpu_possible_mask) < 0); in unbind_worker()
2718 list_del(&worker->node); in detach_worker()
2722 * worker_detach_from_pool() - detach a worker from its pool
2731 struct worker_pool *pool = worker->pool; in worker_detach_from_pool()
2734 WARN_ON_ONCE(pool->flags & POOL_BH); in worker_detach_from_pool()
2738 worker->pool = NULL; in worker_detach_from_pool()
2741 /* clear leftover flags without pool->lock after it is detached */ in worker_detach_from_pool()
2742 worker->flags &= ~(WORKER_UNBOUND | WORKER_REBOUND); in worker_detach_from_pool()
2748 if (worker->rescue_wq) in format_worker_id()
2749 return scnprintf(buf, size, "kworker/R-%s", in format_worker_id()
2750 worker->rescue_wq->name); in format_worker_id()
2753 if (pool->cpu >= 0) in format_worker_id()
2755 pool->cpu, worker->id, in format_worker_id()
2756 pool->attrs->nice < 0 ? "H" : ""); in format_worker_id()
2759 pool->id, worker->id); in format_worker_id()
2766 * create_worker - create a new workqueue worker
2783 id = ida_alloc(&pool->worker_ida, GFP_KERNEL); in create_worker()
2790 worker = alloc_worker(pool->node); in create_worker()
2796 worker->id = id; in create_worker()
2798 if (!(pool->flags & POOL_BH)) { in create_worker()
2802 worker->task = kthread_create_on_node(worker_thread, worker, in create_worker()
2803 pool->node, "%s", id_buf); in create_worker()
2804 if (IS_ERR(worker->task)) { in create_worker()
2805 if (PTR_ERR(worker->task) == -EINTR) { in create_worker()
2810 worker->task); in create_worker()
2815 set_user_nice(worker->task, pool->attrs->nice); in create_worker()
2816 kthread_bind_mask(worker->task, pool_allowed_cpus(pool)); in create_worker()
2823 raw_spin_lock_irq(&pool->lock); in create_worker()
2825 worker->pool->nr_workers++; in create_worker()
2833 if (worker->task) in create_worker()
2834 wake_up_process(worker->task); in create_worker()
2836 raw_spin_unlock_irq(&pool->lock); in create_worker()
2841 ida_free(&pool->worker_ida, id); in create_worker()
2859 list_del_init(&worker->entry); in reap_dying_workers()
2860 kthread_stop_put(worker->task); in reap_dying_workers()
2866 * set_worker_dying - Tag a worker for destruction
2868 * @list: transfer worker away from its pool->idle_list and into list
2874 * raw_spin_lock_irq(pool->lock).
2878 struct worker_pool *pool = worker->pool; in set_worker_dying()
2880 lockdep_assert_held(&pool->lock); in set_worker_dying()
2884 if (WARN_ON(worker->current_work) || in set_worker_dying()
2885 WARN_ON(!list_empty(&worker->scheduled)) || in set_worker_dying()
2886 WARN_ON(!(worker->flags & WORKER_IDLE))) in set_worker_dying()
2889 pool->nr_workers--; in set_worker_dying()
2890 pool->nr_idle--; in set_worker_dying()
2892 worker->flags |= WORKER_DIE; in set_worker_dying()
2894 list_move(&worker->entry, list); in set_worker_dying()
2897 get_task_struct(worker->task); in set_worker_dying()
2901 * idle_worker_timeout - check if some idle workers can now be deleted.
2908 * it expire and re-evaluate things from there.
2915 if (work_pending(&pool->idle_cull_work)) in idle_worker_timeout()
2918 raw_spin_lock_irq(&pool->lock); in idle_worker_timeout()
2925 worker = list_last_entry(&pool->idle_list, struct worker, entry); in idle_worker_timeout()
2926 expires = worker->last_active + IDLE_WORKER_TIMEOUT; in idle_worker_timeout()
2930 mod_timer(&pool->idle_timer, expires); in idle_worker_timeout()
2932 raw_spin_unlock_irq(&pool->lock); in idle_worker_timeout()
2935 queue_work(system_dfl_wq, &pool->idle_cull_work); in idle_worker_timeout()
2939 * idle_cull_fn - cull workers that have been idle for too long.
2955 * Grabbing wq_pool_attach_mutex here ensures an already-running worker in idle_cull_fn()
2956 * cannot proceed beyong set_pf_worker() in its self-destruct path. in idle_cull_fn()
2957 * This is required as a previously-preempted worker could run after in idle_cull_fn()
2961 raw_spin_lock_irq(&pool->lock); in idle_cull_fn()
2967 worker = list_last_entry(&pool->idle_list, struct worker, entry); in idle_cull_fn()
2968 expires = worker->last_active + IDLE_WORKER_TIMEOUT; in idle_cull_fn()
2971 mod_timer(&pool->idle_timer, expires); in idle_cull_fn()
2978 raw_spin_unlock_irq(&pool->lock); in idle_cull_fn()
2988 struct workqueue_struct *wq = pwq->wq; in send_mayday()
2992 if (!wq->rescuer) in send_mayday()
2996 if (list_empty(&pwq->mayday_node)) { in send_mayday()
3003 list_add_tail(&pwq->mayday_node, &wq->maydays); in send_mayday()
3004 wake_up_process(wq->rescuer->task); in send_mayday()
3005 pwq->stats[PWQ_STAT_MAYDAY]++; in send_mayday()
3014 raw_spin_lock_irq(&pool->lock); in pool_mayday_timeout()
3015 raw_spin_lock(&wq_mayday_lock); /* for wq->maydays */ in pool_mayday_timeout()
3024 list_for_each_entry(work, &pool->worklist, entry) in pool_mayday_timeout()
3029 raw_spin_unlock_irq(&pool->lock); in pool_mayday_timeout()
3031 mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INTERVAL); in pool_mayday_timeout()
3035 * maybe_create_worker - create a new worker if necessary
3048 * raw_spin_lock_irq(pool->lock) which may be released and regrabbed
3053 __releases(&pool->lock) in maybe_create_worker()
3054 __acquires(&pool->lock) in maybe_create_worker()
3057 raw_spin_unlock_irq(&pool->lock); in maybe_create_worker()
3060 mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INITIAL_TIMEOUT); in maybe_create_worker()
3072 timer_delete_sync(&pool->mayday_timer); in maybe_create_worker()
3073 raw_spin_lock_irq(&pool->lock); in maybe_create_worker()
3076 * created as @pool->lock was dropped and the new worker might have in maybe_create_worker()
3086 spin_lock(&pool->cb_lock); in worker_lock_callback()
3091 spin_unlock(&pool->cb_lock); in worker_unlock_callback()
3096 spin_lock(&pool->cb_lock); in workqueue_callback_cancel_wait_running()
3097 spin_unlock(&pool->cb_lock); in workqueue_callback_cancel_wait_running()
3109 * manage_workers - manage worker pool
3121 * raw_spin_lock_irq(pool->lock) which may be released and regrabbed
3132 struct worker_pool *pool = worker->pool; in manage_workers()
3134 if (pool->flags & POOL_MANAGER_ACTIVE) in manage_workers()
3137 pool->flags |= POOL_MANAGER_ACTIVE; in manage_workers()
3138 pool->manager = worker; in manage_workers()
3142 pool->manager = NULL; in manage_workers()
3143 pool->flags &= ~POOL_MANAGER_ACTIVE; in manage_workers()
3149 * process_one_work - process single work
3160 * raw_spin_lock_irq(pool->lock) which is released and regrabbed.
3163 __releases(&pool->lock) in process_one_work()
3164 __acquires(&pool->lock) in process_one_work()
3167 struct worker_pool *pool = worker->pool; in process_one_work()
3170 bool bh_draining = pool->flags & POOL_BH_DRAINING; in process_one_work()
3177 * work->lockdep_map, make a copy and use that here. in process_one_work()
3181 lockdep_copy_map(&lockdep_map, &work->lockdep_map); in process_one_work()
3184 WARN_ON_ONCE(!(pool->flags & POOL_DISASSOCIATED) && in process_one_work()
3185 raw_smp_processor_id() != pool->cpu); in process_one_work()
3189 hash_add(pool->busy_hash, &worker->hentry, (unsigned long)work); in process_one_work()
3190 worker->current_work = work; in process_one_work()
3191 worker->current_func = work->func; in process_one_work()
3192 worker->current_pwq = pwq; in process_one_work()
3193 if (worker->task) in process_one_work()
3194 worker->current_at = worker->task->se.sum_exec_runtime; in process_one_work()
3196 worker->current_color = get_work_color(work_data); in process_one_work()
3202 strscpy(worker->desc, pwq->wq->name, WORKER_DESC_LEN); in process_one_work()
3204 list_del_init(&work->entry); in process_one_work()
3212 if (unlikely(pwq->wq->flags & WQ_CPU_INTENSIVE)) in process_one_work()
3216 * Kick @pool if necessary. It's always noop for per-cpu worker pools in process_one_work()
3225 * update to @work. Also, do this inside @pool->lock so that in process_one_work()
3229 set_work_pool_and_clear_pending(work, pool->id, pool_offq_flags(pool)); in process_one_work()
3231 pwq->stats[PWQ_STAT_STARTED]++; in process_one_work()
3232 raw_spin_unlock_irq(&pool->lock); in process_one_work()
3238 lock_map_acquire(pwq->wq->lockdep_map); in process_one_work()
3251 * Which would create W1->C->W1 dependencies, even though there is no in process_one_work()
3253 * read-recursive acquire on the work(queue) 'locks', but this will then in process_one_work()
3258 * flush_work() and complete() primitives (except for single-threaded in process_one_work()
3263 worker->current_func(work); in process_one_work()
3268 trace_workqueue_execute_end(work, worker->current_func); in process_one_work()
3272 lock_map_release(pwq->wq->lockdep_map); in process_one_work()
3274 if (unlikely((worker->task && in_atomic()) || in process_one_work()
3278 " preempt=0x%08x lock=%d->%d RCU=%d->%d workfn=%ps\n", in process_one_work()
3279 current->comm, task_pid_nr(current), preempt_count(), in process_one_work()
3282 worker->current_func); in process_one_work()
3293 * the same condition doesn't freeze RCU. in process_one_work()
3295 if (worker->task) in process_one_work()
3298 raw_spin_lock_irq(&pool->lock); in process_one_work()
3300 pwq->stats[PWQ_STAT_COMPLETED]++; in process_one_work()
3310 worker->last_func = worker->current_func; in process_one_work()
3313 hash_del(&worker->hentry); in process_one_work()
3314 worker->current_work = NULL; in process_one_work()
3315 worker->current_func = NULL; in process_one_work()
3316 worker->current_pwq = NULL; in process_one_work()
3317 worker->current_color = INT_MAX; in process_one_work()
3324 * process_scheduled_works - process scheduled works
3332 * raw_spin_lock_irq(pool->lock) which may be released and regrabbed
3340 while ((work = list_first_entry_or_null(&worker->scheduled, in process_scheduled_works()
3343 worker->pool->watchdog_ts = jiffies; in process_scheduled_works()
3354 current->flags |= PF_WQ_WORKER; in set_pf_worker()
3356 current->flags &= ~PF_WQ_WORKER; in set_pf_worker()
3361 * worker_thread - the worker thread function
3364 * The worker thread function. All workers belong to a worker_pool -
3365 * either a per-cpu one or dynamic unbound one. These workers process all
3375 struct worker_pool *pool = worker->pool; in worker_thread()
3380 raw_spin_lock_irq(&pool->lock); in worker_thread()
3383 if (unlikely(worker->flags & WORKER_DIE)) { in worker_thread()
3384 raw_spin_unlock_irq(&pool->lock); in worker_thread()
3387 * The worker is dead and PF_WQ_WORKER is cleared, worker->pool in worker_thread()
3390 worker->pool = NULL; in worker_thread()
3391 ida_free(&pool->worker_ida, worker->id); in worker_thread()
3406 * ->scheduled list can only be filled while a worker is in worker_thread()
3410 WARN_ON_ONCE(!list_empty(&worker->scheduled)); in worker_thread()
3423 list_first_entry(&pool->worklist, in worker_thread()
3433 * pool->lock is held and there's no work to process and no need to in worker_thread()
3435 * pool->lock or from local cpu, so setting the current state in worker_thread()
3436 * before releasing pool->lock is enough to prevent losing any in worker_thread()
3441 raw_spin_unlock_irq(&pool->lock); in worker_thread()
3447 * rescuer_thread - the rescuer thread function
3470 struct workqueue_struct *wq = rescuer->rescue_wq; in rescuer_thread()
3485 * shouldn't have any work pending, but @wq->maydays may still have in rescuer_thread()
3486 * pwq(s) queued. This can happen by non-rescuer workers consuming in rescuer_thread()
3488 * @wq->maydays processing before acting on should_stop so that the in rescuer_thread()
3496 while (!list_empty(&wq->maydays)) { in rescuer_thread()
3497 struct pool_workqueue *pwq = list_first_entry(&wq->maydays, in rescuer_thread()
3499 struct worker_pool *pool = pwq->pool; in rescuer_thread()
3503 list_del_init(&pwq->mayday_node); in rescuer_thread()
3509 raw_spin_lock_irq(&pool->lock); in rescuer_thread()
3515 WARN_ON_ONCE(!list_empty(&rescuer->scheduled)); in rescuer_thread()
3516 list_for_each_entry_safe(work, n, &pool->worklist, entry) { in rescuer_thread()
3519 pwq->stats[PWQ_STAT_RESCUED]++; in rescuer_thread()
3522 if (!list_empty(&rescuer->scheduled)) { in rescuer_thread()
3530 * that such back-to-back work items, which may be in rescuer_thread()
3534 if (pwq->nr_active && need_to_create_worker(pool)) { in rescuer_thread()
3540 if (wq->rescuer && list_empty(&pwq->mayday_node)) { in rescuer_thread()
3542 list_add_tail(&pwq->mayday_node, &wq->maydays); in rescuer_thread()
3554 raw_spin_unlock_irq(&pool->lock); in rescuer_thread()
3576 WARN_ON_ONCE(!(rescuer->flags & WORKER_NOT_RUNNING)); in rescuer_thread()
3583 struct worker_pool *pool = worker->pool; in bh_worker()
3588 raw_spin_lock_irq(&pool->lock); in bh_worker()
3598 WARN_ON_ONCE(!list_empty(&worker->scheduled)); in bh_worker()
3603 list_first_entry(&pool->worklist, in bh_worker()
3609 --nr_restarts && time_before(jiffies, end)); in bh_worker()
3615 raw_spin_unlock_irq(&pool->lock); in bh_worker()
3627 * After full conversion, we'll add worker->softirq_action, directly use the
3635 bh_worker(list_first_entry(&pool->workers, struct worker, node)); in workqueue_softirq_action()
3648 struct worker_pool *pool = dead_work->pool; in drain_dead_softirq_workfn()
3658 raw_spin_lock_irq(&pool->lock); in drain_dead_softirq_workfn()
3659 pool->flags |= POOL_BH_DRAINING; in drain_dead_softirq_workfn()
3660 raw_spin_unlock_irq(&pool->lock); in drain_dead_softirq_workfn()
3662 bh_worker(list_first_entry(&pool->workers, struct worker, node)); in drain_dead_softirq_workfn()
3664 raw_spin_lock_irq(&pool->lock); in drain_dead_softirq_workfn()
3665 pool->flags &= ~POOL_BH_DRAINING; in drain_dead_softirq_workfn()
3667 raw_spin_unlock_irq(&pool->lock); in drain_dead_softirq_workfn()
3675 if (pool->attrs->nice == HIGHPRI_NICE_LEVEL) in drain_dead_softirq_workfn()
3680 complete(&dead_work->done); in drain_dead_softirq_workfn()
3707 if (pool->attrs->nice == HIGHPRI_NICE_LEVEL) in workqueue_softirq_dead()
3718 * check_flush_dependency - check for flush dependency sanity
3727 * on a workqueue which doesn't have %WQ_MEM_RECLAIM as that can break forward-
3737 if (from_cancel || target_wq->flags & WQ_MEM_RECLAIM) in check_flush_dependency()
3741 target_func = target_work ? target_work->func : NULL; in check_flush_dependency()
3743 WARN_ONCE(current->flags & PF_MEMALLOC, in check_flush_dependency()
3745 current->pid, current->comm, target_wq->name, target_func); in check_flush_dependency()
3746 WARN_ONCE(worker && ((worker->current_pwq->wq->flags & in check_flush_dependency()
3749 worker->current_pwq->wq->name, worker->current_func, in check_flush_dependency()
3750 target_wq->name, target_func); in check_flush_dependency()
3762 complete(&barr->done); in wq_barrier_func()
3766 * insert_wq_barrier - insert a barrier work
3783 * Note that when @worker is non-NULL, @target may be modified
3784 * underneath us, so we can't reliably determine pwq from @target.
3787 * raw_spin_lock_irq(pool->lock).
3799 * debugobject calls are safe here even with pool->lock locked in insert_wq_barrier()
3805 * spuriously triggering "inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} in insert_wq_barrier()
3808 INIT_WORK_ONSTACK_KEY(&barr->work, wq_barrier_func, in insert_wq_barrier()
3809 (pwq->wq->flags & WQ_BH) ? &bh_key : &thr_key); in insert_wq_barrier()
3810 __set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(&barr->work)); in insert_wq_barrier()
3812 init_completion_map(&barr->done, &target->lockdep_map); in insert_wq_barrier()
3814 barr->task = current; in insert_wq_barrier()
3824 head = worker->scheduled.next; in insert_wq_barrier()
3825 work_color = worker->current_color; in insert_wq_barrier()
3829 head = target->entry.next; in insert_wq_barrier()
3836 pwq->nr_in_flight[work_color]++; in insert_wq_barrier()
3839 insert_work(pwq, &barr->work, head, work_flags); in insert_wq_barrier()
3843 * flush_workqueue_prep_pwqs - prepare pwqs for workqueue flushing
3845 * @flush_color: new flush color, < 0 for no-op
3846 * @work_color: new work color, < 0 for no-op
3850 * If @flush_color is non-negative, flush_color on all pwqs should be
3851 * -1. If no pwq has in-flight commands at the specified color, all
3852 * pwq->flush_color's stay at -1 and %false is returned. If any pwq
3853 * has in flight commands, its pwq->flush_color is set to
3854 * @flush_color, @wq->nr_pwqs_to_flush is updated accordingly, pwq
3857 * The caller should have initialized @wq->first_flusher prior to
3858 * calling this function with non-negative @flush_color. If
3862 * If @work_color is non-negative, all pwqs should have the same
3867 * mutex_lock(wq->mutex).
3881 WARN_ON_ONCE(atomic_read(&wq->nr_pwqs_to_flush)); in flush_workqueue_prep_pwqs()
3882 atomic_set(&wq->nr_pwqs_to_flush, 1); in flush_workqueue_prep_pwqs()
3888 * sequentially to wq->pwqs by cpu index. So in the majority in flush_workqueue_prep_pwqs()
3894 if (current_pool != pwq->pool) { in flush_workqueue_prep_pwqs()
3896 raw_spin_unlock_irq(¤t_pool->lock); in flush_workqueue_prep_pwqs()
3897 current_pool = pwq->pool; in flush_workqueue_prep_pwqs()
3898 raw_spin_lock_irq(¤t_pool->lock); in flush_workqueue_prep_pwqs()
3902 WARN_ON_ONCE(pwq->flush_color != -1); in flush_workqueue_prep_pwqs()
3904 if (pwq->nr_in_flight[flush_color]) { in flush_workqueue_prep_pwqs()
3905 pwq->flush_color = flush_color; in flush_workqueue_prep_pwqs()
3906 atomic_inc(&wq->nr_pwqs_to_flush); in flush_workqueue_prep_pwqs()
3912 WARN_ON_ONCE(work_color != work_next_color(pwq->work_color)); in flush_workqueue_prep_pwqs()
3913 pwq->work_color = work_color; in flush_workqueue_prep_pwqs()
3919 raw_spin_unlock_irq(¤t_pool->lock); in flush_workqueue_prep_pwqs()
3921 if (flush_color >= 0 && atomic_dec_and_test(&wq->nr_pwqs_to_flush)) in flush_workqueue_prep_pwqs()
3922 complete(&wq->first_flusher->done); in flush_workqueue_prep_pwqs()
3930 if (unlikely(!wq->lockdep_map)) in touch_wq_lockdep_map()
3933 if (wq->flags & WQ_BH) in touch_wq_lockdep_map()
3936 lock_map_acquire(wq->lockdep_map); in touch_wq_lockdep_map()
3937 lock_map_release(wq->lockdep_map); in touch_wq_lockdep_map()
3939 if (wq->flags & WQ_BH) in touch_wq_lockdep_map()
3948 if (wq->flags & WQ_BH) in touch_work_lockdep_map()
3951 lock_map_acquire(&work->lockdep_map); in touch_work_lockdep_map()
3952 lock_map_release(&work->lockdep_map); in touch_work_lockdep_map()
3954 if (wq->flags & WQ_BH) in touch_work_lockdep_map()
3960 * __flush_workqueue - ensure that any scheduled work has run to completion.
3970 .flush_color = -1, in __flush_workqueue()
3971 .done = COMPLETION_INITIALIZER_ONSTACK_MAP(this_flusher.done, (*wq->lockdep_map)), in __flush_workqueue()
3980 mutex_lock(&wq->mutex); in __flush_workqueue()
3983 * Start-to-wait phase in __flush_workqueue()
3985 next_color = work_next_color(wq->work_color); in __flush_workqueue()
3987 if (next_color != wq->flush_color) { in __flush_workqueue()
3993 WARN_ON_ONCE(!list_empty(&wq->flusher_overflow)); in __flush_workqueue()
3994 this_flusher.flush_color = wq->work_color; in __flush_workqueue()
3995 wq->work_color = next_color; in __flush_workqueue()
3997 if (!wq->first_flusher) { in __flush_workqueue()
3999 WARN_ON_ONCE(wq->flush_color != this_flusher.flush_color); in __flush_workqueue()
4001 wq->first_flusher = &this_flusher; in __flush_workqueue()
4003 if (!flush_workqueue_prep_pwqs(wq, wq->flush_color, in __flush_workqueue()
4004 wq->work_color)) { in __flush_workqueue()
4006 wq->flush_color = next_color; in __flush_workqueue()
4007 wq->first_flusher = NULL; in __flush_workqueue()
4012 WARN_ON_ONCE(wq->flush_color == this_flusher.flush_color); in __flush_workqueue()
4013 list_add_tail(&this_flusher.list, &wq->flusher_queue); in __flush_workqueue()
4014 flush_workqueue_prep_pwqs(wq, -1, wq->work_color); in __flush_workqueue()
4019 * The next flush completion will assign us in __flush_workqueue()
4022 list_add_tail(&this_flusher.list, &wq->flusher_overflow); in __flush_workqueue()
4027 mutex_unlock(&wq->mutex); in __flush_workqueue()
4032 * Wake-up-and-cascade phase in __flush_workqueue()
4035 * handling overflow. Non-first flushers can simply return. in __flush_workqueue()
4037 if (READ_ONCE(wq->first_flusher) != &this_flusher) in __flush_workqueue()
4040 mutex_lock(&wq->mutex); in __flush_workqueue()
4043 if (wq->first_flusher != &this_flusher) in __flush_workqueue()
4046 WRITE_ONCE(wq->first_flusher, NULL); in __flush_workqueue()
4049 WARN_ON_ONCE(wq->flush_color != this_flusher.flush_color); in __flush_workqueue()
4055 list_for_each_entry_safe(next, tmp, &wq->flusher_queue, list) { in __flush_workqueue()
4056 if (next->flush_color != wq->flush_color) in __flush_workqueue()
4058 list_del_init(&next->list); in __flush_workqueue()
4059 complete(&next->done); in __flush_workqueue()
4062 WARN_ON_ONCE(!list_empty(&wq->flusher_overflow) && in __flush_workqueue()
4063 wq->flush_color != work_next_color(wq->work_color)); in __flush_workqueue()
4066 wq->flush_color = work_next_color(wq->flush_color); in __flush_workqueue()
4069 if (!list_empty(&wq->flusher_overflow)) { in __flush_workqueue()
4073 * flusher_queue. This is the start-to-wait in __flush_workqueue()
4076 list_for_each_entry(tmp, &wq->flusher_overflow, list) in __flush_workqueue()
4077 tmp->flush_color = wq->work_color; in __flush_workqueue()
4079 wq->work_color = work_next_color(wq->work_color); in __flush_workqueue()
4081 list_splice_tail_init(&wq->flusher_overflow, in __flush_workqueue()
4082 &wq->flusher_queue); in __flush_workqueue()
4083 flush_workqueue_prep_pwqs(wq, -1, wq->work_color); in __flush_workqueue()
4086 if (list_empty(&wq->flusher_queue)) { in __flush_workqueue()
4087 WARN_ON_ONCE(wq->flush_color != wq->work_color); in __flush_workqueue()
4095 WARN_ON_ONCE(wq->flush_color == wq->work_color); in __flush_workqueue()
4096 WARN_ON_ONCE(wq->flush_color != next->flush_color); in __flush_workqueue()
4098 list_del_init(&next->list); in __flush_workqueue()
4099 wq->first_flusher = next; in __flush_workqueue()
4101 if (flush_workqueue_prep_pwqs(wq, wq->flush_color, -1)) in __flush_workqueue()
4108 wq->first_flusher = NULL; in __flush_workqueue()
4112 mutex_unlock(&wq->mutex); in __flush_workqueue()
4117 * drain_workqueue - drain a workqueue
4134 * hotter than drain_workqueue() and already looks at @wq->flags. in drain_workqueue()
4137 mutex_lock(&wq->mutex); in drain_workqueue()
4138 if (!wq->nr_drainers++) in drain_workqueue()
4139 wq->flags |= __WQ_DRAINING; in drain_workqueue()
4140 mutex_unlock(&wq->mutex); in drain_workqueue()
4144 mutex_lock(&wq->mutex); in drain_workqueue()
4149 raw_spin_lock_irq(&pwq->pool->lock); in drain_workqueue()
4151 raw_spin_unlock_irq(&pwq->pool->lock); in drain_workqueue()
4159 wq->name, __func__, flush_cnt); in drain_workqueue()
4161 mutex_unlock(&wq->mutex); in drain_workqueue()
4165 if (!--wq->nr_drainers) in drain_workqueue()
4166 wq->flags &= ~__WQ_DRAINING; in drain_workqueue()
4167 mutex_unlock(&wq->mutex); in drain_workqueue()
4186 raw_spin_lock_irq(&pool->lock); in start_flush_work()
4190 if (unlikely(pwq->pool != pool)) in start_flush_work()
4196 pwq = worker->current_pwq; in start_flush_work()
4199 wq = pwq->wq; in start_flush_work()
4203 raw_spin_unlock_irq(&pool->lock); in start_flush_work()
4209 * single-threaded or rescuer equipped workqueue. in start_flush_work()
4216 if (!from_cancel && (wq->saved_max_active == 1 || wq->rescuer)) in start_flush_work()
4222 raw_spin_unlock_irq(&pool->lock); in start_flush_work()
4234 if (WARN_ON(!work->func)) in __flush_work()
4245 * BH context and thus can be busy-waited. in __flush_work()
4281 * flush_work - wait for a work to finish executing the last queueing instance
4299 * flush_delayed_work - wait for a dwork to finish executing the last queueing
4313 if (timer_delete_sync(&dwork->timer)) in flush_delayed_work()
4314 __queue_work(dwork->cpu, dwork->wq, &dwork->work); in flush_delayed_work()
4316 return flush_work(&dwork->work); in flush_delayed_work()
4321 * flush_rcu_work - wait for a rwork to finish executing the last queueing
4330 if (test_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(&rwork->work))) { in flush_rcu_work()
4332 flush_work(&rwork->work); in flush_rcu_work()
4335 return flush_work(&rwork->work); in flush_rcu_work()
4342 const unsigned long max = (1lu << WORK_OFFQ_DISABLE_BITS) - 1; in work_offqd_disable()
4344 if (likely(offqd->disable < max)) in work_offqd_disable()
4345 offqd->disable++; in work_offqd_disable()
4352 if (likely(offqd->disable > 0)) in work_offqd_enable()
4353 offqd->disable--; in work_offqd_enable()
4411 * cancel_work_sync - cancel a work and wait for it to finish
4415 * even if the work re-queues itself or migrates to another workqueue. On return
4419 * cancel_work_sync(&delayed_work->work) must not be used for delayed_work's.
4422 * Must be called from a sleepable context if @work was last queued on a non-BH
4423 * workqueue. Can also be called from non-hardirq atomic contexts including BH
4435 * cancel_delayed_work - cancel a delayed work
4445 * it returns %true and the work doesn't re-arm itself. Explicitly flush or
4452 return __cancel_work(&dwork->work, WORK_CANCEL_DELAYED); in cancel_delayed_work()
4457 * cancel_delayed_work_sync - cancel a delayed work and wait for it to finish
4467 return __cancel_work_sync(&dwork->work, WORK_CANCEL_DELAYED); in cancel_delayed_work_sync()
4472 * disable_work - Disable and cancel a work item
4476 * pending. As long as the disable count is non-zero, any attempt to queue @work
4490 * disable_work_sync - Disable, cancel and drain a work item
4496 * Must be called from a sleepable context if @work was last queued on a non-BH
4497 * workqueue. Can also be called from non-hardirq atomic contexts including BH
4509 * enable_work - Enable a work item
4536 * disable_delayed_work - Disable and cancel a delayed work item
4543 return __cancel_work(&dwork->work, in disable_delayed_work()
4549 * disable_delayed_work_sync - Disable, cancel and drain a delayed work item
4556 return __cancel_work_sync(&dwork->work, in disable_delayed_work_sync()
4562 * enable_delayed_work - Enable a delayed work item
4569 return enable_work(&dwork->work); in enable_delayed_work()
4574 * schedule_on_each_cpu - execute a function synchronously on each online CPU
4582 * 0 on success, -errno on failure.
4591 return -ENOMEM; in schedule_on_each_cpu()
4611 * execute_in_process_context - reliably execute the routine with user context
4619 * Return: 0 - function was executed
4620 * 1 - function was scheduled for execution
4625 fn(&ew->work); in execute_in_process_context()
4629 INIT_WORK(&ew->work, fn); in execute_in_process_context()
4630 schedule_work(&ew->work); in execute_in_process_context()
4637 * free_workqueue_attrs - free a workqueue_attrs
4645 free_cpumask_var(attrs->cpumask); in free_workqueue_attrs()
4646 free_cpumask_var(attrs->__pod_cpumask); in free_workqueue_attrs()
4652 * alloc_workqueue_attrs - allocate a workqueue_attrs
4666 if (!alloc_cpumask_var(&attrs->cpumask, GFP_KERNEL)) in alloc_workqueue_attrs_noprof()
4668 if (!alloc_cpumask_var(&attrs->__pod_cpumask, GFP_KERNEL)) in alloc_workqueue_attrs_noprof()
4671 cpumask_copy(attrs->cpumask, cpu_possible_mask); in alloc_workqueue_attrs_noprof()
4672 attrs->affn_scope = WQ_AFFN_DFL; in alloc_workqueue_attrs_noprof()
4682 to->nice = from->nice; in copy_workqueue_attrs()
4683 cpumask_copy(to->cpumask, from->cpumask); in copy_workqueue_attrs()
4684 cpumask_copy(to->__pod_cpumask, from->__pod_cpumask); in copy_workqueue_attrs()
4685 to->affn_strict = from->affn_strict; in copy_workqueue_attrs()
4688 * Unlike hash and equality test, copying shouldn't ignore wq-only in copy_workqueue_attrs()
4692 to->affn_scope = from->affn_scope; in copy_workqueue_attrs()
4693 to->ordered = from->ordered; in copy_workqueue_attrs()
4697 * Some attrs fields are workqueue-only. Clear them for worker_pool's. See the
4702 attrs->affn_scope = WQ_AFFN_NR_TYPES; in wqattrs_clear_for_pool()
4703 attrs->ordered = false; in wqattrs_clear_for_pool()
4704 if (attrs->affn_strict) in wqattrs_clear_for_pool()
4705 cpumask_copy(attrs->cpumask, cpu_possible_mask); in wqattrs_clear_for_pool()
4713 hash = jhash_1word(attrs->nice, hash); in wqattrs_hash()
4714 hash = jhash_1word(attrs->affn_strict, hash); in wqattrs_hash()
4715 hash = jhash(cpumask_bits(attrs->__pod_cpumask), in wqattrs_hash()
4717 if (!attrs->affn_strict) in wqattrs_hash()
4718 hash = jhash(cpumask_bits(attrs->cpumask), in wqattrs_hash()
4727 if (a->nice != b->nice) in wqattrs_equal()
4729 if (a->affn_strict != b->affn_strict) in wqattrs_equal()
4731 if (!cpumask_equal(a->__pod_cpumask, b->__pod_cpumask)) in wqattrs_equal()
4733 if (!a->affn_strict && !cpumask_equal(a->cpumask, b->cpumask)) in wqattrs_equal()
4744 * @attrs->cpumask doesn't overlap with @unbound_cpumask, we fallback to in wqattrs_actualize_cpumask()
4747 cpumask_and(attrs->cpumask, attrs->cpumask, unbound_cpumask); in wqattrs_actualize_cpumask()
4748 if (unlikely(cpumask_empty(attrs->cpumask))) in wqattrs_actualize_cpumask()
4749 cpumask_copy(attrs->cpumask, unbound_cpumask); in wqattrs_actualize_cpumask()
4762 if (attrs->affn_scope == WQ_AFFN_DFL) in wqattrs_pod_type()
4765 scope = attrs->affn_scope; in wqattrs_pod_type()
4769 if (!WARN_ON_ONCE(attrs->affn_scope == WQ_AFFN_NR_TYPES) && in wqattrs_pod_type()
4770 likely(pt->nr_pods)) in wqattrs_pod_type()
4778 BUG_ON(!pt->nr_pods); in wqattrs_pod_type()
4783 * init_worker_pool - initialize a newly zalloc'd worker_pool
4786 * Initialize a newly zalloc'd @pool. It also allocates @pool->attrs.
4788 * Return: 0 on success, -errno on failure. Even on failure, all fields
4794 raw_spin_lock_init(&pool->lock); in init_worker_pool()
4795 pool->id = -1; in init_worker_pool()
4796 pool->cpu = -1; in init_worker_pool()
4797 pool->node = NUMA_NO_NODE; in init_worker_pool()
4798 pool->flags |= POOL_DISASSOCIATED; in init_worker_pool()
4799 pool->watchdog_ts = jiffies; in init_worker_pool()
4800 INIT_LIST_HEAD(&pool->worklist); in init_worker_pool()
4801 INIT_LIST_HEAD(&pool->idle_list); in init_worker_pool()
4802 hash_init(pool->busy_hash); in init_worker_pool()
4804 timer_setup(&pool->idle_timer, idle_worker_timeout, TIMER_DEFERRABLE); in init_worker_pool()
4805 INIT_WORK(&pool->idle_cull_work, idle_cull_fn); in init_worker_pool()
4807 timer_setup(&pool->mayday_timer, pool_mayday_timeout, 0); in init_worker_pool()
4809 INIT_LIST_HEAD(&pool->workers); in init_worker_pool()
4811 ida_init(&pool->worker_ida); in init_worker_pool()
4812 INIT_HLIST_NODE(&pool->hash_node); in init_worker_pool()
4813 pool->refcnt = 1; in init_worker_pool()
4815 spin_lock_init(&pool->cb_lock); in init_worker_pool()
4819 pool->attrs = alloc_workqueue_attrs(); in init_worker_pool()
4820 if (!pool->attrs) in init_worker_pool()
4821 return -ENOMEM; in init_worker_pool()
4823 wqattrs_clear_for_pool(pool->attrs); in init_worker_pool()
4833 lockdep_register_key(&wq->key); in wq_init_lockdep()
4834 lock_name = kasprintf(GFP_KERNEL, "%s%s", "(wq_completion)", wq->name); in wq_init_lockdep()
4836 lock_name = wq->name; in wq_init_lockdep()
4838 wq->lock_name = lock_name; in wq_init_lockdep()
4839 wq->lockdep_map = &wq->__lockdep_map; in wq_init_lockdep()
4840 lockdep_init_map(wq->lockdep_map, lock_name, &wq->key, 0); in wq_init_lockdep()
4845 if (wq->lockdep_map != &wq->__lockdep_map) in wq_unregister_lockdep()
4848 lockdep_unregister_key(&wq->key); in wq_unregister_lockdep()
4853 if (wq->lockdep_map != &wq->__lockdep_map) in wq_free_lockdep()
4856 if (wq->lock_name != wq->name) in wq_free_lockdep()
4857 kfree(wq->lock_name); in wq_free_lockdep()
4888 nna->max = WQ_DFL_MIN_ACTIVE; in init_node_nr_active()
4889 atomic_set(&nna->nr, 0); in init_node_nr_active()
4890 raw_spin_lock_init(&nna->lock); in init_node_nr_active()
4891 INIT_LIST_HEAD(&nna->pending_pwqs); in init_node_nr_active()
4922 return -ENOMEM; in alloc_node_nr_active()
4930 if (wq->flags & WQ_UNBOUND) in rcu_free_wq()
4931 free_node_nr_active(wq->node_nr_active); in rcu_free_wq()
4934 free_percpu(wq->cpu_pwq); in rcu_free_wq()
4935 free_workqueue_attrs(wq->unbound_attrs); in rcu_free_wq()
4943 ida_destroy(&pool->worker_ida); in rcu_free_pool()
4944 free_workqueue_attrs(pool->attrs); in rcu_free_pool()
4949 * put_unbound_pool - put a worker_pool
4966 if (--pool->refcnt) in put_unbound_pool()
4970 if (WARN_ON(!(pool->cpu < 0)) || in put_unbound_pool()
4971 WARN_ON(!list_empty(&pool->worklist))) in put_unbound_pool()
4975 if (pool->id >= 0) in put_unbound_pool()
4976 idr_remove(&worker_pool_idr, pool->id); in put_unbound_pool()
4977 hash_del(&pool->hash_node); in put_unbound_pool()
4986 * pwq->refcnt == pool->refcnt == 0 in put_unbound_pool()
4990 * drops pool->lock in put_unbound_pool()
4994 !(pool->flags & POOL_MANAGER_ACTIVE), in put_unbound_pool()
4998 raw_spin_lock_irq(&pool->lock); in put_unbound_pool()
4999 if (!(pool->flags & POOL_MANAGER_ACTIVE)) { in put_unbound_pool()
5000 pool->flags |= POOL_MANAGER_ACTIVE; in put_unbound_pool()
5003 raw_spin_unlock_irq(&pool->lock); in put_unbound_pool()
5009 WARN_ON(pool->nr_workers || pool->nr_idle); in put_unbound_pool()
5010 raw_spin_unlock_irq(&pool->lock); in put_unbound_pool()
5019 timer_delete_sync(&pool->idle_timer); in put_unbound_pool()
5020 cancel_work_sync(&pool->idle_cull_work); in put_unbound_pool()
5021 timer_delete_sync(&pool->mayday_timer); in put_unbound_pool()
5024 call_rcu(&pool->rcu, rcu_free_pool); in put_unbound_pool()
5028 * get_unbound_pool - get a worker_pool with the specified attributes
5052 if (wqattrs_equal(pool->attrs, attrs)) { in get_unbound_pool()
5053 pool->refcnt++; in get_unbound_pool()
5059 for (pod = 0; pod < pt->nr_pods; pod++) { in get_unbound_pool()
5060 if (cpumask_subset(attrs->__pod_cpumask, pt->pod_cpus[pod])) { in get_unbound_pool()
5061 node = pt->pod_node[pod]; in get_unbound_pool()
5071 pool->node = node; in get_unbound_pool()
5072 copy_workqueue_attrs(pool->attrs, attrs); in get_unbound_pool()
5073 wqattrs_clear_for_pool(pool->attrs); in get_unbound_pool()
5083 hash_add(unbound_pool_hash, &pool->hash_node, hash); in get_unbound_pool()
5100 struct workqueue_struct *wq = pwq->wq; in pwq_release_workfn()
5101 struct worker_pool *pool = pwq->pool; in pwq_release_workfn()
5108 if (!list_empty(&pwq->pwqs_node)) { in pwq_release_workfn()
5109 mutex_lock(&wq->mutex); in pwq_release_workfn()
5110 list_del_rcu(&pwq->pwqs_node); in pwq_release_workfn()
5111 is_last = list_empty(&wq->pwqs); in pwq_release_workfn()
5116 if (!is_last && (wq->flags & __WQ_ORDERED)) in pwq_release_workfn()
5119 mutex_unlock(&wq->mutex); in pwq_release_workfn()
5122 if (wq->flags & WQ_UNBOUND) { in pwq_release_workfn()
5128 if (!list_empty(&pwq->pending_node)) { in pwq_release_workfn()
5130 wq_node_nr_active(pwq->wq, pwq->pool->node); in pwq_release_workfn()
5132 raw_spin_lock_irq(&nna->lock); in pwq_release_workfn()
5133 list_del_init(&pwq->pending_node); in pwq_release_workfn()
5134 raw_spin_unlock_irq(&nna->lock); in pwq_release_workfn()
5145 call_rcu(&wq->rcu, rcu_free_wq); in pwq_release_workfn()
5157 pwq->pool = pool; in init_pwq()
5158 pwq->wq = wq; in init_pwq()
5159 pwq->flush_color = -1; in init_pwq()
5160 pwq->refcnt = 1; in init_pwq()
5161 INIT_LIST_HEAD(&pwq->inactive_works); in init_pwq()
5162 INIT_LIST_HEAD(&pwq->pending_node); in init_pwq()
5163 INIT_LIST_HEAD(&pwq->pwqs_node); in init_pwq()
5164 INIT_LIST_HEAD(&pwq->mayday_node); in init_pwq()
5165 kthread_init_work(&pwq->release_work, pwq_release_workfn); in init_pwq()
5171 struct workqueue_struct *wq = pwq->wq; in link_pwq()
5173 lockdep_assert_held(&wq->mutex); in link_pwq()
5176 if (!list_empty(&pwq->pwqs_node)) in link_pwq()
5180 pwq->work_color = wq->work_color; in link_pwq()
5183 list_add_tail_rcu(&pwq->pwqs_node, &wq->pwqs); in link_pwq()
5199 pwq = kmem_cache_alloc_node(pwq_cache, GFP_KERNEL, pool->node); in alloc_unbound_pwq()
5220 * wq_calc_pod_cpumask - calculate a wq_attrs' cpumask for a pod
5225 * The result is stored in @attrs->__pod_cpumask.
5227 * If pod affinity is not enabled, @attrs->cpumask is always used. If enabled
5229 * intersection of the possible CPUs of @pod and @attrs->cpumask.
5236 int pod = pt->cpu_pod[cpu]; in wq_calc_pod_cpumask()
5239 cpumask_and(attrs->__pod_cpumask, pt->pod_cpus[pod], attrs->cpumask); in wq_calc_pod_cpumask()
5241 if (!cpumask_intersects(attrs->__pod_cpumask, wq_online_cpumask)) { in wq_calc_pod_cpumask()
5242 cpumask_copy(attrs->__pod_cpumask, attrs->cpumask); in wq_calc_pod_cpumask()
5255 lockdep_assert_held(&wq->mutex); in install_unbound_pwq()
5281 put_pwq_unlocked(ctx->pwq_tbl[cpu]); in apply_wqattrs_cleanup()
5282 put_pwq_unlocked(ctx->dfl_pwq); in apply_wqattrs_cleanup()
5284 free_workqueue_attrs(ctx->attrs); in apply_wqattrs_cleanup()
5302 if (WARN_ON(attrs->affn_scope < 0 || in apply_wqattrs_prepare()
5303 attrs->affn_scope >= WQ_AFFN_NR_TYPES)) in apply_wqattrs_prepare()
5304 return ERR_PTR(-EINVAL); in apply_wqattrs_prepare()
5314 * the default pwq covering whole @attrs->cpumask. Always create in apply_wqattrs_prepare()
5319 cpumask_copy(new_attrs->__pod_cpumask, new_attrs->cpumask); in apply_wqattrs_prepare()
5320 ctx->dfl_pwq = alloc_unbound_pwq(wq, new_attrs); in apply_wqattrs_prepare()
5321 if (!ctx->dfl_pwq) in apply_wqattrs_prepare()
5325 if (new_attrs->ordered) { in apply_wqattrs_prepare()
5326 ctx->dfl_pwq->refcnt++; in apply_wqattrs_prepare()
5327 ctx->pwq_tbl[cpu] = ctx->dfl_pwq; in apply_wqattrs_prepare()
5330 ctx->pwq_tbl[cpu] = alloc_unbound_pwq(wq, new_attrs); in apply_wqattrs_prepare()
5331 if (!ctx->pwq_tbl[cpu]) in apply_wqattrs_prepare()
5338 cpumask_and(new_attrs->cpumask, new_attrs->cpumask, cpu_possible_mask); in apply_wqattrs_prepare()
5339 cpumask_copy(new_attrs->__pod_cpumask, new_attrs->cpumask); in apply_wqattrs_prepare()
5340 ctx->attrs = new_attrs; in apply_wqattrs_prepare()
5344 * (dfl_pwq). Set the plugged flag of ctx->dfl_pwq to suspend execution in apply_wqattrs_prepare()
5348 if ((wq->flags & __WQ_ORDERED) && !list_empty(&wq->pwqs)) in apply_wqattrs_prepare()
5349 ctx->dfl_pwq->plugged = true; in apply_wqattrs_prepare()
5351 ctx->wq = wq; in apply_wqattrs_prepare()
5357 return ERR_PTR(-ENOMEM); in apply_wqattrs_prepare()
5366 mutex_lock(&ctx->wq->mutex); in apply_wqattrs_commit()
5368 copy_workqueue_attrs(ctx->wq->unbound_attrs, ctx->attrs); in apply_wqattrs_commit()
5372 ctx->pwq_tbl[cpu] = install_unbound_pwq(ctx->wq, cpu, in apply_wqattrs_commit()
5373 ctx->pwq_tbl[cpu]); in apply_wqattrs_commit()
5374 ctx->dfl_pwq = install_unbound_pwq(ctx->wq, -1, ctx->dfl_pwq); in apply_wqattrs_commit()
5376 /* update node_nr_active->max */ in apply_wqattrs_commit()
5377 wq_update_node_max_active(ctx->wq, -1); in apply_wqattrs_commit()
5380 if (ctx->wq->rescuer) in apply_wqattrs_commit()
5381 set_cpus_allowed_ptr(ctx->wq->rescuer->task, in apply_wqattrs_commit()
5382 unbound_effective_cpumask(ctx->wq)); in apply_wqattrs_commit()
5384 mutex_unlock(&ctx->wq->mutex); in apply_wqattrs_commit()
5393 if (WARN_ON(!(wq->flags & WQ_UNBOUND))) in apply_workqueue_attrs_locked()
5394 return -EINVAL; in apply_workqueue_attrs_locked()
5408 * apply_workqueue_attrs - apply new workqueue_attrs to an unbound workqueue
5413 * a separate pwq to each CPU pod with possibles CPUs in @attrs->cpumask so that
5415 * in-flight work items finish. Note that a work item which repeatedly requeues
5416 * itself back-to-back will stay on its current pwq.
5420 * Return: 0 on success and -errno on failure.
5435 * unbound_wq_update_pwq - update a pwq slot for CPU hot[un]plug
5444 * back to @wq->dfl_pwq which may not be optimal but is always correct.
5449 * may execute on any CPU. This is similar to how per-cpu workqueues behave on
5460 if (!(wq->flags & WQ_UNBOUND) || wq->unbound_attrs->ordered) in unbound_wq_update_pwq()
5470 copy_workqueue_attrs(target_attrs, wq->unbound_attrs); in unbound_wq_update_pwq()
5475 if (wqattrs_equal(target_attrs, unbound_pwq(wq, cpu)->pool->attrs)) in unbound_wq_update_pwq()
5482 wq->name); in unbound_wq_update_pwq()
5487 mutex_lock(&wq->mutex); in unbound_wq_update_pwq()
5492 mutex_lock(&wq->mutex); in unbound_wq_update_pwq()
5493 pwq = unbound_pwq(wq, -1); in unbound_wq_update_pwq()
5494 raw_spin_lock_irq(&pwq->pool->lock); in unbound_wq_update_pwq()
5496 raw_spin_unlock_irq(&pwq->pool->lock); in unbound_wq_update_pwq()
5499 mutex_unlock(&wq->mutex); in unbound_wq_update_pwq()
5505 bool highpri = wq->flags & WQ_HIGHPRI; in alloc_and_link_pwqs()
5510 wq->cpu_pwq = alloc_percpu(struct pool_workqueue *); in alloc_and_link_pwqs()
5511 if (!wq->cpu_pwq) in alloc_and_link_pwqs()
5514 if (!(wq->flags & WQ_UNBOUND)) { in alloc_and_link_pwqs()
5517 if (wq->flags & WQ_BH) in alloc_and_link_pwqs()
5527 pwq_p = per_cpu_ptr(wq->cpu_pwq, cpu); in alloc_and_link_pwqs()
5530 pool->node); in alloc_and_link_pwqs()
5536 mutex_lock(&wq->mutex); in alloc_and_link_pwqs()
5538 mutex_unlock(&wq->mutex); in alloc_and_link_pwqs()
5543 if (wq->flags & __WQ_ORDERED) { in alloc_and_link_pwqs()
5548 dfl_pwq = rcu_access_pointer(wq->dfl_pwq); in alloc_and_link_pwqs()
5549 WARN(!ret && (wq->pwqs.next != &dfl_pwq->pwqs_node || in alloc_and_link_pwqs()
5550 wq->pwqs.prev != &dfl_pwq->pwqs_node), in alloc_and_link_pwqs()
5551 "ordering guarantee broken for workqueue %s\n", wq->name); in alloc_and_link_pwqs()
5559 if (wq->cpu_pwq) { in alloc_and_link_pwqs()
5561 struct pool_workqueue *pwq = *per_cpu_ptr(wq->cpu_pwq, cpu); in alloc_and_link_pwqs()
5566 free_percpu(wq->cpu_pwq); in alloc_and_link_pwqs()
5567 wq->cpu_pwq = NULL; in alloc_and_link_pwqs()
5569 return -ENOMEM; in alloc_and_link_pwqs()
5594 if (!(wq->flags & WQ_MEM_RECLAIM)) in init_rescuer()
5600 wq->name); in init_rescuer()
5601 return -ENOMEM; in init_rescuer()
5604 rescuer->rescue_wq = wq; in init_rescuer()
5607 rescuer->task = kthread_create(rescuer_thread, rescuer, "%s", id_buf); in init_rescuer()
5608 if (IS_ERR(rescuer->task)) { in init_rescuer()
5609 ret = PTR_ERR(rescuer->task); in init_rescuer()
5611 wq->name, ERR_PTR(ret)); in init_rescuer()
5616 wq->rescuer = rescuer; in init_rescuer()
5617 if (wq->flags & WQ_UNBOUND) in init_rescuer()
5618 kthread_bind_mask(rescuer->task, unbound_effective_cpumask(wq)); in init_rescuer()
5620 kthread_bind_mask(rescuer->task, cpu_possible_mask); in init_rescuer()
5621 wake_up_process(rescuer->task); in init_rescuer()
5627 * wq_adjust_max_active - update a wq's max_active to the current setting
5630 * If @wq isn't freezing, set @wq->max_active to the saved_max_active and
5632 * @wq->max_active to zero.
5639 lockdep_assert_held(&wq->mutex); in wq_adjust_max_active()
5641 if ((wq->flags & WQ_FREEZABLE) && workqueue_freezing) { in wq_adjust_max_active()
5645 new_max = wq->saved_max_active; in wq_adjust_max_active()
5646 new_min = wq->saved_min_active; in wq_adjust_max_active()
5649 if (wq->max_active == new_max && wq->min_active == new_min) in wq_adjust_max_active()
5653 * Update @wq->max/min_active and then kick inactive work items if more in wq_adjust_max_active()
5658 WRITE_ONCE(wq->max_active, new_max); in wq_adjust_max_active()
5659 WRITE_ONCE(wq->min_active, new_min); in wq_adjust_max_active()
5661 if (wq->flags & WQ_UNBOUND) in wq_adjust_max_active()
5662 wq_update_node_max_active(wq, -1); in wq_adjust_max_active()
5668 * Round-robin through pwq's activating the first inactive work item in wq_adjust_max_active()
5679 raw_spin_lock_irqsave(&pwq->pool->lock, irq_flags); in wq_adjust_max_active()
5682 kick_pool(pwq->pool); in wq_adjust_max_active()
5684 raw_spin_unlock_irqrestore(&pwq->pool->lock, irq_flags); in wq_adjust_max_active()
5720 wq->unbound_attrs = alloc_workqueue_attrs_noprof(); in __alloc_workqueue()
5721 if (!wq->unbound_attrs) in __alloc_workqueue()
5725 name_len = vsnprintf(wq->name, sizeof(wq->name), fmt, args); in __alloc_workqueue()
5729 wq->name); in __alloc_workqueue()
5739 max_active = wq_clamp_max_active(max_active, flags, wq->name); in __alloc_workqueue()
5743 wq->flags = flags; in __alloc_workqueue()
5744 wq->max_active = max_active; in __alloc_workqueue()
5745 wq->min_active = min(max_active, WQ_DFL_MIN_ACTIVE); in __alloc_workqueue()
5746 wq->saved_max_active = wq->max_active; in __alloc_workqueue()
5747 wq->saved_min_active = wq->min_active; in __alloc_workqueue()
5748 mutex_init(&wq->mutex); in __alloc_workqueue()
5749 atomic_set(&wq->nr_pwqs_to_flush, 0); in __alloc_workqueue()
5750 INIT_LIST_HEAD(&wq->pwqs); in __alloc_workqueue()
5751 INIT_LIST_HEAD(&wq->flusher_queue); in __alloc_workqueue()
5752 INIT_LIST_HEAD(&wq->flusher_overflow); in __alloc_workqueue()
5753 INIT_LIST_HEAD(&wq->maydays); in __alloc_workqueue()
5755 INIT_LIST_HEAD(&wq->list); in __alloc_workqueue()
5758 if (alloc_node_nr_active(wq->node_nr_active) < 0) in __alloc_workqueue()
5764 * and the global freeze state. in __alloc_workqueue()
5771 mutex_lock(&wq->mutex); in __alloc_workqueue()
5773 mutex_unlock(&wq->mutex); in __alloc_workqueue()
5775 list_add_tail_rcu(&wq->list, &workqueues); in __alloc_workqueue()
5782 if ((wq->flags & WQ_SYSFS) && workqueue_sysfs_register(wq)) in __alloc_workqueue()
5790 * Failed alloc_and_link_pwqs() may leave pending pwq->release_work, in __alloc_workqueue()
5794 if (wq->flags & WQ_UNBOUND) { in __alloc_workqueue()
5796 free_node_nr_active(wq->node_nr_active); in __alloc_workqueue()
5799 free_workqueue_attrs(wq->unbound_attrs); in __alloc_workqueue()
5844 wq->lockdep_map = lockdep_map; in alloc_workqueue_lockdep_map()
5856 if (pwq->nr_in_flight[i]) in pwq_busy()
5859 if ((pwq != rcu_access_pointer(pwq->wq->dfl_pwq)) && (pwq->refcnt > 1)) in pwq_busy()
5868 * destroy_workqueue - safely terminate a workqueue
5873 * This function does NOT guarantee that non-pending work that has been
5876 * the workqueue has no way of accessing non-pending delayed_work. delayed_work
5877 * is only linked on the timer-side. All delayed_work must, therefore, be
5881 * destroy_workqueue() would cleanly cancel all pending and non-pending
5896 mutex_lock(&wq->mutex); in destroy_workqueue()
5897 wq->flags |= __WQ_DESTROYING; in destroy_workqueue()
5898 mutex_unlock(&wq->mutex); in destroy_workqueue()
5904 if (wq->rescuer) { in destroy_workqueue()
5905 struct worker *rescuer = wq->rescuer; in destroy_workqueue()
5909 wq->rescuer = NULL; in destroy_workqueue()
5913 kthread_stop(rescuer->task); in destroy_workqueue()
5918 * Sanity checks - grab all the locks so that we wait for all in destroy_workqueue()
5919 * in-flight operations which may do put_pwq(). in destroy_workqueue()
5922 mutex_lock(&wq->mutex); in destroy_workqueue()
5924 raw_spin_lock_irq(&pwq->pool->lock); in destroy_workqueue()
5927 __func__, wq->name); in destroy_workqueue()
5929 raw_spin_unlock_irq(&pwq->pool->lock); in destroy_workqueue()
5930 mutex_unlock(&wq->mutex); in destroy_workqueue()
5935 raw_spin_unlock_irq(&pwq->pool->lock); in destroy_workqueue()
5937 mutex_unlock(&wq->mutex); in destroy_workqueue()
5940 * wq list is used to freeze wq, remove from list after in destroy_workqueue()
5941 * flushing is complete in case freeze races us. in destroy_workqueue()
5943 list_del_rcu(&wq->list); in destroy_workqueue()
5948 * to put the base refs. @wq will be auto-destroyed from the last in destroy_workqueue()
5949 * pwq_put. RCU read lock prevents @wq from going away from under us. in destroy_workqueue()
5958 put_pwq_unlocked(unbound_pwq(wq, -1)); in destroy_workqueue()
5959 RCU_INIT_POINTER(*unbound_pwq_slot(wq, -1), NULL); in destroy_workqueue()
5966 * workqueue_set_max_active - adjust max_active of a workqueue
5979 if (WARN_ON(wq->flags & WQ_BH)) in workqueue_set_max_active()
5982 if (WARN_ON(wq->flags & __WQ_ORDERED)) in workqueue_set_max_active()
5985 max_active = wq_clamp_max_active(max_active, wq->flags, wq->name); in workqueue_set_max_active()
5987 mutex_lock(&wq->mutex); in workqueue_set_max_active()
5989 wq->saved_max_active = max_active; in workqueue_set_max_active()
5990 if (wq->flags & WQ_UNBOUND) in workqueue_set_max_active()
5991 wq->saved_min_active = min(wq->saved_min_active, max_active); in workqueue_set_max_active()
5995 mutex_unlock(&wq->mutex); in workqueue_set_max_active()
6000 * workqueue_set_min_active - adjust min_active of an unbound workqueue
6015 /* min_active is only meaningful for non-ordered unbound workqueues */ in workqueue_set_min_active()
6016 if (WARN_ON((wq->flags & (WQ_BH | WQ_UNBOUND | __WQ_ORDERED)) != in workqueue_set_min_active()
6020 mutex_lock(&wq->mutex); in workqueue_set_min_active()
6021 wq->saved_min_active = clamp(min_active, 0, wq->saved_max_active); in workqueue_set_min_active()
6023 mutex_unlock(&wq->mutex); in workqueue_set_min_active()
6027 * current_work - retrieve %current task's work struct
6038 return worker ? worker->current_work : NULL; in current_work()
6043 * current_is_workqueue_rescuer - is %current workqueue rescuer?
6054 return worker && worker->rescue_wq; in current_is_workqueue_rescuer()
6058 * workqueue_congested - test whether a workqueue is congested
6068 * With the exception of ordered workqueues, all workqueues have per-cpu
6086 pwq = *per_cpu_ptr(wq->cpu_pwq, cpu); in workqueue_congested()
6087 ret = !list_empty(&pwq->inactive_works); in workqueue_congested()
6096 * work_busy - test whether a work is currently pending or running
6118 raw_spin_lock_irqsave(&pool->lock, irq_flags); in work_busy()
6121 raw_spin_unlock_irqrestore(&pool->lock, irq_flags); in work_busy()
6130 * set_worker_desc - set description for the current work item
6131 * @fmt: printf-style format string
6146 vsnprintf(worker->desc, sizeof(worker->desc), fmt, args); in set_worker_desc()
6153 * print_worker_info - print out worker information and description
6174 if (!(task->flags & PF_WQ_WORKER)) in print_worker_info()
6187 copy_from_kernel_nofault(&fn, &worker->current_func, sizeof(fn)); in print_worker_info()
6188 copy_from_kernel_nofault(&pwq, &worker->current_pwq, sizeof(pwq)); in print_worker_info()
6189 copy_from_kernel_nofault(&wq, &pwq->wq, sizeof(wq)); in print_worker_info()
6190 copy_from_kernel_nofault(name, wq->name, sizeof(name) - 1); in print_worker_info()
6191 copy_from_kernel_nofault(desc, worker->desc, sizeof(desc) - 1); in print_worker_info()
6203 pr_cont(" cpus=%*pbl", nr_cpumask_bits, pool->attrs->cpumask); in pr_cont_pool_info()
6204 if (pool->node != NUMA_NO_NODE) in pr_cont_pool_info()
6205 pr_cont(" node=%d", pool->node); in pr_cont_pool_info()
6206 pr_cont(" flags=0x%x", pool->flags); in pr_cont_pool_info()
6207 if (pool->flags & POOL_BH) in pr_cont_pool_info()
6209 pool->attrs->nice == HIGHPRI_NICE_LEVEL ? "-hi" : ""); in pr_cont_pool_info()
6211 pr_cont(" nice=%d", pool->attrs->nice); in pr_cont_pool_info()
6216 struct worker_pool *pool = worker->pool; in pr_cont_worker_id()
6218 if (pool->flags & WQ_BH) in pr_cont_worker_id()
6220 pool->attrs->nice == HIGHPRI_NICE_LEVEL ? "-hi" : ""); in pr_cont_worker_id()
6222 pr_cont("%d%s", task_pid_nr(worker->task), in pr_cont_worker_id()
6223 worker->rescue_wq ? "(RESCUER)" : ""); in pr_cont_worker_id()
6234 if (!pcwsp->ctr) in pr_cont_work_flush()
6236 if (func == pcwsp->func) { in pr_cont_work_flush()
6237 pcwsp->ctr++; in pr_cont_work_flush()
6240 if (pcwsp->ctr == 1) in pr_cont_work_flush()
6241 pr_cont("%s %ps", pcwsp->comma ? "," : "", pcwsp->func); in pr_cont_work_flush()
6243 pr_cont("%s %ld*%ps", pcwsp->comma ? "," : "", pcwsp->ctr, pcwsp->func); in pr_cont_work_flush()
6244 pcwsp->ctr = 0; in pr_cont_work_flush()
6246 if ((long)func == -1L) in pr_cont_work_flush()
6248 pcwsp->comma = comma; in pr_cont_work_flush()
6249 pcwsp->func = func; in pr_cont_work_flush()
6250 pcwsp->ctr = 1; in pr_cont_work_flush()
6255 if (work->func == wq_barrier_func) { in pr_cont_work()
6260 pr_cont_work_flush(comma, (work_func_t)-1, pcwsp); in pr_cont_work()
6262 task_pid_nr(barr->task)); in pr_cont_work()
6265 pr_cont_work_flush(comma, (work_func_t)-1, pcwsp); in pr_cont_work()
6266 pr_cont_work_flush(comma, work->func, pcwsp); in pr_cont_work()
6273 struct worker_pool *pool = pwq->pool; in show_pwq()
6279 pr_info(" pwq %d:", pool->id); in show_pwq()
6283 pwq->nr_active, pwq->refcnt, in show_pwq()
6284 !list_empty(&pwq->mayday_node) ? " MAYDAY" : ""); in show_pwq()
6286 hash_for_each(pool->busy_hash, bkt, worker, hentry) { in show_pwq()
6287 if (worker->current_pwq == pwq) { in show_pwq()
6295 pr_info(" in-flight:"); in show_pwq()
6296 hash_for_each(pool->busy_hash, bkt, worker, hentry) { in show_pwq()
6297 if (worker->current_pwq != pwq) in show_pwq()
6302 pr_cont(":%ps", worker->current_func); in show_pwq()
6303 list_for_each_entry(work, &worker->scheduled, entry) in show_pwq()
6305 pr_cont_work_flush(comma, (work_func_t)-1L, &pcws); in show_pwq()
6311 list_for_each_entry(work, &pool->worklist, entry) { in show_pwq()
6321 list_for_each_entry(work, &pool->worklist, entry) { in show_pwq()
6328 pr_cont_work_flush(comma, (work_func_t)-1L, &pcws); in show_pwq()
6332 if (!list_empty(&pwq->inactive_works)) { in show_pwq()
6336 list_for_each_entry(work, &pwq->inactive_works, entry) { in show_pwq()
6340 pr_cont_work_flush(comma, (work_func_t)-1L, &pcws); in show_pwq()
6346 * show_one_workqueue - dump state of specified workqueue
6364 pr_info("workqueue %s: flags=0x%x\n", wq->name, wq->flags); in show_one_workqueue()
6367 raw_spin_lock_irqsave(&pwq->pool->lock, irq_flags); in show_one_workqueue()
6378 raw_spin_unlock_irqrestore(&pwq->pool->lock, irq_flags); in show_one_workqueue()
6381 * sysrq-t -> show_all_workqueues(). Avoid triggering in show_one_workqueue()
6390 * show_one_worker_pool - dump state of specified worker pool
6400 raw_spin_lock_irqsave(&pool->lock, irq_flags); in show_one_worker_pool()
6401 if (pool->nr_workers == pool->nr_idle) in show_one_worker_pool()
6405 if (!list_empty(&pool->worklist)) in show_one_worker_pool()
6406 hung = jiffies_to_msecs(jiffies - pool->watchdog_ts) / 1000; in show_one_worker_pool()
6414 pr_info("pool %d:", pool->id); in show_one_worker_pool()
6416 pr_cont(" hung=%lus workers=%d", hung, pool->nr_workers); in show_one_worker_pool()
6417 if (pool->manager) in show_one_worker_pool()
6419 task_pid_nr(pool->manager->task)); in show_one_worker_pool()
6420 list_for_each_entry(worker, &pool->idle_list, entry) { in show_one_worker_pool()
6428 raw_spin_unlock_irqrestore(&pool->lock, irq_flags); in show_one_worker_pool()
6431 * sysrq-t -> show_all_workqueues(). Avoid triggering in show_one_worker_pool()
6439 * show_all_workqueues - dump workqueue state
6463 * show_freezable_workqueues - dump freezable workqueue state
6477 if (!(wq->flags & WQ_FREEZABLE)) in show_freezable_workqueues()
6491 if (task->flags & PF_WQ_WORKER) { in wq_worker_comm()
6493 struct worker_pool *pool = worker->pool; in wq_worker_comm()
6499 raw_spin_lock_irq(&pool->lock); in wq_worker_comm()
6501 * ->desc tracks information (wq name or in wq_worker_comm()
6503 * current, prepend '+', otherwise '-'. in wq_worker_comm()
6505 if (worker->desc[0] != '\0') { in wq_worker_comm()
6506 if (worker->current_work) in wq_worker_comm()
6507 scnprintf(buf + off, size - off, "+%s", in wq_worker_comm()
6508 worker->desc); in wq_worker_comm()
6510 scnprintf(buf + off, size - off, "-%s", in wq_worker_comm()
6511 worker->desc); in wq_worker_comm()
6513 raw_spin_unlock_irq(&pool->lock); in wq_worker_comm()
6516 strscpy(buf, task->comm, size); in wq_worker_comm()
6546 raw_spin_lock_irq(&pool->lock); in unbind_workers()
6557 worker->flags |= WORKER_UNBOUND; in unbind_workers()
6559 pool->flags |= POOL_DISASSOCIATED; in unbind_workers()
6569 pool->nr_running = 0; in unbind_workers()
6578 raw_spin_unlock_irq(&pool->lock); in unbind_workers()
6588 * rebind_workers - rebind all workers of a pool to the associated CPU
6591 * @pool->cpu is coming online. Rebind all workers to the CPU.
6601 * be on the run-queue of the associated CPU before any local in rebind_workers()
6602 * wake-ups for concurrency management happen, restore CPU affinity in rebind_workers()
6607 kthread_set_per_cpu(worker->task, pool->cpu); in rebind_workers()
6608 WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task, in rebind_workers()
6612 raw_spin_lock_irq(&pool->lock); in rebind_workers()
6614 pool->flags &= ~POOL_DISASSOCIATED; in rebind_workers()
6617 unsigned int worker_flags = worker->flags; in rebind_workers()
6628 * WRITE_ONCE() is necessary because @worker->flags may be in rebind_workers()
6637 WRITE_ONCE(worker->flags, worker_flags); in rebind_workers()
6640 raw_spin_unlock_irq(&pool->lock); in rebind_workers()
6644 * restore_unbound_workers_cpumask - restore cpumask of unbound workers
6661 if (!cpumask_test_cpu(cpu, pool->attrs->cpumask)) in restore_unbound_workers_cpumask()
6664 cpumask_and(&cpumask, pool->attrs->cpumask, cpu_online_mask); in restore_unbound_workers_cpumask()
6668 WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task, &cpumask) < 0); in restore_unbound_workers_cpumask()
6676 if (pool->nr_workers) in workqueue_prepare_cpu()
6679 return -ENOMEM; in workqueue_prepare_cpu()
6696 if (pool->flags & POOL_BH) in workqueue_online_cpu()
6700 if (pool->cpu == cpu) in workqueue_online_cpu()
6702 else if (pool->cpu < 0) in workqueue_online_cpu()
6709 struct workqueue_attrs *attrs = wq->unbound_attrs; in workqueue_online_cpu()
6715 for_each_cpu(tcpu, pt->pod_cpus[pt->cpu_pod[cpu]]) in workqueue_online_cpu()
6718 mutex_lock(&wq->mutex); in workqueue_online_cpu()
6719 wq_update_node_max_active(wq, -1); in workqueue_online_cpu()
6720 mutex_unlock(&wq->mutex); in workqueue_online_cpu()
6732 /* unbinding per-cpu workers should happen on the local CPU */ in workqueue_offline_cpu()
6734 return -1; in workqueue_offline_cpu()
6744 struct workqueue_attrs *attrs = wq->unbound_attrs; in workqueue_offline_cpu()
6750 for_each_cpu(tcpu, pt->pod_cpus[pt->cpu_pod[cpu]]) in workqueue_offline_cpu()
6753 mutex_lock(&wq->mutex); in workqueue_offline_cpu()
6755 mutex_unlock(&wq->mutex); in workqueue_offline_cpu()
6774 wfc->ret = wfc->fn(wfc->arg); in work_for_cpu_fn()
6778 * work_on_cpu_key - run a function in thread context on a particular cpu
6806 * freeze_workqueues_begin - begin freezing workqueues
6810 * pool->worklist.
6813 * Grabs and releases wq_pool_mutex, wq->mutex and pool->lock's.
6825 mutex_lock(&wq->mutex); in freeze_workqueues_begin()
6827 mutex_unlock(&wq->mutex); in freeze_workqueues_begin()
6834 * freeze_workqueues_busy - are freezable workqueues still busy?
6857 if (!(wq->flags & WQ_FREEZABLE)) in freeze_workqueues_busy()
6865 WARN_ON_ONCE(pwq->nr_active < 0); in freeze_workqueues_busy()
6866 if (pwq->nr_active) { in freeze_workqueues_busy()
6880 * thaw_workqueues - thaw workqueues
6886 * Grabs and releases wq_pool_mutex, wq->mutex and pool->lock's.
6901 mutex_lock(&wq->mutex); in thaw_workqueues()
6903 mutex_unlock(&wq->mutex); in thaw_workqueues()
6921 if (!(wq->flags & WQ_UNBOUND) || (wq->flags & __WQ_DESTROYING)) in workqueue_apply_unbound_cpumask()
6924 ctx = apply_wqattrs_prepare(wq, wq->unbound_attrs, unbound_cpumask); in workqueue_apply_unbound_cpumask()
6930 list_add_tail(&ctx->list, &ctxs); in workqueue_apply_unbound_cpumask()
6948 * workqueue_unbound_exclude_cpumask - Exclude given CPUs from unbound cpumask
6960 return -ENOMEM; in workqueue_unbound_exclude_cpumask()
6992 return -EINVAL; in parse_affn_scope()
7004 return -EINVAL; in wq_affn_dfl_set()
7040 * per_cpu RO bool : whether the workqueue is per-cpu or unbound
7041 * max_active RW int : maximum number of in-flight work items
7059 return wq_dev->wq; in dev_to_wq()
7067 return scnprintf(buf, PAGE_SIZE, "%d\n", (bool)!(wq->flags & WQ_UNBOUND)); in per_cpu_show()
7076 return scnprintf(buf, PAGE_SIZE, "%d\n", wq->saved_max_active); in max_active_show()
7087 return -EINVAL; in max_active_store()
7107 mutex_lock(&wq->mutex); in wq_nice_show()
7108 written = scnprintf(buf, PAGE_SIZE, "%d\n", wq->unbound_attrs->nice); in wq_nice_show()
7109 mutex_unlock(&wq->mutex); in wq_nice_show()
7125 copy_workqueue_attrs(attrs, wq->unbound_attrs); in wq_sysfs_prep_attrs()
7134 int ret = -ENOMEM; in wq_nice_store()
7142 if (sscanf(buf, "%d", &attrs->nice) == 1 && in wq_nice_store()
7143 attrs->nice >= MIN_NICE && attrs->nice <= MAX_NICE) in wq_nice_store()
7146 ret = -EINVAL; in wq_nice_store()
7160 mutex_lock(&wq->mutex); in wq_cpumask_show()
7162 cpumask_pr_args(wq->unbound_attrs->cpumask)); in wq_cpumask_show()
7163 mutex_unlock(&wq->mutex); in wq_cpumask_show()
7173 int ret = -ENOMEM; in wq_cpumask_store()
7181 ret = cpumask_parse(buf, attrs->cpumask); in wq_cpumask_store()
7197 mutex_lock(&wq->mutex); in wq_affn_scope_show()
7198 if (wq->unbound_attrs->affn_scope == WQ_AFFN_DFL) in wq_affn_scope_show()
7204 wq_affn_names[wq->unbound_attrs->affn_scope]); in wq_affn_scope_show()
7205 mutex_unlock(&wq->mutex); in wq_affn_scope_show()
7216 int affn, ret = -ENOMEM; in wq_affn_scope_store()
7225 attrs->affn_scope = affn; in wq_affn_scope_store()
7239 wq->unbound_attrs->affn_strict); in wq_affinity_strict_show()
7248 int v, ret = -ENOMEM; in wq_affinity_strict_store()
7251 return -EINVAL; in wq_affinity_strict_store()
7256 attrs->affn_strict = (bool)v; in wq_affinity_strict_store()
7278 * workqueue_set_unbound_cpumask - Set the low-level unbound cpumask
7281 * The low-level workqueues cpumask is a global cpumask that limits
7285 * Return: 0 - Success
7286 * -EINVAL - Invalid @cpumask
7287 * -ENOMEM - Failed to allocate memory for attrs or pwqs.
7291 int ret = -EINVAL; in workqueue_set_unbound_cpumask()
7350 return -ENOMEM; in cpumask_store()
7383 * workqueue_sysfs_register - make a workqueue visible in sysfs
7395 * Return: 0 on success, -errno on failure.
7406 if (WARN_ON(wq->flags & __WQ_ORDERED)) in workqueue_sysfs_register()
7407 return -EINVAL; in workqueue_sysfs_register()
7409 wq->wq_dev = wq_dev = kzalloc(sizeof(*wq_dev), GFP_KERNEL); in workqueue_sysfs_register()
7411 return -ENOMEM; in workqueue_sysfs_register()
7413 wq_dev->wq = wq; in workqueue_sysfs_register()
7414 wq_dev->dev.bus = &wq_subsys; in workqueue_sysfs_register()
7415 wq_dev->dev.release = wq_device_release; in workqueue_sysfs_register()
7416 dev_set_name(&wq_dev->dev, "%s", wq->name); in workqueue_sysfs_register()
7422 dev_set_uevent_suppress(&wq_dev->dev, true); in workqueue_sysfs_register()
7424 ret = device_register(&wq_dev->dev); in workqueue_sysfs_register()
7426 put_device(&wq_dev->dev); in workqueue_sysfs_register()
7427 wq->wq_dev = NULL; in workqueue_sysfs_register()
7431 if (wq->flags & WQ_UNBOUND) { in workqueue_sysfs_register()
7434 for (attr = wq_sysfs_unbound_attrs; attr->attr.name; attr++) { in workqueue_sysfs_register()
7435 ret = device_create_file(&wq_dev->dev, attr); in workqueue_sysfs_register()
7437 device_unregister(&wq_dev->dev); in workqueue_sysfs_register()
7438 wq->wq_dev = NULL; in workqueue_sysfs_register()
7444 dev_set_uevent_suppress(&wq_dev->dev, false); in workqueue_sysfs_register()
7445 kobject_uevent(&wq_dev->dev.kobj, KOBJ_ADD); in workqueue_sysfs_register()
7450 * workqueue_sysfs_unregister - undo workqueue_sysfs_register()
7457 struct wq_device *wq_dev = wq->wq_dev; in workqueue_sysfs_unregister()
7459 if (!wq->wq_dev) in workqueue_sysfs_unregister()
7462 wq->wq_dev = NULL; in workqueue_sysfs_unregister()
7463 device_unregister(&wq_dev->dev); in workqueue_sysfs_unregister()
7472 * Stall may be caused by various bugs - missing WQ_MEM_RECLAIM, illegal
7480 * forward progress is defined as the first item on ->worklist changing.
7499 * The only candidates are CPU-bound workers in the running state.
7509 raw_spin_lock_irqsave(&pool->lock, irq_flags); in show_cpu_pool_hog()
7511 hash_for_each(pool->busy_hash, bkt, worker, hentry) { in show_cpu_pool_hog()
7512 if (task_is_running(worker->task)) { in show_cpu_pool_hog()
7520 pr_info("pool %d:\n", pool->id); in show_cpu_pool_hog()
7521 sched_show_task(worker->task); in show_cpu_pool_hog()
7527 raw_spin_unlock_irqrestore(&pool->lock, irq_flags); in show_cpu_pool_hog()
7535 pr_info("Showing backtraces of running workers in stalled CPU-bound worker pools:\n"); in show_cpu_pools_hogs()
7540 if (pool->cpu_stall) in show_cpu_pools_hogs()
7582 pool->cpu_stall = false; in wq_watchdog_timer_fn()
7583 if (list_empty(&pool->worklist)) in wq_watchdog_timer_fn()
7593 if (pool->cpu >= 0) in wq_watchdog_timer_fn()
7594 touched = READ_ONCE(per_cpu(wq_watchdog_touched_cpu, pool->cpu)); in wq_watchdog_timer_fn()
7597 pool_ts = READ_ONCE(pool->watchdog_ts); in wq_watchdog_timer_fn()
7607 if (pool->cpu >= 0 && !(pool->flags & POOL_BH)) { in wq_watchdog_timer_fn()
7608 pool->cpu_stall = true; in wq_watchdog_timer_fn()
7611 pr_emerg("BUG: workqueue lockup - pool"); in wq_watchdog_timer_fn()
7613 pr_cont(" stuck for %us!\n", in wq_watchdog_timer_fn()
7614 jiffies_to_msecs(now - pool_ts) / 1000); in wq_watchdog_timer_fn()
7723 pool->cpu = cpu; in init_cpu_worker_pool()
7724 cpumask_copy(pool->attrs->cpumask, cpumask_of(cpu)); in init_cpu_worker_pool()
7725 cpumask_copy(pool->attrs->__pod_cpumask, cpumask_of(cpu)); in init_cpu_worker_pool()
7726 pool->attrs->nice = nice; in init_cpu_worker_pool()
7727 pool->attrs->affn_strict = true; in init_cpu_worker_pool()
7728 pool->node = cpu_to_node(cpu); in init_cpu_worker_pool()
7737 * workqueue_init_early - early init for workqueue subsystem
7739 * This is the first step of three-staged workqueue subsystem initialization and
7740 * invoked as soon as the bare basics - memory allocation, cpumasks and idr are
7784 pt->pod_cpus = kcalloc(1, sizeof(pt->pod_cpus[0]), GFP_KERNEL); in workqueue_init_early()
7785 pt->pod_node = kcalloc(1, sizeof(pt->pod_node[0]), GFP_KERNEL); in workqueue_init_early()
7786 pt->cpu_pod = kcalloc(nr_cpu_ids, sizeof(pt->cpu_pod[0]), GFP_KERNEL); in workqueue_init_early()
7787 BUG_ON(!pt->pod_cpus || !pt->pod_node || !pt->cpu_pod); in workqueue_init_early()
7789 BUG_ON(!zalloc_cpumask_var_node(&pt->pod_cpus[0], GFP_KERNEL, NUMA_NO_NODE)); in workqueue_init_early()
7791 pt->nr_pods = 1; in workqueue_init_early()
7792 cpumask_copy(pt->pod_cpus[0], cpu_possible_mask); in workqueue_init_early()
7793 pt->pod_node[0] = NUMA_NO_NODE; in workqueue_init_early()
7794 pt->cpu_pod[0] = 0; in workqueue_init_early()
7803 pool->flags |= POOL_BH; in workqueue_init_early()
7818 attrs->nice = std_nice[i]; in workqueue_init_early()
7826 attrs->nice = std_nice[i]; in workqueue_init_early()
7827 attrs->ordered = true; in workqueue_init_early()
7869 * most consider human-perceivable. However, the kernel also runs on a in wq_cpu_intensive_thresh_init()
7882 /* see init/calibrate.c for lpj -> BogoMIPS calculation */ in wq_cpu_intensive_thresh_init()
7894 * workqueue_init - bring workqueue subsystem fully online
7896 * This is the second step of three-staged workqueue subsystem initialization
7913 * Per-cpu pools created earlier could be missing node hint. Fix them in workqueue_init()
7918 pool->node = cpu_to_node(cpu); in workqueue_init()
7920 pool->node = cpu_to_node(cpu); in workqueue_init()
7926 wq->name); in workqueue_init()
7943 pool->flags &= ~POOL_DISASSOCIATED; in workqueue_init()
7956 * Initialize @pt by first initializing @pt->cpu_pod[] with pod IDs according to
7965 pt->nr_pods = 0; in init_pod_type()
7967 /* init @pt->cpu_pod[] according to @cpus_share_pod() */ in init_pod_type()
7968 pt->cpu_pod = kcalloc(nr_cpu_ids, sizeof(pt->cpu_pod[0]), GFP_KERNEL); in init_pod_type()
7969 BUG_ON(!pt->cpu_pod); in init_pod_type()
7974 pt->cpu_pod[cur] = pt->nr_pods++; in init_pod_type()
7978 pt->cpu_pod[cur] = pt->cpu_pod[pre]; in init_pod_type()
7984 /* init the rest to match @pt->cpu_pod[] */ in init_pod_type()
7985 pt->pod_cpus = kcalloc(pt->nr_pods, sizeof(pt->pod_cpus[0]), GFP_KERNEL); in init_pod_type()
7986 pt->pod_node = kcalloc(pt->nr_pods, sizeof(pt->pod_node[0]), GFP_KERNEL); in init_pod_type()
7987 BUG_ON(!pt->pod_cpus || !pt->pod_node); in init_pod_type()
7989 for (pod = 0; pod < pt->nr_pods; pod++) in init_pod_type()
7990 BUG_ON(!zalloc_cpumask_var(&pt->pod_cpus[pod], GFP_KERNEL)); in init_pod_type()
7993 cpumask_set_cpu(cpu, pt->pod_cpus[pt->cpu_pod[cpu]]); in init_pod_type()
7994 pt->pod_node[pt->cpu_pod[cpu]] = cpu_to_node(cpu); in init_pod_type()
8018 * workqueue_init_topology - initialize CPU pods for unbound workqueues
8020 * This is the third step of three-staged workqueue subsystem initialization and
8041 * and CPU combinations to apply per-pod sharing. in workqueue_init_topology()
8046 if (wq->flags & WQ_UNBOUND) { in workqueue_init_topology()
8047 mutex_lock(&wq->mutex); in workqueue_init_topology()
8048 wq_update_node_max_active(wq, -1); in workqueue_init_topology()
8049 mutex_unlock(&wq->mutex); in workqueue_init_topology()
8058 pr_warn("WARNING: Flushing system-wide workqueues will be prohibited in near future.\n"); in __warn_flushing_systemwide_wq()