Lines Matching +full:full +full:- +full:pwr +full:- +full:cycle
1 // SPDX-License-Identifier: GPL-2.0-only
3 * kernel/workqueue.c - generic async execution with shared worker pool
10 * Kai Petzke <wpp@marie.physik.tu-berlin.de>
22 * pools for workqueues which are not bound to any specific CPU - the
25 * Please read Documentation/core-api/workqueue.rst for details.
79 * BH pool is per-CPU and always DISASSOCIATED.
108 UNBOUND_POOL_HASH_ORDER = 6, /* hashed by pool->attrs */
128 WORKER_ID_LEN = 10 + WQ_NAME_LEN, /* "kworker/R-" + WQ_NAME_LEN */
142 * I: Modifiable by initialization/destruction paths and read-only for
148 * L: pool->lock protected. Access with pool->lock held.
150 * LN: pool->lock and wq_node_nr_active->lock protected for writes. Either for
153 * K: Only modified by worker while holding pool->lock. Can be safely read by
154 * self, while holding pool->lock or from IRQ context if %current is the
165 * PW: wq_pool_mutex and wq->mutex protected for writes. Either for reads.
167 * PWR: wq_pool_mutex and wq->mutex protected for writes. Either or
170 * WQ: wq->mutex protected.
172 * WR: wq->mutex protected for writes. RCU protected for reads.
174 * WO: wq->mutex protected for writes. Updated with WRITE_ONCE() and can be read
197 * but w/ pool->lock held. The readers grab pool->lock and are
234 * Per-pool_workqueue statistics. These can be monitored using
242 PWQ_STAT_CM_WAKEUP, /* concurrency-management worker wakeups */
251 * The per-pool workqueue. While queued, bits below WORK_PWQ_SHIFT
252 * of work_struct->data are used for flags and the remaining high bits
269 * When pwq->nr_active >= max_active, new work item is queued to
270 * pwq->inactive_works instead of pool->worklist and marked with
274 * nr_active and all work items in pwq->inactive_works are marked with
276 * in pwq->inactive_works. Some of them are ready to run in
277 * pool->worklist or worker->scheduled. Those work itmes are only struct
279 * in nr_active. For non-barrier work item, it is marked with
280 * WORK_STRUCT_INACTIVE iff it is in pwq->inactive_works.
284 struct list_head pending_node; /* LN: node on wq_node_nr_active->pending_pwqs */
285 struct list_head pwqs_node; /* WR: node on wq->pwqs */
286 struct list_head mayday_node; /* MD: node on wq->maydays */
294 * grabbing wq->mutex.
312 * Unlike in a per-cpu workqueue where max_active limits its concurrency level
317 * The following struct is used to enforce per-node max_active. When a pwq wants
318 * to start executing a work item, it should increment ->nr using
319 * tryinc_node_nr_active(). If acquisition fails due to ->nr already being over
320 * ->max, the pwq is queued on ->pending_pwqs. As in-flight work items finish
321 * and decrement ->nr, node_activate_pending_pwq() activates the pending pwqs in
322 * round-robin order.
325 int max; /* per-node max_active */
326 atomic_t nr; /* per-node nr_active */
381 struct pool_workqueue __rcu * __percpu *cpu_pwq; /* I: per-cpu pwqs */
382 struct wq_node_nr_active *node_nr_active[]; /* I: per-node nr_active */
387 * See the comment above workqueue_attrs->affn_scope.
391 cpumask_var_t *pod_cpus; /* pod -> cpus */
392 int *pod_node; /* pod -> node */
393 int *cpu_pod; /* cpu -> pod */
412 * Per-cpu work items which run for longer than the following threshold are
414 * management to prevent them from noticeably delaying other per-cpu work items.
442 static DEFINE_RAW_SPINLOCK(wq_mayday_lock); /* protects wq->maydays list */
469 * following always forces round-robin CPU selection on unbound work items
485 /* the per-cpu worker pools */
490 /* PL: hash of all unbound pools keyed by pool->attrs */
502 * worker to avoid A-A deadlocks.
540 !lockdep_is_held(&wq->mutex) && \
542 "RCU, wq->mutex or wq_pool_mutex should be held")
555 * for_each_pool - iterate through all worker_pools in the system
572 * for_each_pool_worker - iterate through all workers of a worker_pool
582 list_for_each_entry((worker), &(pool)->workers, node) \
587 * for_each_pwq - iterate through all pool_workqueues of the specified workqueue
591 * This must be called either with wq->mutex held or RCU read locked.
599 list_for_each_entry_rcu((pwq), &(wq)->pwqs, pwqs_node, \
600 lockdep_is_held(&(wq->mutex)))
608 return ((struct work_struct *) addr)->func; in work_debug_hint()
620 * - an active object is initialized
638 * - an active object is freed
689 destroy_timer_on_stack(&work->timer); in destroy_delayed_work_on_stack()
690 debug_object_free(&work->work, &work_debug_descr); in destroy_delayed_work_on_stack()
700 * worker_pool_assign_id - allocate ID and assign it to @pool
704 * successfully, -errno on failure.
715 pool->id = ret; in worker_pool_assign_id()
725 return per_cpu_ptr(wq->cpu_pwq, cpu); in unbound_pwq_slot()
727 return &wq->dfl_pwq; in unbound_pwq_slot()
735 lockdep_is_held(&wq->mutex)); in unbound_pwq()
739 * unbound_effective_cpumask - effective cpumask of an unbound workqueue
742 * @wq->unbound_attrs->cpumask contains the cpumask requested by the user which
748 return unbound_pwq(wq, -1)->pool->attrs->__pod_cpumask; in unbound_effective_cpumask()
759 ((1 << WORK_STRUCT_COLOR_BITS) - 1); in get_work_color()
769 return (pool->flags & POOL_BH) ? WORK_OFFQ_BH : 0; in pool_offq_flags()
778 * can be used to set the pwq, pool or clear work->data. These functions should
779 * only be called while the work is owned - ie. while the PENDING bit is set.
789 atomic_long_set(&work->data, data | work_static(work)); in set_work_data()
821 * work->current_func, which is executed afterwards. This possible in set_work_pool_and_clear_pending()
826 * ---------------------------- -------------------------------- in set_work_pool_and_clear_pending()
834 * 7 work->current_func() { in set_work_pool_and_clear_pending()
838 * Without an explicit full barrier speculative LOAD on line 8 can in set_work_pool_and_clear_pending()
856 unsigned long data = atomic_long_read(&work->data); in get_work_pwq()
865 * get_work_pool - return the worker_pool a given work was associated with
881 unsigned long data = atomic_long_read(&work->data); in get_work_pool()
887 return work_struct_pwq(data)->pool; in get_work_pool()
898 return (v >> shift) & ((1U << bits) - 1); in shift_and_mask()
905 offqd->pool_id = shift_and_mask(data, WORK_OFFQ_POOL_SHIFT, in work_offqd_unpack()
907 offqd->disable = shift_and_mask(data, WORK_OFFQ_DISABLE_SHIFT, in work_offqd_unpack()
909 offqd->flags = data & WORK_OFFQ_FLAG_MASK; in work_offqd_unpack()
914 return ((unsigned long)offqd->disable << WORK_OFFQ_DISABLE_SHIFT) | in work_offqd_pack_flags()
915 ((unsigned long)offqd->flags); in work_offqd_pack_flags()
921 * they're being called with pool->lock held.
934 return !list_empty(&pool->worklist) && !pool->nr_running; in need_more_worker()
940 return pool->nr_idle; in may_start_working()
946 return !list_empty(&pool->worklist) && (pool->nr_running <= 1); in keep_working()
958 bool managing = pool->flags & POOL_MANAGER_ACTIVE; in too_many_workers()
959 int nr_idle = pool->nr_idle + managing; /* manager is considered idle */ in too_many_workers()
960 int nr_busy = pool->nr_workers - nr_idle; in too_many_workers()
962 return nr_idle > 2 && (nr_idle - 2) * MAX_IDLE_WORKERS_RATIO >= nr_busy; in too_many_workers()
966 * worker_set_flags - set worker flags and adjust nr_running accordingly
970 * Set @flags in @worker->flags and adjust nr_running accordingly.
974 struct worker_pool *pool = worker->pool; in worker_set_flags()
976 lockdep_assert_held(&pool->lock); in worker_set_flags()
980 !(worker->flags & WORKER_NOT_RUNNING)) { in worker_set_flags()
981 pool->nr_running--; in worker_set_flags()
984 worker->flags |= flags; in worker_set_flags()
988 * worker_clr_flags - clear worker flags and adjust nr_running accordingly
992 * Clear @flags in @worker->flags and adjust nr_running accordingly.
996 struct worker_pool *pool = worker->pool; in worker_clr_flags()
997 unsigned int oflags = worker->flags; in worker_clr_flags()
999 lockdep_assert_held(&pool->lock); in worker_clr_flags()
1001 worker->flags &= ~flags; in worker_clr_flags()
1009 if (!(worker->flags & WORKER_NOT_RUNNING)) in worker_clr_flags()
1010 pool->nr_running++; in worker_clr_flags()
1013 /* Return the first idle worker. Called with pool->lock held. */
1016 if (unlikely(list_empty(&pool->idle_list))) in first_idle_worker()
1019 return list_first_entry(&pool->idle_list, struct worker, entry); in first_idle_worker()
1023 * worker_enter_idle - enter idle state
1030 * raw_spin_lock_irq(pool->lock).
1034 struct worker_pool *pool = worker->pool; in worker_enter_idle()
1036 if (WARN_ON_ONCE(worker->flags & WORKER_IDLE) || in worker_enter_idle()
1037 WARN_ON_ONCE(!list_empty(&worker->entry) && in worker_enter_idle()
1038 (worker->hentry.next || worker->hentry.pprev))) in worker_enter_idle()
1042 worker->flags |= WORKER_IDLE; in worker_enter_idle()
1043 pool->nr_idle++; in worker_enter_idle()
1044 worker->last_active = jiffies; in worker_enter_idle()
1047 list_add(&worker->entry, &pool->idle_list); in worker_enter_idle()
1049 if (too_many_workers(pool) && !timer_pending(&pool->idle_timer)) in worker_enter_idle()
1050 mod_timer(&pool->idle_timer, jiffies + IDLE_WORKER_TIMEOUT); in worker_enter_idle()
1053 WARN_ON_ONCE(pool->nr_workers == pool->nr_idle && pool->nr_running); in worker_enter_idle()
1057 * worker_leave_idle - leave idle state
1063 * raw_spin_lock_irq(pool->lock).
1067 struct worker_pool *pool = worker->pool; in worker_leave_idle()
1069 if (WARN_ON_ONCE(!(worker->flags & WORKER_IDLE))) in worker_leave_idle()
1072 pool->nr_idle--; in worker_leave_idle()
1073 list_del_init(&worker->entry); in worker_leave_idle()
1077 * find_worker_executing_work - find worker which is executing a work
1082 * @pool->busy_hash which is keyed by the address of @work. For a worker
1103 * raw_spin_lock_irq(pool->lock).
1114 hash_for_each_possible(pool->busy_hash, worker, hentry, in find_worker_executing_work()
1116 if (worker->current_work == work && in find_worker_executing_work()
1117 worker->current_func == work->func) in find_worker_executing_work()
1124 * move_linked_works - move linked works to a list
1135 * raw_spin_lock_irq(pool->lock).
1147 list_move_tail(&work->entry, head); in move_linked_works()
1162 * assign_work - assign a work item and its linked work items to a worker
1180 struct worker_pool *pool = worker->pool; in assign_work()
1183 lockdep_assert_held(&pool->lock); in assign_work()
1195 move_linked_works(work, &collision->scheduled, nextp); in assign_work()
1199 move_linked_works(work, &worker->scheduled, nextp); in assign_work()
1205 int high = pool->attrs->nice == HIGHPRI_NICE_LEVEL ? 1 : 0; in bh_pool_irq_work()
1207 return &per_cpu(bh_pool_irq_works, pool->cpu)[high]; in bh_pool_irq_work()
1214 if (unlikely(pool->cpu != smp_processor_id() && in kick_bh_pool()
1215 !(pool->flags & POOL_BH_DRAINING))) { in kick_bh_pool()
1216 irq_work_queue_on(bh_pool_irq_work(pool), pool->cpu); in kick_bh_pool()
1220 if (pool->attrs->nice == HIGHPRI_NICE_LEVEL) in kick_bh_pool()
1227 * kick_pool - wake up an idle worker if necessary
1238 lockdep_assert_held(&pool->lock); in kick_pool()
1243 if (pool->flags & POOL_BH) { in kick_pool()
1248 p = worker->task; in kick_pool()
1259 * so, setting the wake_cpu won't do anything. As this is a best-effort in kick_pool()
1260 * optimization and the race window is narrow, let's leave as-is for in kick_pool()
1264 * If @pool has non-strict affinity, @worker might have ended up outside in kick_pool()
1267 if (!pool->attrs->affn_strict && in kick_pool()
1268 !cpumask_test_cpu(p->wake_cpu, pool->attrs->__pod_cpumask)) { in kick_pool()
1269 struct work_struct *work = list_first_entry(&pool->worklist, in kick_pool()
1271 int wake_cpu = cpumask_any_and_distribute(pool->attrs->__pod_cpumask, in kick_pool()
1274 p->wake_cpu = wake_cpu; in kick_pool()
1275 get_work_pwq(work)->stats[PWQ_STAT_REPATRIATED]++; in kick_pool()
1286 * Concurrency-managed per-cpu work items that hog CPU for longer than
1288 * which prevents them from stalling other concurrency-managed work items. If a
1316 if (ent->func == func) in wci_find_ent()
1335 cnt = atomic64_inc_return_relaxed(&ent->cnt); in wq_cpu_intensive_report()
1338 is_power_of_2(cnt + 1 - wq_cpu_intensive_warning_thresh)) in wq_cpu_intensive_report()
1340 ent->func, wq_cpu_intensive_thresh_us, in wq_cpu_intensive_report()
1341 atomic64_read(&ent->cnt)); in wq_cpu_intensive_report()
1366 ent->func = func; in wq_cpu_intensive_report()
1367 atomic64_set(&ent->cnt, 0); in wq_cpu_intensive_report()
1368 hash_add_rcu(wci_hash, &ent->hash_node, (unsigned long)func); in wq_cpu_intensive_report()
1380 * wq_worker_running - a worker is running again
1389 if (!READ_ONCE(worker->sleeping)) in wq_worker_running()
1395 * and leave with an unexpected pool->nr_running == 1 on the newly unbound in wq_worker_running()
1399 if (!(worker->flags & WORKER_NOT_RUNNING)) in wq_worker_running()
1400 worker->pool->nr_running++; in wq_worker_running()
1404 * CPU intensive auto-detection cares about how long a work item hogged in wq_worker_running()
1407 worker->current_at = worker->task->se.sum_exec_runtime; in wq_worker_running()
1409 WRITE_ONCE(worker->sleeping, 0); in wq_worker_running()
1413 * wq_worker_sleeping - a worker is going to sleep
1429 if (worker->flags & WORKER_NOT_RUNNING) in wq_worker_sleeping()
1432 pool = worker->pool; in wq_worker_sleeping()
1435 if (READ_ONCE(worker->sleeping)) in wq_worker_sleeping()
1438 WRITE_ONCE(worker->sleeping, 1); in wq_worker_sleeping()
1439 raw_spin_lock_irq(&pool->lock); in wq_worker_sleeping()
1446 if (worker->flags & WORKER_NOT_RUNNING) { in wq_worker_sleeping()
1447 raw_spin_unlock_irq(&pool->lock); in wq_worker_sleeping()
1451 pool->nr_running--; in wq_worker_sleeping()
1453 worker->current_pwq->stats[PWQ_STAT_CM_WAKEUP]++; in wq_worker_sleeping()
1455 raw_spin_unlock_irq(&pool->lock); in wq_worker_sleeping()
1459 * wq_worker_tick - a scheduler tick occurred while a kworker is running
1468 struct pool_workqueue *pwq = worker->current_pwq; in wq_worker_tick()
1469 struct worker_pool *pool = worker->pool; in wq_worker_tick()
1474 pwq->stats[PWQ_STAT_CPU_TIME] += TICK_USEC; in wq_worker_tick()
1482 * CPU_INTENSIVE to avoid stalling other concurrency-managed work items. in wq_worker_tick()
1484 * Set @worker->sleeping means that @worker is in the process of in wq_worker_tick()
1486 * @pool->nr_running until it wakes up. As wq_worker_sleeping() also in wq_worker_tick()
1487 * decrements ->nr_running, setting CPU_INTENSIVE here can lead to in wq_worker_tick()
1491 if ((worker->flags & WORKER_NOT_RUNNING) || READ_ONCE(worker->sleeping) || in wq_worker_tick()
1492 worker->task->se.sum_exec_runtime - worker->current_at < in wq_worker_tick()
1496 raw_spin_lock(&pool->lock); in wq_worker_tick()
1499 wq_cpu_intensive_report(worker->current_func); in wq_worker_tick()
1500 pwq->stats[PWQ_STAT_CPU_INTENSIVE]++; in wq_worker_tick()
1503 pwq->stats[PWQ_STAT_CM_WAKEUP]++; in wq_worker_tick()
1505 raw_spin_unlock(&pool->lock); in wq_worker_tick()
1509 * wq_worker_last_func - retrieve worker's last work function
1516 * raw_spin_lock_irq(rq->lock)
1520 * dequeuing, to allow periodic aggregation to shut-off when that
1523 * As this function doesn't involve any workqueue-related locking, it
1536 return worker->last_func; in wq_worker_last_func()
1540 * wq_node_nr_active - Determine wq_node_nr_active to use
1546 * - %NULL for per-cpu workqueues as they don't need to use shared nr_active.
1548 * - node_nr_active[nr_node_ids] if @node is %NUMA_NO_NODE.
1550 * - Otherwise, node_nr_active[@node].
1555 if (!(wq->flags & WQ_UNBOUND)) in wq_node_nr_active()
1561 return wq->node_nr_active[node]; in wq_node_nr_active()
1565 * wq_update_node_max_active - Update per-node max_actives to use
1567 * @off_cpu: CPU that's going down, -1 if a CPU is not going down
1569 * Update @wq->node_nr_active[]->max. @wq must be unbound. max_active is
1571 * cpus. The result is always between @wq->min_active and max_active.
1576 int min_active = READ_ONCE(wq->min_active); in wq_update_node_max_active()
1577 int max_active = READ_ONCE(wq->max_active); in wq_update_node_max_active()
1580 lockdep_assert_held(&wq->mutex); in wq_update_node_max_active()
1586 off_cpu = -1; in wq_update_node_max_active()
1590 total_cpus--; in wq_update_node_max_active()
1595 wq_node_nr_active(wq, node)->max = min_active; in wq_update_node_max_active()
1597 wq_node_nr_active(wq, NUMA_NO_NODE)->max = max_active; in wq_update_node_max_active()
1606 node_cpus--; in wq_update_node_max_active()
1608 wq_node_nr_active(wq, node)->max = in wq_update_node_max_active()
1613 wq_node_nr_active(wq, NUMA_NO_NODE)->max = max_active; in wq_update_node_max_active()
1617 * get_pwq - get an extra reference on the specified pool_workqueue
1621 * @pwq has positive refcnt and be holding the matching pool->lock.
1625 lockdep_assert_held(&pwq->pool->lock); in get_pwq()
1626 WARN_ON_ONCE(pwq->refcnt <= 0); in get_pwq()
1627 pwq->refcnt++; in get_pwq()
1631 * put_pwq - put a pool_workqueue reference
1635 * destruction. The caller should be holding the matching pool->lock.
1639 lockdep_assert_held(&pwq->pool->lock); in put_pwq()
1640 if (likely(--pwq->refcnt)) in put_pwq()
1643 * @pwq can't be released under pool->lock, bounce to a dedicated in put_pwq()
1644 * kthread_worker to avoid A-A deadlocks. in put_pwq()
1646 kthread_queue_work(pwq_release_worker, &pwq->release_work); in put_pwq()
1650 * put_pwq_unlocked - put_pwq() with surrounding pool lock/unlock
1662 raw_spin_lock_irq(&pwq->pool->lock); in put_pwq_unlocked()
1664 raw_spin_unlock_irq(&pwq->pool->lock); in put_pwq_unlocked()
1670 return !pwq->nr_active && list_empty(&pwq->inactive_works); in pwq_is_empty()
1680 if (list_empty(&pwq->pool->worklist)) in __pwq_activate_work()
1681 pwq->pool->watchdog_ts = jiffies; in __pwq_activate_work()
1682 move_linked_works(work, &pwq->pool->worklist, NULL); in __pwq_activate_work()
1688 int max = READ_ONCE(nna->max); in tryinc_node_nr_active()
1693 old = atomic_read(&nna->nr); in tryinc_node_nr_active()
1696 tmp = atomic_cmpxchg_relaxed(&nna->nr, old, old + 1); in tryinc_node_nr_active()
1703 * pwq_tryinc_nr_active - Try to increment nr_active for a pwq
1712 struct workqueue_struct *wq = pwq->wq; in pwq_tryinc_nr_active()
1713 struct worker_pool *pool = pwq->pool; in pwq_tryinc_nr_active()
1714 struct wq_node_nr_active *nna = wq_node_nr_active(wq, pool->node); in pwq_tryinc_nr_active()
1717 lockdep_assert_held(&pool->lock); in pwq_tryinc_nr_active()
1720 /* BH or per-cpu workqueue, pwq->nr_active is sufficient */ in pwq_tryinc_nr_active()
1721 obtained = pwq->nr_active < READ_ONCE(wq->max_active); in pwq_tryinc_nr_active()
1725 if (unlikely(pwq->plugged)) in pwq_tryinc_nr_active()
1729 * Unbound workqueue uses per-node shared nr_active $nna. If @pwq is in pwq_tryinc_nr_active()
1737 if (!list_empty(&pwq->pending_node) && likely(!fill)) in pwq_tryinc_nr_active()
1745 * Lockless acquisition failed. Lock, add ourself to $nna->pending_pwqs in pwq_tryinc_nr_active()
1748 * we see the decremented $nna->nr or they see non-empty in pwq_tryinc_nr_active()
1749 * $nna->pending_pwqs. in pwq_tryinc_nr_active()
1751 raw_spin_lock(&nna->lock); in pwq_tryinc_nr_active()
1753 if (list_empty(&pwq->pending_node)) in pwq_tryinc_nr_active()
1754 list_add_tail(&pwq->pending_node, &nna->pending_pwqs); in pwq_tryinc_nr_active()
1767 list_del_init(&pwq->pending_node); in pwq_tryinc_nr_active()
1770 raw_spin_unlock(&nna->lock); in pwq_tryinc_nr_active()
1773 pwq->nr_active++; in pwq_tryinc_nr_active()
1778 * pwq_activate_first_inactive - Activate the first inactive work item on a pwq
1791 list_first_entry_or_null(&pwq->inactive_works, in pwq_activate_first_inactive()
1803 * unplug_oldest_pwq - unplug the oldest pool_workqueue
1810 * dfl_pwq --------------+ [P] - plugged
1813 * pwqs -> A -> B [P] -> C [P] (newest)
1821 * pwq's are linked into wq->pwqs with the oldest first, so the first one in
1828 lockdep_assert_held(&wq->mutex); in unplug_oldest_pwq()
1831 pwq = list_first_entry_or_null(&wq->pwqs, struct pool_workqueue, in unplug_oldest_pwq()
1833 raw_spin_lock_irq(&pwq->pool->lock); in unplug_oldest_pwq()
1834 if (pwq->plugged) { in unplug_oldest_pwq()
1835 pwq->plugged = false; in unplug_oldest_pwq()
1837 kick_pool(pwq->pool); in unplug_oldest_pwq()
1839 raw_spin_unlock_irq(&pwq->pool->lock); in unplug_oldest_pwq()
1843 * node_activate_pending_pwq - Activate a pending pwq on a wq_node_nr_active
1847 * Activate a pwq in @nna->pending_pwqs. Called with @caller_pool locked.
1857 lockdep_assert_held(&caller_pool->lock); in node_activate_pending_pwq()
1859 raw_spin_lock(&nna->lock); in node_activate_pending_pwq()
1861 pwq = list_first_entry_or_null(&nna->pending_pwqs, in node_activate_pending_pwq()
1868 * @pwq->pool->lock. Let's trylock first. If unsuccessful, do the unlock in node_activate_pending_pwq()
1869 * / lock dance. For that, we also need to release @nna->lock as it's in node_activate_pending_pwq()
1872 if (pwq->pool != locked_pool) { in node_activate_pending_pwq()
1873 raw_spin_unlock(&locked_pool->lock); in node_activate_pending_pwq()
1874 locked_pool = pwq->pool; in node_activate_pending_pwq()
1875 if (!raw_spin_trylock(&locked_pool->lock)) { in node_activate_pending_pwq()
1876 raw_spin_unlock(&nna->lock); in node_activate_pending_pwq()
1877 raw_spin_lock(&locked_pool->lock); in node_activate_pending_pwq()
1878 raw_spin_lock(&nna->lock); in node_activate_pending_pwq()
1887 work = list_first_entry_or_null(&pwq->inactive_works, in node_activate_pending_pwq()
1890 list_del_init(&pwq->pending_node); in node_activate_pending_pwq()
1897 * pending_pwqs so that we round-robin through them. This means that in node_activate_pending_pwq()
1902 pwq->nr_active++; in node_activate_pending_pwq()
1905 if (list_empty(&pwq->inactive_works)) in node_activate_pending_pwq()
1906 list_del_init(&pwq->pending_node); in node_activate_pending_pwq()
1908 list_move_tail(&pwq->pending_node, &nna->pending_pwqs); in node_activate_pending_pwq()
1911 if (pwq->pool != caller_pool) in node_activate_pending_pwq()
1912 kick_pool(pwq->pool); in node_activate_pending_pwq()
1916 raw_spin_unlock(&nna->lock); in node_activate_pending_pwq()
1918 raw_spin_unlock(&locked_pool->lock); in node_activate_pending_pwq()
1919 raw_spin_lock(&caller_pool->lock); in node_activate_pending_pwq()
1924 * pwq_dec_nr_active - Retire an active count
1928 * For unbound workqueues, this function may temporarily drop @pwq->pool->lock.
1932 struct worker_pool *pool = pwq->pool; in pwq_dec_nr_active()
1933 struct wq_node_nr_active *nna = wq_node_nr_active(pwq->wq, pool->node); in pwq_dec_nr_active()
1935 lockdep_assert_held(&pool->lock); in pwq_dec_nr_active()
1938 * @pwq->nr_active should be decremented for both percpu and unbound in pwq_dec_nr_active()
1941 pwq->nr_active--; in pwq_dec_nr_active()
1956 * $nna->pending_pwqs. The following atomic_dec_return()'s implied in pwq_dec_nr_active()
1958 * guarantee that either we see non-empty pending_pwqs or they see in pwq_dec_nr_active()
1959 * decremented $nna->nr. in pwq_dec_nr_active()
1961 * $nna->max may change as CPUs come online/offline and @pwq->wq's in pwq_dec_nr_active()
1963 * larger than @pwq->wq->min_active which is above zero unless freezing. in pwq_dec_nr_active()
1966 if (atomic_dec_return(&nna->nr) >= READ_ONCE(nna->max)) in pwq_dec_nr_active()
1969 if (!list_empty(&nna->pending_pwqs)) in pwq_dec_nr_active()
1974 * pwq_dec_nr_in_flight - decrement pwq's nr_in_flight
1982 * For unbound workqueues, this function may temporarily drop @pwq->pool->lock
1983 * and thus should be called after all other state updates for the in-flight
1987 * raw_spin_lock_irq(pool->lock).
1996 pwq->nr_in_flight[color]--; in pwq_dec_nr_in_flight()
1999 if (likely(pwq->flush_color != color)) in pwq_dec_nr_in_flight()
2002 /* are there still in-flight works? */ in pwq_dec_nr_in_flight()
2003 if (pwq->nr_in_flight[color]) in pwq_dec_nr_in_flight()
2007 pwq->flush_color = -1; in pwq_dec_nr_in_flight()
2013 if (atomic_dec_and_test(&pwq->wq->nr_pwqs_to_flush)) in pwq_dec_nr_in_flight()
2014 complete(&pwq->wq->first_flusher->done); in pwq_dec_nr_in_flight()
2020 * try_to_grab_pending - steal work item from worklist and disable irq
2026 * stable state - idle, on timer or on worklist.
2033 * -EAGAIN if PENDING couldn't be grabbed at the moment, safe to busy-retry
2039 * disabled on entry. This, combined with delayed_work->timer being
2040 * irqsafe, ensures that we return -EAGAIN for finite short period of time.
2060 * dwork->timer is irqsafe. If del_timer() fails, it's in try_to_grab_pending()
2064 if (likely(del_timer(&dwork->timer))) in try_to_grab_pending()
2075 * steal it from ->worklist without clearing WORK_STRUCT_PENDING. in try_to_grab_pending()
2081 raw_spin_lock(&pool->lock); in try_to_grab_pending()
2083 * work->data is guaranteed to point to pwq only while the work in try_to_grab_pending()
2084 * item is queued on pwq->wq, and both updating work->data to point in try_to_grab_pending()
2086 * pwq->pool->lock. This in turn guarantees that, if work->data in try_to_grab_pending()
2091 if (pwq && pwq->pool == pool) { in try_to_grab_pending()
2098 * pwq->inactive_works since a queued barrier can't be in try_to_grab_pending()
2103 * on the inactive_works list, will confuse pwq->nr_active in try_to_grab_pending()
2111 move_linked_works(work, &pwq->pool->worklist, NULL); in try_to_grab_pending()
2113 list_del_init(&work->entry); in try_to_grab_pending()
2116 * work->data points to pwq iff queued. Let's point to pool. As in try_to_grab_pending()
2117 * this destroys work->data needed by the next step, stash it. in try_to_grab_pending()
2119 set_work_pool_and_keep_pending(work, pool->id, in try_to_grab_pending()
2125 raw_spin_unlock(&pool->lock); in try_to_grab_pending()
2129 raw_spin_unlock(&pool->lock); in try_to_grab_pending()
2133 return -EAGAIN; in try_to_grab_pending()
2137 * work_grab_pending - steal work item from worklist and disable irq
2142 * Grab PENDING bit of @work. @work can be in any stable state - idle, on timer
2146 * stored in *@irq_flags. The caller is responsible for re-enabling it using
2165 * insert_work - insert a work into a pool
2175 * raw_spin_lock_irq(pool->lock).
2187 list_add_tail(&work->entry, head); in insert_work()
2204 return worker && worker->current_pwq->wq == wq; in is_chained_work()
2220 pr_warn_once("workqueue: round-robin CPU selection forced, expect performance impact\n"); in wq_select_unbound_cpu()
2245 * steal the PENDING will busy-loop waiting for it to either get in __queue_work()
2256 if (unlikely(wq->flags & (__WQ_DESTROYING | __WQ_DRAINING) && in __queue_work()
2258 work->func, wq->name))) { in __queue_work()
2265 if (wq->flags & WQ_UNBOUND) in __queue_work()
2271 pwq = rcu_dereference(*per_cpu_ptr(wq->cpu_pwq, cpu)); in __queue_work()
2272 pool = pwq->pool; in __queue_work()
2277 * pool to guarantee non-reentrancy. in __queue_work()
2281 * non-reentrancy. See the comments above unplug_oldest_pwq(). in __queue_work()
2284 if (last_pool && last_pool != pool && !(wq->flags & __WQ_ORDERED)) { in __queue_work()
2287 raw_spin_lock(&last_pool->lock); in __queue_work()
2291 if (worker && worker->current_pwq->wq == wq) { in __queue_work()
2292 pwq = worker->current_pwq; in __queue_work()
2293 pool = pwq->pool; in __queue_work()
2297 raw_spin_unlock(&last_pool->lock); in __queue_work()
2298 raw_spin_lock(&pool->lock); in __queue_work()
2301 raw_spin_lock(&pool->lock); in __queue_work()
2309 * on it, so the retrying is guaranteed to make forward-progress. in __queue_work()
2311 if (unlikely(!pwq->refcnt)) { in __queue_work()
2312 if (wq->flags & WQ_UNBOUND) { in __queue_work()
2313 raw_spin_unlock(&pool->lock); in __queue_work()
2318 WARN_ONCE(true, "workqueue: per-cpu pwq for %s on cpu%d has 0 refcnt", in __queue_work()
2319 wq->name, cpu); in __queue_work()
2325 if (WARN_ON(!list_empty(&work->entry))) in __queue_work()
2328 pwq->nr_in_flight[pwq->work_color]++; in __queue_work()
2329 work_flags = work_color_to_flags(pwq->work_color); in __queue_work()
2336 if (list_empty(&pwq->inactive_works) && pwq_tryinc_nr_active(pwq, false)) { in __queue_work()
2337 if (list_empty(&pool->worklist)) in __queue_work()
2338 pool->watchdog_ts = jiffies; in __queue_work()
2341 insert_work(pwq, work, &pool->worklist, work_flags); in __queue_work()
2345 insert_work(pwq, work, &pwq->inactive_works, work_flags); in __queue_work()
2349 raw_spin_unlock(&pool->lock); in __queue_work()
2369 * queue_work_on - queue work on specific cpu
2402 * select_numa_node_cpu - Select a CPU based on NUMA node
2431 * queue_work_node - queue work on a "random" cpu for a given NUMA node
2461 * If this is used with a per-cpu workqueue then the logic in in queue_work_node()
2465 WARN_ON_ONCE(!(wq->flags & WQ_UNBOUND)); in queue_work_node()
2487 __queue_work(dwork->cpu, dwork->wq, &dwork->work); in delayed_work_timer_fn()
2494 struct timer_list *timer = &dwork->timer; in __queue_delayed_work()
2495 struct work_struct *work = &dwork->work; in __queue_delayed_work()
2498 WARN_ON_ONCE(timer->function != delayed_work_timer_fn); in __queue_delayed_work()
2500 WARN_ON_ONCE(!list_empty(&work->entry)); in __queue_delayed_work()
2503 * If @delay is 0, queue @dwork->work immediately. This is for in __queue_delayed_work()
2509 __queue_work(cpu, wq, &dwork->work); in __queue_delayed_work()
2514 dwork->wq = wq; in __queue_delayed_work()
2515 dwork->cpu = cpu; in __queue_delayed_work()
2516 timer->expires = jiffies + delay; in __queue_delayed_work()
2533 * queue_delayed_work_on - queue work on specific CPU after delay
2539 * We queue the delayed_work to a specific CPU, for non-zero delays the
2541 * to ensure this, may get @dwork->timer queued to an offlined CPU and
2542 * this will prevent queueing of @dwork->work unless the offlined CPU
2552 struct work_struct *work = &dwork->work; in queue_delayed_work_on()
2571 * mod_delayed_work_on - modify delay of or queue a delayed work on specific CPU
2594 ret = work_grab_pending(&dwork->work, WORK_CANCEL_DELAYED, &irq_flags); in mod_delayed_work_on()
2596 if (!clear_pending_if_disabled(&dwork->work)) in mod_delayed_work_on()
2610 __queue_work(WORK_CPU_UNBOUND, rwork->wq, &rwork->work); in rcu_work_rcufn()
2615 * queue_rcu_work - queue work after a RCU grace period
2620 * that a full RCU grace period is guaranteed only after a %true return.
2622 * execution may happen before a full RCU grace period has passed.
2626 struct work_struct *work = &rwork->work; in queue_rcu_work()
2634 rwork->wq = wq; in queue_rcu_work()
2635 call_rcu_hurry(&rwork->rcu, rcu_work_rcufn); in queue_rcu_work()
2649 INIT_LIST_HEAD(&worker->entry); in alloc_worker()
2650 INIT_LIST_HEAD(&worker->scheduled); in alloc_worker()
2651 INIT_LIST_HEAD(&worker->node); in alloc_worker()
2653 worker->flags = WORKER_PREP; in alloc_worker()
2660 if (pool->cpu < 0 && pool->attrs->affn_strict) in pool_allowed_cpus()
2661 return pool->attrs->__pod_cpumask; in pool_allowed_cpus()
2663 return pool->attrs->cpumask; in pool_allowed_cpus()
2667 * worker_attach_to_pool() - attach a worker to a pool
2672 * cpu-binding of @worker are kept coordinated with the pool across
2673 * cpu-[un]hotplugs.
2683 * details. BH workers are, while per-CPU, always DISASSOCIATED. in worker_attach_to_pool()
2685 if (pool->flags & POOL_DISASSOCIATED) { in worker_attach_to_pool()
2686 worker->flags |= WORKER_UNBOUND; in worker_attach_to_pool()
2688 WARN_ON_ONCE(pool->flags & POOL_BH); in worker_attach_to_pool()
2689 kthread_set_per_cpu(worker->task, pool->cpu); in worker_attach_to_pool()
2692 if (worker->rescue_wq) in worker_attach_to_pool()
2693 set_cpus_allowed_ptr(worker->task, pool_allowed_cpus(pool)); in worker_attach_to_pool()
2695 list_add_tail(&worker->node, &pool->workers); in worker_attach_to_pool()
2696 worker->pool = pool; in worker_attach_to_pool()
2705 kthread_set_per_cpu(worker->task, -1); in unbind_worker()
2707 WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task, wq_unbound_cpumask) < 0); in unbind_worker()
2709 WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task, cpu_possible_mask) < 0); in unbind_worker()
2718 list_del(&worker->node); in detach_worker()
2722 * worker_detach_from_pool() - detach a worker from its pool
2731 struct worker_pool *pool = worker->pool; in worker_detach_from_pool()
2734 WARN_ON_ONCE(pool->flags & POOL_BH); in worker_detach_from_pool()
2738 worker->pool = NULL; in worker_detach_from_pool()
2741 /* clear leftover flags without pool->lock after it is detached */ in worker_detach_from_pool()
2742 worker->flags &= ~(WORKER_UNBOUND | WORKER_REBOUND); in worker_detach_from_pool()
2748 if (worker->rescue_wq) in format_worker_id()
2749 return scnprintf(buf, size, "kworker/R-%s", in format_worker_id()
2750 worker->rescue_wq->name); in format_worker_id()
2753 if (pool->cpu >= 0) in format_worker_id()
2755 pool->cpu, worker->id, in format_worker_id()
2756 pool->attrs->nice < 0 ? "H" : ""); in format_worker_id()
2759 pool->id, worker->id); in format_worker_id()
2766 * create_worker - create a new workqueue worker
2783 id = ida_alloc(&pool->worker_ida, GFP_KERNEL); in create_worker()
2790 worker = alloc_worker(pool->node); in create_worker()
2796 worker->id = id; in create_worker()
2798 if (!(pool->flags & POOL_BH)) { in create_worker()
2802 worker->task = kthread_create_on_node(worker_thread, worker, in create_worker()
2803 pool->node, "%s", id_buf); in create_worker()
2804 if (IS_ERR(worker->task)) { in create_worker()
2805 if (PTR_ERR(worker->task) == -EINTR) { in create_worker()
2810 worker->task); in create_worker()
2815 set_user_nice(worker->task, pool->attrs->nice); in create_worker()
2816 kthread_bind_mask(worker->task, pool_allowed_cpus(pool)); in create_worker()
2823 raw_spin_lock_irq(&pool->lock); in create_worker()
2825 worker->pool->nr_workers++; in create_worker()
2833 if (worker->task) in create_worker()
2834 wake_up_process(worker->task); in create_worker()
2836 raw_spin_unlock_irq(&pool->lock); in create_worker()
2841 ida_free(&pool->worker_ida, id); in create_worker()
2859 list_del_init(&worker->entry); in reap_dying_workers()
2860 kthread_stop_put(worker->task); in reap_dying_workers()
2866 * set_worker_dying - Tag a worker for destruction
2868 * @list: transfer worker away from its pool->idle_list and into list
2874 * raw_spin_lock_irq(pool->lock).
2878 struct worker_pool *pool = worker->pool; in set_worker_dying()
2880 lockdep_assert_held(&pool->lock); in set_worker_dying()
2884 if (WARN_ON(worker->current_work) || in set_worker_dying()
2885 WARN_ON(!list_empty(&worker->scheduled)) || in set_worker_dying()
2886 WARN_ON(!(worker->flags & WORKER_IDLE))) in set_worker_dying()
2889 pool->nr_workers--; in set_worker_dying()
2890 pool->nr_idle--; in set_worker_dying()
2892 worker->flags |= WORKER_DIE; in set_worker_dying()
2894 list_move(&worker->entry, list); in set_worker_dying()
2897 get_task_struct(worker->task); in set_worker_dying()
2901 * idle_worker_timeout - check if some idle workers can now be deleted.
2908 * it expire and re-evaluate things from there.
2915 if (work_pending(&pool->idle_cull_work)) in idle_worker_timeout()
2918 raw_spin_lock_irq(&pool->lock); in idle_worker_timeout()
2925 worker = list_last_entry(&pool->idle_list, struct worker, entry); in idle_worker_timeout()
2926 expires = worker->last_active + IDLE_WORKER_TIMEOUT; in idle_worker_timeout()
2930 mod_timer(&pool->idle_timer, expires); in idle_worker_timeout()
2932 raw_spin_unlock_irq(&pool->lock); in idle_worker_timeout()
2935 queue_work(system_unbound_wq, &pool->idle_cull_work); in idle_worker_timeout()
2939 * idle_cull_fn - cull workers that have been idle for too long.
2955 * Grabbing wq_pool_attach_mutex here ensures an already-running worker in idle_cull_fn()
2956 * cannot proceed beyong set_pf_worker() in its self-destruct path. in idle_cull_fn()
2957 * This is required as a previously-preempted worker could run after in idle_cull_fn()
2961 raw_spin_lock_irq(&pool->lock); in idle_cull_fn()
2967 worker = list_last_entry(&pool->idle_list, struct worker, entry); in idle_cull_fn()
2968 expires = worker->last_active + IDLE_WORKER_TIMEOUT; in idle_cull_fn()
2971 mod_timer(&pool->idle_timer, expires); in idle_cull_fn()
2978 raw_spin_unlock_irq(&pool->lock); in idle_cull_fn()
2988 struct workqueue_struct *wq = pwq->wq; in send_mayday()
2992 if (!wq->rescuer) in send_mayday()
2996 if (list_empty(&pwq->mayday_node)) { in send_mayday()
3003 list_add_tail(&pwq->mayday_node, &wq->maydays); in send_mayday()
3004 wake_up_process(wq->rescuer->task); in send_mayday()
3005 pwq->stats[PWQ_STAT_MAYDAY]++; in send_mayday()
3014 raw_spin_lock_irq(&pool->lock); in pool_mayday_timeout()
3015 raw_spin_lock(&wq_mayday_lock); /* for wq->maydays */ in pool_mayday_timeout()
3024 list_for_each_entry(work, &pool->worklist, entry) in pool_mayday_timeout()
3029 raw_spin_unlock_irq(&pool->lock); in pool_mayday_timeout()
3031 mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INTERVAL); in pool_mayday_timeout()
3035 * maybe_create_worker - create a new worker if necessary
3048 * raw_spin_lock_irq(pool->lock) which may be released and regrabbed
3053 __releases(&pool->lock) in maybe_create_worker()
3054 __acquires(&pool->lock) in maybe_create_worker()
3057 raw_spin_unlock_irq(&pool->lock); in maybe_create_worker()
3060 mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INITIAL_TIMEOUT); in maybe_create_worker()
3072 del_timer_sync(&pool->mayday_timer); in maybe_create_worker()
3073 raw_spin_lock_irq(&pool->lock); in maybe_create_worker()
3076 * created as @pool->lock was dropped and the new worker might have in maybe_create_worker()
3084 * manage_workers - manage worker pool
3096 * raw_spin_lock_irq(pool->lock) which may be released and regrabbed
3107 struct worker_pool *pool = worker->pool; in manage_workers()
3109 if (pool->flags & POOL_MANAGER_ACTIVE) in manage_workers()
3112 pool->flags |= POOL_MANAGER_ACTIVE; in manage_workers()
3113 pool->manager = worker; in manage_workers()
3117 pool->manager = NULL; in manage_workers()
3118 pool->flags &= ~POOL_MANAGER_ACTIVE; in manage_workers()
3124 * process_one_work - process single work
3135 * raw_spin_lock_irq(pool->lock) which is released and regrabbed.
3138 __releases(&pool->lock) in process_one_work()
3139 __acquires(&pool->lock) in process_one_work()
3142 struct worker_pool *pool = worker->pool; in process_one_work()
3145 bool bh_draining = pool->flags & POOL_BH_DRAINING; in process_one_work()
3152 * work->lockdep_map, make a copy and use that here. in process_one_work()
3156 lockdep_copy_map(&lockdep_map, &work->lockdep_map); in process_one_work()
3159 WARN_ON_ONCE(!(pool->flags & POOL_DISASSOCIATED) && in process_one_work()
3160 raw_smp_processor_id() != pool->cpu); in process_one_work()
3164 hash_add(pool->busy_hash, &worker->hentry, (unsigned long)work); in process_one_work()
3165 worker->current_work = work; in process_one_work()
3166 worker->current_func = work->func; in process_one_work()
3167 worker->current_pwq = pwq; in process_one_work()
3168 if (worker->task) in process_one_work()
3169 worker->current_at = worker->task->se.sum_exec_runtime; in process_one_work()
3171 worker->current_color = get_work_color(work_data); in process_one_work()
3177 strscpy(worker->desc, pwq->wq->name, WORKER_DESC_LEN); in process_one_work()
3179 list_del_init(&work->entry); in process_one_work()
3187 if (unlikely(pwq->wq->flags & WQ_CPU_INTENSIVE)) in process_one_work()
3191 * Kick @pool if necessary. It's always noop for per-cpu worker pools in process_one_work()
3200 * update to @work. Also, do this inside @pool->lock so that in process_one_work()
3204 set_work_pool_and_clear_pending(work, pool->id, pool_offq_flags(pool)); in process_one_work()
3206 pwq->stats[PWQ_STAT_STARTED]++; in process_one_work()
3207 raw_spin_unlock_irq(&pool->lock); in process_one_work()
3213 lock_map_acquire(pwq->wq->lockdep_map); in process_one_work()
3226 * Which would create W1->C->W1 dependencies, even though there is no in process_one_work()
3228 * read-recursive acquire on the work(queue) 'locks', but this will then in process_one_work()
3233 * flush_work() and complete() primitives (except for single-threaded in process_one_work()
3238 worker->current_func(work); in process_one_work()
3243 trace_workqueue_execute_end(work, worker->current_func); in process_one_work()
3244 pwq->stats[PWQ_STAT_COMPLETED]++; in process_one_work()
3247 lock_map_release(pwq->wq->lockdep_map); in process_one_work()
3249 if (unlikely((worker->task && in_atomic()) || in process_one_work()
3253 " preempt=0x%08x lock=%d->%d RCU=%d->%d workfn=%ps\n", in process_one_work()
3254 current->comm, task_pid_nr(current), preempt_count(), in process_one_work()
3257 worker->current_func); in process_one_work()
3270 if (worker->task) in process_one_work()
3273 raw_spin_lock_irq(&pool->lock); in process_one_work()
3283 worker->last_func = worker->current_func; in process_one_work()
3286 hash_del(&worker->hentry); in process_one_work()
3287 worker->current_work = NULL; in process_one_work()
3288 worker->current_func = NULL; in process_one_work()
3289 worker->current_pwq = NULL; in process_one_work()
3290 worker->current_color = INT_MAX; in process_one_work()
3297 * process_scheduled_works - process scheduled works
3305 * raw_spin_lock_irq(pool->lock) which may be released and regrabbed
3313 while ((work = list_first_entry_or_null(&worker->scheduled, in process_scheduled_works()
3316 worker->pool->watchdog_ts = jiffies; in process_scheduled_works()
3327 current->flags |= PF_WQ_WORKER; in set_pf_worker()
3329 current->flags &= ~PF_WQ_WORKER; in set_pf_worker()
3334 * worker_thread - the worker thread function
3337 * The worker thread function. All workers belong to a worker_pool -
3338 * either a per-cpu one or dynamic unbound one. These workers process all
3348 struct worker_pool *pool = worker->pool; in worker_thread()
3353 raw_spin_lock_irq(&pool->lock); in worker_thread()
3356 if (unlikely(worker->flags & WORKER_DIE)) { in worker_thread()
3357 raw_spin_unlock_irq(&pool->lock); in worker_thread()
3360 * The worker is dead and PF_WQ_WORKER is cleared, worker->pool in worker_thread()
3363 worker->pool = NULL; in worker_thread()
3364 ida_free(&pool->worker_ida, worker->id); in worker_thread()
3379 * ->scheduled list can only be filled while a worker is in worker_thread()
3383 WARN_ON_ONCE(!list_empty(&worker->scheduled)); in worker_thread()
3396 list_first_entry(&pool->worklist, in worker_thread()
3406 * pool->lock is held and there's no work to process and no need to in worker_thread()
3408 * pool->lock or from local cpu, so setting the current state in worker_thread()
3409 * before releasing pool->lock is enough to prevent losing any in worker_thread()
3414 raw_spin_unlock_irq(&pool->lock); in worker_thread()
3420 * rescuer_thread - the rescuer thread function
3443 struct workqueue_struct *wq = rescuer->rescue_wq; in rescuer_thread()
3458 * shouldn't have any work pending, but @wq->maydays may still have in rescuer_thread()
3459 * pwq(s) queued. This can happen by non-rescuer workers consuming in rescuer_thread()
3461 * @wq->maydays processing before acting on should_stop so that the in rescuer_thread()
3469 while (!list_empty(&wq->maydays)) { in rescuer_thread()
3470 struct pool_workqueue *pwq = list_first_entry(&wq->maydays, in rescuer_thread()
3472 struct worker_pool *pool = pwq->pool; in rescuer_thread()
3476 list_del_init(&pwq->mayday_node); in rescuer_thread()
3482 raw_spin_lock_irq(&pool->lock); in rescuer_thread()
3488 WARN_ON_ONCE(!list_empty(&rescuer->scheduled)); in rescuer_thread()
3489 list_for_each_entry_safe(work, n, &pool->worklist, entry) { in rescuer_thread()
3492 pwq->stats[PWQ_STAT_RESCUED]++; in rescuer_thread()
3495 if (!list_empty(&rescuer->scheduled)) { in rescuer_thread()
3503 * that such back-to-back work items, which may be in rescuer_thread()
3507 if (pwq->nr_active && need_to_create_worker(pool)) { in rescuer_thread()
3513 if (wq->rescuer && list_empty(&pwq->mayday_node)) { in rescuer_thread()
3515 list_add_tail(&pwq->mayday_node, &wq->maydays); in rescuer_thread()
3527 raw_spin_unlock_irq(&pool->lock); in rescuer_thread()
3549 WARN_ON_ONCE(!(rescuer->flags & WORKER_NOT_RUNNING)); in rescuer_thread()
3556 struct worker_pool *pool = worker->pool; in bh_worker()
3560 raw_spin_lock_irq(&pool->lock); in bh_worker()
3570 WARN_ON_ONCE(!list_empty(&worker->scheduled)); in bh_worker()
3575 list_first_entry(&pool->worklist, in bh_worker()
3581 --nr_restarts && time_before(jiffies, end)); in bh_worker()
3587 raw_spin_unlock_irq(&pool->lock); in bh_worker()
3598 * After full conversion, we'll add worker->softirq_action, directly use the
3606 bh_worker(list_first_entry(&pool->workers, struct worker, node)); in workqueue_softirq_action()
3619 struct worker_pool *pool = dead_work->pool; in drain_dead_softirq_workfn()
3629 raw_spin_lock_irq(&pool->lock); in drain_dead_softirq_workfn()
3630 pool->flags |= POOL_BH_DRAINING; in drain_dead_softirq_workfn()
3631 raw_spin_unlock_irq(&pool->lock); in drain_dead_softirq_workfn()
3633 bh_worker(list_first_entry(&pool->workers, struct worker, node)); in drain_dead_softirq_workfn()
3635 raw_spin_lock_irq(&pool->lock); in drain_dead_softirq_workfn()
3636 pool->flags &= ~POOL_BH_DRAINING; in drain_dead_softirq_workfn()
3638 raw_spin_unlock_irq(&pool->lock); in drain_dead_softirq_workfn()
3646 if (pool->attrs->nice == HIGHPRI_NICE_LEVEL) in drain_dead_softirq_workfn()
3651 complete(&dead_work->done); in drain_dead_softirq_workfn()
3678 if (pool->attrs->nice == HIGHPRI_NICE_LEVEL) in workqueue_softirq_dead()
3689 * check_flush_dependency - check for flush dependency sanity
3698 * on a workqueue which doesn't have %WQ_MEM_RECLAIM as that can break forward-
3708 if (from_cancel || target_wq->flags & WQ_MEM_RECLAIM) in check_flush_dependency()
3712 target_func = target_work ? target_work->func : NULL; in check_flush_dependency()
3714 WARN_ONCE(current->flags & PF_MEMALLOC, in check_flush_dependency()
3716 current->pid, current->comm, target_wq->name, target_func); in check_flush_dependency()
3717 WARN_ONCE(worker && ((worker->current_pwq->wq->flags & in check_flush_dependency()
3720 worker->current_pwq->wq->name, worker->current_func, in check_flush_dependency()
3721 target_wq->name, target_func); in check_flush_dependency()
3733 complete(&barr->done); in wq_barrier_func()
3737 * insert_wq_barrier - insert a barrier work
3754 * Note that when @worker is non-NULL, @target may be modified
3758 * raw_spin_lock_irq(pool->lock).
3770 * debugobject calls are safe here even with pool->lock locked in insert_wq_barrier()
3776 * spuriously triggering "inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} in insert_wq_barrier()
3779 INIT_WORK_ONSTACK_KEY(&barr->work, wq_barrier_func, in insert_wq_barrier()
3780 (pwq->wq->flags & WQ_BH) ? &bh_key : &thr_key); in insert_wq_barrier()
3781 __set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(&barr->work)); in insert_wq_barrier()
3783 init_completion_map(&barr->done, &target->lockdep_map); in insert_wq_barrier()
3785 barr->task = current; in insert_wq_barrier()
3795 head = worker->scheduled.next; in insert_wq_barrier()
3796 work_color = worker->current_color; in insert_wq_barrier()
3800 head = target->entry.next; in insert_wq_barrier()
3807 pwq->nr_in_flight[work_color]++; in insert_wq_barrier()
3810 insert_work(pwq, &barr->work, head, work_flags); in insert_wq_barrier()
3814 * flush_workqueue_prep_pwqs - prepare pwqs for workqueue flushing
3816 * @flush_color: new flush color, < 0 for no-op
3817 * @work_color: new work color, < 0 for no-op
3821 * If @flush_color is non-negative, flush_color on all pwqs should be
3822 * -1. If no pwq has in-flight commands at the specified color, all
3823 * pwq->flush_color's stay at -1 and %false is returned. If any pwq
3824 * has in flight commands, its pwq->flush_color is set to
3825 * @flush_color, @wq->nr_pwqs_to_flush is updated accordingly, pwq
3828 * The caller should have initialized @wq->first_flusher prior to
3829 * calling this function with non-negative @flush_color. If
3833 * If @work_color is non-negative, all pwqs should have the same
3838 * mutex_lock(wq->mutex).
3852 WARN_ON_ONCE(atomic_read(&wq->nr_pwqs_to_flush)); in flush_workqueue_prep_pwqs()
3853 atomic_set(&wq->nr_pwqs_to_flush, 1); in flush_workqueue_prep_pwqs()
3859 * sequentially to wq->pwqs by cpu index. So in the majority in flush_workqueue_prep_pwqs()
3865 if (current_pool != pwq->pool) { in flush_workqueue_prep_pwqs()
3867 raw_spin_unlock_irq(¤t_pool->lock); in flush_workqueue_prep_pwqs()
3868 current_pool = pwq->pool; in flush_workqueue_prep_pwqs()
3869 raw_spin_lock_irq(¤t_pool->lock); in flush_workqueue_prep_pwqs()
3873 WARN_ON_ONCE(pwq->flush_color != -1); in flush_workqueue_prep_pwqs()
3875 if (pwq->nr_in_flight[flush_color]) { in flush_workqueue_prep_pwqs()
3876 pwq->flush_color = flush_color; in flush_workqueue_prep_pwqs()
3877 atomic_inc(&wq->nr_pwqs_to_flush); in flush_workqueue_prep_pwqs()
3883 WARN_ON_ONCE(work_color != work_next_color(pwq->work_color)); in flush_workqueue_prep_pwqs()
3884 pwq->work_color = work_color; in flush_workqueue_prep_pwqs()
3890 raw_spin_unlock_irq(¤t_pool->lock); in flush_workqueue_prep_pwqs()
3892 if (flush_color >= 0 && atomic_dec_and_test(&wq->nr_pwqs_to_flush)) in flush_workqueue_prep_pwqs()
3893 complete(&wq->first_flusher->done); in flush_workqueue_prep_pwqs()
3901 if (unlikely(!wq->lockdep_map)) in touch_wq_lockdep_map()
3904 if (wq->flags & WQ_BH) in touch_wq_lockdep_map()
3907 lock_map_acquire(wq->lockdep_map); in touch_wq_lockdep_map()
3908 lock_map_release(wq->lockdep_map); in touch_wq_lockdep_map()
3910 if (wq->flags & WQ_BH) in touch_wq_lockdep_map()
3919 if (wq->flags & WQ_BH) in touch_work_lockdep_map()
3922 lock_map_acquire(&work->lockdep_map); in touch_work_lockdep_map()
3923 lock_map_release(&work->lockdep_map); in touch_work_lockdep_map()
3925 if (wq->flags & WQ_BH) in touch_work_lockdep_map()
3931 * __flush_workqueue - ensure that any scheduled work has run to completion.
3941 .flush_color = -1, in __flush_workqueue()
3942 .done = COMPLETION_INITIALIZER_ONSTACK_MAP(this_flusher.done, (*wq->lockdep_map)), in __flush_workqueue()
3951 mutex_lock(&wq->mutex); in __flush_workqueue()
3954 * Start-to-wait phase in __flush_workqueue()
3956 next_color = work_next_color(wq->work_color); in __flush_workqueue()
3958 if (next_color != wq->flush_color) { in __flush_workqueue()
3960 * Color space is not full. The current work_color in __flush_workqueue()
3964 WARN_ON_ONCE(!list_empty(&wq->flusher_overflow)); in __flush_workqueue()
3965 this_flusher.flush_color = wq->work_color; in __flush_workqueue()
3966 wq->work_color = next_color; in __flush_workqueue()
3968 if (!wq->first_flusher) { in __flush_workqueue()
3970 WARN_ON_ONCE(wq->flush_color != this_flusher.flush_color); in __flush_workqueue()
3972 wq->first_flusher = &this_flusher; in __flush_workqueue()
3974 if (!flush_workqueue_prep_pwqs(wq, wq->flush_color, in __flush_workqueue()
3975 wq->work_color)) { in __flush_workqueue()
3977 wq->flush_color = next_color; in __flush_workqueue()
3978 wq->first_flusher = NULL; in __flush_workqueue()
3983 WARN_ON_ONCE(wq->flush_color == this_flusher.flush_color); in __flush_workqueue()
3984 list_add_tail(&this_flusher.list, &wq->flusher_queue); in __flush_workqueue()
3985 flush_workqueue_prep_pwqs(wq, -1, wq->work_color); in __flush_workqueue()
3989 * Oops, color space is full, wait on overflow queue. in __flush_workqueue()
3993 list_add_tail(&this_flusher.list, &wq->flusher_overflow); in __flush_workqueue()
3998 mutex_unlock(&wq->mutex); in __flush_workqueue()
4003 * Wake-up-and-cascade phase in __flush_workqueue()
4006 * handling overflow. Non-first flushers can simply return. in __flush_workqueue()
4008 if (READ_ONCE(wq->first_flusher) != &this_flusher) in __flush_workqueue()
4011 mutex_lock(&wq->mutex); in __flush_workqueue()
4014 if (wq->first_flusher != &this_flusher) in __flush_workqueue()
4017 WRITE_ONCE(wq->first_flusher, NULL); in __flush_workqueue()
4020 WARN_ON_ONCE(wq->flush_color != this_flusher.flush_color); in __flush_workqueue()
4026 list_for_each_entry_safe(next, tmp, &wq->flusher_queue, list) { in __flush_workqueue()
4027 if (next->flush_color != wq->flush_color) in __flush_workqueue()
4029 list_del_init(&next->list); in __flush_workqueue()
4030 complete(&next->done); in __flush_workqueue()
4033 WARN_ON_ONCE(!list_empty(&wq->flusher_overflow) && in __flush_workqueue()
4034 wq->flush_color != work_next_color(wq->work_color)); in __flush_workqueue()
4037 wq->flush_color = work_next_color(wq->flush_color); in __flush_workqueue()
4040 if (!list_empty(&wq->flusher_overflow)) { in __flush_workqueue()
4044 * flusher_queue. This is the start-to-wait in __flush_workqueue()
4047 list_for_each_entry(tmp, &wq->flusher_overflow, list) in __flush_workqueue()
4048 tmp->flush_color = wq->work_color; in __flush_workqueue()
4050 wq->work_color = work_next_color(wq->work_color); in __flush_workqueue()
4052 list_splice_tail_init(&wq->flusher_overflow, in __flush_workqueue()
4053 &wq->flusher_queue); in __flush_workqueue()
4054 flush_workqueue_prep_pwqs(wq, -1, wq->work_color); in __flush_workqueue()
4057 if (list_empty(&wq->flusher_queue)) { in __flush_workqueue()
4058 WARN_ON_ONCE(wq->flush_color != wq->work_color); in __flush_workqueue()
4066 WARN_ON_ONCE(wq->flush_color == wq->work_color); in __flush_workqueue()
4067 WARN_ON_ONCE(wq->flush_color != next->flush_color); in __flush_workqueue()
4069 list_del_init(&next->list); in __flush_workqueue()
4070 wq->first_flusher = next; in __flush_workqueue()
4072 if (flush_workqueue_prep_pwqs(wq, wq->flush_color, -1)) in __flush_workqueue()
4079 wq->first_flusher = NULL; in __flush_workqueue()
4083 mutex_unlock(&wq->mutex); in __flush_workqueue()
4088 * drain_workqueue - drain a workqueue
4105 * hotter than drain_workqueue() and already looks at @wq->flags. in drain_workqueue()
4108 mutex_lock(&wq->mutex); in drain_workqueue()
4109 if (!wq->nr_drainers++) in drain_workqueue()
4110 wq->flags |= __WQ_DRAINING; in drain_workqueue()
4111 mutex_unlock(&wq->mutex); in drain_workqueue()
4115 mutex_lock(&wq->mutex); in drain_workqueue()
4120 raw_spin_lock_irq(&pwq->pool->lock); in drain_workqueue()
4122 raw_spin_unlock_irq(&pwq->pool->lock); in drain_workqueue()
4130 wq->name, __func__, flush_cnt); in drain_workqueue()
4132 mutex_unlock(&wq->mutex); in drain_workqueue()
4136 if (!--wq->nr_drainers) in drain_workqueue()
4137 wq->flags &= ~__WQ_DRAINING; in drain_workqueue()
4138 mutex_unlock(&wq->mutex); in drain_workqueue()
4157 raw_spin_lock_irq(&pool->lock); in start_flush_work()
4161 if (unlikely(pwq->pool != pool)) in start_flush_work()
4167 pwq = worker->current_pwq; in start_flush_work()
4170 wq = pwq->wq; in start_flush_work()
4174 raw_spin_unlock_irq(&pool->lock); in start_flush_work()
4180 * single-threaded or rescuer equipped workqueue. in start_flush_work()
4187 if (!from_cancel && (wq->saved_max_active == 1 || wq->rescuer)) in start_flush_work()
4193 raw_spin_unlock_irq(&pool->lock); in start_flush_work()
4205 if (WARN_ON(!work->func)) in __flush_work()
4216 * BH context and thus can be busy-waited. in __flush_work()
4252 * flush_work - wait for a work to finish executing the last queueing instance
4270 * flush_delayed_work - wait for a dwork to finish executing the last queueing
4284 if (del_timer_sync(&dwork->timer)) in flush_delayed_work()
4285 __queue_work(dwork->cpu, dwork->wq, &dwork->work); in flush_delayed_work()
4287 return flush_work(&dwork->work); in flush_delayed_work()
4292 * flush_rcu_work - wait for a rwork to finish executing the last queueing
4301 if (test_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(&rwork->work))) { in flush_rcu_work()
4303 flush_work(&rwork->work); in flush_rcu_work()
4306 return flush_work(&rwork->work); in flush_rcu_work()
4313 const unsigned long max = (1lu << WORK_OFFQ_DISABLE_BITS) - 1; in work_offqd_disable()
4315 if (likely(offqd->disable < max)) in work_offqd_disable()
4316 offqd->disable++; in work_offqd_disable()
4323 if (likely(offqd->disable > 0)) in work_offqd_enable()
4324 offqd->disable--; in work_offqd_enable()
4382 * cancel_work_sync - cancel a work and wait for it to finish
4386 * even if the work re-queues itself or migrates to another workqueue. On return
4390 * cancel_work_sync(&delayed_work->work) must not be used for delayed_work's.
4393 * Must be called from a sleepable context if @work was last queued on a non-BH
4394 * workqueue. Can also be called from non-hardirq atomic contexts including BH
4406 * cancel_delayed_work - cancel a delayed work
4416 * it returns %true and the work doesn't re-arm itself. Explicitly flush or
4423 return __cancel_work(&dwork->work, WORK_CANCEL_DELAYED); in cancel_delayed_work()
4428 * cancel_delayed_work_sync - cancel a delayed work and wait for it to finish
4438 return __cancel_work_sync(&dwork->work, WORK_CANCEL_DELAYED); in cancel_delayed_work_sync()
4443 * disable_work - Disable and cancel a work item
4447 * pending. As long as the disable count is non-zero, any attempt to queue @work
4461 * disable_work_sync - Disable, cancel and drain a work item
4467 * Must be called from a sleepable context if @work was last queued on a non-BH
4468 * workqueue. Can also be called from non-hardirq atomic contexts including BH
4480 * enable_work - Enable a work item
4507 * disable_delayed_work - Disable and cancel a delayed work item
4514 return __cancel_work(&dwork->work, in disable_delayed_work()
4520 * disable_delayed_work_sync - Disable, cancel and drain a delayed work item
4527 return __cancel_work_sync(&dwork->work, in disable_delayed_work_sync()
4533 * enable_delayed_work - Enable a delayed work item
4540 return enable_work(&dwork->work); in enable_delayed_work()
4545 * schedule_on_each_cpu - execute a function synchronously on each online CPU
4553 * 0 on success, -errno on failure.
4562 return -ENOMEM; in schedule_on_each_cpu()
4582 * execute_in_process_context - reliably execute the routine with user context
4590 * Return: 0 - function was executed
4591 * 1 - function was scheduled for execution
4596 fn(&ew->work); in execute_in_process_context()
4600 INIT_WORK(&ew->work, fn); in execute_in_process_context()
4601 schedule_work(&ew->work); in execute_in_process_context()
4608 * free_workqueue_attrs - free a workqueue_attrs
4616 free_cpumask_var(attrs->cpumask); in free_workqueue_attrs()
4617 free_cpumask_var(attrs->__pod_cpumask); in free_workqueue_attrs()
4623 * alloc_workqueue_attrs - allocate a workqueue_attrs
4637 if (!alloc_cpumask_var(&attrs->cpumask, GFP_KERNEL)) in alloc_workqueue_attrs()
4639 if (!alloc_cpumask_var(&attrs->__pod_cpumask, GFP_KERNEL)) in alloc_workqueue_attrs()
4642 cpumask_copy(attrs->cpumask, cpu_possible_mask); in alloc_workqueue_attrs()
4643 attrs->affn_scope = WQ_AFFN_DFL; in alloc_workqueue_attrs()
4653 to->nice = from->nice; in copy_workqueue_attrs()
4654 cpumask_copy(to->cpumask, from->cpumask); in copy_workqueue_attrs()
4655 cpumask_copy(to->__pod_cpumask, from->__pod_cpumask); in copy_workqueue_attrs()
4656 to->affn_strict = from->affn_strict; in copy_workqueue_attrs()
4659 * Unlike hash and equality test, copying shouldn't ignore wq-only in copy_workqueue_attrs()
4663 to->affn_scope = from->affn_scope; in copy_workqueue_attrs()
4664 to->ordered = from->ordered; in copy_workqueue_attrs()
4668 * Some attrs fields are workqueue-only. Clear them for worker_pool's. See the
4673 attrs->affn_scope = WQ_AFFN_NR_TYPES; in wqattrs_clear_for_pool()
4674 attrs->ordered = false; in wqattrs_clear_for_pool()
4675 if (attrs->affn_strict) in wqattrs_clear_for_pool()
4676 cpumask_copy(attrs->cpumask, cpu_possible_mask); in wqattrs_clear_for_pool()
4684 hash = jhash_1word(attrs->nice, hash); in wqattrs_hash()
4685 hash = jhash_1word(attrs->affn_strict, hash); in wqattrs_hash()
4686 hash = jhash(cpumask_bits(attrs->__pod_cpumask), in wqattrs_hash()
4688 if (!attrs->affn_strict) in wqattrs_hash()
4689 hash = jhash(cpumask_bits(attrs->cpumask), in wqattrs_hash()
4698 if (a->nice != b->nice) in wqattrs_equal()
4700 if (a->affn_strict != b->affn_strict) in wqattrs_equal()
4702 if (!cpumask_equal(a->__pod_cpumask, b->__pod_cpumask)) in wqattrs_equal()
4704 if (!a->affn_strict && !cpumask_equal(a->cpumask, b->cpumask)) in wqattrs_equal()
4715 * @attrs->cpumask doesn't overlap with @unbound_cpumask, we fallback to in wqattrs_actualize_cpumask()
4718 cpumask_and(attrs->cpumask, attrs->cpumask, unbound_cpumask); in wqattrs_actualize_cpumask()
4719 if (unlikely(cpumask_empty(attrs->cpumask))) in wqattrs_actualize_cpumask()
4720 cpumask_copy(attrs->cpumask, unbound_cpumask); in wqattrs_actualize_cpumask()
4733 if (attrs->affn_scope == WQ_AFFN_DFL) in wqattrs_pod_type()
4736 scope = attrs->affn_scope; in wqattrs_pod_type()
4740 if (!WARN_ON_ONCE(attrs->affn_scope == WQ_AFFN_NR_TYPES) && in wqattrs_pod_type()
4741 likely(pt->nr_pods)) in wqattrs_pod_type()
4749 BUG_ON(!pt->nr_pods); in wqattrs_pod_type()
4754 * init_worker_pool - initialize a newly zalloc'd worker_pool
4757 * Initialize a newly zalloc'd @pool. It also allocates @pool->attrs.
4759 * Return: 0 on success, -errno on failure. Even on failure, all fields
4765 raw_spin_lock_init(&pool->lock); in init_worker_pool()
4766 pool->id = -1; in init_worker_pool()
4767 pool->cpu = -1; in init_worker_pool()
4768 pool->node = NUMA_NO_NODE; in init_worker_pool()
4769 pool->flags |= POOL_DISASSOCIATED; in init_worker_pool()
4770 pool->watchdog_ts = jiffies; in init_worker_pool()
4771 INIT_LIST_HEAD(&pool->worklist); in init_worker_pool()
4772 INIT_LIST_HEAD(&pool->idle_list); in init_worker_pool()
4773 hash_init(pool->busy_hash); in init_worker_pool()
4775 timer_setup(&pool->idle_timer, idle_worker_timeout, TIMER_DEFERRABLE); in init_worker_pool()
4776 INIT_WORK(&pool->idle_cull_work, idle_cull_fn); in init_worker_pool()
4778 timer_setup(&pool->mayday_timer, pool_mayday_timeout, 0); in init_worker_pool()
4780 INIT_LIST_HEAD(&pool->workers); in init_worker_pool()
4782 ida_init(&pool->worker_ida); in init_worker_pool()
4783 INIT_HLIST_NODE(&pool->hash_node); in init_worker_pool()
4784 pool->refcnt = 1; in init_worker_pool()
4787 pool->attrs = alloc_workqueue_attrs(); in init_worker_pool()
4788 if (!pool->attrs) in init_worker_pool()
4789 return -ENOMEM; in init_worker_pool()
4791 wqattrs_clear_for_pool(pool->attrs); in init_worker_pool()
4801 lockdep_register_key(&wq->key); in wq_init_lockdep()
4802 lock_name = kasprintf(GFP_KERNEL, "%s%s", "(wq_completion)", wq->name); in wq_init_lockdep()
4804 lock_name = wq->name; in wq_init_lockdep()
4806 wq->lock_name = lock_name; in wq_init_lockdep()
4807 wq->lockdep_map = &wq->__lockdep_map; in wq_init_lockdep()
4808 lockdep_init_map(wq->lockdep_map, lock_name, &wq->key, 0); in wq_init_lockdep()
4813 if (wq->lockdep_map != &wq->__lockdep_map) in wq_unregister_lockdep()
4816 lockdep_unregister_key(&wq->key); in wq_unregister_lockdep()
4821 if (wq->lockdep_map != &wq->__lockdep_map) in wq_free_lockdep()
4824 if (wq->lock_name != wq->name) in wq_free_lockdep()
4825 kfree(wq->lock_name); in wq_free_lockdep()
4856 nna->max = WQ_DFL_MIN_ACTIVE; in init_node_nr_active()
4857 atomic_set(&nna->nr, 0); in init_node_nr_active()
4858 raw_spin_lock_init(&nna->lock); in init_node_nr_active()
4859 INIT_LIST_HEAD(&nna->pending_pwqs); in init_node_nr_active()
4890 return -ENOMEM; in alloc_node_nr_active()
4898 if (wq->flags & WQ_UNBOUND) in rcu_free_wq()
4899 free_node_nr_active(wq->node_nr_active); in rcu_free_wq()
4902 free_percpu(wq->cpu_pwq); in rcu_free_wq()
4903 free_workqueue_attrs(wq->unbound_attrs); in rcu_free_wq()
4911 ida_destroy(&pool->worker_ida); in rcu_free_pool()
4912 free_workqueue_attrs(pool->attrs); in rcu_free_pool()
4917 * put_unbound_pool - put a worker_pool
4934 if (--pool->refcnt) in put_unbound_pool()
4938 if (WARN_ON(!(pool->cpu < 0)) || in put_unbound_pool()
4939 WARN_ON(!list_empty(&pool->worklist))) in put_unbound_pool()
4943 if (pool->id >= 0) in put_unbound_pool()
4944 idr_remove(&worker_pool_idr, pool->id); in put_unbound_pool()
4945 hash_del(&pool->hash_node); in put_unbound_pool()
4954 * pwq->refcnt == pool->refcnt == 0 in put_unbound_pool()
4958 * drops pool->lock in put_unbound_pool()
4962 !(pool->flags & POOL_MANAGER_ACTIVE), in put_unbound_pool()
4966 raw_spin_lock_irq(&pool->lock); in put_unbound_pool()
4967 if (!(pool->flags & POOL_MANAGER_ACTIVE)) { in put_unbound_pool()
4968 pool->flags |= POOL_MANAGER_ACTIVE; in put_unbound_pool()
4971 raw_spin_unlock_irq(&pool->lock); in put_unbound_pool()
4977 WARN_ON(pool->nr_workers || pool->nr_idle); in put_unbound_pool()
4978 raw_spin_unlock_irq(&pool->lock); in put_unbound_pool()
4987 del_timer_sync(&pool->idle_timer); in put_unbound_pool()
4988 cancel_work_sync(&pool->idle_cull_work); in put_unbound_pool()
4989 del_timer_sync(&pool->mayday_timer); in put_unbound_pool()
4992 call_rcu(&pool->rcu, rcu_free_pool); in put_unbound_pool()
4996 * get_unbound_pool - get a worker_pool with the specified attributes
5020 if (wqattrs_equal(pool->attrs, attrs)) { in get_unbound_pool()
5021 pool->refcnt++; in get_unbound_pool()
5027 for (pod = 0; pod < pt->nr_pods; pod++) { in get_unbound_pool()
5028 if (cpumask_subset(attrs->__pod_cpumask, pt->pod_cpus[pod])) { in get_unbound_pool()
5029 node = pt->pod_node[pod]; in get_unbound_pool()
5039 pool->node = node; in get_unbound_pool()
5040 copy_workqueue_attrs(pool->attrs, attrs); in get_unbound_pool()
5041 wqattrs_clear_for_pool(pool->attrs); in get_unbound_pool()
5051 hash_add(unbound_pool_hash, &pool->hash_node, hash); in get_unbound_pool()
5068 struct workqueue_struct *wq = pwq->wq; in pwq_release_workfn()
5069 struct worker_pool *pool = pwq->pool; in pwq_release_workfn()
5076 if (!list_empty(&pwq->pwqs_node)) { in pwq_release_workfn()
5077 mutex_lock(&wq->mutex); in pwq_release_workfn()
5078 list_del_rcu(&pwq->pwqs_node); in pwq_release_workfn()
5079 is_last = list_empty(&wq->pwqs); in pwq_release_workfn()
5084 if (!is_last && (wq->flags & __WQ_ORDERED)) in pwq_release_workfn()
5087 mutex_unlock(&wq->mutex); in pwq_release_workfn()
5090 if (wq->flags & WQ_UNBOUND) { in pwq_release_workfn()
5096 if (!list_empty(&pwq->pending_node)) { in pwq_release_workfn()
5098 wq_node_nr_active(pwq->wq, pwq->pool->node); in pwq_release_workfn()
5100 raw_spin_lock_irq(&nna->lock); in pwq_release_workfn()
5101 list_del_init(&pwq->pending_node); in pwq_release_workfn()
5102 raw_spin_unlock_irq(&nna->lock); in pwq_release_workfn()
5113 call_rcu(&wq->rcu, rcu_free_wq); in pwq_release_workfn()
5125 pwq->pool = pool; in init_pwq()
5126 pwq->wq = wq; in init_pwq()
5127 pwq->flush_color = -1; in init_pwq()
5128 pwq->refcnt = 1; in init_pwq()
5129 INIT_LIST_HEAD(&pwq->inactive_works); in init_pwq()
5130 INIT_LIST_HEAD(&pwq->pending_node); in init_pwq()
5131 INIT_LIST_HEAD(&pwq->pwqs_node); in init_pwq()
5132 INIT_LIST_HEAD(&pwq->mayday_node); in init_pwq()
5133 kthread_init_work(&pwq->release_work, pwq_release_workfn); in init_pwq()
5139 struct workqueue_struct *wq = pwq->wq; in link_pwq()
5141 lockdep_assert_held(&wq->mutex); in link_pwq()
5144 if (!list_empty(&pwq->pwqs_node)) in link_pwq()
5148 pwq->work_color = wq->work_color; in link_pwq()
5151 list_add_tail_rcu(&pwq->pwqs_node, &wq->pwqs); in link_pwq()
5167 pwq = kmem_cache_alloc_node(pwq_cache, GFP_KERNEL, pool->node); in alloc_unbound_pwq()
5188 * wq_calc_pod_cpumask - calculate a wq_attrs' cpumask for a pod
5193 * The result is stored in @attrs->__pod_cpumask.
5195 * If pod affinity is not enabled, @attrs->cpumask is always used. If enabled
5197 * intersection of the possible CPUs of @pod and @attrs->cpumask.
5204 int pod = pt->cpu_pod[cpu]; in wq_calc_pod_cpumask()
5207 cpumask_and(attrs->__pod_cpumask, pt->pod_cpus[pod], attrs->cpumask); in wq_calc_pod_cpumask()
5209 if (!cpumask_intersects(attrs->__pod_cpumask, wq_online_cpumask)) { in wq_calc_pod_cpumask()
5210 cpumask_copy(attrs->__pod_cpumask, attrs->cpumask); in wq_calc_pod_cpumask()
5223 lockdep_assert_held(&wq->mutex); in install_unbound_pwq()
5249 put_pwq_unlocked(ctx->pwq_tbl[cpu]); in apply_wqattrs_cleanup()
5250 put_pwq_unlocked(ctx->dfl_pwq); in apply_wqattrs_cleanup()
5252 free_workqueue_attrs(ctx->attrs); in apply_wqattrs_cleanup()
5270 if (WARN_ON(attrs->affn_scope < 0 || in apply_wqattrs_prepare()
5271 attrs->affn_scope >= WQ_AFFN_NR_TYPES)) in apply_wqattrs_prepare()
5272 return ERR_PTR(-EINVAL); in apply_wqattrs_prepare()
5282 * the default pwq covering whole @attrs->cpumask. Always create in apply_wqattrs_prepare()
5287 cpumask_copy(new_attrs->__pod_cpumask, new_attrs->cpumask); in apply_wqattrs_prepare()
5288 ctx->dfl_pwq = alloc_unbound_pwq(wq, new_attrs); in apply_wqattrs_prepare()
5289 if (!ctx->dfl_pwq) in apply_wqattrs_prepare()
5293 if (new_attrs->ordered) { in apply_wqattrs_prepare()
5294 ctx->dfl_pwq->refcnt++; in apply_wqattrs_prepare()
5295 ctx->pwq_tbl[cpu] = ctx->dfl_pwq; in apply_wqattrs_prepare()
5298 ctx->pwq_tbl[cpu] = alloc_unbound_pwq(wq, new_attrs); in apply_wqattrs_prepare()
5299 if (!ctx->pwq_tbl[cpu]) in apply_wqattrs_prepare()
5306 cpumask_and(new_attrs->cpumask, new_attrs->cpumask, cpu_possible_mask); in apply_wqattrs_prepare()
5307 cpumask_copy(new_attrs->__pod_cpumask, new_attrs->cpumask); in apply_wqattrs_prepare()
5308 ctx->attrs = new_attrs; in apply_wqattrs_prepare()
5312 * (dfl_pwq). Set the plugged flag of ctx->dfl_pwq to suspend execution in apply_wqattrs_prepare()
5316 if ((wq->flags & __WQ_ORDERED) && !list_empty(&wq->pwqs)) in apply_wqattrs_prepare()
5317 ctx->dfl_pwq->plugged = true; in apply_wqattrs_prepare()
5319 ctx->wq = wq; in apply_wqattrs_prepare()
5325 return ERR_PTR(-ENOMEM); in apply_wqattrs_prepare()
5334 mutex_lock(&ctx->wq->mutex); in apply_wqattrs_commit()
5336 copy_workqueue_attrs(ctx->wq->unbound_attrs, ctx->attrs); in apply_wqattrs_commit()
5340 ctx->pwq_tbl[cpu] = install_unbound_pwq(ctx->wq, cpu, in apply_wqattrs_commit()
5341 ctx->pwq_tbl[cpu]); in apply_wqattrs_commit()
5342 ctx->dfl_pwq = install_unbound_pwq(ctx->wq, -1, ctx->dfl_pwq); in apply_wqattrs_commit()
5344 /* update node_nr_active->max */ in apply_wqattrs_commit()
5345 wq_update_node_max_active(ctx->wq, -1); in apply_wqattrs_commit()
5348 if (ctx->wq->rescuer) in apply_wqattrs_commit()
5349 set_cpus_allowed_ptr(ctx->wq->rescuer->task, in apply_wqattrs_commit()
5350 unbound_effective_cpumask(ctx->wq)); in apply_wqattrs_commit()
5352 mutex_unlock(&ctx->wq->mutex); in apply_wqattrs_commit()
5361 if (WARN_ON(!(wq->flags & WQ_UNBOUND))) in apply_workqueue_attrs_locked()
5362 return -EINVAL; in apply_workqueue_attrs_locked()
5376 * apply_workqueue_attrs - apply new workqueue_attrs to an unbound workqueue
5381 * a separate pwq to each CPU pod with possibles CPUs in @attrs->cpumask so that
5383 * in-flight work items finish. Note that a work item which repeatedly requeues
5384 * itself back-to-back will stay on its current pwq.
5388 * Return: 0 on success and -errno on failure.
5403 * unbound_wq_update_pwq - update a pwq slot for CPU hot[un]plug
5412 * back to @wq->dfl_pwq which may not be optimal but is always correct.
5417 * may execute on any CPU. This is similar to how per-cpu workqueues behave on
5428 if (!(wq->flags & WQ_UNBOUND) || wq->unbound_attrs->ordered) in unbound_wq_update_pwq()
5438 copy_workqueue_attrs(target_attrs, wq->unbound_attrs); in unbound_wq_update_pwq()
5443 if (wqattrs_equal(target_attrs, unbound_pwq(wq, cpu)->pool->attrs)) in unbound_wq_update_pwq()
5450 wq->name); in unbound_wq_update_pwq()
5455 mutex_lock(&wq->mutex); in unbound_wq_update_pwq()
5460 mutex_lock(&wq->mutex); in unbound_wq_update_pwq()
5461 pwq = unbound_pwq(wq, -1); in unbound_wq_update_pwq()
5462 raw_spin_lock_irq(&pwq->pool->lock); in unbound_wq_update_pwq()
5464 raw_spin_unlock_irq(&pwq->pool->lock); in unbound_wq_update_pwq()
5467 mutex_unlock(&wq->mutex); in unbound_wq_update_pwq()
5473 bool highpri = wq->flags & WQ_HIGHPRI; in alloc_and_link_pwqs()
5478 wq->cpu_pwq = alloc_percpu(struct pool_workqueue *); in alloc_and_link_pwqs()
5479 if (!wq->cpu_pwq) in alloc_and_link_pwqs()
5482 if (!(wq->flags & WQ_UNBOUND)) { in alloc_and_link_pwqs()
5485 if (wq->flags & WQ_BH) in alloc_and_link_pwqs()
5495 pwq_p = per_cpu_ptr(wq->cpu_pwq, cpu); in alloc_and_link_pwqs()
5498 pool->node); in alloc_and_link_pwqs()
5504 mutex_lock(&wq->mutex); in alloc_and_link_pwqs()
5506 mutex_unlock(&wq->mutex); in alloc_and_link_pwqs()
5511 if (wq->flags & __WQ_ORDERED) { in alloc_and_link_pwqs()
5516 dfl_pwq = rcu_access_pointer(wq->dfl_pwq); in alloc_and_link_pwqs()
5517 WARN(!ret && (wq->pwqs.next != &dfl_pwq->pwqs_node || in alloc_and_link_pwqs()
5518 wq->pwqs.prev != &dfl_pwq->pwqs_node), in alloc_and_link_pwqs()
5519 "ordering guarantee broken for workqueue %s\n", wq->name); in alloc_and_link_pwqs()
5527 if (wq->cpu_pwq) { in alloc_and_link_pwqs()
5529 struct pool_workqueue *pwq = *per_cpu_ptr(wq->cpu_pwq, cpu); in alloc_and_link_pwqs()
5534 free_percpu(wq->cpu_pwq); in alloc_and_link_pwqs()
5535 wq->cpu_pwq = NULL; in alloc_and_link_pwqs()
5537 return -ENOMEM; in alloc_and_link_pwqs()
5562 if (!(wq->flags & WQ_MEM_RECLAIM)) in init_rescuer()
5568 wq->name); in init_rescuer()
5569 return -ENOMEM; in init_rescuer()
5572 rescuer->rescue_wq = wq; in init_rescuer()
5575 rescuer->task = kthread_create(rescuer_thread, rescuer, "%s", id_buf); in init_rescuer()
5576 if (IS_ERR(rescuer->task)) { in init_rescuer()
5577 ret = PTR_ERR(rescuer->task); in init_rescuer()
5579 wq->name, ERR_PTR(ret)); in init_rescuer()
5584 wq->rescuer = rescuer; in init_rescuer()
5585 if (wq->flags & WQ_UNBOUND) in init_rescuer()
5586 kthread_bind_mask(rescuer->task, unbound_effective_cpumask(wq)); in init_rescuer()
5588 kthread_bind_mask(rescuer->task, cpu_possible_mask); in init_rescuer()
5589 wake_up_process(rescuer->task); in init_rescuer()
5595 * wq_adjust_max_active - update a wq's max_active to the current setting
5598 * If @wq isn't freezing, set @wq->max_active to the saved_max_active and
5600 * @wq->max_active to zero.
5607 lockdep_assert_held(&wq->mutex); in wq_adjust_max_active()
5609 if ((wq->flags & WQ_FREEZABLE) && workqueue_freezing) { in wq_adjust_max_active()
5613 new_max = wq->saved_max_active; in wq_adjust_max_active()
5614 new_min = wq->saved_min_active; in wq_adjust_max_active()
5617 if (wq->max_active == new_max && wq->min_active == new_min) in wq_adjust_max_active()
5621 * Update @wq->max/min_active and then kick inactive work items if more in wq_adjust_max_active()
5626 WRITE_ONCE(wq->max_active, new_max); in wq_adjust_max_active()
5627 WRITE_ONCE(wq->min_active, new_min); in wq_adjust_max_active()
5629 if (wq->flags & WQ_UNBOUND) in wq_adjust_max_active()
5630 wq_update_node_max_active(wq, -1); in wq_adjust_max_active()
5636 * Round-robin through pwq's activating the first inactive work item in wq_adjust_max_active()
5647 raw_spin_lock_irqsave(&pwq->pool->lock, irq_flags); in wq_adjust_max_active()
5650 kick_pool(pwq->pool); in wq_adjust_max_active()
5652 raw_spin_unlock_irqrestore(&pwq->pool->lock, irq_flags); in wq_adjust_max_active()
5688 wq->unbound_attrs = alloc_workqueue_attrs(); in __alloc_workqueue()
5689 if (!wq->unbound_attrs) in __alloc_workqueue()
5693 name_len = vsnprintf(wq->name, sizeof(wq->name), fmt, args); in __alloc_workqueue()
5697 wq->name); in __alloc_workqueue()
5707 max_active = wq_clamp_max_active(max_active, flags, wq->name); in __alloc_workqueue()
5711 wq->flags = flags; in __alloc_workqueue()
5712 wq->max_active = max_active; in __alloc_workqueue()
5713 wq->min_active = min(max_active, WQ_DFL_MIN_ACTIVE); in __alloc_workqueue()
5714 wq->saved_max_active = wq->max_active; in __alloc_workqueue()
5715 wq->saved_min_active = wq->min_active; in __alloc_workqueue()
5716 mutex_init(&wq->mutex); in __alloc_workqueue()
5717 atomic_set(&wq->nr_pwqs_to_flush, 0); in __alloc_workqueue()
5718 INIT_LIST_HEAD(&wq->pwqs); in __alloc_workqueue()
5719 INIT_LIST_HEAD(&wq->flusher_queue); in __alloc_workqueue()
5720 INIT_LIST_HEAD(&wq->flusher_overflow); in __alloc_workqueue()
5721 INIT_LIST_HEAD(&wq->maydays); in __alloc_workqueue()
5723 INIT_LIST_HEAD(&wq->list); in __alloc_workqueue()
5726 if (alloc_node_nr_active(wq->node_nr_active) < 0) in __alloc_workqueue()
5739 mutex_lock(&wq->mutex); in __alloc_workqueue()
5741 mutex_unlock(&wq->mutex); in __alloc_workqueue()
5743 list_add_tail_rcu(&wq->list, &workqueues); in __alloc_workqueue()
5750 if ((wq->flags & WQ_SYSFS) && workqueue_sysfs_register(wq)) in __alloc_workqueue()
5758 * Failed alloc_and_link_pwqs() may leave pending pwq->release_work, in __alloc_workqueue()
5762 if (wq->flags & WQ_UNBOUND) { in __alloc_workqueue()
5764 free_node_nr_active(wq->node_nr_active); in __alloc_workqueue()
5767 free_workqueue_attrs(wq->unbound_attrs); in __alloc_workqueue()
5812 wq->lockdep_map = lockdep_map; in alloc_workqueue_lockdep_map()
5824 if (pwq->nr_in_flight[i]) in pwq_busy()
5827 if ((pwq != rcu_access_pointer(pwq->wq->dfl_pwq)) && (pwq->refcnt > 1)) in pwq_busy()
5836 * destroy_workqueue - safely terminate a workqueue
5853 mutex_lock(&wq->mutex); in destroy_workqueue()
5854 wq->flags |= __WQ_DESTROYING; in destroy_workqueue()
5855 mutex_unlock(&wq->mutex); in destroy_workqueue()
5861 if (wq->rescuer) { in destroy_workqueue()
5862 struct worker *rescuer = wq->rescuer; in destroy_workqueue()
5866 wq->rescuer = NULL; in destroy_workqueue()
5870 kthread_stop(rescuer->task); in destroy_workqueue()
5875 * Sanity checks - grab all the locks so that we wait for all in destroy_workqueue()
5876 * in-flight operations which may do put_pwq(). in destroy_workqueue()
5879 mutex_lock(&wq->mutex); in destroy_workqueue()
5881 raw_spin_lock_irq(&pwq->pool->lock); in destroy_workqueue()
5884 __func__, wq->name); in destroy_workqueue()
5886 raw_spin_unlock_irq(&pwq->pool->lock); in destroy_workqueue()
5887 mutex_unlock(&wq->mutex); in destroy_workqueue()
5892 raw_spin_unlock_irq(&pwq->pool->lock); in destroy_workqueue()
5894 mutex_unlock(&wq->mutex); in destroy_workqueue()
5900 list_del_rcu(&wq->list); in destroy_workqueue()
5905 * to put the base refs. @wq will be auto-destroyed from the last in destroy_workqueue()
5915 put_pwq_unlocked(unbound_pwq(wq, -1)); in destroy_workqueue()
5916 RCU_INIT_POINTER(*unbound_pwq_slot(wq, -1), NULL); in destroy_workqueue()
5923 * workqueue_set_max_active - adjust max_active of a workqueue
5936 if (WARN_ON(wq->flags & WQ_BH)) in workqueue_set_max_active()
5939 if (WARN_ON(wq->flags & __WQ_ORDERED)) in workqueue_set_max_active()
5942 max_active = wq_clamp_max_active(max_active, wq->flags, wq->name); in workqueue_set_max_active()
5944 mutex_lock(&wq->mutex); in workqueue_set_max_active()
5946 wq->saved_max_active = max_active; in workqueue_set_max_active()
5947 if (wq->flags & WQ_UNBOUND) in workqueue_set_max_active()
5948 wq->saved_min_active = min(wq->saved_min_active, max_active); in workqueue_set_max_active()
5952 mutex_unlock(&wq->mutex); in workqueue_set_max_active()
5957 * workqueue_set_min_active - adjust min_active of an unbound workqueue
5972 /* min_active is only meaningful for non-ordered unbound workqueues */ in workqueue_set_min_active()
5973 if (WARN_ON((wq->flags & (WQ_BH | WQ_UNBOUND | __WQ_ORDERED)) != in workqueue_set_min_active()
5977 mutex_lock(&wq->mutex); in workqueue_set_min_active()
5978 wq->saved_min_active = clamp(min_active, 0, wq->saved_max_active); in workqueue_set_min_active()
5980 mutex_unlock(&wq->mutex); in workqueue_set_min_active()
5984 * current_work - retrieve %current task's work struct
5995 return worker ? worker->current_work : NULL; in current_work()
6000 * current_is_workqueue_rescuer - is %current workqueue rescuer?
6011 return worker && worker->rescue_wq; in current_is_workqueue_rescuer()
6015 * workqueue_congested - test whether a workqueue is congested
6025 * With the exception of ordered workqueues, all workqueues have per-cpu
6044 pwq = *per_cpu_ptr(wq->cpu_pwq, cpu); in workqueue_congested()
6045 ret = !list_empty(&pwq->inactive_works); in workqueue_congested()
6055 * work_busy - test whether a work is currently pending or running
6077 raw_spin_lock_irqsave(&pool->lock, irq_flags); in work_busy()
6080 raw_spin_unlock_irqrestore(&pool->lock, irq_flags); in work_busy()
6089 * set_worker_desc - set description for the current work item
6090 * @fmt: printf-style format string
6105 vsnprintf(worker->desc, sizeof(worker->desc), fmt, args); in set_worker_desc()
6112 * print_worker_info - print out worker information and description
6133 if (!(task->flags & PF_WQ_WORKER)) in print_worker_info()
6146 copy_from_kernel_nofault(&fn, &worker->current_func, sizeof(fn)); in print_worker_info()
6147 copy_from_kernel_nofault(&pwq, &worker->current_pwq, sizeof(pwq)); in print_worker_info()
6148 copy_from_kernel_nofault(&wq, &pwq->wq, sizeof(wq)); in print_worker_info()
6149 copy_from_kernel_nofault(name, wq->name, sizeof(name) - 1); in print_worker_info()
6150 copy_from_kernel_nofault(desc, worker->desc, sizeof(desc) - 1); in print_worker_info()
6162 pr_cont(" cpus=%*pbl", nr_cpumask_bits, pool->attrs->cpumask); in pr_cont_pool_info()
6163 if (pool->node != NUMA_NO_NODE) in pr_cont_pool_info()
6164 pr_cont(" node=%d", pool->node); in pr_cont_pool_info()
6165 pr_cont(" flags=0x%x", pool->flags); in pr_cont_pool_info()
6166 if (pool->flags & POOL_BH) in pr_cont_pool_info()
6168 pool->attrs->nice == HIGHPRI_NICE_LEVEL ? "-hi" : ""); in pr_cont_pool_info()
6170 pr_cont(" nice=%d", pool->attrs->nice); in pr_cont_pool_info()
6175 struct worker_pool *pool = worker->pool; in pr_cont_worker_id()
6177 if (pool->flags & WQ_BH) in pr_cont_worker_id()
6179 pool->attrs->nice == HIGHPRI_NICE_LEVEL ? "-hi" : ""); in pr_cont_worker_id()
6181 pr_cont("%d%s", task_pid_nr(worker->task), in pr_cont_worker_id()
6182 worker->rescue_wq ? "(RESCUER)" : ""); in pr_cont_worker_id()
6193 if (!pcwsp->ctr) in pr_cont_work_flush()
6195 if (func == pcwsp->func) { in pr_cont_work_flush()
6196 pcwsp->ctr++; in pr_cont_work_flush()
6199 if (pcwsp->ctr == 1) in pr_cont_work_flush()
6200 pr_cont("%s %ps", pcwsp->comma ? "," : "", pcwsp->func); in pr_cont_work_flush()
6202 pr_cont("%s %ld*%ps", pcwsp->comma ? "," : "", pcwsp->ctr, pcwsp->func); in pr_cont_work_flush()
6203 pcwsp->ctr = 0; in pr_cont_work_flush()
6205 if ((long)func == -1L) in pr_cont_work_flush()
6207 pcwsp->comma = comma; in pr_cont_work_flush()
6208 pcwsp->func = func; in pr_cont_work_flush()
6209 pcwsp->ctr = 1; in pr_cont_work_flush()
6214 if (work->func == wq_barrier_func) { in pr_cont_work()
6219 pr_cont_work_flush(comma, (work_func_t)-1, pcwsp); in pr_cont_work()
6221 task_pid_nr(barr->task)); in pr_cont_work()
6224 pr_cont_work_flush(comma, (work_func_t)-1, pcwsp); in pr_cont_work()
6225 pr_cont_work_flush(comma, work->func, pcwsp); in pr_cont_work()
6232 struct worker_pool *pool = pwq->pool; in show_pwq()
6238 pr_info(" pwq %d:", pool->id); in show_pwq()
6242 pwq->nr_active, pwq->refcnt, in show_pwq()
6243 !list_empty(&pwq->mayday_node) ? " MAYDAY" : ""); in show_pwq()
6245 hash_for_each(pool->busy_hash, bkt, worker, hentry) { in show_pwq()
6246 if (worker->current_pwq == pwq) { in show_pwq()
6254 pr_info(" in-flight:"); in show_pwq()
6255 hash_for_each(pool->busy_hash, bkt, worker, hentry) { in show_pwq()
6256 if (worker->current_pwq != pwq) in show_pwq()
6261 pr_cont(":%ps", worker->current_func); in show_pwq()
6262 list_for_each_entry(work, &worker->scheduled, entry) in show_pwq()
6264 pr_cont_work_flush(comma, (work_func_t)-1L, &pcws); in show_pwq()
6270 list_for_each_entry(work, &pool->worklist, entry) { in show_pwq()
6280 list_for_each_entry(work, &pool->worklist, entry) { in show_pwq()
6287 pr_cont_work_flush(comma, (work_func_t)-1L, &pcws); in show_pwq()
6291 if (!list_empty(&pwq->inactive_works)) { in show_pwq()
6295 list_for_each_entry(work, &pwq->inactive_works, entry) { in show_pwq()
6299 pr_cont_work_flush(comma, (work_func_t)-1L, &pcws); in show_pwq()
6305 * show_one_workqueue - dump state of specified workqueue
6323 pr_info("workqueue %s: flags=0x%x\n", wq->name, wq->flags); in show_one_workqueue()
6326 raw_spin_lock_irqsave(&pwq->pool->lock, irq_flags); in show_one_workqueue()
6337 raw_spin_unlock_irqrestore(&pwq->pool->lock, irq_flags); in show_one_workqueue()
6340 * sysrq-t -> show_all_workqueues(). Avoid triggering in show_one_workqueue()
6349 * show_one_worker_pool - dump state of specified worker pool
6359 raw_spin_lock_irqsave(&pool->lock, irq_flags); in show_one_worker_pool()
6360 if (pool->nr_workers == pool->nr_idle) in show_one_worker_pool()
6364 if (!list_empty(&pool->worklist)) in show_one_worker_pool()
6365 hung = jiffies_to_msecs(jiffies - pool->watchdog_ts) / 1000; in show_one_worker_pool()
6373 pr_info("pool %d:", pool->id); in show_one_worker_pool()
6375 pr_cont(" hung=%lus workers=%d", hung, pool->nr_workers); in show_one_worker_pool()
6376 if (pool->manager) in show_one_worker_pool()
6378 task_pid_nr(pool->manager->task)); in show_one_worker_pool()
6379 list_for_each_entry(worker, &pool->idle_list, entry) { in show_one_worker_pool()
6387 raw_spin_unlock_irqrestore(&pool->lock, irq_flags); in show_one_worker_pool()
6390 * sysrq-t -> show_all_workqueues(). Avoid triggering in show_one_worker_pool()
6398 * show_all_workqueues - dump workqueue state
6422 * show_freezable_workqueues - dump freezable workqueue state
6436 if (!(wq->flags & WQ_FREEZABLE)) in show_freezable_workqueues()
6450 if (task->flags & PF_WQ_WORKER) { in wq_worker_comm()
6452 struct worker_pool *pool = worker->pool; in wq_worker_comm()
6458 raw_spin_lock_irq(&pool->lock); in wq_worker_comm()
6460 * ->desc tracks information (wq name or in wq_worker_comm()
6462 * current, prepend '+', otherwise '-'. in wq_worker_comm()
6464 if (worker->desc[0] != '\0') { in wq_worker_comm()
6465 if (worker->current_work) in wq_worker_comm()
6466 scnprintf(buf + off, size - off, "+%s", in wq_worker_comm()
6467 worker->desc); in wq_worker_comm()
6469 scnprintf(buf + off, size - off, "-%s", in wq_worker_comm()
6470 worker->desc); in wq_worker_comm()
6472 raw_spin_unlock_irq(&pool->lock); in wq_worker_comm()
6475 strscpy(buf, task->comm, size); in wq_worker_comm()
6505 raw_spin_lock_irq(&pool->lock); in unbind_workers()
6516 worker->flags |= WORKER_UNBOUND; in unbind_workers()
6518 pool->flags |= POOL_DISASSOCIATED; in unbind_workers()
6528 pool->nr_running = 0; in unbind_workers()
6537 raw_spin_unlock_irq(&pool->lock); in unbind_workers()
6547 * rebind_workers - rebind all workers of a pool to the associated CPU
6550 * @pool->cpu is coming online. Rebind all workers to the CPU.
6560 * be on the run-queue of the associated CPU before any local in rebind_workers()
6561 * wake-ups for concurrency management happen, restore CPU affinity in rebind_workers()
6566 kthread_set_per_cpu(worker->task, pool->cpu); in rebind_workers()
6567 WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task, in rebind_workers()
6571 raw_spin_lock_irq(&pool->lock); in rebind_workers()
6573 pool->flags &= ~POOL_DISASSOCIATED; in rebind_workers()
6576 unsigned int worker_flags = worker->flags; in rebind_workers()
6583 * it initiates the next execution cycle thus restoring in rebind_workers()
6587 * WRITE_ONCE() is necessary because @worker->flags may be in rebind_workers()
6596 WRITE_ONCE(worker->flags, worker_flags); in rebind_workers()
6599 raw_spin_unlock_irq(&pool->lock); in rebind_workers()
6603 * restore_unbound_workers_cpumask - restore cpumask of unbound workers
6620 if (!cpumask_test_cpu(cpu, pool->attrs->cpumask)) in restore_unbound_workers_cpumask()
6623 cpumask_and(&cpumask, pool->attrs->cpumask, cpu_online_mask); in restore_unbound_workers_cpumask()
6627 WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task, &cpumask) < 0); in restore_unbound_workers_cpumask()
6635 if (pool->nr_workers) in workqueue_prepare_cpu()
6638 return -ENOMEM; in workqueue_prepare_cpu()
6655 if (pool->flags & POOL_BH) in workqueue_online_cpu()
6659 if (pool->cpu == cpu) in workqueue_online_cpu()
6661 else if (pool->cpu < 0) in workqueue_online_cpu()
6668 struct workqueue_attrs *attrs = wq->unbound_attrs; in workqueue_online_cpu()
6674 for_each_cpu(tcpu, pt->pod_cpus[pt->cpu_pod[cpu]]) in workqueue_online_cpu()
6677 mutex_lock(&wq->mutex); in workqueue_online_cpu()
6678 wq_update_node_max_active(wq, -1); in workqueue_online_cpu()
6679 mutex_unlock(&wq->mutex); in workqueue_online_cpu()
6691 /* unbinding per-cpu workers should happen on the local CPU */ in workqueue_offline_cpu()
6693 return -1; in workqueue_offline_cpu()
6703 struct workqueue_attrs *attrs = wq->unbound_attrs; in workqueue_offline_cpu()
6709 for_each_cpu(tcpu, pt->pod_cpus[pt->cpu_pod[cpu]]) in workqueue_offline_cpu()
6712 mutex_lock(&wq->mutex); in workqueue_offline_cpu()
6714 mutex_unlock(&wq->mutex); in workqueue_offline_cpu()
6733 wfc->ret = wfc->fn(wfc->arg); in work_for_cpu_fn()
6737 * work_on_cpu_key - run a function in thread context on a particular cpu
6762 * work_on_cpu_safe_key - run a function in thread context on a particular cpu
6776 long ret = -ENODEV; in work_on_cpu_safe_key()
6790 * freeze_workqueues_begin - begin freezing workqueues
6794 * pool->worklist.
6797 * Grabs and releases wq_pool_mutex, wq->mutex and pool->lock's.
6809 mutex_lock(&wq->mutex); in freeze_workqueues_begin()
6811 mutex_unlock(&wq->mutex); in freeze_workqueues_begin()
6818 * freeze_workqueues_busy - are freezable workqueues still busy?
6841 if (!(wq->flags & WQ_FREEZABLE)) in freeze_workqueues_busy()
6849 WARN_ON_ONCE(pwq->nr_active < 0); in freeze_workqueues_busy()
6850 if (pwq->nr_active) { in freeze_workqueues_busy()
6864 * thaw_workqueues - thaw workqueues
6870 * Grabs and releases wq_pool_mutex, wq->mutex and pool->lock's.
6885 mutex_lock(&wq->mutex); in thaw_workqueues()
6887 mutex_unlock(&wq->mutex); in thaw_workqueues()
6905 if (!(wq->flags & WQ_UNBOUND) || (wq->flags & __WQ_DESTROYING)) in workqueue_apply_unbound_cpumask()
6908 ctx = apply_wqattrs_prepare(wq, wq->unbound_attrs, unbound_cpumask); in workqueue_apply_unbound_cpumask()
6914 list_add_tail(&ctx->list, &ctxs); in workqueue_apply_unbound_cpumask()
6932 * workqueue_unbound_exclude_cpumask - Exclude given CPUs from unbound cpumask
6944 return -ENOMEM; in workqueue_unbound_exclude_cpumask()
6976 return -EINVAL; in parse_affn_scope()
6988 return -EINVAL; in wq_affn_dfl_set()
7024 * per_cpu RO bool : whether the workqueue is per-cpu or unbound
7025 * max_active RW int : maximum number of in-flight work items
7043 return wq_dev->wq; in dev_to_wq()
7051 return scnprintf(buf, PAGE_SIZE, "%d\n", (bool)!(wq->flags & WQ_UNBOUND)); in per_cpu_show()
7060 return scnprintf(buf, PAGE_SIZE, "%d\n", wq->saved_max_active); in max_active_show()
7071 return -EINVAL; in max_active_store()
7091 mutex_lock(&wq->mutex); in wq_nice_show()
7092 written = scnprintf(buf, PAGE_SIZE, "%d\n", wq->unbound_attrs->nice); in wq_nice_show()
7093 mutex_unlock(&wq->mutex); in wq_nice_show()
7109 copy_workqueue_attrs(attrs, wq->unbound_attrs); in wq_sysfs_prep_attrs()
7118 int ret = -ENOMEM; in wq_nice_store()
7126 if (sscanf(buf, "%d", &attrs->nice) == 1 && in wq_nice_store()
7127 attrs->nice >= MIN_NICE && attrs->nice <= MAX_NICE) in wq_nice_store()
7130 ret = -EINVAL; in wq_nice_store()
7144 mutex_lock(&wq->mutex); in wq_cpumask_show()
7146 cpumask_pr_args(wq->unbound_attrs->cpumask)); in wq_cpumask_show()
7147 mutex_unlock(&wq->mutex); in wq_cpumask_show()
7157 int ret = -ENOMEM; in wq_cpumask_store()
7165 ret = cpumask_parse(buf, attrs->cpumask); in wq_cpumask_store()
7181 mutex_lock(&wq->mutex); in wq_affn_scope_show()
7182 if (wq->unbound_attrs->affn_scope == WQ_AFFN_DFL) in wq_affn_scope_show()
7188 wq_affn_names[wq->unbound_attrs->affn_scope]); in wq_affn_scope_show()
7189 mutex_unlock(&wq->mutex); in wq_affn_scope_show()
7200 int affn, ret = -ENOMEM; in wq_affn_scope_store()
7209 attrs->affn_scope = affn; in wq_affn_scope_store()
7223 wq->unbound_attrs->affn_strict); in wq_affinity_strict_show()
7232 int v, ret = -ENOMEM; in wq_affinity_strict_store()
7235 return -EINVAL; in wq_affinity_strict_store()
7240 attrs->affn_strict = (bool)v; in wq_affinity_strict_store()
7262 * workqueue_set_unbound_cpumask - Set the low-level unbound cpumask
7265 * The low-level workqueues cpumask is a global cpumask that limits
7269 * Return: 0 - Success
7270 * -EINVAL - Invalid @cpumask
7271 * -ENOMEM - Failed to allocate memory for attrs or pwqs.
7275 int ret = -EINVAL; in workqueue_set_unbound_cpumask()
7334 return -ENOMEM; in cpumask_store()
7367 * workqueue_sysfs_register - make a workqueue visible in sysfs
7379 * Return: 0 on success, -errno on failure.
7390 if (WARN_ON(wq->flags & __WQ_ORDERED)) in workqueue_sysfs_register()
7391 return -EINVAL; in workqueue_sysfs_register()
7393 wq->wq_dev = wq_dev = kzalloc(sizeof(*wq_dev), GFP_KERNEL); in workqueue_sysfs_register()
7395 return -ENOMEM; in workqueue_sysfs_register()
7397 wq_dev->wq = wq; in workqueue_sysfs_register()
7398 wq_dev->dev.bus = &wq_subsys; in workqueue_sysfs_register()
7399 wq_dev->dev.release = wq_device_release; in workqueue_sysfs_register()
7400 dev_set_name(&wq_dev->dev, "%s", wq->name); in workqueue_sysfs_register()
7406 dev_set_uevent_suppress(&wq_dev->dev, true); in workqueue_sysfs_register()
7408 ret = device_register(&wq_dev->dev); in workqueue_sysfs_register()
7410 put_device(&wq_dev->dev); in workqueue_sysfs_register()
7411 wq->wq_dev = NULL; in workqueue_sysfs_register()
7415 if (wq->flags & WQ_UNBOUND) { in workqueue_sysfs_register()
7418 for (attr = wq_sysfs_unbound_attrs; attr->attr.name; attr++) { in workqueue_sysfs_register()
7419 ret = device_create_file(&wq_dev->dev, attr); in workqueue_sysfs_register()
7421 device_unregister(&wq_dev->dev); in workqueue_sysfs_register()
7422 wq->wq_dev = NULL; in workqueue_sysfs_register()
7428 dev_set_uevent_suppress(&wq_dev->dev, false); in workqueue_sysfs_register()
7429 kobject_uevent(&wq_dev->dev.kobj, KOBJ_ADD); in workqueue_sysfs_register()
7434 * workqueue_sysfs_unregister - undo workqueue_sysfs_register()
7441 struct wq_device *wq_dev = wq->wq_dev; in workqueue_sysfs_unregister()
7443 if (!wq->wq_dev) in workqueue_sysfs_unregister()
7446 wq->wq_dev = NULL; in workqueue_sysfs_unregister()
7447 device_unregister(&wq_dev->dev); in workqueue_sysfs_unregister()
7456 * Stall may be caused by various bugs - missing WQ_MEM_RECLAIM, illegal
7464 * forward progress is defined as the first item on ->worklist changing.
7483 * The only candidates are CPU-bound workers in the running state.
7493 raw_spin_lock_irqsave(&pool->lock, irq_flags); in show_cpu_pool_hog()
7495 hash_for_each(pool->busy_hash, bkt, worker, hentry) { in show_cpu_pool_hog()
7496 if (task_is_running(worker->task)) { in show_cpu_pool_hog()
7504 pr_info("pool %d:\n", pool->id); in show_cpu_pool_hog()
7505 sched_show_task(worker->task); in show_cpu_pool_hog()
7511 raw_spin_unlock_irqrestore(&pool->lock, irq_flags); in show_cpu_pool_hog()
7519 pr_info("Showing backtraces of running workers in stalled CPU-bound worker pools:\n"); in show_cpu_pools_hogs()
7524 if (pool->cpu_stall) in show_cpu_pools_hogs()
7568 pool->cpu_stall = false; in wq_watchdog_timer_fn()
7569 if (list_empty(&pool->worklist)) in wq_watchdog_timer_fn()
7579 if (pool->cpu >= 0) in wq_watchdog_timer_fn()
7580 touched = READ_ONCE(per_cpu(wq_watchdog_touched_cpu, pool->cpu)); in wq_watchdog_timer_fn()
7583 pool_ts = READ_ONCE(pool->watchdog_ts); in wq_watchdog_timer_fn()
7593 if (pool->cpu >= 0 && !(pool->flags & POOL_BH)) { in wq_watchdog_timer_fn()
7594 pool->cpu_stall = true; in wq_watchdog_timer_fn()
7597 pr_emerg("BUG: workqueue lockup - pool"); in wq_watchdog_timer_fn()
7600 jiffies_to_msecs(now - pool_ts) / 1000); in wq_watchdog_timer_fn()
7711 pool->cpu = cpu; in init_cpu_worker_pool()
7712 cpumask_copy(pool->attrs->cpumask, cpumask_of(cpu)); in init_cpu_worker_pool()
7713 cpumask_copy(pool->attrs->__pod_cpumask, cpumask_of(cpu)); in init_cpu_worker_pool()
7714 pool->attrs->nice = nice; in init_cpu_worker_pool()
7715 pool->attrs->affn_strict = true; in init_cpu_worker_pool()
7716 pool->node = cpu_to_node(cpu); in init_cpu_worker_pool()
7725 * workqueue_init_early - early init for workqueue subsystem
7727 * This is the first step of three-staged workqueue subsystem initialization and
7728 * invoked as soon as the bare basics - memory allocation, cpumasks and idr are
7771 pt->pod_cpus = kcalloc(1, sizeof(pt->pod_cpus[0]), GFP_KERNEL); in workqueue_init_early()
7772 pt->pod_node = kcalloc(1, sizeof(pt->pod_node[0]), GFP_KERNEL); in workqueue_init_early()
7773 pt->cpu_pod = kcalloc(nr_cpu_ids, sizeof(pt->cpu_pod[0]), GFP_KERNEL); in workqueue_init_early()
7774 BUG_ON(!pt->pod_cpus || !pt->pod_node || !pt->cpu_pod); in workqueue_init_early()
7776 BUG_ON(!zalloc_cpumask_var_node(&pt->pod_cpus[0], GFP_KERNEL, NUMA_NO_NODE)); in workqueue_init_early()
7778 pt->nr_pods = 1; in workqueue_init_early()
7779 cpumask_copy(pt->pod_cpus[0], cpu_possible_mask); in workqueue_init_early()
7780 pt->pod_node[0] = NUMA_NO_NODE; in workqueue_init_early()
7781 pt->cpu_pod[0] = 0; in workqueue_init_early()
7790 pool->flags |= POOL_BH; in workqueue_init_early()
7805 attrs->nice = std_nice[i]; in workqueue_init_early()
7813 attrs->nice = std_nice[i]; in workqueue_init_early()
7814 attrs->ordered = true; in workqueue_init_early()
7855 * most consider human-perceivable. However, the kernel also runs on a in wq_cpu_intensive_thresh_init()
7868 /* see init/calibrate.c for lpj -> BogoMIPS calculation */ in wq_cpu_intensive_thresh_init()
7880 * workqueue_init - bring workqueue subsystem fully online
7882 * This is the second step of three-staged workqueue subsystem initialization
7899 * Per-cpu pools created earlier could be missing node hint. Fix them in workqueue_init()
7904 pool->node = cpu_to_node(cpu); in workqueue_init()
7906 pool->node = cpu_to_node(cpu); in workqueue_init()
7912 wq->name); in workqueue_init()
7929 pool->flags &= ~POOL_DISASSOCIATED; in workqueue_init()
7942 * Initialize @pt by first initializing @pt->cpu_pod[] with pod IDs according to
7951 pt->nr_pods = 0; in init_pod_type()
7953 /* init @pt->cpu_pod[] according to @cpus_share_pod() */ in init_pod_type()
7954 pt->cpu_pod = kcalloc(nr_cpu_ids, sizeof(pt->cpu_pod[0]), GFP_KERNEL); in init_pod_type()
7955 BUG_ON(!pt->cpu_pod); in init_pod_type()
7960 pt->cpu_pod[cur] = pt->nr_pods++; in init_pod_type()
7964 pt->cpu_pod[cur] = pt->cpu_pod[pre]; in init_pod_type()
7970 /* init the rest to match @pt->cpu_pod[] */ in init_pod_type()
7971 pt->pod_cpus = kcalloc(pt->nr_pods, sizeof(pt->pod_cpus[0]), GFP_KERNEL); in init_pod_type()
7972 pt->pod_node = kcalloc(pt->nr_pods, sizeof(pt->pod_node[0]), GFP_KERNEL); in init_pod_type()
7973 BUG_ON(!pt->pod_cpus || !pt->pod_node); in init_pod_type()
7975 for (pod = 0; pod < pt->nr_pods; pod++) in init_pod_type()
7976 BUG_ON(!zalloc_cpumask_var(&pt->pod_cpus[pod], GFP_KERNEL)); in init_pod_type()
7979 cpumask_set_cpu(cpu, pt->pod_cpus[pt->cpu_pod[cpu]]); in init_pod_type()
7980 pt->pod_node[pt->cpu_pod[cpu]] = cpu_to_node(cpu); in init_pod_type()
8004 * workqueue_init_topology - initialize CPU pods for unbound workqueues
8006 * This is the third step of three-staged workqueue subsystem initialization and
8027 * and CPU combinations to apply per-pod sharing. in workqueue_init_topology()
8032 if (wq->flags & WQ_UNBOUND) { in workqueue_init_topology()
8033 mutex_lock(&wq->mutex); in workqueue_init_topology()
8034 wq_update_node_max_active(wq, -1); in workqueue_init_topology()
8035 mutex_unlock(&wq->mutex); in workqueue_init_topology()
8044 pr_warn("WARNING: Flushing system-wide workqueues will be prohibited in near future.\n"); in __warn_flushing_systemwide_wq()