sched.h - OpenGrok cross reference for /linux/kernel/sched/sched.h

Lines Matching +full:assigned +full:- +full:resolution +full:- +full:bits
1 /* SPDX-License-Identifier: GPL-2.0 */
114  * Asymmetric CPU capacity bits
125 #define cpu_capacity_span(asym_data) to_cpumask((asym_data)->cpus)
128  * Helpers for converting nanosecond timing to jiffy resolution
133  * Increase resolution of nice-level calculations for 64-bit architectures.
134  * The extra resolution improves shares distribution and load balancing of
135  * low-weight task groups (eg. nice +19 on an autogroup), deeper task-group
136  * hierarchies, especially on larger systems. This is not a user-visible change
137  * and does not change the user-interface for setting shares/weights.
139  * We increase resolution only if we have enough bits to allow this increased
140  * resolution (i.e. 64-bit). The costs for increasing resolution when 32-bit
144  * increase coverage and consistency always enable it on 64-bit platforms.
165  * independent resolution, but they should be well calibrated. We use
169  *  scale_load(sched_prio_to_weight[NICE_TO_PRIO(0)-MAX_RT_PRIO]) == NICE_0_LOAD
176  * 10 -> just above 1us
177  * 9  -> just above 0.5us
223 	return idle_policy(p->policy);  in task_has_idle_policy()
228 	return rt_policy(p->policy);  in task_has_rt_policy()
233 	return dl_policy(p->policy);  in task_has_dl_policy()
240 	s64 diff = sample - *avg;  in update_avg()
247  * is UB; cap at size-1.
250 	(val >> min_t(typeof(shift), shift, BITS_PER_TYPE(typeof(val)) - 1))
255  * maps pretty well onto the shares value used by scheduler and the round-trip
289 	return unlikely(dl_se->flags & SCHED_FLAG_SUGOV);  in dl_entity_is_special()
302 	       dl_time_before(a->deadline, b->deadline);  in dl_entity_preempt()
306  * This is the priority-queue data structure of the RT scheduling class:
328  * To keep the bandwidth of -deadline tasks under control
330  *  - store the maximum -deadline bandwidth of each cpu;
331  *  - cache the fraction of bandwidth that is currently allocated in
335  * one used for RT-throttling (rt_bandwidth), with the main difference
342  *  - bw (< 100%) is the deadline bandwidth of each CPU;
343  *  - total_bw is the currently allocated bandwidth in each root domain;
366  *   dl_se::rq -- runqueue we belong to.
368  *   dl_se::server_pick() -- nested pick_next_task(); we yield the period if this
371  *   dl_server_update() -- called from update_curr_common(), propagates runtime
374  *   dl_server_start() -- start the server when it has tasks; it will stop
378  *   dl_server_stop() -- (force) stop the server; use when updating
381  *   dl_server_init() -- initializes the server.
384  * zero-laxity point -- that is, unlike regular EDF tasks which run ASAP, a
388  * against the server -- through dl_server_update() above -- such that when it
401  * task wakes up imminently (starting the server again), can be used --
402  * subject to CBS wakeup rules -- without having to wait for the next period.
424 	return dl_se->dl_server_active;  in dl_server_active()
487 	 * it in its own cache-line separated from the fields above which
516 	/* The two decimal precision [%] value requested from user-space */
534  * (The default weight is 1024 - so there's no practical
634  * applicable for 32-bits architectures.
674 /* CFS-related fields in a runqueue */
724 	 * Where f(tg) is the recursive weight fraction assigned to
737 	 * a hierarchy). Non-leaf lrqs hold other higher schedulable entities
774 /* scx_rq->flags, protected by the rq lock */
802 	u64			clock;			/* current per-rq clock -- see scx_bpf_now() */
824 /* Real-Time classes' related field in a runqueue: */
847 	struct rq		*rq; /* this is always top-level rq, cache? */
856 	return rt_rq->rt_queued && rt_rq->rt_nr_running;  in rt_rq_is_runnable()
881 	 * an rb-tree, ordered by tasks' deadlines, with caching
894 	 * Utilization of the tasks "assigned" to this runqueue (including
900 	 * runqueue (inactive utilization = this_bw - running_bw).
921 #define entity_is_task(se)	(!se->my_q)
926 		se->runnable_weight = se->my_q->h_nr_runnable;  in se_update_runnable()
931 	if (se->sched_delayed)  in se_runnable()
935 		return !!se->on_rq;  in se_runnable()
937 		return se->runnable_weight;  in se_runnable()
948 	if (se->sched_delayed)  in se_runnable()
951 	return !!se->on_rq;  in se_runnable()
957  * XXX we want to get rid of these helpers and use the full load resolution.
961 	return scale_load_down(se->load.weight);  in se_weight()
977  * We add the notion of a root-domain which will be used to define per-domain
980  * exclusive cpuset is created, we also create and attach a new root-domain
993 	 * - More than one runnable task
994 	 * - Running task is misfit
998 	/* Indicate one or more CPUs over-utilized (tipping point) */
1003 	 * than one runnable -deadline task (as it is below for RT tasks).
1040 	 * NULL-terminated list of performance domains intersecting with the
1054 	return READ_ONCE(rd->overloaded);  in get_rd_overloaded()
1060 		WRITE_ONCE(rd->overloaded, status);  in set_rd_overloaded()
1069  * struct uclamp_bucket - Utilization clamp bucket
1078 	unsigned long tasks : BITS_PER_LONG - bits_per(SCHED_CAPACITY_SCALE);
1082  * struct uclamp_rq - rq's utilization clamp
1094  * - for util_min: we want to run the CPU at least at the max of the minimum
1096  * - for util_max: we want to allow the CPU to run up to the max of the
1101  * the metrics required to compute all the per-rq utilization clamp values.
1112  * This is the main, per-CPU runqueue data structure.
1303 	/* shared state -- careful with sched_core_cpu_deactivate() */
1327 	return cfs_rq->rq;  in rq_of()
1340 	return rq->cpu;  in cpu_of()
1347 	return p->migration_disabled;  in is_migration_disabled()
1355 #define cpu_curr(cpu)		(cpu_rq(cpu)->curr)
1361 	rcu_assign_pointer(rq->donor, t);  in rq_set_donor()
1377 	return static_branch_unlikely(&__sched_core_enabled) && rq->core_enabled;  in sched_core_enabled()
1387  * stable unless you actually hold a relevant rq->__lock.
1392 		return &rq->core->__lock;  in rq_lockp()
1394 	return &rq->__lock;  in rq_lockp()
1399 	if (rq->core_enabled)  in __rq_lockp()
1400 		return &rq->core->__lock;  in __rq_lockp()
1402 	return &rq->__lock;  in __rq_lockp()
1422 	return rq->core->core_cookie == p->core_cookie;  in sched_cpu_cookie_match()
1445 	return idle_core || rq->core->core_cookie == p->core_cookie;  in sched_core_cookie_match()
1458 	for_each_cpu_and(cpu, sched_group_span(group), p->cpus_ptr) {  in sched_group_cookie_match()
1467 	return !RB_EMPTY_NODE(&p->core_node);  in sched_core_enqueued()
1490 	return &rq->__lock;  in rq_lockp()
1495 	return &rq->__lock;  in __rq_lockp()
1605 	return p->se.cfs_rq;  in task_cfs_rq()
1611 	return se->cfs_rq;  in cfs_rq_of()
1617 	return grp->my_q;  in group_cfs_rq()
1626 	return &task_rq(p)->cfs;  in task_cfs_rq()
1634 	return &rq->cfs;  in cfs_rq_of()
1648  * rq::clock_update_flags bits
1650  * %RQCF_REQ_SKIP - will request skipping of clock update on the next
1654  * %RQCF_ACT_SKIP - is set from inside of __schedule() when skipping is
1657  * %RQCF_UPDATED - is a debug flag that indicates whether a call has been
1664  *	if (rq-clock_update_flags >= RQCF_UPDATED)
1680 	WARN_ON_ONCE(rq->clock_update_flags < RQCF_ACT_SKIP);  in assert_clock_updated()
1688 	return rq->clock;  in rq_clock()
1696 	return rq->clock_task;  in rq_clock_task()
1702 	rq->clock_update_flags |= RQCF_REQ_SKIP;  in rq_clock_skip_update()
1712 	rq->clock_update_flags &= ~RQCF_REQ_SKIP;  in rq_clock_cancel_skipupdate()
1722  * to clear RQCF_ACT_SKIP of rq->clock_update_flags.
1727 	WARN_ON_ONCE(rq->clock_update_flags & RQCF_ACT_SKIP);  in rq_clock_start_loop_update()
1728 	rq->clock_update_flags |= RQCF_ACT_SKIP;  in rq_clock_start_loop_update()
1734 	rq->clock_update_flags &= ~RQCF_ACT_SKIP;  in rq_clock_stop_loop_update()
1763 	WRITE_ONCE(rq->scx.clock, clock);  in scx_rq_clock_update()
1764 	smp_store_release(&rq->scx.flags, rq->scx.flags | SCX_RQ_CLK_VALID);  in scx_rq_clock_update()
1771 	WRITE_ONCE(rq->scx.flags, rq->scx.flags & ~SCX_RQ_CLK_VALID);  in scx_rq_clock_invalidate()
1788  * copy of the (on-stack) 'struct rq_flags rf'.
1790  * Also see Documentation/locking/lockdep-design.rst.
1794 	rf->cookie = lockdep_pin_lock(__rq_lockp(rq));  in rq_pin_lock()
1796 	rq->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP);  in rq_pin_lock()
1797 	rf->clock_update_flags = 0;  in rq_pin_lock()
1798 	WARN_ON_ONCE(rq->balance_callback && rq->balance_callback != &balance_push_callback);  in rq_pin_lock()
1803 	if (rq->clock_update_flags > RQCF_ACT_SKIP)  in rq_unpin_lock()
1804 		rf->clock_update_flags = RQCF_UPDATED;  in rq_unpin_lock()
1807 	lockdep_unpin_lock(__rq_lockp(rq), rf->cookie);  in rq_unpin_lock()
1812 	lockdep_repin_lock(__rq_lockp(rq), rf->cookie);  in rq_repin_lock()
1817 	rq->clock_update_flags |= rf->clock_update_flags;  in rq_repin_lock()
1822 	__acquires(rq->lock);
1826 	__acquires(p->pi_lock)
1827 	__acquires(rq->lock);
1830 	__releases(rq->lock)  in __task_rq_unlock()
1838 	__releases(rq->lock)  in task_rq_unlock()
1839 	__releases(p->pi_lock)  in task_rq_unlock()
1843 	raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags);  in task_rq_unlock()
1847 		    _T->rq = task_rq_lock(_T->lock, &_T->rf),
1848 		    task_rq_unlock(_T->rq, _T->lock, &_T->rf),
1852 	__acquires(rq->lock)  in rq_lock_irqsave()
1854 	raw_spin_rq_lock_irqsave(rq, rf->flags);  in rq_lock_irqsave()
1859 	__acquires(rq->lock)  in rq_lock_irq()
1866 	__acquires(rq->lock)  in rq_lock()
1873 	__releases(rq->lock)  in rq_unlock_irqrestore()
1876 	raw_spin_rq_unlock_irqrestore(rq, rf->flags);  in rq_unlock_irqrestore()
1880 	__releases(rq->lock)  in rq_unlock_irq()
1887 	__releases(rq->lock)  in rq_unlock()
1894 		    rq_lock(_T->lock, &_T->rf),
1895 		    rq_unlock(_T->lock, &_T->rf),
1899 		    rq_lock_irq(_T->lock, &_T->rf),
1900 		    rq_unlock_irq(_T->lock, &_T->rf),
1904 		    rq_lock_irqsave(_T->lock, &_T->rf),
1905 		    rq_unlock_irqrestore(_T->lock, &_T->rf),
1909 	__acquires(rq->lock)  in this_rq_lock_irq()
1988 	if (unlikely(head->next || rq->balance_callback == &balance_push_callback))  in queue_balance_callback()
1991 	head->func = func;  in queue_balance_callback()
1992 	head->next = rq->balance_callback;  in queue_balance_callback()
1993 	rq->balance_callback = head;  in queue_balance_callback()
2000  * The domain tree (rq->sd) is protected by RCU's quiescent state transition.
2004  * preempt-disabled sections.
2007 	for (__sd = rcu_dereference_check_sched_domain(cpu_rq(cpu)->sd); \
2008 			__sd; __sd = __sd->parent)
2018  * highest_flag_domain - Return highest sched_domain containing flag.
2032 		if (sd->flags & flag) {  in highest_flag_domain()
2053 		if (sd->flags & flag)  in lowest_flag_domain()
2084 	unsigned long		min_capacity;		/* Min per-CPU capacity in group */
2085 	unsigned long		max_capacity;		/* Max per-CPU capacity in group */
2116 	return to_cpumask(sg->cpumask);  in sched_group_span()
2124 	return to_cpumask(sg->sgc->cpumask);  in group_balance_mask()
2136 	if (!p->user_cpus_ptr)  in task_user_cpus()
2138 	return p->user_cpus_ptr;  in task_user_cpus()
2150  * The same is true for autogroup's p->signal->autogroup->tg, the autogroup
2158 	return p->sched_task_group;  in task_group()
2169 	set_task_rq_fair(&p->se, p->se.cfs_rq, tg->cfs_rq[cpu]);  in set_task_rq()
2170 	p->se.cfs_rq = tg->cfs_rq[cpu];  in set_task_rq()
2171 	p->se.parent = tg->se[cpu];  in set_task_rq()
2172 	p->se.depth = tg->se[cpu] ? tg->se[cpu]->depth + 1 : 0;  in set_task_rq()
2177 	 * p->rt.rt_rq is NULL initially and it is easier to assign  in set_task_rq()
2183 	p->rt.rt_rq  = tg->rt_rq[cpu];  in set_task_rq()
2184 	p->rt.parent = tg->rt_se[cpu];  in set_task_rq()
2204 	 * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be  in __set_task_cpu()
2206 	 * per-task data have been completed by this moment.  in __set_task_cpu()
2209 	WRITE_ONCE(task_thread_info(p)->cpu, cpu);  in __set_task_cpu()
2210 	p->wake_cpu = cpu;  in __set_task_cpu()
2229  * To support run-time toggling of sched features, all the translation units
2275 	return rq->curr == p;  in task_current()
2282  * rq->curr == rq->donor == p.
2286 	return rq->donor == p;  in task_current_donor()
2294 	return !!p->blocked_on;  in task_is_blocked()
2299 	return p->on_cpu;  in task_on_cpu()
2304 	return READ_ONCE(p->on_rq) == TASK_ON_RQ_QUEUED;  in task_on_rq_queued()
2309 	return READ_ONCE(p->on_rq) == TASK_ON_RQ_MIGRATING;  in task_on_rq_migrating()
2320 #define WF_RQ_SELECTED		0x80 /* ->select_task_rq() was called */
2344  * DEQUEUE_SLEEP  - task is no longer runnable
2345  * ENQUEUE_WAKEUP - task just became runnable
2347  * SAVE/RESTORE - an otherwise spurious dequeue/enqueue, done to ensure tasks
2351  * MOVE - paired with SAVE/RESTORE, explicitly does not preserve the location
2354  * NOCLOCK - skip the update_rq_clock() (avoids double updates)
2356  * MIGRATION - p->on_rq == TASK_ON_RQ_MIGRATING (used for DEADLINE)
2358  * ENQUEUE_HEAD      - place at front of runqueue (tail if not specified)
2359  * ENQUEUE_REPLENISH - CBS (replenish runtime and postpone deadline)
2360  * ENQUEUE_MIGRATED  - the task was migrated during wakeup
2361  * ENQUEUE_RQ_SELECTED - ->select_task_rq() was called
2387 #define RETRY_TASK		((void *)-1UL)
2444 	 * The switched_from() call is allowed to drop rq->lock, therefore we
2446 	 * rq->lock. They are however serialized by p->pi_lock.
2472 	WARN_ON_ONCE(rq->donor != prev);  in put_prev_task()
2473 	prev->sched_class->put_prev_task(rq, prev, NULL);  in put_prev_task()
2478 	next->sched_class->set_next_task(rq, next, false);  in set_next_task()
2486 	prev->dl_server = NULL;  in __put_prev_set_next_dl_server()
2487 	next->dl_server = rq->dl_server;  in __put_prev_set_next_dl_server()
2488 	rq->dl_server = NULL;  in __put_prev_set_next_dl_server()
2495 	WARN_ON_ONCE(rq->donor != prev);  in put_prev_set_next_task()
2502 	prev->sched_class->put_prev_task(rq, prev, next);  in put_prev_set_next_task()
2503 	next->sched_class->set_next_task(rq, next, true);  in put_prev_set_next_task()
2510  *   include/asm-generic/vmlinux.lds.h
2521 /* Defined in include/asm-generic/vmlinux.lds.h */
2563 	return rq->stop && task_on_rq_queued(rq->stop);  in sched_stop_runnable()
2568 	return rq->dl.dl_nr_running > 0;  in sched_dl_runnable()
2573 	return rq->rt.rt_queued > 0;  in sched_rt_runnable()
2578 	return rq->cfs.nr_queued > 0;  in sched_fair_runnable()
2599 	if (!cpumask_test_cpu(cpu, p->cpus_ptr))  in task_allowed_on_cpu()
2603 	if (!(p->flags & PF_KTHREAD) && !task_cpu_possible(cpu, p))  in task_allowed_on_cpu()
2621 	struct task_struct *p = rq->donor;  in get_push_task()
2625 	if (rq->push_busy)  in get_push_task()
2628 	if (p->nr_cpus_allowed == 1)  in get_push_task()
2631 	if (p->migration_disabled)  in get_push_task()
2634 	rq->push_busy = true;  in get_push_task()
2645 	rq->idle_state = idle_state;  in idle_set_state()
2652 	return rq->idle_state;  in idle_get_state()
2694 #define MAX_BW_BITS		(64 - BW_SHIFT)
2695 #define MAX_BW			((1ULL << MAX_BW_BITS) - 1)
2730 	unsigned prev_nr = rq->nr_running;  in add_nr_running()
2732 	rq->nr_running = prev_nr + count;  in add_nr_running()
2737 	if (prev_nr < 2 && rq->nr_running >= 2)  in add_nr_running()
2738 		set_rd_overloaded(rq->rd, 1);  in add_nr_running()
2745 	rq->nr_running -= count;  in sub_nr_running()
2747 		call_trace_sched_update_nr_running(rq, -count);  in sub_nr_running()
2756 	if (p->sched_contributes_to_load)  in __block_task()
2757 		rq->nr_uninterruptible++;  in __block_task()
2759 	if (p->in_iowait) {  in __block_task()
2760 		atomic_inc(&rq->nr_iowait);  in __block_task()
2764 	ASSERT_EXCLUSIVE_WRITER(p->on_rq);  in __block_task()
2768 	 * this task, rendering our rq->__lock ineffective.  in __block_task()
2771 	 *   LOCK rq->__lock			  LOCK p->pi_lock  in __block_task()
2777 	 *             RELEASE p->on_rq = 0	  if (p->on_rq && ...)  in __block_task()
2780 	 *					  ACQUIRE (after ctrl-dep)  in __block_task()
2786 	 *					      LOCK rq->__lock  in __block_task()
2788 	 *					        STORE p->on_rq = 1  in __block_task()
2789 	 *   UNLOCK rq->__lock  in __block_task()
2791 	 * Callers must ensure to not reference @p after this -- we no longer  in __block_task()
2794 	smp_store_release(&p->on_rq, 0);  in __block_task()
2828  *  - enabled by features
2829  *  - hrtimer is actually high res
2835 	return hrtimer_is_hres_active(&rq->hrtick_timer);  in hrtick_enabled()
2879  * arch_scale_freq_capacity - get the frequency scale factor of a given CPU.
2885  *     ------ * SCHED_CAPACITY_SCALE
2899  * rq->clock_update_flags to avoid the WARN_DOUBLE_CLOCK warning.
2903 	rq1->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP);  in double_rq_clock_clear_update()
2904 	rq2->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP);  in double_rq_clock_clear_update()
2917 	 * In order to not have {0,2},{1,3} turn into into an AB-BA,  in rq_order_less()
2918 	 * order by core-id first and cpu-id second.  in rq_order_less()
2922 	 *	double_rq_lock(0,3); will take core-0, core-1 lock  in rq_order_less()
2923 	 *	double_rq_lock(1,2); will take core-1, core-0 lock  in rq_order_less()
2925 	 * when only cpu-id is considered.  in rq_order_less()
2927 	if (rq1->core->cpu < rq2->core->cpu)  in rq_order_less()
2929 	if (rq1->core->cpu > rq2->core->cpu)  in rq_order_less()
2933 	 * __sched_core_flip() relies on SMT having cpu-id lock order.  in rq_order_less()
2936 	return rq1->cpu < rq2->cpu;  in rq_order_less()
2944  * fair double_lock_balance: Safely acquires both rq->locks in a fair
2952 	__releases(this_rq->lock)  in _double_lock_balance()
2953 	__acquires(busiest->lock)  in _double_lock_balance()
2954 	__acquires(this_rq->lock)  in _double_lock_balance()
2966  * already in proper order on entry.  This favors lower CPU-ids and will
2971 	__releases(this_rq->lock)  in _double_lock_balance()
2972 	__acquires(busiest->lock)  in _double_lock_balance()
2973 	__acquires(this_rq->lock)  in _double_lock_balance()
2996  * double_lock_balance - lock the busiest runqueue, this_rq is locked already.
3006 	__releases(busiest->lock)  in double_unlock_balance()
3010 	lock_set_subclass(&__rq_lockp(this_rq)->dep_map, 0, _RET_IP_);  in double_unlock_balance()
3047 		    double_raw_lock(_T->lock, _T->lock2),
3048 		    double_raw_unlock(_T->lock, _T->lock2))
3051  * double_rq_unlock - safely unlock two runqueues
3057 	__releases(rq1->lock)  in double_rq_unlock()
3058 	__releases(rq2->lock)  in double_rq_unlock()
3063 		__release(rq2->lock);  in double_rq_unlock()
3073 		    double_rq_lock(_T->lock, _T->lock2),
3074 		    double_rq_unlock(_T->lock, _T->lock2))
3123 #define nohz_flags(cpu)		(&cpu_rq(cpu)->nohz_flags)
3193 		seq = __u64_stats_fetch_begin(&irqtime->sync);  in irq_time_read()
3194 		total = irqtime->total;  in irq_time_read()
3195 	} while (__u64_stats_fetch_retry(&irqtime->sync, seq));  in irq_time_read()
3214  * cpufreq_update_util - Take a note about CPU utilization changes.
3221  * It can only be called from RCU-sched read-side critical sections.
3232  * but that really is a band-aid.  Going forward it should be replaced with
3242 		data->func(data, rq_clock(rq), flags);  in cpufreq_update_util()
3271  * (BW_SHIFT - SCHED_CAPACITY_SHIFT) and false otherwise.
3277 	return cap >= p->dl.dl_density >> (BW_SHIFT - SCHED_CAPACITY_SHIFT);  in dl_task_fits_capacity()
3282 	return (rq->dl.running_bw * SCHED_CAPACITY_SCALE) >> BW_SHIFT;  in cpu_bw_dl()
3287 	return READ_ONCE(rq->avg_dl.util_avg);  in cpu_util_dl()
3296 	return READ_ONCE(rq->avg_rt.util_avg);  in cpu_util_rt()
3308  * Returns true if userspace opted-in to use uclamp and aggregation at rq level
3331 	return READ_ONCE(rq->uclamp[clamp_id].value);  in uclamp_rq_get()
3337 	WRITE_ONCE(rq->uclamp[clamp_id].value, value);  in uclamp_rq_set()
3342 	return rq->uclamp_flags & UCLAMP_FLAG_IDLE;  in uclamp_rq_is_idle()
3355 	max_util = READ_ONCE(rq->uclamp[UCLAMP_MAX].value);  in uclamp_rq_is_capped()
3378 	return min_t(unsigned int, clamp_value / UCLAMP_BUCKET_DELTA, UCLAMP_BUCKETS - 1);  in uclamp_bucket_id()
3384 	uc_se->value = value;  in uclamp_se_set()
3385 	uc_se->bucket_id = uclamp_bucket_id(value);  in uclamp_se_set()
3386 	uc_se->user_defined = user_defined;  in uclamp_se_set()
3434 	return READ_ONCE(rq->avg_irq.util_avg);  in cpu_util_irq()
3440 	util *= (max - irq);  in scale_irq_capacity()
3466 #define perf_domain_span(pd) (to_cpumask(((pd)->em_pd->cpus)))
3487  * - prior user-space memory accesses and store to rq->membarrier_state,
3488  * - store to rq->membarrier_state and following user-space memory accesses.
3489  * In the same way it provides those guarantees around store to rq->curr.
3500 	membarrier_state = atomic_read(&next_mm->membarrier_state);  in membarrier_switch_mm()
3501 	if (READ_ONCE(rq->membarrier_state) == membarrier_state)  in membarrier_switch_mm()
3504 	WRITE_ONCE(rq->membarrier_state, membarrier_state);  in membarrier_switch_mm()
3519 	if (!(p->flags & PF_KTHREAD))  in is_per_cpu_kthread()
3522 	if (p->nr_cpus_allowed != 1)  in is_per_cpu_kthread()
3561  * The per-mm/cpu cid can have the MM_CID_LAZY_PUT flag set or transition to
3570 	struct mm_struct *mm = t->mm;  in mm_cid_put_lazy()
3571 	struct mm_cid __percpu *pcpu_cid = mm->pcpu_cid;  in mm_cid_put_lazy()
3575 	cid = __this_cpu_read(pcpu_cid->cid);  in mm_cid_put_lazy()
3577 	    !try_cmpxchg(&this_cpu_ptr(pcpu_cid)->cid, &cid, MM_CID_UNSET))  in mm_cid_put_lazy()
3584 	struct mm_cid __percpu *pcpu_cid = mm->pcpu_cid;  in mm_cid_pcpu_unset()
3588 	cid = __this_cpu_read(pcpu_cid->cid);  in mm_cid_pcpu_unset()
3593 		 * Attempt transition from valid or lazy-put to unset.  in mm_cid_pcpu_unset()
3595 		res = cmpxchg(&this_cpu_ptr(pcpu_cid)->cid, cid, MM_CID_UNSET);  in mm_cid_pcpu_unset()
3617 	struct mm_cid __percpu *pcpu_cid = mm->pcpu_cid;  in __mm_cid_try_get()
3626 	max_nr_cid = atomic_read(&mm->max_nr_cid);  in __mm_cid_try_get()
3627 	while ((allowed_max_nr_cid = min_t(int, READ_ONCE(mm->nr_cpus_allowed),  in __mm_cid_try_get()
3628 					   atomic_read(&mm->mm_users))),  in __mm_cid_try_get()
3630 		/* atomic_try_cmpxchg loads previous mm->max_nr_cid into max_nr_cid. */  in __mm_cid_try_get()
3631 		if (atomic_try_cmpxchg(&mm->max_nr_cid, &max_nr_cid, allowed_max_nr_cid)) {  in __mm_cid_try_get()
3636 	/* Try to re-use recent cid. This improves cache locality. */  in __mm_cid_try_get()
3637 	cid = __this_cpu_read(pcpu_cid->recent_cid);  in __mm_cid_try_get()
3648 	while (cid < READ_ONCE(mm->nr_cpus_allowed) && cid < atomic_read(&mm->mm_users)) {  in __mm_cid_try_get()
3649 		/* atomic_try_cmpxchg loads previous mm->max_nr_cid into cid. */  in __mm_cid_try_get()
3650 		if (!atomic_try_cmpxchg(&mm->max_nr_cid, &cid, cid + 1))  in __mm_cid_try_get()
3658 	 * filled. This only happens during concurrent remote-clear  in __mm_cid_try_get()
3663 		if (cid < READ_ONCE(mm->nr_cpus_allowed))  in __mm_cid_try_get()
3668 		return -1;  in __mm_cid_try_get()
3675  * with the per-cpu cid value, allowing to estimate how recently it was used.
3679 	struct mm_cid *pcpu_cid = per_cpu_ptr(mm->pcpu_cid, cpu_of(rq));  in mm_cid_snapshot_time()
3682 	WRITE_ONCE(pcpu_cid->time, rq->clock);  in mm_cid_snapshot_time()
3691 	 * All allocations (even those using the cid_lock) are lock-free. If  in __mm_cid_get()
3742 	struct mm_cid __percpu *pcpu_cid = mm->pcpu_cid;  in mm_cid_get()
3748 	cid = __this_cpu_read(pcpu_cid->cid);  in mm_cid_get()
3754 		if (try_cmpxchg(&this_cpu_ptr(pcpu_cid)->cid, &cid, MM_CID_UNSET))  in mm_cid_get()
3758 	__this_cpu_write(pcpu_cid->cid, cid);  in mm_cid_get()
3759 	__this_cpu_write(pcpu_cid->recent_cid, cid);  in mm_cid_get()
3769 	 * Provide a memory barrier between rq->curr store and load of  in switch_mm_cid()
3770 	 * {prev,next}->mm->pcpu_cid[cpu] on rq->curr->mm transition.  in switch_mm_cid()
3774 	if (!next->mm) {                                // to kernel  in switch_mm_cid()
3776 		 * user -> kernel transition does not guarantee a barrier, but  in switch_mm_cid()
3780 		if (prev->mm)                           // from user  in switch_mm_cid()
3783 		 * kernel -> kernel transition does not change rq->curr->mm  in switch_mm_cid()
3788 		 * kernel -> user transition does not provide a barrier  in switch_mm_cid()
3789 		 * between rq->curr store and load of {prev,next}->mm->pcpu_cid[cpu].  in switch_mm_cid()
3792 		if (!prev->mm) {                        // from kernel  in switch_mm_cid()
3796 			 * user->user transition relies on an implicit  in switch_mm_cid()
3798 			 * current->mm changes. If the architecture  in switch_mm_cid()
3800 			 * barrier, it is emitted here.  If current->mm  in switch_mm_cid()
3806 	if (prev->mm_cid_active) {  in switch_mm_cid()
3807 		mm_cid_snapshot_time(rq, prev->mm);  in switch_mm_cid()
3809 		prev->mm_cid = -1;  in switch_mm_cid()
3811 	if (next->mm_cid_active)  in switch_mm_cid()
3812 		next->last_mm_cid = next->mm_cid = mm_cid_get(rq, next, next->mm);  in switch_mm_cid()
3832 	set_task_cpu(task, dst_rq->cpu);  in move_queued_task_locked()
3840 	    cpumask_test_cpu(cpu, &p->cpus_mask))  in task_is_pushable()
3851 		prio = min(prio, pi_task->prio);  in __rt_effective_prio()