xref: /linux/kernel/cgroup/cpuset-internal.h (revision 5dd74441cbf42c22e874450eb6a6bbb19390a216)
1 /* SPDX-License-Identifier: GPL-2.0-or-later */
2 
3 #ifndef __CPUSET_INTERNAL_H
4 #define __CPUSET_INTERNAL_H
5 
6 #include <linux/cgroup.h>
7 #include <linux/cpu.h>
8 #include <linux/cpumask.h>
9 #include <linux/cpuset.h>
10 #include <linux/spinlock.h>
11 #include <linux/union_find.h>
12 #include <linux/sched/isolation.h>
13 
14 /* See "Frequency meter" comments, below. */
15 
16 struct fmeter {
17 	int cnt;		/* unprocessed events count */
18 	int val;		/* most recent output value */
19 	time64_t time;		/* clock (secs) when val computed */
20 	spinlock_t lock;	/* guards read or write of above */
21 };
22 
23 /*
24  * Invalid partition error code
25  */
26 enum prs_errcode {
27 	PERR_NONE = 0,
28 	PERR_INVCPUS,
29 	PERR_INVPARENT,
30 	PERR_NOTPART,
31 	PERR_NOTEXCL,
32 	PERR_NOCPUS,
33 	PERR_HOTPLUG,
34 	PERR_CPUSEMPTY,
35 	PERR_HKEEPING,
36 	PERR_ACCESS,
37 	PERR_REMOTE,
38 };
39 
40 /* bits in struct cpuset flags field */
41 typedef enum {
42 	CS_CPU_EXCLUSIVE,
43 	CS_MEM_EXCLUSIVE,
44 	CS_MEM_HARDWALL,
45 	CS_MEMORY_MIGRATE,
46 	CS_SCHED_LOAD_BALANCE,
47 	CS_SPREAD_PAGE,
48 	CS_SPREAD_SLAB,
49 } cpuset_flagbits_t;
50 
51 /* The various types of files and directories in a cpuset file system */
52 
53 typedef enum {
54 	FILE_MEMORY_MIGRATE,
55 	FILE_CPULIST,
56 	FILE_MEMLIST,
57 	FILE_EFFECTIVE_CPULIST,
58 	FILE_EFFECTIVE_MEMLIST,
59 	FILE_SUBPARTS_CPULIST,
60 	FILE_EXCLUSIVE_CPULIST,
61 	FILE_EFFECTIVE_XCPULIST,
62 	FILE_ISOLATED_CPULIST,
63 	FILE_CPU_EXCLUSIVE,
64 	FILE_MEM_EXCLUSIVE,
65 	FILE_MEM_HARDWALL,
66 	FILE_SCHED_LOAD_BALANCE,
67 	FILE_PARTITION_ROOT,
68 	FILE_SCHED_RELAX_DOMAIN_LEVEL,
69 	FILE_MEMORY_PRESSURE_ENABLED,
70 	FILE_MEMORY_PRESSURE,
71 	FILE_SPREAD_PAGE,
72 	FILE_SPREAD_SLAB,
73 } cpuset_filetype_t;
74 
75 struct cpuset {
76 	struct cgroup_subsys_state css;
77 
78 	unsigned long flags;		/* "unsigned long" so bitops work */
79 
80 	/*
81 	 * On default hierarchy:
82 	 *
83 	 * The user-configured masks can only be changed by writing to
84 	 * cpuset.cpus and cpuset.mems, and won't be limited by the
85 	 * parent masks.
86 	 *
87 	 * The effective masks is the real masks that apply to the tasks
88 	 * in the cpuset. They may be changed if the configured masks are
89 	 * changed or hotplug happens.
90 	 *
91 	 * effective_mask == configured_mask & parent's effective_mask,
92 	 * and if it ends up empty, it will inherit the parent's mask.
93 	 *
94 	 *
95 	 * On legacy hierarchy:
96 	 *
97 	 * The user-configured masks are always the same with effective masks.
98 	 */
99 
100 	/* user-configured CPUs and Memory Nodes allow to tasks */
101 	cpumask_var_t cpus_allowed;
102 	nodemask_t mems_allowed;
103 
104 	/* effective CPUs and Memory Nodes allow to tasks */
105 	cpumask_var_t effective_cpus;
106 	nodemask_t effective_mems;
107 
108 	/*
109 	 * Exclusive CPUs dedicated to current cgroup (default hierarchy only)
110 	 *
111 	 * The effective_cpus of a valid partition root comes solely from its
112 	 * effective_xcpus and some of the effective_xcpus may be distributed
113 	 * to sub-partitions below & hence excluded from its effective_cpus.
114 	 * For a valid partition root, its effective_cpus have no relationship
115 	 * with cpus_allowed unless its exclusive_cpus isn't set.
116 	 *
117 	 * This value will only be set if either exclusive_cpus is set or
118 	 * when this cpuset becomes a local partition root.
119 	 */
120 	cpumask_var_t effective_xcpus;
121 
122 	/*
123 	 * Exclusive CPUs as requested by the user (default hierarchy only)
124 	 *
125 	 * Its value is independent of cpus_allowed and designates the set of
126 	 * CPUs that can be granted to the current cpuset or its children when
127 	 * it becomes a valid partition root. The effective set of exclusive
128 	 * CPUs granted (effective_xcpus) depends on whether those exclusive
129 	 * CPUs are passed down by its ancestors and not yet taken up by
130 	 * another sibling partition root along the way.
131 	 *
132 	 * If its value isn't set, it defaults to cpus_allowed.
133 	 */
134 	cpumask_var_t exclusive_cpus;
135 
136 	/*
137 	 * This is old Memory Nodes tasks took on.
138 	 *
139 	 * - top_cpuset.old_mems_allowed is initialized to mems_allowed.
140 	 * - A new cpuset's old_mems_allowed is initialized when some
141 	 *   task is moved into it.
142 	 * - old_mems_allowed is used in cpuset_migrate_mm() when we change
143 	 *   cpuset.mems_allowed and have tasks' nodemask updated, and
144 	 *   then old_mems_allowed is updated to mems_allowed.
145 	 */
146 	nodemask_t old_mems_allowed;
147 
148 	/*
149 	 * Tasks are being attached to this cpuset.  Used to prevent
150 	 * zeroing cpus/mems_allowed between ->can_attach() and ->attach().
151 	 */
152 	int attach_in_progress;
153 
154 	/* partition root state */
155 	int partition_root_state;
156 
157 	/*
158 	 * Whether cpuset is a remote partition.
159 	 * It used to be a list anchoring all remote partitions — we can switch back
160 	 * to a list if we need to iterate over the remote partitions.
161 	 */
162 	bool remote_partition;
163 
164 	/*
165 	 * number of SCHED_DEADLINE tasks attached to this cpuset, so that we
166 	 * know when to rebuild associated root domain bandwidth information.
167 	 */
168 	int nr_deadline_tasks;
169 	int nr_migrate_dl_tasks;
170 	/* DL bandwidth that needs destination reservation for this attach. */
171 	u64 sum_migrate_dl_bw;
172 	/*
173 	 * CPU used for temporary DL bandwidth allocation during attach;
174 	 * -1 if no DL bandwidth was allocated in the current attach.
175 	 */
176 	int dl_bw_cpu;
177 
178 	/* Invalid partition error code, not lock protected */
179 	enum prs_errcode prs_err;
180 
181 	/* Handle for cpuset.cpus.partition */
182 	struct cgroup_file partition_file;
183 
184 #ifdef CONFIG_CPUSETS_V1
185 	struct fmeter fmeter;		/* memory_pressure filter */
186 
187 	/* for custom sched domain */
188 	int relax_domain_level;
189 
190 	/* Used to merge intersecting subsets for generate_sched_domains */
191 	struct uf_node node;
192 #endif
193 };
194 
195 extern struct cpuset top_cpuset;
196 
197 static inline struct cpuset *css_cs(struct cgroup_subsys_state *css)
198 {
199 	return css ? container_of(css, struct cpuset, css) : NULL;
200 }
201 
202 /* Retrieve the cpuset for a task */
203 static inline struct cpuset *task_cs(struct task_struct *task)
204 {
205 	return css_cs(task_css(task, cpuset_cgrp_id));
206 }
207 
208 static inline struct cpuset *parent_cs(struct cpuset *cs)
209 {
210 	return css_cs(cs->css.parent);
211 }
212 
213 /* convenient tests for these bits */
214 static inline bool is_cpuset_online(struct cpuset *cs)
215 {
216 	return css_is_online(&cs->css) && !css_is_dying(&cs->css);
217 }
218 
219 static inline int is_cpu_exclusive(const struct cpuset *cs)
220 {
221 	return test_bit(CS_CPU_EXCLUSIVE, &cs->flags);
222 }
223 
224 static inline int is_mem_exclusive(const struct cpuset *cs)
225 {
226 	return test_bit(CS_MEM_EXCLUSIVE, &cs->flags);
227 }
228 
229 static inline int is_mem_hardwall(const struct cpuset *cs)
230 {
231 	return test_bit(CS_MEM_HARDWALL, &cs->flags);
232 }
233 
234 static inline int is_sched_load_balance(const struct cpuset *cs)
235 {
236 	return test_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
237 }
238 
239 static inline int is_memory_migrate(const struct cpuset *cs)
240 {
241 	return test_bit(CS_MEMORY_MIGRATE, &cs->flags);
242 }
243 
244 static inline int is_spread_page(const struct cpuset *cs)
245 {
246 	return test_bit(CS_SPREAD_PAGE, &cs->flags);
247 }
248 
249 static inline int is_spread_slab(const struct cpuset *cs)
250 {
251 	return test_bit(CS_SPREAD_SLAB, &cs->flags);
252 }
253 
254 /*
255  * Helper routine for generate_sched_domains().
256  * Do cpusets a, b have overlapping effective cpus_allowed masks?
257  */
258 static inline int cpusets_overlap(struct cpuset *a, struct cpuset *b)
259 {
260 	return cpumask_intersects(a->effective_cpus, b->effective_cpus);
261 }
262 
263 static inline int nr_cpusets(void)
264 {
265 	/* jump label reference count + the top-level cpuset */
266 	return static_key_count(&cpusets_enabled_key.key) + 1;
267 }
268 
269 static inline bool cpuset_is_populated(struct cpuset *cs)
270 {
271 	lockdep_assert_cpuset_lock_held();
272 
273 	/* Cpusets in the process of attaching should be considered as populated */
274 	return cgroup_is_populated(cs->css.cgroup) ||
275 		cs->attach_in_progress;
276 }
277 
278 /**
279  * cpuset_for_each_child - traverse online children of a cpuset
280  * @child_cs: loop cursor pointing to the current child
281  * @pos_css: used for iteration
282  * @parent_cs: target cpuset to walk children of
283  *
284  * Walk @child_cs through the online children of @parent_cs.  Must be used
285  * with RCU read locked.
286  */
287 #define cpuset_for_each_child(child_cs, pos_css, parent_cs)		\
288 	css_for_each_child((pos_css), &(parent_cs)->css)		\
289 		if (is_cpuset_online(((child_cs) = css_cs((pos_css)))))
290 
291 /**
292  * cpuset_for_each_descendant_pre - pre-order walk of a cpuset's descendants
293  * @des_cs: loop cursor pointing to the current descendant
294  * @pos_css: used for iteration
295  * @root_cs: target cpuset to walk ancestor of
296  *
297  * Walk @des_cs through the online descendants of @root_cs.  Must be used
298  * with RCU read locked.  The caller may modify @pos_css by calling
299  * css_rightmost_descendant() to skip subtree.  @root_cs is included in the
300  * iteration and the first node to be visited.
301  */
302 #define cpuset_for_each_descendant_pre(des_cs, pos_css, root_cs)	\
303 	css_for_each_descendant_pre((pos_css), &(root_cs)->css)		\
304 		if (is_cpuset_online(((des_cs) = css_cs((pos_css)))))
305 
306 void rebuild_sched_domains_locked(void);
307 void cpuset_callback_lock_irq(void);
308 void cpuset_callback_unlock_irq(void);
309 void cpuset_update_tasks_cpumask(struct cpuset *cs, struct cpumask *new_cpus);
310 void cpuset_update_tasks_nodemask(struct cpuset *cs);
311 int cpuset_update_flag(cpuset_flagbits_t bit, struct cpuset *cs, int turning_on);
312 ssize_t cpuset_write_resmask(struct kernfs_open_file *of,
313 				    char *buf, size_t nbytes, loff_t off);
314 int cpuset_common_seq_show(struct seq_file *sf, void *v);
315 void cpuset_full_lock(void);
316 void cpuset_full_unlock(void);
317 
318 /*
319  * cpuset-v1.c
320  */
321 #ifdef CONFIG_CPUSETS_V1
322 extern struct cftype cpuset1_files[];
323 void cpuset1_update_task_spread_flags(struct cpuset *cs,
324 					struct task_struct *tsk);
325 void cpuset1_update_tasks_flags(struct cpuset *cs);
326 void cpuset1_hotplug_update_tasks(struct cpuset *cs,
327 			    struct cpumask *new_cpus, nodemask_t *new_mems,
328 			    bool cpus_updated, bool mems_updated);
329 int cpuset1_validate_change(struct cpuset *cur, struct cpuset *trial);
330 bool cpuset1_cpus_excl_conflict(struct cpuset *cs1, struct cpuset *cs2);
331 void cpuset1_init(struct cpuset *cs);
332 void cpuset1_online_css(struct cgroup_subsys_state *css);
333 int cpuset1_generate_sched_domains(cpumask_var_t **domains,
334 			struct sched_domain_attr **attributes);
335 
336 #else
337 static inline void cpuset1_update_task_spread_flags(struct cpuset *cs,
338 					struct task_struct *tsk) {}
339 static inline void cpuset1_update_tasks_flags(struct cpuset *cs) {}
340 static inline void cpuset1_hotplug_update_tasks(struct cpuset *cs,
341 			    struct cpumask *new_cpus, nodemask_t *new_mems,
342 			    bool cpus_updated, bool mems_updated) {}
343 static inline int cpuset1_validate_change(struct cpuset *cur,
344 				struct cpuset *trial) { return 0; }
345 static inline bool cpuset1_cpus_excl_conflict(struct cpuset *cs1,
346 					struct cpuset *cs2) { return false; }
347 static inline void cpuset1_init(struct cpuset *cs) {}
348 static inline void cpuset1_online_css(struct cgroup_subsys_state *css) {}
349 static inline int cpuset1_generate_sched_domains(cpumask_var_t **domains,
350 			struct sched_domain_attr **attributes) { return 0; };
351 
352 #endif /* CONFIG_CPUSETS_V1 */
353 
354 #endif /* __CPUSET_INTERNAL_H */
355