xref: /linux/kernel/cgroup/cpuset-internal.h (revision 8449d3252c2603a51ffc7c36cb5bd94874378b7d)
1 /* SPDX-License-Identifier: GPL-2.0-or-later */
2 
3 #ifndef __CPUSET_INTERNAL_H
4 #define __CPUSET_INTERNAL_H
5 
6 #include <linux/cgroup.h>
7 #include <linux/cpu.h>
8 #include <linux/cpumask.h>
9 #include <linux/cpuset.h>
10 #include <linux/spinlock.h>
11 #include <linux/union_find.h>
12 
13 /* See "Frequency meter" comments, below. */
14 
15 struct fmeter {
16 	int cnt;		/* unprocessed events count */
17 	int val;		/* most recent output value */
18 	time64_t time;		/* clock (secs) when val computed */
19 	spinlock_t lock;	/* guards read or write of above */
20 };
21 
22 /*
23  * Invalid partition error code
24  */
25 enum prs_errcode {
26 	PERR_NONE = 0,
27 	PERR_INVCPUS,
28 	PERR_INVPARENT,
29 	PERR_NOTPART,
30 	PERR_NOTEXCL,
31 	PERR_NOCPUS,
32 	PERR_HOTPLUG,
33 	PERR_CPUSEMPTY,
34 	PERR_HKEEPING,
35 	PERR_ACCESS,
36 	PERR_REMOTE,
37 };
38 
39 /* bits in struct cpuset flags field */
40 typedef enum {
41 	CS_CPU_EXCLUSIVE,
42 	CS_MEM_EXCLUSIVE,
43 	CS_MEM_HARDWALL,
44 	CS_MEMORY_MIGRATE,
45 	CS_SCHED_LOAD_BALANCE,
46 	CS_SPREAD_PAGE,
47 	CS_SPREAD_SLAB,
48 } cpuset_flagbits_t;
49 
50 /* The various types of files and directories in a cpuset file system */
51 
52 typedef enum {
53 	FILE_MEMORY_MIGRATE,
54 	FILE_CPULIST,
55 	FILE_MEMLIST,
56 	FILE_EFFECTIVE_CPULIST,
57 	FILE_EFFECTIVE_MEMLIST,
58 	FILE_SUBPARTS_CPULIST,
59 	FILE_EXCLUSIVE_CPULIST,
60 	FILE_EFFECTIVE_XCPULIST,
61 	FILE_ISOLATED_CPULIST,
62 	FILE_CPU_EXCLUSIVE,
63 	FILE_MEM_EXCLUSIVE,
64 	FILE_MEM_HARDWALL,
65 	FILE_SCHED_LOAD_BALANCE,
66 	FILE_PARTITION_ROOT,
67 	FILE_SCHED_RELAX_DOMAIN_LEVEL,
68 	FILE_MEMORY_PRESSURE_ENABLED,
69 	FILE_MEMORY_PRESSURE,
70 	FILE_SPREAD_PAGE,
71 	FILE_SPREAD_SLAB,
72 } cpuset_filetype_t;
73 
74 struct cpuset {
75 	struct cgroup_subsys_state css;
76 
77 	unsigned long flags;		/* "unsigned long" so bitops work */
78 
79 	/*
80 	 * On default hierarchy:
81 	 *
82 	 * The user-configured masks can only be changed by writing to
83 	 * cpuset.cpus and cpuset.mems, and won't be limited by the
84 	 * parent masks.
85 	 *
86 	 * The effective masks is the real masks that apply to the tasks
87 	 * in the cpuset. They may be changed if the configured masks are
88 	 * changed or hotplug happens.
89 	 *
90 	 * effective_mask == configured_mask & parent's effective_mask,
91 	 * and if it ends up empty, it will inherit the parent's mask.
92 	 *
93 	 *
94 	 * On legacy hierarchy:
95 	 *
96 	 * The user-configured masks are always the same with effective masks.
97 	 */
98 
99 	/* user-configured CPUs and Memory Nodes allow to tasks */
100 	cpumask_var_t cpus_allowed;
101 	nodemask_t mems_allowed;
102 
103 	/* effective CPUs and Memory Nodes allow to tasks */
104 	cpumask_var_t effective_cpus;
105 	nodemask_t effective_mems;
106 
107 	/*
108 	 * Exclusive CPUs dedicated to current cgroup (default hierarchy only)
109 	 *
110 	 * The effective_cpus of a valid partition root comes solely from its
111 	 * effective_xcpus and some of the effective_xcpus may be distributed
112 	 * to sub-partitions below & hence excluded from its effective_cpus.
113 	 * For a valid partition root, its effective_cpus have no relationship
114 	 * with cpus_allowed unless its exclusive_cpus isn't set.
115 	 *
116 	 * This value will only be set if either exclusive_cpus is set or
117 	 * when this cpuset becomes a local partition root.
118 	 */
119 	cpumask_var_t effective_xcpus;
120 
121 	/*
122 	 * Exclusive CPUs as requested by the user (default hierarchy only)
123 	 *
124 	 * Its value is independent of cpus_allowed and designates the set of
125 	 * CPUs that can be granted to the current cpuset or its children when
126 	 * it becomes a valid partition root. The effective set of exclusive
127 	 * CPUs granted (effective_xcpus) depends on whether those exclusive
128 	 * CPUs are passed down by its ancestors and not yet taken up by
129 	 * another sibling partition root along the way.
130 	 *
131 	 * If its value isn't set, it defaults to cpus_allowed.
132 	 */
133 	cpumask_var_t exclusive_cpus;
134 
135 	/*
136 	 * This is old Memory Nodes tasks took on.
137 	 *
138 	 * - top_cpuset.old_mems_allowed is initialized to mems_allowed.
139 	 * - A new cpuset's old_mems_allowed is initialized when some
140 	 *   task is moved into it.
141 	 * - old_mems_allowed is used in cpuset_migrate_mm() when we change
142 	 *   cpuset.mems_allowed and have tasks' nodemask updated, and
143 	 *   then old_mems_allowed is updated to mems_allowed.
144 	 */
145 	nodemask_t old_mems_allowed;
146 
147 	struct fmeter fmeter;		/* memory_pressure filter */
148 
149 	/*
150 	 * Tasks are being attached to this cpuset.  Used to prevent
151 	 * zeroing cpus/mems_allowed between ->can_attach() and ->attach().
152 	 */
153 	int attach_in_progress;
154 
155 	/* for custom sched domain */
156 	int relax_domain_level;
157 
158 	/* partition root state */
159 	int partition_root_state;
160 
161 	/*
162 	 * Whether cpuset is a remote partition.
163 	 * It used to be a list anchoring all remote partitions — we can switch back
164 	 * to a list if we need to iterate over the remote partitions.
165 	 */
166 	bool remote_partition;
167 
168 	/*
169 	 * number of SCHED_DEADLINE tasks attached to this cpuset, so that we
170 	 * know when to rebuild associated root domain bandwidth information.
171 	 */
172 	int nr_deadline_tasks;
173 	int nr_migrate_dl_tasks;
174 	u64 sum_migrate_dl_bw;
175 
176 	/* Invalid partition error code, not lock protected */
177 	enum prs_errcode prs_err;
178 
179 	/* Handle for cpuset.cpus.partition */
180 	struct cgroup_file partition_file;
181 
182 	/* Used to merge intersecting subsets for generate_sched_domains */
183 	struct uf_node node;
184 };
185 
css_cs(struct cgroup_subsys_state * css)186 static inline struct cpuset *css_cs(struct cgroup_subsys_state *css)
187 {
188 	return css ? container_of(css, struct cpuset, css) : NULL;
189 }
190 
191 /* Retrieve the cpuset for a task */
task_cs(struct task_struct * task)192 static inline struct cpuset *task_cs(struct task_struct *task)
193 {
194 	return css_cs(task_css(task, cpuset_cgrp_id));
195 }
196 
parent_cs(struct cpuset * cs)197 static inline struct cpuset *parent_cs(struct cpuset *cs)
198 {
199 	return css_cs(cs->css.parent);
200 }
201 
202 /* convenient tests for these bits */
is_cpuset_online(struct cpuset * cs)203 static inline bool is_cpuset_online(struct cpuset *cs)
204 {
205 	return css_is_online(&cs->css) && !css_is_dying(&cs->css);
206 }
207 
is_cpu_exclusive(const struct cpuset * cs)208 static inline int is_cpu_exclusive(const struct cpuset *cs)
209 {
210 	return test_bit(CS_CPU_EXCLUSIVE, &cs->flags);
211 }
212 
is_mem_exclusive(const struct cpuset * cs)213 static inline int is_mem_exclusive(const struct cpuset *cs)
214 {
215 	return test_bit(CS_MEM_EXCLUSIVE, &cs->flags);
216 }
217 
is_mem_hardwall(const struct cpuset * cs)218 static inline int is_mem_hardwall(const struct cpuset *cs)
219 {
220 	return test_bit(CS_MEM_HARDWALL, &cs->flags);
221 }
222 
is_sched_load_balance(const struct cpuset * cs)223 static inline int is_sched_load_balance(const struct cpuset *cs)
224 {
225 	return test_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
226 }
227 
is_memory_migrate(const struct cpuset * cs)228 static inline int is_memory_migrate(const struct cpuset *cs)
229 {
230 	return test_bit(CS_MEMORY_MIGRATE, &cs->flags);
231 }
232 
is_spread_page(const struct cpuset * cs)233 static inline int is_spread_page(const struct cpuset *cs)
234 {
235 	return test_bit(CS_SPREAD_PAGE, &cs->flags);
236 }
237 
is_spread_slab(const struct cpuset * cs)238 static inline int is_spread_slab(const struct cpuset *cs)
239 {
240 	return test_bit(CS_SPREAD_SLAB, &cs->flags);
241 }
242 
243 /**
244  * cpuset_for_each_child - traverse online children of a cpuset
245  * @child_cs: loop cursor pointing to the current child
246  * @pos_css: used for iteration
247  * @parent_cs: target cpuset to walk children of
248  *
249  * Walk @child_cs through the online children of @parent_cs.  Must be used
250  * with RCU read locked.
251  */
252 #define cpuset_for_each_child(child_cs, pos_css, parent_cs)		\
253 	css_for_each_child((pos_css), &(parent_cs)->css)		\
254 		if (is_cpuset_online(((child_cs) = css_cs((pos_css)))))
255 
256 /**
257  * cpuset_for_each_descendant_pre - pre-order walk of a cpuset's descendants
258  * @des_cs: loop cursor pointing to the current descendant
259  * @pos_css: used for iteration
260  * @root_cs: target cpuset to walk ancestor of
261  *
262  * Walk @des_cs through the online descendants of @root_cs.  Must be used
263  * with RCU read locked.  The caller may modify @pos_css by calling
264  * css_rightmost_descendant() to skip subtree.  @root_cs is included in the
265  * iteration and the first node to be visited.
266  */
267 #define cpuset_for_each_descendant_pre(des_cs, pos_css, root_cs)	\
268 	css_for_each_descendant_pre((pos_css), &(root_cs)->css)		\
269 		if (is_cpuset_online(((des_cs) = css_cs((pos_css)))))
270 
271 void rebuild_sched_domains_locked(void);
272 void cpuset_callback_lock_irq(void);
273 void cpuset_callback_unlock_irq(void);
274 void cpuset_update_tasks_cpumask(struct cpuset *cs, struct cpumask *new_cpus);
275 void cpuset_update_tasks_nodemask(struct cpuset *cs);
276 int cpuset_update_flag(cpuset_flagbits_t bit, struct cpuset *cs, int turning_on);
277 ssize_t cpuset_write_resmask(struct kernfs_open_file *of,
278 				    char *buf, size_t nbytes, loff_t off);
279 int cpuset_common_seq_show(struct seq_file *sf, void *v);
280 void cpuset_full_lock(void);
281 void cpuset_full_unlock(void);
282 
283 /*
284  * cpuset-v1.c
285  */
286 #ifdef CONFIG_CPUSETS_V1
287 extern struct cftype cpuset1_files[];
288 void fmeter_init(struct fmeter *fmp);
289 void cpuset1_update_task_spread_flags(struct cpuset *cs,
290 					struct task_struct *tsk);
291 void cpuset1_update_tasks_flags(struct cpuset *cs);
292 void cpuset1_hotplug_update_tasks(struct cpuset *cs,
293 			    struct cpumask *new_cpus, nodemask_t *new_mems,
294 			    bool cpus_updated, bool mems_updated);
295 int cpuset1_validate_change(struct cpuset *cur, struct cpuset *trial);
296 #else
fmeter_init(struct fmeter * fmp)297 static inline void fmeter_init(struct fmeter *fmp) {}
cpuset1_update_task_spread_flags(struct cpuset * cs,struct task_struct * tsk)298 static inline void cpuset1_update_task_spread_flags(struct cpuset *cs,
299 					struct task_struct *tsk) {}
cpuset1_update_tasks_flags(struct cpuset * cs)300 static inline void cpuset1_update_tasks_flags(struct cpuset *cs) {}
cpuset1_hotplug_update_tasks(struct cpuset * cs,struct cpumask * new_cpus,nodemask_t * new_mems,bool cpus_updated,bool mems_updated)301 static inline void cpuset1_hotplug_update_tasks(struct cpuset *cs,
302 			    struct cpumask *new_cpus, nodemask_t *new_mems,
303 			    bool cpus_updated, bool mems_updated) {}
cpuset1_validate_change(struct cpuset * cur,struct cpuset * trial)304 static inline int cpuset1_validate_change(struct cpuset *cur,
305 				struct cpuset *trial) { return 0; }
306 #endif /* CONFIG_CPUSETS_V1 */
307 
308 #endif /* __CPUSET_INTERNAL_H */
309