Lines Matching +full:early +full:- +full:to +full:- +full:mid
1 // SPDX-License-Identifier: GPL-2.0-only
2 #include "cgroup-internal.h"
40 * pidlist destructions need to be flushed on cgroup destruction. Use a
45 /* protects cgroup_subsys->release_agent_path */
55 /* Check also dfl_cftypes for file-less controllers, i.e. perf_event */ in cgroup1_subsys_absent()
56 return ss->legacy_cftypes == NULL && ss->dfl_cftypes; in cgroup1_subsys_absent()
60 * cgroup_attach_task_all - attach task 'tsk' to all cgroups of task 'from'
61 * @from: attach to all cgroups of a given task
62 * @tsk: the task to be attached
92 * cgroup_transfer_tasks - move tasks from one cgroup to another
93 * @to: cgroup to which the tasks will be moved
98 * is guaranteed to be either visible in the source cgroup after the
104 int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from) in cgroup_transfer_tasks() argument
112 if (cgroup_on_dfl(to)) in cgroup_transfer_tasks()
113 return -EINVAL; in cgroup_transfer_tasks()
115 ret = cgroup_migrate_vet_dst(to); in cgroup_transfer_tasks()
125 list_for_each_entry(link, &from->cset_links, cset_link) in cgroup_transfer_tasks()
126 cgroup_migrate_add_src(link->cset, to, &mgctx); in cgroup_transfer_tasks()
134 * Migrate tasks one-by-one until @from is empty. This fails iff in cgroup_transfer_tasks()
135 * ->can_attach() fails. in cgroup_transfer_tasks()
138 css_task_iter_start(&from->self, 0, &it); in cgroup_transfer_tasks()
142 } while (task && (task->flags & PF_EXITING)); in cgroup_transfer_tasks()
151 TRACE_CGROUP_PATH(transfer_tasks, to, task, false); in cgroup_transfer_tasks()
166 * *lots* of attached tasks. So it may need several calls to read(),
182 * to the cgroup.
186 * used to find which pidlist is wanted. doesn't change as long as
196 /* pointer to the cgroup we belong to, for list removal purposes */
203 * Used to destroy all pidlists lingering waiting for destroy timer. None
210 mutex_lock(&cgrp->pidlist_mutex); in cgroup1_pidlist_destroy_all()
211 list_for_each_entry_safe(l, tmp_l, &cgrp->pidlists, links) in cgroup1_pidlist_destroy_all()
212 mod_delayed_work(cgroup_pidlist_destroy_wq, &l->destroy_dwork, 0); in cgroup1_pidlist_destroy_all()
213 mutex_unlock(&cgrp->pidlist_mutex); in cgroup1_pidlist_destroy_all()
216 BUG_ON(!list_empty(&cgrp->pidlists)); in cgroup1_pidlist_destroy_all()
226 mutex_lock(&l->owner->pidlist_mutex); in cgroup_pidlist_destroy_work_fn()
233 list_del(&l->links); in cgroup_pidlist_destroy_work_fn()
234 kvfree(l->list); in cgroup_pidlist_destroy_work_fn()
235 put_pid_ns(l->key.ns); in cgroup_pidlist_destroy_work_fn()
239 mutex_unlock(&l->owner->pidlist_mutex); in cgroup_pidlist_destroy_work_fn()
244 * pidlist_uniq - given a kmalloc()ed list, strip out all duplicate entries
253 * edge cases first; no work needs to be done for either in pidlist_uniq()
260 while (list[src] == list[src-1]) { in pidlist_uniq()
265 /* dest always points to where the next unique element goes */ in pidlist_uniq()
274 * The two pid files - task and cgroup.procs - guaranteed that the result
277 * making it impossible to use, for example, single rbtree of member tasks
279 * per open file is dangerous, so cgroup had to implement shared pool of
284 return *(pid_t *)a - *(pid_t *)b; in cmppid()
294 lockdep_assert_held(&cgrp->pidlist_mutex); in cgroup_pidlist_find()
296 list_for_each_entry(l, &cgrp->pidlists, links) in cgroup_pidlist_find()
297 if (l->key.type == type && l->key.ns == ns) in cgroup_pidlist_find()
313 lockdep_assert_held(&cgrp->pidlist_mutex); in cgroup_pidlist_find_create()
324 INIT_DELAYED_WORK(&l->destroy_dwork, cgroup_pidlist_destroy_work_fn); in cgroup_pidlist_find_create()
325 l->key.type = type; in cgroup_pidlist_find_create()
327 l->key.ns = get_pid_ns(task_active_pid_ns(current)); in cgroup_pidlist_find_create()
328 l->owner = cgrp; in cgroup_pidlist_find_create()
329 list_add(&l->links, &cgrp->pidlists); in cgroup_pidlist_find_create()
346 lockdep_assert_held(&cgrp->pidlist_mutex); in pidlist_array_load()
350 * enough space - tough. This race is indistinguishable to the in pidlist_array_load()
357 return -ENOMEM; in pidlist_array_load()
359 css_task_iter_start(&cgrp->self, 0, &it); in pidlist_array_load()
368 if (pid > 0) /* make sure to only use valid results */ in pidlist_array_load()
380 return -ENOMEM; in pidlist_array_load()
384 kvfree(l->list); in pidlist_array_load()
385 l->list = array; in pidlist_array_load()
386 l->length = length; in pidlist_array_load()
393 * next pid to display; the seq_file iterator is a pointer to the pid
394 * in the cgroup->l->list array.
400 * Initially we receive a position value that corresponds to in cgroup_pidlist_start()
402 * after a seek to the start). Use a binary-search to find the in cgroup_pidlist_start()
403 * next pid to display, if any in cgroup_pidlist_start()
405 struct kernfs_open_file *of = s->private; in cgroup_pidlist_start()
406 struct cgroup_file_ctx *ctx = of->priv; in cgroup_pidlist_start()
407 struct cgroup *cgrp = seq_css(s)->cgroup; in cgroup_pidlist_start()
409 enum cgroup_filetype type = seq_cft(s)->private; in cgroup_pidlist_start()
413 mutex_lock(&cgrp->pidlist_mutex); in cgroup_pidlist_start()
416 * !NULL @ctx->procs1.pidlist indicates that this isn't the first in cgroup_pidlist_start()
418 * that. Look for it. Note that @ctx->procs1.pidlist can't be used in cgroup_pidlist_start()
421 if (ctx->procs1.pidlist) in cgroup_pidlist_start()
422 ctx->procs1.pidlist = cgroup_pidlist_find(cgrp, type); in cgroup_pidlist_start()
428 if (!ctx->procs1.pidlist) { in cgroup_pidlist_start()
429 ret = pidlist_array_load(cgrp, type, &ctx->procs1.pidlist); in cgroup_pidlist_start()
433 l = ctx->procs1.pidlist; in cgroup_pidlist_start()
436 int end = l->length; in cgroup_pidlist_start()
439 int mid = (index + end) / 2; in cgroup_pidlist_start() local
440 if (l->list[mid] == pid) { in cgroup_pidlist_start()
441 index = mid; in cgroup_pidlist_start()
443 } else if (l->list[mid] < pid) in cgroup_pidlist_start()
444 index = mid + 1; in cgroup_pidlist_start()
446 end = mid; in cgroup_pidlist_start()
450 if (index >= l->length) in cgroup_pidlist_start()
452 /* Update the abstract position to be the actual pid that we found */ in cgroup_pidlist_start()
453 iter = l->list + index; in cgroup_pidlist_start()
460 struct kernfs_open_file *of = s->private; in cgroup_pidlist_stop()
461 struct cgroup_file_ctx *ctx = of->priv; in cgroup_pidlist_stop()
462 struct cgroup_pidlist *l = ctx->procs1.pidlist; in cgroup_pidlist_stop()
465 mod_delayed_work(cgroup_pidlist_destroy_wq, &l->destroy_dwork, in cgroup_pidlist_stop()
467 mutex_unlock(&seq_css(s)->cgroup->pidlist_mutex); in cgroup_pidlist_stop()
472 struct kernfs_open_file *of = s->private; in cgroup_pidlist_next()
473 struct cgroup_file_ctx *ctx = of->priv; in cgroup_pidlist_next()
474 struct cgroup_pidlist *l = ctx->procs1.pidlist; in cgroup_pidlist_next()
476 pid_t *end = l->list + l->length; in cgroup_pidlist_next()
478 * Advance to the next pid in the array. If this goes off the in cgroup_pidlist_next()
508 cgrp = cgroup_kn_lock_live(of->kn, false); in __cgroup1_procs_write()
510 return -ENODEV; in __cgroup1_procs_write()
519 * to check permissions on one of them. Check permissions using the in __cgroup1_procs_write()
520 * credentials from file open to protect against inherited fd attacks. in __cgroup1_procs_write()
522 cred = of->file->f_cred; in __cgroup1_procs_write()
524 if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) && in __cgroup1_procs_write()
525 !uid_eq(cred->euid, tcred->uid) && in __cgroup1_procs_write()
526 !uid_eq(cred->euid, tcred->suid)) in __cgroup1_procs_write()
527 ret = -EACCES; in __cgroup1_procs_write()
537 cgroup_kn_unlock(of->kn); in __cgroup1_procs_write()
560 BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX); in cgroup_release_agent_write()
564 * require capabilities to set release agent. in cgroup_release_agent_write()
566 ctx = of->priv; in cgroup_release_agent_write()
567 if ((ctx->ns->user_ns != &init_user_ns) || in cgroup_release_agent_write()
568 !file_ns_capable(of->file, &init_user_ns, CAP_SYS_ADMIN)) in cgroup_release_agent_write()
569 return -EPERM; in cgroup_release_agent_write()
571 cgrp = cgroup_kn_lock_live(of->kn, false); in cgroup_release_agent_write()
573 return -ENODEV; in cgroup_release_agent_write()
575 strscpy(cgrp->root->release_agent_path, strstrip(buf), in cgroup_release_agent_write()
576 sizeof(cgrp->root->release_agent_path)); in cgroup_release_agent_write()
578 cgroup_kn_unlock(of->kn); in cgroup_release_agent_write()
584 struct cgroup *cgrp = seq_css(seq)->cgroup; in cgroup_release_agent_show()
587 seq_puts(seq, cgrp->root->release_agent_path); in cgroup_release_agent_show()
602 return notify_on_release(css->cgroup); in cgroup_read_notify_on_release()
609 set_bit(CGRP_NOTIFY_ON_RELEASE, &css->cgroup->flags); in cgroup_write_notify_on_release()
611 clear_bit(CGRP_NOTIFY_ON_RELEASE, &css->cgroup->flags); in cgroup_write_notify_on_release()
618 return test_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags); in cgroup_clone_children_read()
625 set_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags); in cgroup_clone_children_write()
627 clear_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags); in cgroup_clone_children_write()
671 .max_write_len = PATH_MAX - 1,
685 * Grab the subsystems state racily. No need to add avenue to in proc_cgroupstats_show()
690 cgrp_v1_visible |= ss->root != &cgrp_dfl_root; in proc_cgroupstats_show()
696 ss->legacy_name, ss->root->hierarchy_id, in proc_cgroupstats_show()
697 atomic_read(&ss->root->nr_cgrps), in proc_cgroupstats_show()
709 * cgroupstats_build - build and fill cgroupstats
710 * @stats: cgroupstats to fill information into
711 * @dentry: A dentry entry belonging to the cgroup for which stats have
714 * Build and fill cgroupstats so that taskstats can export it to user
726 /* it should be kernfs_node belonging to cgroupfs and is a directory */ in cgroupstats_build()
727 if (dentry->d_sb->s_type != &cgroup_fs_type || !kn || in cgroupstats_build()
729 return -EINVAL; in cgroupstats_build()
733 * @kn->priv's validity. For this and css_tryget_online_from_dir(), in cgroupstats_build()
734 * @kn->priv is RCU safe. Let's do the RCU dancing. in cgroupstats_build()
737 cgrp = rcu_dereference(*(void __rcu __force **)&kn->priv); in cgroupstats_build()
740 return -ENOENT; in cgroupstats_build()
744 css_task_iter_start(&cgrp->self, 0, &it); in cgroupstats_build()
746 switch (READ_ONCE(tsk->__state)) { in cgroupstats_build()
748 stats->nr_running++; in cgroupstats_build()
751 stats->nr_sleeping++; in cgroupstats_build()
754 stats->nr_uninterruptible++; in cgroupstats_build()
757 stats->nr_stopped++; in cgroupstats_build()
760 if (tsk->in_iowait) in cgroupstats_build()
761 stats->nr_io_wait++; in cgroupstats_build()
774 !css_has_online_children(&cgrp->self) && !cgroup_is_dead(cgrp)) in cgroup1_check_for_release()
775 schedule_work(&cgrp->release_agent_work); in cgroup1_check_for_release()
781 * relative to the root of cgroup file system) as the argument.
783 * Most likely, this user command will try to rmdir this cgroup.
786 * attached to this cgroup before it is removed, or that some other
790 * to continue to serve a useful existence. Next time it's released,
793 * The final arg to call_usermodehelper() is UMH_WAIT_EXEC, which
797 * release agent task. We don't bother to wait because the caller of
809 /* snoop agent path and exit early if empty */ in cgroup1_release_agent()
810 if (!cgrp->root->release_agent_path[0]) in cgroup1_release_agent()
820 strscpy(agentbuf, cgrp->root->release_agent_path, PATH_MAX); in cgroup1_release_agent()
845 * cgroup_rename - Only allow simple rename of directories in place.
850 struct cgroup *cgrp = kn->priv; in cgroup1_rename()
853 /* do not accept '\n' to prevent making /proc/<pid>/cgroup unparsable */ in cgroup1_rename()
855 return -EINVAL; in cgroup1_rename()
858 return -ENOTDIR; in cgroup1_rename()
859 if (rcu_access_pointer(kn->__parent) != new_parent) in cgroup1_rename()
860 return -EIO; in cgroup1_rename()
890 if (root->subsys_mask & (1 << ssid)) in cgroup1_show_options()
891 seq_show_option(seq, ss->legacy_name, NULL); in cgroup1_show_options()
892 if (root->flags & CGRP_ROOT_NOPREFIX) in cgroup1_show_options()
894 if (root->flags & CGRP_ROOT_XATTR) in cgroup1_show_options()
896 if (root->flags & CGRP_ROOT_CPUSET_V2_MODE) in cgroup1_show_options()
898 if (root->flags & CGRP_ROOT_FAVOR_DYNMODS) in cgroup1_show_options()
902 if (strlen(root->release_agent_path)) in cgroup1_show_options()
904 root->release_agent_path); in cgroup1_show_options()
907 if (test_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags)) in cgroup1_show_options()
909 if (strlen(root->name)) in cgroup1_show_options()
910 seq_show_option(seq, "name", root->name); in cgroup1_show_options()
949 if (opt == -ENOPARAM) { in cgroup1_parse_param()
953 if (ret != -ENOPARAM) in cgroup1_parse_param()
956 if (strcmp(param->key, ss->legacy_name) || in cgroup1_parse_param()
961 param->key); in cgroup1_parse_param()
962 ctx->subsys_mask |= (1 << i); in cgroup1_parse_param()
965 return invalfc(fc, "Unknown subsys name '%s'", param->key); in cgroup1_parse_param()
973 ctx->none = true; in cgroup1_parse_param()
976 ctx->all_ss = true; in cgroup1_parse_param()
979 ctx->flags |= CGRP_ROOT_NOPREFIX; in cgroup1_parse_param()
982 ctx->cpuset_clone_children = true; in cgroup1_parse_param()
985 ctx->flags |= CGRP_ROOT_CPUSET_V2_MODE; in cgroup1_parse_param()
988 ctx->flags |= CGRP_ROOT_XATTR; in cgroup1_parse_param()
991 ctx->flags |= CGRP_ROOT_FAVOR_DYNMODS; in cgroup1_parse_param()
994 ctx->flags &= ~CGRP_ROOT_FAVOR_DYNMODS; in cgroup1_parse_param()
998 if (ctx->release_agent) in cgroup1_parse_param()
1002 * require capabilities to set release agent. in cgroup1_parse_param()
1004 if ((fc->user_ns != &init_user_ns) || !capable(CAP_SYS_ADMIN)) in cgroup1_parse_param()
1006 ctx->release_agent = param->string; in cgroup1_parse_param()
1007 param->string = NULL; in cgroup1_parse_param()
1012 return -ENOENT; in cgroup1_parse_param()
1014 if (!param->size) in cgroup1_parse_param()
1016 if (param->size > MAX_CGROUP_ROOT_NAMELEN - 1) in cgroup1_parse_param()
1018 /* Must match [\w.-]+ */ in cgroup1_parse_param()
1019 for (i = 0; i < param->size; i++) { in cgroup1_parse_param()
1020 char c = param->string[i]; in cgroup1_parse_param()
1023 if ((c == '.') || (c == '-') || (c == '_')) in cgroup1_parse_param()
1028 if (ctx->name) in cgroup1_parse_param()
1030 ctx->name = param->string; in cgroup1_parse_param()
1031 param->string = NULL; in cgroup1_parse_param()
1053 ctx->subsys_mask &= enabled; in check_cgroupfs_options()
1057 * let's default to 'all'. in check_cgroupfs_options()
1059 if (!ctx->subsys_mask && !ctx->none && !ctx->name) in check_cgroupfs_options()
1060 ctx->all_ss = true; in check_cgroupfs_options()
1062 if (ctx->all_ss) { in check_cgroupfs_options()
1064 if (ctx->subsys_mask) in check_cgroupfs_options()
1067 ctx->subsys_mask = enabled; in check_cgroupfs_options()
1071 * We either have to specify by name or by subsystems. (So all in check_cgroupfs_options()
1074 if (!ctx->subsys_mask && !ctx->name) in check_cgroupfs_options()
1082 if ((ctx->flags & CGRP_ROOT_NOPREFIX) && (ctx->subsys_mask & mask)) in check_cgroupfs_options()
1086 if (ctx->subsys_mask && ctx->none) in check_cgroupfs_options()
1095 struct kernfs_root *kf_root = kernfs_root_from_sb(fc->root->d_sb); in cgroup1_reconfigure()
1107 if (ctx->subsys_mask != root->subsys_mask || ctx->release_agent) in cgroup1_reconfigure()
1109 task_tgid_nr(current), current->comm); in cgroup1_reconfigure()
1111 added_mask = ctx->subsys_mask & ~root->subsys_mask; in cgroup1_reconfigure()
1112 removed_mask = root->subsys_mask & ~ctx->subsys_mask; in cgroup1_reconfigure()
1114 /* Don't allow flags or name to change at remount */ in cgroup1_reconfigure()
1115 if ((ctx->flags ^ root->flags) || in cgroup1_reconfigure()
1116 (ctx->name && strcmp(ctx->name, root->name))) { in cgroup1_reconfigure()
1118 ctx->flags, ctx->name ?: "", root->flags, root->name); in cgroup1_reconfigure()
1119 ret = -EINVAL; in cgroup1_reconfigure()
1124 if (!list_empty(&root->cgrp.self.children)) { in cgroup1_reconfigure()
1125 ret = -EBUSY; in cgroup1_reconfigure()
1135 if (ctx->release_agent) { in cgroup1_reconfigure()
1137 strscpy(root->release_agent_path, ctx->release_agent); in cgroup1_reconfigure()
1157 * The guts of cgroup1 mount - find or create cgroup_root to use.
1158 * Called with cgroup_mutex held; returns 0 on success, -E... on
1159 * error and positive - in case when the candidate is busy dying.
1160 * On success it stashes a reference to cgroup_root into given
1179 * dying subsystems. We just need to ensure that the ones in cgroup1_root_to_use()
1184 if (!(ctx->subsys_mask & (1 << i)) || in cgroup1_root_to_use()
1185 ss->root == &cgrp_dfl_root) in cgroup1_root_to_use()
1188 if (!percpu_ref_tryget_live(&ss->root->cgrp.self.refcnt)) in cgroup1_root_to_use()
1190 cgroup_put(&ss->root->cgrp); in cgroup1_root_to_use()
1204 if (ctx->name) { in cgroup1_root_to_use()
1205 if (strcmp(ctx->name, root->name)) in cgroup1_root_to_use()
1214 if ((ctx->subsys_mask || ctx->none) && in cgroup1_root_to_use()
1215 (ctx->subsys_mask != root->subsys_mask)) { in cgroup1_root_to_use()
1218 return -EBUSY; in cgroup1_root_to_use()
1221 if (root->flags ^ ctx->flags) in cgroup1_root_to_use()
1224 ctx->root = root; in cgroup1_root_to_use()
1233 if (!ctx->subsys_mask && !ctx->none) in cgroup1_root_to_use()
1237 if (ctx->ns != &init_cgroup_ns) in cgroup1_root_to_use()
1238 return -EPERM; in cgroup1_root_to_use()
1242 return -ENOMEM; in cgroup1_root_to_use()
1244 ctx->root = root; in cgroup1_root_to_use()
1247 ret = cgroup_setup_root(root, ctx->subsys_mask); in cgroup1_root_to_use()
1249 cgroup_favor_dynmods(root, ctx->flags & CGRP_ROOT_FAVOR_DYNMODS); in cgroup1_root_to_use()
1261 /* Check if the caller has permission to mount. */ in cgroup1_get_tree()
1262 if (!ns_capable(ctx->ns->user_ns, CAP_SYS_ADMIN)) in cgroup1_get_tree()
1263 return -EPERM; in cgroup1_get_tree()
1268 if (!ret && !percpu_ref_tryget_live(&ctx->root->cgrp.self.refcnt)) in cgroup1_get_tree()
1276 if (!ret && percpu_ref_is_dying(&ctx->root->cgrp.self.refcnt)) { in cgroup1_get_tree()
1289 * task_get_cgroup1 - Acquires the associated cgroup of a task within a
1296 * We limit it to cgroup1 only.
1300 struct cgroup *cgrp = ERR_PTR(-ENOENT); in task_get_cgroup1()
1309 if (root->hierarchy_id != hierarchy_id) in task_get_cgroup1()
1314 cgrp = ERR_PTR(-ENOENT); in task_get_cgroup1()
1325 * Used to destroy pidlists and separate to serve as flush domain. in cgroup1_wq_init()
1326 * Cap @max_active to 1 too. in cgroup1_wq_init()
1356 if (strcmp(token, ss->name) && in cgroup_no_v1()
1357 strcmp(token, ss->legacy_name)) in cgroup_no_v1()