memcontrol.c (16ec790938d4f356c82fab27b9a9adf4d6fe19a6) | memcontrol.c (79bd9814e5ec9a288d6599f53aeac0b548fdfe52) |
---|---|
1/* memcontrol.c - Memory Controller 2 * 3 * Copyright IBM Corporation, 2007 4 * Author Balbir Singh <balbir@linux.vnet.ibm.com> 5 * 6 * Copyright 2007 OpenVZ SWsoft Inc 7 * Author: Pavel Emelianov <xemul@openvz.org> 8 * --- 31 unchanged lines hidden (view full) --- 40#include <linux/export.h> 41#include <linux/mutex.h> 42#include <linux/rbtree.h> 43#include <linux/slab.h> 44#include <linux/swap.h> 45#include <linux/swapops.h> 46#include <linux/spinlock.h> 47#include <linux/eventfd.h> | 1/* memcontrol.c - Memory Controller 2 * 3 * Copyright IBM Corporation, 2007 4 * Author Balbir Singh <balbir@linux.vnet.ibm.com> 5 * 6 * Copyright 2007 OpenVZ SWsoft Inc 7 * Author: Pavel Emelianov <xemul@openvz.org> 8 * --- 31 unchanged lines hidden (view full) --- 40#include <linux/export.h> 41#include <linux/mutex.h> 42#include <linux/rbtree.h> 43#include <linux/slab.h> 44#include <linux/swap.h> 45#include <linux/swapops.h> 46#include <linux/spinlock.h> 47#include <linux/eventfd.h> |
48#include <linux/poll.h> |
|
48#include <linux/sort.h> 49#include <linux/fs.h> 50#include <linux/seq_file.h> 51#include <linux/vmalloc.h> 52#include <linux/vmpressure.h> 53#include <linux/mm_inline.h> 54#include <linux/page_cgroup.h> 55#include <linux/cpu.h> 56#include <linux/oom.h> 57#include <linux/lockdep.h> | 49#include <linux/sort.h> 50#include <linux/fs.h> 51#include <linux/seq_file.h> 52#include <linux/vmalloc.h> 53#include <linux/vmpressure.h> 54#include <linux/mm_inline.h> 55#include <linux/page_cgroup.h> 56#include <linux/cpu.h> 57#include <linux/oom.h> 58#include <linux/lockdep.h> |
59#include <linux/file.h> |
|
58#include "internal.h" 59#include <net/sock.h> 60#include <net/ip.h> 61#include <net/tcp_memcontrol.h> 62 63#include <asm/uaccess.h> 64 65#include <trace/events/vmscan.h> --- 155 unchanged lines hidden (view full) --- 221}; 222 223/* for OOM */ 224struct mem_cgroup_eventfd_list { 225 struct list_head list; 226 struct eventfd_ctx *eventfd; 227}; 228 | 60#include "internal.h" 61#include <net/sock.h> 62#include <net/ip.h> 63#include <net/tcp_memcontrol.h> 64 65#include <asm/uaccess.h> 66 67#include <trace/events/vmscan.h> --- 155 unchanged lines hidden (view full) --- 223}; 224 225/* for OOM */ 226struct mem_cgroup_eventfd_list { 227 struct list_head list; 228 struct eventfd_ctx *eventfd; 229}; 230 |
231/* 232 * cgroup_event represents events which userspace want to receive. 233 */ 234struct cgroup_event { 235 /* 236 * css which the event belongs to. 237 */ 238 struct cgroup_subsys_state *css; 239 /* 240 * Control file which the event associated. 241 */ 242 struct cftype *cft; 243 /* 244 * eventfd to signal userspace about the event. 245 */ 246 struct eventfd_ctx *eventfd; 247 /* 248 * Each of these stored in a list by the cgroup. 249 */ 250 struct list_head list; 251 /* 252 * All fields below needed to unregister event when 253 * userspace closes eventfd. 254 */ 255 poll_table pt; 256 wait_queue_head_t *wqh; 257 wait_queue_t wait; 258 struct work_struct remove; 259}; 260 |
|
229static void mem_cgroup_threshold(struct mem_cgroup *memcg); 230static void mem_cgroup_oom_notify(struct mem_cgroup *memcg); 231 232/* 233 * The memory controller data structure. The memory controller controls both 234 * page cache and RSS per cgroup. We would eventually like to provide 235 * statistics based on the statistics developed by Rik Van Riel for clock-pro, 236 * to help the administrator determine what knobs to tune. --- 5705 unchanged lines hidden (view full) --- 5942{ 5943} 5944 5945static void kmem_cgroup_css_offline(struct mem_cgroup *memcg) 5946{ 5947} 5948#endif 5949 | 261static void mem_cgroup_threshold(struct mem_cgroup *memcg); 262static void mem_cgroup_oom_notify(struct mem_cgroup *memcg); 263 264/* 265 * The memory controller data structure. The memory controller controls both 266 * page cache and RSS per cgroup. We would eventually like to provide 267 * statistics based on the statistics developed by Rik Van Riel for clock-pro, 268 * to help the administrator determine what knobs to tune. --- 5705 unchanged lines hidden (view full) --- 5974{ 5975} 5976 5977static void kmem_cgroup_css_offline(struct mem_cgroup *memcg) 5978{ 5979} 5980#endif 5981 |
5982/* 5983 * Unregister event and free resources. 5984 * 5985 * Gets called from workqueue. 5986 */ 5987static void cgroup_event_remove(struct work_struct *work) 5988{ 5989 struct cgroup_event *event = container_of(work, struct cgroup_event, 5990 remove); 5991 struct cgroup_subsys_state *css = event->css; 5992 5993 remove_wait_queue(event->wqh, &event->wait); 5994 5995 event->cft->unregister_event(css, event->cft, event->eventfd); 5996 5997 /* Notify userspace the event is going away. */ 5998 eventfd_signal(event->eventfd, 1); 5999 6000 eventfd_ctx_put(event->eventfd); 6001 kfree(event); 6002 css_put(css); 6003} 6004 6005/* 6006 * Gets called on POLLHUP on eventfd when user closes it. 6007 * 6008 * Called with wqh->lock held and interrupts disabled. 6009 */ 6010static int cgroup_event_wake(wait_queue_t *wait, unsigned mode, 6011 int sync, void *key) 6012{ 6013 struct cgroup_event *event = container_of(wait, 6014 struct cgroup_event, wait); 6015 struct cgroup *cgrp = event->css->cgroup; 6016 unsigned long flags = (unsigned long)key; 6017 6018 if (flags & POLLHUP) { 6019 /* 6020 * If the event has been detached at cgroup removal, we 6021 * can simply return knowing the other side will cleanup 6022 * for us. 6023 * 6024 * We can't race against event freeing since the other 6025 * side will require wqh->lock via remove_wait_queue(), 6026 * which we hold. 6027 */ 6028 spin_lock(&cgrp->event_list_lock); 6029 if (!list_empty(&event->list)) { 6030 list_del_init(&event->list); 6031 /* 6032 * We are in atomic context, but cgroup_event_remove() 6033 * may sleep, so we have to call it in workqueue. 6034 */ 6035 schedule_work(&event->remove); 6036 } 6037 spin_unlock(&cgrp->event_list_lock); 6038 } 6039 6040 return 0; 6041} 6042 6043static void cgroup_event_ptable_queue_proc(struct file *file, 6044 wait_queue_head_t *wqh, poll_table *pt) 6045{ 6046 struct cgroup_event *event = container_of(pt, 6047 struct cgroup_event, pt); 6048 6049 event->wqh = wqh; 6050 add_wait_queue(wqh, &event->wait); 6051} 6052 6053/* 6054 * Parse input and register new cgroup event handler. 6055 * 6056 * Input must be in format '<event_fd> <control_fd> <args>'. 6057 * Interpretation of args is defined by control file implementation. 6058 */ 6059static int cgroup_write_event_control(struct cgroup_subsys_state *dummy_css, 6060 struct cftype *cft, const char *buffer) 6061{ 6062 struct cgroup *cgrp = dummy_css->cgroup; 6063 struct cgroup_event *event; 6064 struct cgroup_subsys_state *cfile_css; 6065 unsigned int efd, cfd; 6066 struct fd efile; 6067 struct fd cfile; 6068 char *endp; 6069 int ret; 6070 6071 efd = simple_strtoul(buffer, &endp, 10); 6072 if (*endp != ' ') 6073 return -EINVAL; 6074 buffer = endp + 1; 6075 6076 cfd = simple_strtoul(buffer, &endp, 10); 6077 if ((*endp != ' ') && (*endp != '\0')) 6078 return -EINVAL; 6079 buffer = endp + 1; 6080 6081 event = kzalloc(sizeof(*event), GFP_KERNEL); 6082 if (!event) 6083 return -ENOMEM; 6084 6085 INIT_LIST_HEAD(&event->list); 6086 init_poll_funcptr(&event->pt, cgroup_event_ptable_queue_proc); 6087 init_waitqueue_func_entry(&event->wait, cgroup_event_wake); 6088 INIT_WORK(&event->remove, cgroup_event_remove); 6089 6090 efile = fdget(efd); 6091 if (!efile.file) { 6092 ret = -EBADF; 6093 goto out_kfree; 6094 } 6095 6096 event->eventfd = eventfd_ctx_fileget(efile.file); 6097 if (IS_ERR(event->eventfd)) { 6098 ret = PTR_ERR(event->eventfd); 6099 goto out_put_efile; 6100 } 6101 6102 cfile = fdget(cfd); 6103 if (!cfile.file) { 6104 ret = -EBADF; 6105 goto out_put_eventfd; 6106 } 6107 6108 /* the process need read permission on control file */ 6109 /* AV: shouldn't we check that it's been opened for read instead? */ 6110 ret = inode_permission(file_inode(cfile.file), MAY_READ); 6111 if (ret < 0) 6112 goto out_put_cfile; 6113 6114 event->cft = __file_cft(cfile.file); 6115 if (IS_ERR(event->cft)) { 6116 ret = PTR_ERR(event->cft); 6117 goto out_put_cfile; 6118 } 6119 6120 if (!event->cft->ss) { 6121 ret = -EBADF; 6122 goto out_put_cfile; 6123 } 6124 6125 /* 6126 * Determine the css of @cfile, verify it belongs to the same 6127 * cgroup as cgroup.event_control, and associate @event with it. 6128 * Remaining events are automatically removed on cgroup destruction 6129 * but the removal is asynchronous, so take an extra ref. 6130 */ 6131 rcu_read_lock(); 6132 6133 ret = -EINVAL; 6134 event->css = cgroup_css(cgrp, event->cft->ss); 6135 cfile_css = css_from_dir(cfile.file->f_dentry->d_parent, event->cft->ss); 6136 if (event->css && event->css == cfile_css && css_tryget(event->css)) 6137 ret = 0; 6138 6139 rcu_read_unlock(); 6140 if (ret) 6141 goto out_put_cfile; 6142 6143 if (!event->cft->register_event || !event->cft->unregister_event) { 6144 ret = -EINVAL; 6145 goto out_put_css; 6146 } 6147 6148 ret = event->cft->register_event(event->css, event->cft, 6149 event->eventfd, buffer); 6150 if (ret) 6151 goto out_put_css; 6152 6153 efile.file->f_op->poll(efile.file, &event->pt); 6154 6155 spin_lock(&cgrp->event_list_lock); 6156 list_add(&event->list, &cgrp->event_list); 6157 spin_unlock(&cgrp->event_list_lock); 6158 6159 fdput(cfile); 6160 fdput(efile); 6161 6162 return 0; 6163 6164out_put_css: 6165 css_put(event->css); 6166out_put_cfile: 6167 fdput(cfile); 6168out_put_eventfd: 6169 eventfd_ctx_put(event->eventfd); 6170out_put_efile: 6171 fdput(efile); 6172out_kfree: 6173 kfree(event); 6174 6175 return ret; 6176} 6177 |
|
5950static struct cftype mem_cgroup_files[] = { 5951 { 5952 .name = "usage_in_bytes", 5953 .private = MEMFILE_PRIVATE(_MEM, RES_USAGE), 5954 .read = mem_cgroup_read, 5955 .register_event = mem_cgroup_usage_register_event, 5956 .unregister_event = mem_cgroup_usage_unregister_event, 5957 }, --- 31 unchanged lines hidden (view full) --- 5989 }, 5990 { 5991 .name = "use_hierarchy", 5992 .flags = CFTYPE_INSANE, 5993 .write_u64 = mem_cgroup_hierarchy_write, 5994 .read_u64 = mem_cgroup_hierarchy_read, 5995 }, 5996 { | 6178static struct cftype mem_cgroup_files[] = { 6179 { 6180 .name = "usage_in_bytes", 6181 .private = MEMFILE_PRIVATE(_MEM, RES_USAGE), 6182 .read = mem_cgroup_read, 6183 .register_event = mem_cgroup_usage_register_event, 6184 .unregister_event = mem_cgroup_usage_unregister_event, 6185 }, --- 31 unchanged lines hidden (view full) --- 6217 }, 6218 { 6219 .name = "use_hierarchy", 6220 .flags = CFTYPE_INSANE, 6221 .write_u64 = mem_cgroup_hierarchy_write, 6222 .read_u64 = mem_cgroup_hierarchy_read, 6223 }, 6224 { |
6225 .name = "cgroup.event_control", 6226 .write_string = cgroup_write_event_control, 6227 .flags = CFTYPE_NO_PREFIX, 6228 .mode = S_IWUGO, 6229 }, 6230 { |
|
5997 .name = "swappiness", 5998 .read_u64 = mem_cgroup_swappiness_read, 5999 .write_u64 = mem_cgroup_swappiness_write, 6000 }, 6001 { 6002 .name = "move_charge_at_immigrate", 6003 .read_u64 = mem_cgroup_move_charge_read, 6004 .write_u64 = mem_cgroup_move_charge_write, --- 316 unchanged lines hidden (view full) --- 6321 */ 6322 if (!root_mem_cgroup->use_hierarchy) 6323 mem_cgroup_iter_invalidate(root_mem_cgroup); 6324} 6325 6326static void mem_cgroup_css_offline(struct cgroup_subsys_state *css) 6327{ 6328 struct mem_cgroup *memcg = mem_cgroup_from_css(css); | 6231 .name = "swappiness", 6232 .read_u64 = mem_cgroup_swappiness_read, 6233 .write_u64 = mem_cgroup_swappiness_write, 6234 }, 6235 { 6236 .name = "move_charge_at_immigrate", 6237 .read_u64 = mem_cgroup_move_charge_read, 6238 .write_u64 = mem_cgroup_move_charge_write, --- 316 unchanged lines hidden (view full) --- 6555 */ 6556 if (!root_mem_cgroup->use_hierarchy) 6557 mem_cgroup_iter_invalidate(root_mem_cgroup); 6558} 6559 6560static void mem_cgroup_css_offline(struct cgroup_subsys_state *css) 6561{ 6562 struct mem_cgroup *memcg = mem_cgroup_from_css(css); |
6563 struct cgroup *cgrp = css->cgroup; 6564 struct cgroup_event *event, *tmp; |
|
6329 | 6565 |
6566 /* 6567 * Unregister events and notify userspace. 6568 * Notify userspace about cgroup removing only after rmdir of cgroup 6569 * directory to avoid race between userspace and kernelspace. 6570 */ 6571 spin_lock(&cgrp->event_list_lock); 6572 list_for_each_entry_safe(event, tmp, &cgrp->event_list, list) { 6573 list_del_init(&event->list); 6574 schedule_work(&event->remove); 6575 } 6576 spin_unlock(&cgrp->event_list_lock); 6577 |
|
6330 kmem_cgroup_css_offline(memcg); 6331 6332 mem_cgroup_invalidate_reclaim_iterators(memcg); 6333 mem_cgroup_reparent_charges(memcg); 6334 mem_cgroup_destroy_all_caches(memcg); 6335 vmpressure_cleanup(&memcg->vmpressure); 6336} 6337 --- 675 unchanged lines hidden --- | 6578 kmem_cgroup_css_offline(memcg); 6579 6580 mem_cgroup_invalidate_reclaim_iterators(memcg); 6581 mem_cgroup_reparent_charges(memcg); 6582 mem_cgroup_destroy_all_caches(memcg); 6583 vmpressure_cleanup(&memcg->vmpressure); 6584} 6585 --- 675 unchanged lines hidden --- |