1 /* 2 * Common Block IO controller cgroup interface 3 * 4 * Based on ideas and code from CFQ, CFS and BFQ: 5 * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk> 6 * 7 * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it> 8 * Paolo Valente <paolo.valente@unimore.it> 9 * 10 * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com> 11 * Nauman Rafique <nauman@google.com> 12 */ 13 #include <linux/ioprio.h> 14 #include <linux/seq_file.h> 15 #include <linux/kdev_t.h> 16 #include <linux/module.h> 17 #include <linux/err.h> 18 #include "blk-cgroup.h" 19 20 static DEFINE_SPINLOCK(blkio_list_lock); 21 static LIST_HEAD(blkio_list); 22 23 struct blkio_cgroup blkio_root_cgroup = { .weight = 2*BLKIO_WEIGHT_DEFAULT }; 24 EXPORT_SYMBOL_GPL(blkio_root_cgroup); 25 26 struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup) 27 { 28 return container_of(cgroup_subsys_state(cgroup, blkio_subsys_id), 29 struct blkio_cgroup, css); 30 } 31 EXPORT_SYMBOL_GPL(cgroup_to_blkio_cgroup); 32 33 void blkiocg_update_blkio_group_stats(struct blkio_group *blkg, 34 unsigned long time, unsigned long sectors) 35 { 36 blkg->time += time; 37 blkg->sectors += sectors; 38 } 39 EXPORT_SYMBOL_GPL(blkiocg_update_blkio_group_stats); 40 41 void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg, 42 struct blkio_group *blkg, void *key, dev_t dev) 43 { 44 unsigned long flags; 45 46 spin_lock_irqsave(&blkcg->lock, flags); 47 rcu_assign_pointer(blkg->key, key); 48 blkg->blkcg_id = css_id(&blkcg->css); 49 hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list); 50 spin_unlock_irqrestore(&blkcg->lock, flags); 51 #ifdef CONFIG_DEBUG_BLK_CGROUP 52 /* Need to take css reference ? */ 53 cgroup_path(blkcg->css.cgroup, blkg->path, sizeof(blkg->path)); 54 #endif 55 blkg->dev = dev; 56 } 57 EXPORT_SYMBOL_GPL(blkiocg_add_blkio_group); 58 59 static void __blkiocg_del_blkio_group(struct blkio_group *blkg) 60 { 61 hlist_del_init_rcu(&blkg->blkcg_node); 62 blkg->blkcg_id = 0; 63 } 64 65 /* 66 * returns 0 if blkio_group was still on cgroup list. Otherwise returns 1 67 * indicating that blk_group was unhashed by the time we got to it. 68 */ 69 int blkiocg_del_blkio_group(struct blkio_group *blkg) 70 { 71 struct blkio_cgroup *blkcg; 72 unsigned long flags; 73 struct cgroup_subsys_state *css; 74 int ret = 1; 75 76 rcu_read_lock(); 77 css = css_lookup(&blkio_subsys, blkg->blkcg_id); 78 if (!css) 79 goto out; 80 81 blkcg = container_of(css, struct blkio_cgroup, css); 82 spin_lock_irqsave(&blkcg->lock, flags); 83 if (!hlist_unhashed(&blkg->blkcg_node)) { 84 __blkiocg_del_blkio_group(blkg); 85 ret = 0; 86 } 87 spin_unlock_irqrestore(&blkcg->lock, flags); 88 out: 89 rcu_read_unlock(); 90 return ret; 91 } 92 EXPORT_SYMBOL_GPL(blkiocg_del_blkio_group); 93 94 /* called under rcu_read_lock(). */ 95 struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg, void *key) 96 { 97 struct blkio_group *blkg; 98 struct hlist_node *n; 99 void *__key; 100 101 hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node) { 102 __key = blkg->key; 103 if (__key == key) 104 return blkg; 105 } 106 107 return NULL; 108 } 109 EXPORT_SYMBOL_GPL(blkiocg_lookup_group); 110 111 #define SHOW_FUNCTION(__VAR) \ 112 static u64 blkiocg_##__VAR##_read(struct cgroup *cgroup, \ 113 struct cftype *cftype) \ 114 { \ 115 struct blkio_cgroup *blkcg; \ 116 \ 117 blkcg = cgroup_to_blkio_cgroup(cgroup); \ 118 return (u64)blkcg->__VAR; \ 119 } 120 121 SHOW_FUNCTION(weight); 122 #undef SHOW_FUNCTION 123 124 static int 125 blkiocg_weight_write(struct cgroup *cgroup, struct cftype *cftype, u64 val) 126 { 127 struct blkio_cgroup *blkcg; 128 struct blkio_group *blkg; 129 struct hlist_node *n; 130 struct blkio_policy_type *blkiop; 131 132 if (val < BLKIO_WEIGHT_MIN || val > BLKIO_WEIGHT_MAX) 133 return -EINVAL; 134 135 blkcg = cgroup_to_blkio_cgroup(cgroup); 136 spin_lock(&blkio_list_lock); 137 spin_lock_irq(&blkcg->lock); 138 blkcg->weight = (unsigned int)val; 139 hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) { 140 list_for_each_entry(blkiop, &blkio_list, list) 141 blkiop->ops.blkio_update_group_weight_fn(blkg, 142 blkcg->weight); 143 } 144 spin_unlock_irq(&blkcg->lock); 145 spin_unlock(&blkio_list_lock); 146 return 0; 147 } 148 149 #define SHOW_FUNCTION_PER_GROUP(__VAR) \ 150 static int blkiocg_##__VAR##_read(struct cgroup *cgroup, \ 151 struct cftype *cftype, struct seq_file *m) \ 152 { \ 153 struct blkio_cgroup *blkcg; \ 154 struct blkio_group *blkg; \ 155 struct hlist_node *n; \ 156 \ 157 if (!cgroup_lock_live_group(cgroup)) \ 158 return -ENODEV; \ 159 \ 160 blkcg = cgroup_to_blkio_cgroup(cgroup); \ 161 rcu_read_lock(); \ 162 hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node) {\ 163 if (blkg->dev) \ 164 seq_printf(m, "%u:%u %lu\n", MAJOR(blkg->dev), \ 165 MINOR(blkg->dev), blkg->__VAR); \ 166 } \ 167 rcu_read_unlock(); \ 168 cgroup_unlock(); \ 169 return 0; \ 170 } 171 172 SHOW_FUNCTION_PER_GROUP(time); 173 SHOW_FUNCTION_PER_GROUP(sectors); 174 #ifdef CONFIG_DEBUG_BLK_CGROUP 175 SHOW_FUNCTION_PER_GROUP(dequeue); 176 #endif 177 #undef SHOW_FUNCTION_PER_GROUP 178 179 #ifdef CONFIG_DEBUG_BLK_CGROUP 180 void blkiocg_update_blkio_group_dequeue_stats(struct blkio_group *blkg, 181 unsigned long dequeue) 182 { 183 blkg->dequeue += dequeue; 184 } 185 EXPORT_SYMBOL_GPL(blkiocg_update_blkio_group_dequeue_stats); 186 #endif 187 188 struct cftype blkio_files[] = { 189 { 190 .name = "weight", 191 .read_u64 = blkiocg_weight_read, 192 .write_u64 = blkiocg_weight_write, 193 }, 194 { 195 .name = "time", 196 .read_seq_string = blkiocg_time_read, 197 }, 198 { 199 .name = "sectors", 200 .read_seq_string = blkiocg_sectors_read, 201 }, 202 #ifdef CONFIG_DEBUG_BLK_CGROUP 203 { 204 .name = "dequeue", 205 .read_seq_string = blkiocg_dequeue_read, 206 }, 207 #endif 208 }; 209 210 static int blkiocg_populate(struct cgroup_subsys *subsys, struct cgroup *cgroup) 211 { 212 return cgroup_add_files(cgroup, subsys, blkio_files, 213 ARRAY_SIZE(blkio_files)); 214 } 215 216 static void blkiocg_destroy(struct cgroup_subsys *subsys, struct cgroup *cgroup) 217 { 218 struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup); 219 unsigned long flags; 220 struct blkio_group *blkg; 221 void *key; 222 struct blkio_policy_type *blkiop; 223 224 rcu_read_lock(); 225 remove_entry: 226 spin_lock_irqsave(&blkcg->lock, flags); 227 228 if (hlist_empty(&blkcg->blkg_list)) { 229 spin_unlock_irqrestore(&blkcg->lock, flags); 230 goto done; 231 } 232 233 blkg = hlist_entry(blkcg->blkg_list.first, struct blkio_group, 234 blkcg_node); 235 key = rcu_dereference(blkg->key); 236 __blkiocg_del_blkio_group(blkg); 237 238 spin_unlock_irqrestore(&blkcg->lock, flags); 239 240 /* 241 * This blkio_group is being unlinked as associated cgroup is going 242 * away. Let all the IO controlling policies know about this event. 243 * 244 * Currently this is static call to one io controlling policy. Once 245 * we have more policies in place, we need some dynamic registration 246 * of callback function. 247 */ 248 spin_lock(&blkio_list_lock); 249 list_for_each_entry(blkiop, &blkio_list, list) 250 blkiop->ops.blkio_unlink_group_fn(key, blkg); 251 spin_unlock(&blkio_list_lock); 252 goto remove_entry; 253 done: 254 free_css_id(&blkio_subsys, &blkcg->css); 255 rcu_read_unlock(); 256 kfree(blkcg); 257 } 258 259 static struct cgroup_subsys_state * 260 blkiocg_create(struct cgroup_subsys *subsys, struct cgroup *cgroup) 261 { 262 struct blkio_cgroup *blkcg, *parent_blkcg; 263 264 if (!cgroup->parent) { 265 blkcg = &blkio_root_cgroup; 266 goto done; 267 } 268 269 /* Currently we do not support hierarchy deeper than two level (0,1) */ 270 parent_blkcg = cgroup_to_blkio_cgroup(cgroup->parent); 271 if (css_depth(&parent_blkcg->css) > 0) 272 return ERR_PTR(-EINVAL); 273 274 blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL); 275 if (!blkcg) 276 return ERR_PTR(-ENOMEM); 277 278 blkcg->weight = BLKIO_WEIGHT_DEFAULT; 279 done: 280 spin_lock_init(&blkcg->lock); 281 INIT_HLIST_HEAD(&blkcg->blkg_list); 282 283 return &blkcg->css; 284 } 285 286 /* 287 * We cannot support shared io contexts, as we have no mean to support 288 * two tasks with the same ioc in two different groups without major rework 289 * of the main cic data structures. For now we allow a task to change 290 * its cgroup only if it's the only owner of its ioc. 291 */ 292 static int blkiocg_can_attach(struct cgroup_subsys *subsys, 293 struct cgroup *cgroup, struct task_struct *tsk, 294 bool threadgroup) 295 { 296 struct io_context *ioc; 297 int ret = 0; 298 299 /* task_lock() is needed to avoid races with exit_io_context() */ 300 task_lock(tsk); 301 ioc = tsk->io_context; 302 if (ioc && atomic_read(&ioc->nr_tasks) > 1) 303 ret = -EINVAL; 304 task_unlock(tsk); 305 306 return ret; 307 } 308 309 static void blkiocg_attach(struct cgroup_subsys *subsys, struct cgroup *cgroup, 310 struct cgroup *prev, struct task_struct *tsk, 311 bool threadgroup) 312 { 313 struct io_context *ioc; 314 315 task_lock(tsk); 316 ioc = tsk->io_context; 317 if (ioc) 318 ioc->cgroup_changed = 1; 319 task_unlock(tsk); 320 } 321 322 struct cgroup_subsys blkio_subsys = { 323 .name = "blkio", 324 .create = blkiocg_create, 325 .can_attach = blkiocg_can_attach, 326 .attach = blkiocg_attach, 327 .destroy = blkiocg_destroy, 328 .populate = blkiocg_populate, 329 .subsys_id = blkio_subsys_id, 330 .use_id = 1, 331 }; 332 333 void blkio_policy_register(struct blkio_policy_type *blkiop) 334 { 335 spin_lock(&blkio_list_lock); 336 list_add_tail(&blkiop->list, &blkio_list); 337 spin_unlock(&blkio_list_lock); 338 } 339 EXPORT_SYMBOL_GPL(blkio_policy_register); 340 341 void blkio_policy_unregister(struct blkio_policy_type *blkiop) 342 { 343 spin_lock(&blkio_list_lock); 344 list_del_init(&blkiop->list); 345 spin_unlock(&blkio_list_lock); 346 } 347 EXPORT_SYMBOL_GPL(blkio_policy_unregister); 348