1 /* 2 * 3 * Copyright IBM Corporation, 2012 4 * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> 5 * 6 * Cgroup v2 7 * Copyright (C) 2019 Red Hat, Inc. 8 * Author: Giuseppe Scrivano <gscrivan@redhat.com> 9 * 10 * This program is free software; you can redistribute it and/or modify it 11 * under the terms of version 2.1 of the GNU Lesser General Public License 12 * as published by the Free Software Foundation. 13 * 14 * This program is distributed in the hope that it would be useful, but 15 * WITHOUT ANY WARRANTY; without even the implied warranty of 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 17 * 18 */ 19 20 #include <linux/cgroup.h> 21 #include <linux/page_counter.h> 22 #include <linux/slab.h> 23 #include <linux/hugetlb.h> 24 #include <linux/hugetlb_cgroup.h> 25 26 #define MEMFILE_PRIVATE(x, val) (((x) << 16) | (val)) 27 #define MEMFILE_IDX(val) (((val) >> 16) & 0xffff) 28 #define MEMFILE_ATTR(val) ((val) & 0xffff) 29 30 #define hugetlb_cgroup_from_counter(counter, idx) \ 31 container_of(counter, struct hugetlb_cgroup, hugepage[idx]) 32 33 static struct hugetlb_cgroup *root_h_cgroup __read_mostly; 34 35 static inline struct page_counter * 36 __hugetlb_cgroup_counter_from_cgroup(struct hugetlb_cgroup *h_cg, int idx, 37 bool rsvd) 38 { 39 if (rsvd) 40 return &h_cg->rsvd_hugepage[idx]; 41 return &h_cg->hugepage[idx]; 42 } 43 44 static inline struct page_counter * 45 hugetlb_cgroup_counter_from_cgroup(struct hugetlb_cgroup *h_cg, int idx) 46 { 47 return __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, false); 48 } 49 50 static inline struct page_counter * 51 hugetlb_cgroup_counter_from_cgroup_rsvd(struct hugetlb_cgroup *h_cg, int idx) 52 { 53 return __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, true); 54 } 55 56 static inline 57 struct hugetlb_cgroup *hugetlb_cgroup_from_css(struct cgroup_subsys_state *s) 58 { 59 return s ? container_of(s, struct hugetlb_cgroup, css) : NULL; 60 } 61 62 static inline 63 struct hugetlb_cgroup *hugetlb_cgroup_from_task(struct task_struct *task) 64 { 65 return hugetlb_cgroup_from_css(task_css(task, hugetlb_cgrp_id)); 66 } 67 68 static inline bool hugetlb_cgroup_is_root(struct hugetlb_cgroup *h_cg) 69 { 70 return (h_cg == root_h_cgroup); 71 } 72 73 static inline struct hugetlb_cgroup * 74 parent_hugetlb_cgroup(struct hugetlb_cgroup *h_cg) 75 { 76 return hugetlb_cgroup_from_css(h_cg->css.parent); 77 } 78 79 static inline bool hugetlb_cgroup_have_usage(struct hugetlb_cgroup *h_cg) 80 { 81 int idx; 82 83 for (idx = 0; idx < hugetlb_max_hstate; idx++) { 84 if (page_counter_read( 85 hugetlb_cgroup_counter_from_cgroup(h_cg, idx)) || 86 page_counter_read(hugetlb_cgroup_counter_from_cgroup_rsvd( 87 h_cg, idx))) { 88 return true; 89 } 90 } 91 return false; 92 } 93 94 static void hugetlb_cgroup_init(struct hugetlb_cgroup *h_cgroup, 95 struct hugetlb_cgroup *parent_h_cgroup) 96 { 97 int idx; 98 99 for (idx = 0; idx < HUGE_MAX_HSTATE; idx++) { 100 struct page_counter *fault_parent = NULL; 101 struct page_counter *rsvd_parent = NULL; 102 unsigned long limit; 103 int ret; 104 105 if (parent_h_cgroup) { 106 fault_parent = hugetlb_cgroup_counter_from_cgroup( 107 parent_h_cgroup, idx); 108 rsvd_parent = hugetlb_cgroup_counter_from_cgroup_rsvd( 109 parent_h_cgroup, idx); 110 } 111 page_counter_init(hugetlb_cgroup_counter_from_cgroup(h_cgroup, 112 idx), 113 fault_parent); 114 page_counter_init( 115 hugetlb_cgroup_counter_from_cgroup_rsvd(h_cgroup, idx), 116 rsvd_parent); 117 118 limit = round_down(PAGE_COUNTER_MAX, 119 1 << huge_page_order(&hstates[idx])); 120 121 ret = page_counter_set_max( 122 hugetlb_cgroup_counter_from_cgroup(h_cgroup, idx), 123 limit); 124 VM_BUG_ON(ret); 125 ret = page_counter_set_max( 126 hugetlb_cgroup_counter_from_cgroup_rsvd(h_cgroup, idx), 127 limit); 128 VM_BUG_ON(ret); 129 } 130 } 131 132 static struct cgroup_subsys_state * 133 hugetlb_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) 134 { 135 struct hugetlb_cgroup *parent_h_cgroup = hugetlb_cgroup_from_css(parent_css); 136 struct hugetlb_cgroup *h_cgroup; 137 138 h_cgroup = kzalloc(sizeof(*h_cgroup), GFP_KERNEL); 139 if (!h_cgroup) 140 return ERR_PTR(-ENOMEM); 141 142 if (!parent_h_cgroup) 143 root_h_cgroup = h_cgroup; 144 145 hugetlb_cgroup_init(h_cgroup, parent_h_cgroup); 146 return &h_cgroup->css; 147 } 148 149 static void hugetlb_cgroup_css_free(struct cgroup_subsys_state *css) 150 { 151 struct hugetlb_cgroup *h_cgroup; 152 153 h_cgroup = hugetlb_cgroup_from_css(css); 154 kfree(h_cgroup); 155 } 156 157 /* 158 * Should be called with hugetlb_lock held. 159 * Since we are holding hugetlb_lock, pages cannot get moved from 160 * active list or uncharged from the cgroup, So no need to get 161 * page reference and test for page active here. This function 162 * cannot fail. 163 */ 164 static void hugetlb_cgroup_move_parent(int idx, struct hugetlb_cgroup *h_cg, 165 struct page *page) 166 { 167 unsigned int nr_pages; 168 struct page_counter *counter; 169 struct hugetlb_cgroup *page_hcg; 170 struct hugetlb_cgroup *parent = parent_hugetlb_cgroup(h_cg); 171 172 page_hcg = hugetlb_cgroup_from_page(page); 173 /* 174 * We can have pages in active list without any cgroup 175 * ie, hugepage with less than 3 pages. We can safely 176 * ignore those pages. 177 */ 178 if (!page_hcg || page_hcg != h_cg) 179 goto out; 180 181 nr_pages = compound_nr(page); 182 if (!parent) { 183 parent = root_h_cgroup; 184 /* root has no limit */ 185 page_counter_charge(&parent->hugepage[idx], nr_pages); 186 } 187 counter = &h_cg->hugepage[idx]; 188 /* Take the pages off the local counter */ 189 page_counter_cancel(counter, nr_pages); 190 191 set_hugetlb_cgroup(page, parent); 192 out: 193 return; 194 } 195 196 /* 197 * Force the hugetlb cgroup to empty the hugetlb resources by moving them to 198 * the parent cgroup. 199 */ 200 static void hugetlb_cgroup_css_offline(struct cgroup_subsys_state *css) 201 { 202 struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css); 203 struct hstate *h; 204 struct page *page; 205 int idx = 0; 206 207 do { 208 for_each_hstate(h) { 209 spin_lock(&hugetlb_lock); 210 list_for_each_entry(page, &h->hugepage_activelist, lru) 211 hugetlb_cgroup_move_parent(idx, h_cg, page); 212 213 spin_unlock(&hugetlb_lock); 214 idx++; 215 } 216 cond_resched(); 217 } while (hugetlb_cgroup_have_usage(h_cg)); 218 } 219 220 static inline void hugetlb_event(struct hugetlb_cgroup *hugetlb, int idx, 221 enum hugetlb_memory_event event) 222 { 223 atomic_long_inc(&hugetlb->events_local[idx][event]); 224 cgroup_file_notify(&hugetlb->events_local_file[idx]); 225 226 do { 227 atomic_long_inc(&hugetlb->events[idx][event]); 228 cgroup_file_notify(&hugetlb->events_file[idx]); 229 } while ((hugetlb = parent_hugetlb_cgroup(hugetlb)) && 230 !hugetlb_cgroup_is_root(hugetlb)); 231 } 232 233 static int __hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages, 234 struct hugetlb_cgroup **ptr, 235 bool rsvd) 236 { 237 int ret = 0; 238 struct page_counter *counter; 239 struct hugetlb_cgroup *h_cg = NULL; 240 241 if (hugetlb_cgroup_disabled()) 242 goto done; 243 /* 244 * We don't charge any cgroup if the compound page have less 245 * than 3 pages. 246 */ 247 if (huge_page_order(&hstates[idx]) < HUGETLB_CGROUP_MIN_ORDER) 248 goto done; 249 again: 250 rcu_read_lock(); 251 h_cg = hugetlb_cgroup_from_task(current); 252 if (!css_tryget(&h_cg->css)) { 253 rcu_read_unlock(); 254 goto again; 255 } 256 rcu_read_unlock(); 257 258 if (!page_counter_try_charge( 259 __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, rsvd), 260 nr_pages, &counter)) { 261 ret = -ENOMEM; 262 hugetlb_event(h_cg, idx, HUGETLB_MAX); 263 css_put(&h_cg->css); 264 goto done; 265 } 266 /* Reservations take a reference to the css because they do not get 267 * reparented. 268 */ 269 if (!rsvd) 270 css_put(&h_cg->css); 271 done: 272 *ptr = h_cg; 273 return ret; 274 } 275 276 int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages, 277 struct hugetlb_cgroup **ptr) 278 { 279 return __hugetlb_cgroup_charge_cgroup(idx, nr_pages, ptr, false); 280 } 281 282 int hugetlb_cgroup_charge_cgroup_rsvd(int idx, unsigned long nr_pages, 283 struct hugetlb_cgroup **ptr) 284 { 285 return __hugetlb_cgroup_charge_cgroup(idx, nr_pages, ptr, true); 286 } 287 288 /* Should be called with hugetlb_lock held */ 289 static void __hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages, 290 struct hugetlb_cgroup *h_cg, 291 struct page *page, bool rsvd) 292 { 293 if (hugetlb_cgroup_disabled() || !h_cg) 294 return; 295 296 __set_hugetlb_cgroup(page, h_cg, rsvd); 297 return; 298 } 299 300 void hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages, 301 struct hugetlb_cgroup *h_cg, 302 struct page *page) 303 { 304 __hugetlb_cgroup_commit_charge(idx, nr_pages, h_cg, page, false); 305 } 306 307 void hugetlb_cgroup_commit_charge_rsvd(int idx, unsigned long nr_pages, 308 struct hugetlb_cgroup *h_cg, 309 struct page *page) 310 { 311 __hugetlb_cgroup_commit_charge(idx, nr_pages, h_cg, page, true); 312 } 313 314 /* 315 * Should be called with hugetlb_lock held 316 */ 317 static void __hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages, 318 struct page *page, bool rsvd) 319 { 320 struct hugetlb_cgroup *h_cg; 321 322 if (hugetlb_cgroup_disabled()) 323 return; 324 lockdep_assert_held(&hugetlb_lock); 325 h_cg = __hugetlb_cgroup_from_page(page, rsvd); 326 if (unlikely(!h_cg)) 327 return; 328 __set_hugetlb_cgroup(page, NULL, rsvd); 329 330 page_counter_uncharge(__hugetlb_cgroup_counter_from_cgroup(h_cg, idx, 331 rsvd), 332 nr_pages); 333 334 if (rsvd) 335 css_put(&h_cg->css); 336 337 return; 338 } 339 340 void hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages, 341 struct page *page) 342 { 343 __hugetlb_cgroup_uncharge_page(idx, nr_pages, page, false); 344 } 345 346 void hugetlb_cgroup_uncharge_page_rsvd(int idx, unsigned long nr_pages, 347 struct page *page) 348 { 349 __hugetlb_cgroup_uncharge_page(idx, nr_pages, page, true); 350 } 351 352 static void __hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages, 353 struct hugetlb_cgroup *h_cg, 354 bool rsvd) 355 { 356 if (hugetlb_cgroup_disabled() || !h_cg) 357 return; 358 359 if (huge_page_order(&hstates[idx]) < HUGETLB_CGROUP_MIN_ORDER) 360 return; 361 362 page_counter_uncharge(__hugetlb_cgroup_counter_from_cgroup(h_cg, idx, 363 rsvd), 364 nr_pages); 365 366 if (rsvd) 367 css_put(&h_cg->css); 368 } 369 370 void hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages, 371 struct hugetlb_cgroup *h_cg) 372 { 373 __hugetlb_cgroup_uncharge_cgroup(idx, nr_pages, h_cg, false); 374 } 375 376 void hugetlb_cgroup_uncharge_cgroup_rsvd(int idx, unsigned long nr_pages, 377 struct hugetlb_cgroup *h_cg) 378 { 379 __hugetlb_cgroup_uncharge_cgroup(idx, nr_pages, h_cg, true); 380 } 381 382 void hugetlb_cgroup_uncharge_counter(struct resv_map *resv, unsigned long start, 383 unsigned long end) 384 { 385 if (hugetlb_cgroup_disabled() || !resv || !resv->reservation_counter || 386 !resv->css) 387 return; 388 389 page_counter_uncharge(resv->reservation_counter, 390 (end - start) * resv->pages_per_hpage); 391 css_put(resv->css); 392 } 393 394 void hugetlb_cgroup_uncharge_file_region(struct resv_map *resv, 395 struct file_region *rg, 396 unsigned long nr_pages) 397 { 398 if (hugetlb_cgroup_disabled() || !resv || !rg || !nr_pages) 399 return; 400 401 if (rg->reservation_counter && resv->pages_per_hpage && nr_pages > 0 && 402 !resv->reservation_counter) { 403 page_counter_uncharge(rg->reservation_counter, 404 nr_pages * resv->pages_per_hpage); 405 css_put(rg->css); 406 } 407 } 408 409 enum { 410 RES_USAGE, 411 RES_RSVD_USAGE, 412 RES_LIMIT, 413 RES_RSVD_LIMIT, 414 RES_MAX_USAGE, 415 RES_RSVD_MAX_USAGE, 416 RES_FAILCNT, 417 RES_RSVD_FAILCNT, 418 }; 419 420 static u64 hugetlb_cgroup_read_u64(struct cgroup_subsys_state *css, 421 struct cftype *cft) 422 { 423 struct page_counter *counter; 424 struct page_counter *rsvd_counter; 425 struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css); 426 427 counter = &h_cg->hugepage[MEMFILE_IDX(cft->private)]; 428 rsvd_counter = &h_cg->rsvd_hugepage[MEMFILE_IDX(cft->private)]; 429 430 switch (MEMFILE_ATTR(cft->private)) { 431 case RES_USAGE: 432 return (u64)page_counter_read(counter) * PAGE_SIZE; 433 case RES_RSVD_USAGE: 434 return (u64)page_counter_read(rsvd_counter) * PAGE_SIZE; 435 case RES_LIMIT: 436 return (u64)counter->max * PAGE_SIZE; 437 case RES_RSVD_LIMIT: 438 return (u64)rsvd_counter->max * PAGE_SIZE; 439 case RES_MAX_USAGE: 440 return (u64)counter->watermark * PAGE_SIZE; 441 case RES_RSVD_MAX_USAGE: 442 return (u64)rsvd_counter->watermark * PAGE_SIZE; 443 case RES_FAILCNT: 444 return counter->failcnt; 445 case RES_RSVD_FAILCNT: 446 return rsvd_counter->failcnt; 447 default: 448 BUG(); 449 } 450 } 451 452 static int hugetlb_cgroup_read_u64_max(struct seq_file *seq, void *v) 453 { 454 int idx; 455 u64 val; 456 struct cftype *cft = seq_cft(seq); 457 unsigned long limit; 458 struct page_counter *counter; 459 struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(seq_css(seq)); 460 461 idx = MEMFILE_IDX(cft->private); 462 counter = &h_cg->hugepage[idx]; 463 464 limit = round_down(PAGE_COUNTER_MAX, 465 1 << huge_page_order(&hstates[idx])); 466 467 switch (MEMFILE_ATTR(cft->private)) { 468 case RES_RSVD_USAGE: 469 counter = &h_cg->rsvd_hugepage[idx]; 470 fallthrough; 471 case RES_USAGE: 472 val = (u64)page_counter_read(counter); 473 seq_printf(seq, "%llu\n", val * PAGE_SIZE); 474 break; 475 case RES_RSVD_LIMIT: 476 counter = &h_cg->rsvd_hugepage[idx]; 477 fallthrough; 478 case RES_LIMIT: 479 val = (u64)counter->max; 480 if (val == limit) 481 seq_puts(seq, "max\n"); 482 else 483 seq_printf(seq, "%llu\n", val * PAGE_SIZE); 484 break; 485 default: 486 BUG(); 487 } 488 489 return 0; 490 } 491 492 static DEFINE_MUTEX(hugetlb_limit_mutex); 493 494 static ssize_t hugetlb_cgroup_write(struct kernfs_open_file *of, 495 char *buf, size_t nbytes, loff_t off, 496 const char *max) 497 { 498 int ret, idx; 499 unsigned long nr_pages; 500 struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(of_css(of)); 501 bool rsvd = false; 502 503 if (hugetlb_cgroup_is_root(h_cg)) /* Can't set limit on root */ 504 return -EINVAL; 505 506 buf = strstrip(buf); 507 ret = page_counter_memparse(buf, max, &nr_pages); 508 if (ret) 509 return ret; 510 511 idx = MEMFILE_IDX(of_cft(of)->private); 512 nr_pages = round_down(nr_pages, 1 << huge_page_order(&hstates[idx])); 513 514 switch (MEMFILE_ATTR(of_cft(of)->private)) { 515 case RES_RSVD_LIMIT: 516 rsvd = true; 517 fallthrough; 518 case RES_LIMIT: 519 mutex_lock(&hugetlb_limit_mutex); 520 ret = page_counter_set_max( 521 __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, rsvd), 522 nr_pages); 523 mutex_unlock(&hugetlb_limit_mutex); 524 break; 525 default: 526 ret = -EINVAL; 527 break; 528 } 529 return ret ?: nbytes; 530 } 531 532 static ssize_t hugetlb_cgroup_write_legacy(struct kernfs_open_file *of, 533 char *buf, size_t nbytes, loff_t off) 534 { 535 return hugetlb_cgroup_write(of, buf, nbytes, off, "-1"); 536 } 537 538 static ssize_t hugetlb_cgroup_write_dfl(struct kernfs_open_file *of, 539 char *buf, size_t nbytes, loff_t off) 540 { 541 return hugetlb_cgroup_write(of, buf, nbytes, off, "max"); 542 } 543 544 static ssize_t hugetlb_cgroup_reset(struct kernfs_open_file *of, 545 char *buf, size_t nbytes, loff_t off) 546 { 547 int ret = 0; 548 struct page_counter *counter, *rsvd_counter; 549 struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(of_css(of)); 550 551 counter = &h_cg->hugepage[MEMFILE_IDX(of_cft(of)->private)]; 552 rsvd_counter = &h_cg->rsvd_hugepage[MEMFILE_IDX(of_cft(of)->private)]; 553 554 switch (MEMFILE_ATTR(of_cft(of)->private)) { 555 case RES_MAX_USAGE: 556 page_counter_reset_watermark(counter); 557 break; 558 case RES_RSVD_MAX_USAGE: 559 page_counter_reset_watermark(rsvd_counter); 560 break; 561 case RES_FAILCNT: 562 counter->failcnt = 0; 563 break; 564 case RES_RSVD_FAILCNT: 565 rsvd_counter->failcnt = 0; 566 break; 567 default: 568 ret = -EINVAL; 569 break; 570 } 571 return ret ?: nbytes; 572 } 573 574 static char *mem_fmt(char *buf, int size, unsigned long hsize) 575 { 576 if (hsize >= (1UL << 30)) 577 snprintf(buf, size, "%luGB", hsize >> 30); 578 else if (hsize >= (1UL << 20)) 579 snprintf(buf, size, "%luMB", hsize >> 20); 580 else 581 snprintf(buf, size, "%luKB", hsize >> 10); 582 return buf; 583 } 584 585 static int __hugetlb_events_show(struct seq_file *seq, bool local) 586 { 587 int idx; 588 long max; 589 struct cftype *cft = seq_cft(seq); 590 struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(seq_css(seq)); 591 592 idx = MEMFILE_IDX(cft->private); 593 594 if (local) 595 max = atomic_long_read(&h_cg->events_local[idx][HUGETLB_MAX]); 596 else 597 max = atomic_long_read(&h_cg->events[idx][HUGETLB_MAX]); 598 599 seq_printf(seq, "max %lu\n", max); 600 601 return 0; 602 } 603 604 static int hugetlb_events_show(struct seq_file *seq, void *v) 605 { 606 return __hugetlb_events_show(seq, false); 607 } 608 609 static int hugetlb_events_local_show(struct seq_file *seq, void *v) 610 { 611 return __hugetlb_events_show(seq, true); 612 } 613 614 static void __init __hugetlb_cgroup_file_dfl_init(int idx) 615 { 616 char buf[32]; 617 struct cftype *cft; 618 struct hstate *h = &hstates[idx]; 619 620 /* format the size */ 621 mem_fmt(buf, sizeof(buf), huge_page_size(h)); 622 623 /* Add the limit file */ 624 cft = &h->cgroup_files_dfl[0]; 625 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.max", buf); 626 cft->private = MEMFILE_PRIVATE(idx, RES_LIMIT); 627 cft->seq_show = hugetlb_cgroup_read_u64_max; 628 cft->write = hugetlb_cgroup_write_dfl; 629 cft->flags = CFTYPE_NOT_ON_ROOT; 630 631 /* Add the reservation limit file */ 632 cft = &h->cgroup_files_dfl[1]; 633 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.max", buf); 634 cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_LIMIT); 635 cft->seq_show = hugetlb_cgroup_read_u64_max; 636 cft->write = hugetlb_cgroup_write_dfl; 637 cft->flags = CFTYPE_NOT_ON_ROOT; 638 639 /* Add the current usage file */ 640 cft = &h->cgroup_files_dfl[2]; 641 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.current", buf); 642 cft->private = MEMFILE_PRIVATE(idx, RES_USAGE); 643 cft->seq_show = hugetlb_cgroup_read_u64_max; 644 cft->flags = CFTYPE_NOT_ON_ROOT; 645 646 /* Add the current reservation usage file */ 647 cft = &h->cgroup_files_dfl[3]; 648 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.current", buf); 649 cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_USAGE); 650 cft->seq_show = hugetlb_cgroup_read_u64_max; 651 cft->flags = CFTYPE_NOT_ON_ROOT; 652 653 /* Add the events file */ 654 cft = &h->cgroup_files_dfl[4]; 655 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.events", buf); 656 cft->private = MEMFILE_PRIVATE(idx, 0); 657 cft->seq_show = hugetlb_events_show; 658 cft->file_offset = offsetof(struct hugetlb_cgroup, events_file[idx]); 659 cft->flags = CFTYPE_NOT_ON_ROOT; 660 661 /* Add the events.local file */ 662 cft = &h->cgroup_files_dfl[5]; 663 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.events.local", buf); 664 cft->private = MEMFILE_PRIVATE(idx, 0); 665 cft->seq_show = hugetlb_events_local_show; 666 cft->file_offset = offsetof(struct hugetlb_cgroup, 667 events_local_file[idx]); 668 cft->flags = CFTYPE_NOT_ON_ROOT; 669 670 /* NULL terminate the last cft */ 671 cft = &h->cgroup_files_dfl[6]; 672 memset(cft, 0, sizeof(*cft)); 673 674 WARN_ON(cgroup_add_dfl_cftypes(&hugetlb_cgrp_subsys, 675 h->cgroup_files_dfl)); 676 } 677 678 static void __init __hugetlb_cgroup_file_legacy_init(int idx) 679 { 680 char buf[32]; 681 struct cftype *cft; 682 struct hstate *h = &hstates[idx]; 683 684 /* format the size */ 685 mem_fmt(buf, sizeof(buf), huge_page_size(h)); 686 687 /* Add the limit file */ 688 cft = &h->cgroup_files_legacy[0]; 689 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.limit_in_bytes", buf); 690 cft->private = MEMFILE_PRIVATE(idx, RES_LIMIT); 691 cft->read_u64 = hugetlb_cgroup_read_u64; 692 cft->write = hugetlb_cgroup_write_legacy; 693 694 /* Add the reservation limit file */ 695 cft = &h->cgroup_files_legacy[1]; 696 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.limit_in_bytes", buf); 697 cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_LIMIT); 698 cft->read_u64 = hugetlb_cgroup_read_u64; 699 cft->write = hugetlb_cgroup_write_legacy; 700 701 /* Add the usage file */ 702 cft = &h->cgroup_files_legacy[2]; 703 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.usage_in_bytes", buf); 704 cft->private = MEMFILE_PRIVATE(idx, RES_USAGE); 705 cft->read_u64 = hugetlb_cgroup_read_u64; 706 707 /* Add the reservation usage file */ 708 cft = &h->cgroup_files_legacy[3]; 709 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.usage_in_bytes", buf); 710 cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_USAGE); 711 cft->read_u64 = hugetlb_cgroup_read_u64; 712 713 /* Add the MAX usage file */ 714 cft = &h->cgroup_files_legacy[4]; 715 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.max_usage_in_bytes", buf); 716 cft->private = MEMFILE_PRIVATE(idx, RES_MAX_USAGE); 717 cft->write = hugetlb_cgroup_reset; 718 cft->read_u64 = hugetlb_cgroup_read_u64; 719 720 /* Add the MAX reservation usage file */ 721 cft = &h->cgroup_files_legacy[5]; 722 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.max_usage_in_bytes", buf); 723 cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_MAX_USAGE); 724 cft->write = hugetlb_cgroup_reset; 725 cft->read_u64 = hugetlb_cgroup_read_u64; 726 727 /* Add the failcntfile */ 728 cft = &h->cgroup_files_legacy[6]; 729 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.failcnt", buf); 730 cft->private = MEMFILE_PRIVATE(idx, RES_FAILCNT); 731 cft->write = hugetlb_cgroup_reset; 732 cft->read_u64 = hugetlb_cgroup_read_u64; 733 734 /* Add the reservation failcntfile */ 735 cft = &h->cgroup_files_legacy[7]; 736 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.failcnt", buf); 737 cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_FAILCNT); 738 cft->write = hugetlb_cgroup_reset; 739 cft->read_u64 = hugetlb_cgroup_read_u64; 740 741 /* NULL terminate the last cft */ 742 cft = &h->cgroup_files_legacy[8]; 743 memset(cft, 0, sizeof(*cft)); 744 745 WARN_ON(cgroup_add_legacy_cftypes(&hugetlb_cgrp_subsys, 746 h->cgroup_files_legacy)); 747 } 748 749 static void __init __hugetlb_cgroup_file_init(int idx) 750 { 751 __hugetlb_cgroup_file_dfl_init(idx); 752 __hugetlb_cgroup_file_legacy_init(idx); 753 } 754 755 void __init hugetlb_cgroup_file_init(void) 756 { 757 struct hstate *h; 758 759 for_each_hstate(h) { 760 /* 761 * Add cgroup control files only if the huge page consists 762 * of more than two normal pages. This is because we use 763 * page[2].private for storing cgroup details. 764 */ 765 if (huge_page_order(h) >= HUGETLB_CGROUP_MIN_ORDER) 766 __hugetlb_cgroup_file_init(hstate_index(h)); 767 } 768 } 769 770 /* 771 * hugetlb_lock will make sure a parallel cgroup rmdir won't happen 772 * when we migrate hugepages 773 */ 774 void hugetlb_cgroup_migrate(struct page *oldhpage, struct page *newhpage) 775 { 776 struct hugetlb_cgroup *h_cg; 777 struct hugetlb_cgroup *h_cg_rsvd; 778 struct hstate *h = page_hstate(oldhpage); 779 780 if (hugetlb_cgroup_disabled()) 781 return; 782 783 VM_BUG_ON_PAGE(!PageHuge(oldhpage), oldhpage); 784 spin_lock(&hugetlb_lock); 785 h_cg = hugetlb_cgroup_from_page(oldhpage); 786 h_cg_rsvd = hugetlb_cgroup_from_page_rsvd(oldhpage); 787 set_hugetlb_cgroup(oldhpage, NULL); 788 set_hugetlb_cgroup_rsvd(oldhpage, NULL); 789 790 /* move the h_cg details to new cgroup */ 791 set_hugetlb_cgroup(newhpage, h_cg); 792 set_hugetlb_cgroup_rsvd(newhpage, h_cg_rsvd); 793 list_move(&newhpage->lru, &h->hugepage_activelist); 794 spin_unlock(&hugetlb_lock); 795 return; 796 } 797 798 static struct cftype hugetlb_files[] = { 799 {} /* terminate */ 800 }; 801 802 struct cgroup_subsys hugetlb_cgrp_subsys = { 803 .css_alloc = hugetlb_cgroup_css_alloc, 804 .css_offline = hugetlb_cgroup_css_offline, 805 .css_free = hugetlb_cgroup_css_free, 806 .dfl_cftypes = hugetlb_files, 807 .legacy_cftypes = hugetlb_files, 808 }; 809