1 /* 2 * 3 * Copyright IBM Corporation, 2012 4 * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> 5 * 6 * Cgroup v2 7 * Copyright (C) 2019 Red Hat, Inc. 8 * Author: Giuseppe Scrivano <gscrivan@redhat.com> 9 * 10 * This program is free software; you can redistribute it and/or modify it 11 * under the terms of version 2.1 of the GNU Lesser General Public License 12 * as published by the Free Software Foundation. 13 * 14 * This program is distributed in the hope that it would be useful, but 15 * WITHOUT ANY WARRANTY; without even the implied warranty of 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 17 * 18 */ 19 20 #include <linux/cgroup.h> 21 #include <linux/page_counter.h> 22 #include <linux/slab.h> 23 #include <linux/hugetlb.h> 24 #include <linux/hugetlb_cgroup.h> 25 26 #define MEMFILE_PRIVATE(x, val) (((x) << 16) | (val)) 27 #define MEMFILE_IDX(val) (((val) >> 16) & 0xffff) 28 #define MEMFILE_ATTR(val) ((val) & 0xffff) 29 30 #define hugetlb_cgroup_from_counter(counter, idx) \ 31 container_of(counter, struct hugetlb_cgroup, hugepage[idx]) 32 33 static struct hugetlb_cgroup *root_h_cgroup __read_mostly; 34 35 static inline struct page_counter * 36 __hugetlb_cgroup_counter_from_cgroup(struct hugetlb_cgroup *h_cg, int idx, 37 bool rsvd) 38 { 39 if (rsvd) 40 return &h_cg->rsvd_hugepage[idx]; 41 return &h_cg->hugepage[idx]; 42 } 43 44 static inline struct page_counter * 45 hugetlb_cgroup_counter_from_cgroup(struct hugetlb_cgroup *h_cg, int idx) 46 { 47 return __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, false); 48 } 49 50 static inline struct page_counter * 51 hugetlb_cgroup_counter_from_cgroup_rsvd(struct hugetlb_cgroup *h_cg, int idx) 52 { 53 return __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, true); 54 } 55 56 static inline 57 struct hugetlb_cgroup *hugetlb_cgroup_from_css(struct cgroup_subsys_state *s) 58 { 59 return s ? container_of(s, struct hugetlb_cgroup, css) : NULL; 60 } 61 62 static inline 63 struct hugetlb_cgroup *hugetlb_cgroup_from_task(struct task_struct *task) 64 { 65 return hugetlb_cgroup_from_css(task_css(task, hugetlb_cgrp_id)); 66 } 67 68 static inline bool hugetlb_cgroup_is_root(struct hugetlb_cgroup *h_cg) 69 { 70 return (h_cg == root_h_cgroup); 71 } 72 73 static inline struct hugetlb_cgroup * 74 parent_hugetlb_cgroup(struct hugetlb_cgroup *h_cg) 75 { 76 return hugetlb_cgroup_from_css(h_cg->css.parent); 77 } 78 79 static inline bool hugetlb_cgroup_have_usage(struct hugetlb_cgroup *h_cg) 80 { 81 int idx; 82 83 for (idx = 0; idx < hugetlb_max_hstate; idx++) { 84 if (page_counter_read( 85 hugetlb_cgroup_counter_from_cgroup(h_cg, idx))) 86 return true; 87 } 88 return false; 89 } 90 91 static void hugetlb_cgroup_init(struct hugetlb_cgroup *h_cgroup, 92 struct hugetlb_cgroup *parent_h_cgroup) 93 { 94 int idx; 95 96 for (idx = 0; idx < HUGE_MAX_HSTATE; idx++) { 97 struct page_counter *fault_parent = NULL; 98 struct page_counter *rsvd_parent = NULL; 99 unsigned long limit; 100 int ret; 101 102 if (parent_h_cgroup) { 103 fault_parent = hugetlb_cgroup_counter_from_cgroup( 104 parent_h_cgroup, idx); 105 rsvd_parent = hugetlb_cgroup_counter_from_cgroup_rsvd( 106 parent_h_cgroup, idx); 107 } 108 page_counter_init(hugetlb_cgroup_counter_from_cgroup(h_cgroup, 109 idx), 110 fault_parent); 111 page_counter_init( 112 hugetlb_cgroup_counter_from_cgroup_rsvd(h_cgroup, idx), 113 rsvd_parent); 114 115 limit = round_down(PAGE_COUNTER_MAX, 116 pages_per_huge_page(&hstates[idx])); 117 118 ret = page_counter_set_max( 119 hugetlb_cgroup_counter_from_cgroup(h_cgroup, idx), 120 limit); 121 VM_BUG_ON(ret); 122 ret = page_counter_set_max( 123 hugetlb_cgroup_counter_from_cgroup_rsvd(h_cgroup, idx), 124 limit); 125 VM_BUG_ON(ret); 126 } 127 } 128 129 static struct cgroup_subsys_state * 130 hugetlb_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) 131 { 132 struct hugetlb_cgroup *parent_h_cgroup = hugetlb_cgroup_from_css(parent_css); 133 struct hugetlb_cgroup *h_cgroup; 134 135 h_cgroup = kzalloc(sizeof(*h_cgroup), GFP_KERNEL); 136 if (!h_cgroup) 137 return ERR_PTR(-ENOMEM); 138 139 if (!parent_h_cgroup) 140 root_h_cgroup = h_cgroup; 141 142 hugetlb_cgroup_init(h_cgroup, parent_h_cgroup); 143 return &h_cgroup->css; 144 } 145 146 static void hugetlb_cgroup_css_free(struct cgroup_subsys_state *css) 147 { 148 struct hugetlb_cgroup *h_cgroup; 149 150 h_cgroup = hugetlb_cgroup_from_css(css); 151 kfree(h_cgroup); 152 } 153 154 /* 155 * Should be called with hugetlb_lock held. 156 * Since we are holding hugetlb_lock, pages cannot get moved from 157 * active list or uncharged from the cgroup, So no need to get 158 * page reference and test for page active here. This function 159 * cannot fail. 160 */ 161 static void hugetlb_cgroup_move_parent(int idx, struct hugetlb_cgroup *h_cg, 162 struct page *page) 163 { 164 unsigned int nr_pages; 165 struct page_counter *counter; 166 struct hugetlb_cgroup *page_hcg; 167 struct hugetlb_cgroup *parent = parent_hugetlb_cgroup(h_cg); 168 169 page_hcg = hugetlb_cgroup_from_page(page); 170 /* 171 * We can have pages in active list without any cgroup 172 * ie, hugepage with less than 3 pages. We can safely 173 * ignore those pages. 174 */ 175 if (!page_hcg || page_hcg != h_cg) 176 goto out; 177 178 nr_pages = compound_nr(page); 179 if (!parent) { 180 parent = root_h_cgroup; 181 /* root has no limit */ 182 page_counter_charge(&parent->hugepage[idx], nr_pages); 183 } 184 counter = &h_cg->hugepage[idx]; 185 /* Take the pages off the local counter */ 186 page_counter_cancel(counter, nr_pages); 187 188 set_hugetlb_cgroup(page, parent); 189 out: 190 return; 191 } 192 193 /* 194 * Force the hugetlb cgroup to empty the hugetlb resources by moving them to 195 * the parent cgroup. 196 */ 197 static void hugetlb_cgroup_css_offline(struct cgroup_subsys_state *css) 198 { 199 struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css); 200 struct hstate *h; 201 struct page *page; 202 int idx; 203 204 do { 205 idx = 0; 206 for_each_hstate(h) { 207 spin_lock_irq(&hugetlb_lock); 208 list_for_each_entry(page, &h->hugepage_activelist, lru) 209 hugetlb_cgroup_move_parent(idx, h_cg, page); 210 211 spin_unlock_irq(&hugetlb_lock); 212 idx++; 213 } 214 cond_resched(); 215 } while (hugetlb_cgroup_have_usage(h_cg)); 216 } 217 218 static inline void hugetlb_event(struct hugetlb_cgroup *hugetlb, int idx, 219 enum hugetlb_memory_event event) 220 { 221 atomic_long_inc(&hugetlb->events_local[idx][event]); 222 cgroup_file_notify(&hugetlb->events_local_file[idx]); 223 224 do { 225 atomic_long_inc(&hugetlb->events[idx][event]); 226 cgroup_file_notify(&hugetlb->events_file[idx]); 227 } while ((hugetlb = parent_hugetlb_cgroup(hugetlb)) && 228 !hugetlb_cgroup_is_root(hugetlb)); 229 } 230 231 static int __hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages, 232 struct hugetlb_cgroup **ptr, 233 bool rsvd) 234 { 235 int ret = 0; 236 struct page_counter *counter; 237 struct hugetlb_cgroup *h_cg = NULL; 238 239 if (hugetlb_cgroup_disabled()) 240 goto done; 241 /* 242 * We don't charge any cgroup if the compound page have less 243 * than 3 pages. 244 */ 245 if (huge_page_order(&hstates[idx]) < HUGETLB_CGROUP_MIN_ORDER) 246 goto done; 247 again: 248 rcu_read_lock(); 249 h_cg = hugetlb_cgroup_from_task(current); 250 if (!css_tryget(&h_cg->css)) { 251 rcu_read_unlock(); 252 goto again; 253 } 254 rcu_read_unlock(); 255 256 if (!page_counter_try_charge( 257 __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, rsvd), 258 nr_pages, &counter)) { 259 ret = -ENOMEM; 260 hugetlb_event(h_cg, idx, HUGETLB_MAX); 261 css_put(&h_cg->css); 262 goto done; 263 } 264 /* Reservations take a reference to the css because they do not get 265 * reparented. 266 */ 267 if (!rsvd) 268 css_put(&h_cg->css); 269 done: 270 *ptr = h_cg; 271 return ret; 272 } 273 274 int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages, 275 struct hugetlb_cgroup **ptr) 276 { 277 return __hugetlb_cgroup_charge_cgroup(idx, nr_pages, ptr, false); 278 } 279 280 int hugetlb_cgroup_charge_cgroup_rsvd(int idx, unsigned long nr_pages, 281 struct hugetlb_cgroup **ptr) 282 { 283 return __hugetlb_cgroup_charge_cgroup(idx, nr_pages, ptr, true); 284 } 285 286 /* Should be called with hugetlb_lock held */ 287 static void __hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages, 288 struct hugetlb_cgroup *h_cg, 289 struct page *page, bool rsvd) 290 { 291 if (hugetlb_cgroup_disabled() || !h_cg) 292 return; 293 294 __set_hugetlb_cgroup(page, h_cg, rsvd); 295 return; 296 } 297 298 void hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages, 299 struct hugetlb_cgroup *h_cg, 300 struct page *page) 301 { 302 __hugetlb_cgroup_commit_charge(idx, nr_pages, h_cg, page, false); 303 } 304 305 void hugetlb_cgroup_commit_charge_rsvd(int idx, unsigned long nr_pages, 306 struct hugetlb_cgroup *h_cg, 307 struct page *page) 308 { 309 __hugetlb_cgroup_commit_charge(idx, nr_pages, h_cg, page, true); 310 } 311 312 /* 313 * Should be called with hugetlb_lock held 314 */ 315 static void __hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages, 316 struct page *page, bool rsvd) 317 { 318 struct hugetlb_cgroup *h_cg; 319 320 if (hugetlb_cgroup_disabled()) 321 return; 322 lockdep_assert_held(&hugetlb_lock); 323 h_cg = __hugetlb_cgroup_from_page(page, rsvd); 324 if (unlikely(!h_cg)) 325 return; 326 __set_hugetlb_cgroup(page, NULL, rsvd); 327 328 page_counter_uncharge(__hugetlb_cgroup_counter_from_cgroup(h_cg, idx, 329 rsvd), 330 nr_pages); 331 332 if (rsvd) 333 css_put(&h_cg->css); 334 335 return; 336 } 337 338 void hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages, 339 struct page *page) 340 { 341 __hugetlb_cgroup_uncharge_page(idx, nr_pages, page, false); 342 } 343 344 void hugetlb_cgroup_uncharge_page_rsvd(int idx, unsigned long nr_pages, 345 struct page *page) 346 { 347 __hugetlb_cgroup_uncharge_page(idx, nr_pages, page, true); 348 } 349 350 static void __hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages, 351 struct hugetlb_cgroup *h_cg, 352 bool rsvd) 353 { 354 if (hugetlb_cgroup_disabled() || !h_cg) 355 return; 356 357 if (huge_page_order(&hstates[idx]) < HUGETLB_CGROUP_MIN_ORDER) 358 return; 359 360 page_counter_uncharge(__hugetlb_cgroup_counter_from_cgroup(h_cg, idx, 361 rsvd), 362 nr_pages); 363 364 if (rsvd) 365 css_put(&h_cg->css); 366 } 367 368 void hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages, 369 struct hugetlb_cgroup *h_cg) 370 { 371 __hugetlb_cgroup_uncharge_cgroup(idx, nr_pages, h_cg, false); 372 } 373 374 void hugetlb_cgroup_uncharge_cgroup_rsvd(int idx, unsigned long nr_pages, 375 struct hugetlb_cgroup *h_cg) 376 { 377 __hugetlb_cgroup_uncharge_cgroup(idx, nr_pages, h_cg, true); 378 } 379 380 void hugetlb_cgroup_uncharge_counter(struct resv_map *resv, unsigned long start, 381 unsigned long end) 382 { 383 if (hugetlb_cgroup_disabled() || !resv || !resv->reservation_counter || 384 !resv->css) 385 return; 386 387 page_counter_uncharge(resv->reservation_counter, 388 (end - start) * resv->pages_per_hpage); 389 css_put(resv->css); 390 } 391 392 void hugetlb_cgroup_uncharge_file_region(struct resv_map *resv, 393 struct file_region *rg, 394 unsigned long nr_pages, 395 bool region_del) 396 { 397 if (hugetlb_cgroup_disabled() || !resv || !rg || !nr_pages) 398 return; 399 400 if (rg->reservation_counter && resv->pages_per_hpage && nr_pages > 0 && 401 !resv->reservation_counter) { 402 page_counter_uncharge(rg->reservation_counter, 403 nr_pages * resv->pages_per_hpage); 404 /* 405 * Only do css_put(rg->css) when we delete the entire region 406 * because one file_region must hold exactly one css reference. 407 */ 408 if (region_del) 409 css_put(rg->css); 410 } 411 } 412 413 enum { 414 RES_USAGE, 415 RES_RSVD_USAGE, 416 RES_LIMIT, 417 RES_RSVD_LIMIT, 418 RES_MAX_USAGE, 419 RES_RSVD_MAX_USAGE, 420 RES_FAILCNT, 421 RES_RSVD_FAILCNT, 422 }; 423 424 static u64 hugetlb_cgroup_read_u64(struct cgroup_subsys_state *css, 425 struct cftype *cft) 426 { 427 struct page_counter *counter; 428 struct page_counter *rsvd_counter; 429 struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css); 430 431 counter = &h_cg->hugepage[MEMFILE_IDX(cft->private)]; 432 rsvd_counter = &h_cg->rsvd_hugepage[MEMFILE_IDX(cft->private)]; 433 434 switch (MEMFILE_ATTR(cft->private)) { 435 case RES_USAGE: 436 return (u64)page_counter_read(counter) * PAGE_SIZE; 437 case RES_RSVD_USAGE: 438 return (u64)page_counter_read(rsvd_counter) * PAGE_SIZE; 439 case RES_LIMIT: 440 return (u64)counter->max * PAGE_SIZE; 441 case RES_RSVD_LIMIT: 442 return (u64)rsvd_counter->max * PAGE_SIZE; 443 case RES_MAX_USAGE: 444 return (u64)counter->watermark * PAGE_SIZE; 445 case RES_RSVD_MAX_USAGE: 446 return (u64)rsvd_counter->watermark * PAGE_SIZE; 447 case RES_FAILCNT: 448 return counter->failcnt; 449 case RES_RSVD_FAILCNT: 450 return rsvd_counter->failcnt; 451 default: 452 BUG(); 453 } 454 } 455 456 static int hugetlb_cgroup_read_u64_max(struct seq_file *seq, void *v) 457 { 458 int idx; 459 u64 val; 460 struct cftype *cft = seq_cft(seq); 461 unsigned long limit; 462 struct page_counter *counter; 463 struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(seq_css(seq)); 464 465 idx = MEMFILE_IDX(cft->private); 466 counter = &h_cg->hugepage[idx]; 467 468 limit = round_down(PAGE_COUNTER_MAX, 469 pages_per_huge_page(&hstates[idx])); 470 471 switch (MEMFILE_ATTR(cft->private)) { 472 case RES_RSVD_USAGE: 473 counter = &h_cg->rsvd_hugepage[idx]; 474 fallthrough; 475 case RES_USAGE: 476 val = (u64)page_counter_read(counter); 477 seq_printf(seq, "%llu\n", val * PAGE_SIZE); 478 break; 479 case RES_RSVD_LIMIT: 480 counter = &h_cg->rsvd_hugepage[idx]; 481 fallthrough; 482 case RES_LIMIT: 483 val = (u64)counter->max; 484 if (val == limit) 485 seq_puts(seq, "max\n"); 486 else 487 seq_printf(seq, "%llu\n", val * PAGE_SIZE); 488 break; 489 default: 490 BUG(); 491 } 492 493 return 0; 494 } 495 496 static DEFINE_MUTEX(hugetlb_limit_mutex); 497 498 static ssize_t hugetlb_cgroup_write(struct kernfs_open_file *of, 499 char *buf, size_t nbytes, loff_t off, 500 const char *max) 501 { 502 int ret, idx; 503 unsigned long nr_pages; 504 struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(of_css(of)); 505 bool rsvd = false; 506 507 if (hugetlb_cgroup_is_root(h_cg)) /* Can't set limit on root */ 508 return -EINVAL; 509 510 buf = strstrip(buf); 511 ret = page_counter_memparse(buf, max, &nr_pages); 512 if (ret) 513 return ret; 514 515 idx = MEMFILE_IDX(of_cft(of)->private); 516 nr_pages = round_down(nr_pages, pages_per_huge_page(&hstates[idx])); 517 518 switch (MEMFILE_ATTR(of_cft(of)->private)) { 519 case RES_RSVD_LIMIT: 520 rsvd = true; 521 fallthrough; 522 case RES_LIMIT: 523 mutex_lock(&hugetlb_limit_mutex); 524 ret = page_counter_set_max( 525 __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, rsvd), 526 nr_pages); 527 mutex_unlock(&hugetlb_limit_mutex); 528 break; 529 default: 530 ret = -EINVAL; 531 break; 532 } 533 return ret ?: nbytes; 534 } 535 536 static ssize_t hugetlb_cgroup_write_legacy(struct kernfs_open_file *of, 537 char *buf, size_t nbytes, loff_t off) 538 { 539 return hugetlb_cgroup_write(of, buf, nbytes, off, "-1"); 540 } 541 542 static ssize_t hugetlb_cgroup_write_dfl(struct kernfs_open_file *of, 543 char *buf, size_t nbytes, loff_t off) 544 { 545 return hugetlb_cgroup_write(of, buf, nbytes, off, "max"); 546 } 547 548 static ssize_t hugetlb_cgroup_reset(struct kernfs_open_file *of, 549 char *buf, size_t nbytes, loff_t off) 550 { 551 int ret = 0; 552 struct page_counter *counter, *rsvd_counter; 553 struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(of_css(of)); 554 555 counter = &h_cg->hugepage[MEMFILE_IDX(of_cft(of)->private)]; 556 rsvd_counter = &h_cg->rsvd_hugepage[MEMFILE_IDX(of_cft(of)->private)]; 557 558 switch (MEMFILE_ATTR(of_cft(of)->private)) { 559 case RES_MAX_USAGE: 560 page_counter_reset_watermark(counter); 561 break; 562 case RES_RSVD_MAX_USAGE: 563 page_counter_reset_watermark(rsvd_counter); 564 break; 565 case RES_FAILCNT: 566 counter->failcnt = 0; 567 break; 568 case RES_RSVD_FAILCNT: 569 rsvd_counter->failcnt = 0; 570 break; 571 default: 572 ret = -EINVAL; 573 break; 574 } 575 return ret ?: nbytes; 576 } 577 578 static char *mem_fmt(char *buf, int size, unsigned long hsize) 579 { 580 if (hsize >= (1UL << 30)) 581 snprintf(buf, size, "%luGB", hsize >> 30); 582 else if (hsize >= (1UL << 20)) 583 snprintf(buf, size, "%luMB", hsize >> 20); 584 else 585 snprintf(buf, size, "%luKB", hsize >> 10); 586 return buf; 587 } 588 589 static int __hugetlb_events_show(struct seq_file *seq, bool local) 590 { 591 int idx; 592 long max; 593 struct cftype *cft = seq_cft(seq); 594 struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(seq_css(seq)); 595 596 idx = MEMFILE_IDX(cft->private); 597 598 if (local) 599 max = atomic_long_read(&h_cg->events_local[idx][HUGETLB_MAX]); 600 else 601 max = atomic_long_read(&h_cg->events[idx][HUGETLB_MAX]); 602 603 seq_printf(seq, "max %lu\n", max); 604 605 return 0; 606 } 607 608 static int hugetlb_events_show(struct seq_file *seq, void *v) 609 { 610 return __hugetlb_events_show(seq, false); 611 } 612 613 static int hugetlb_events_local_show(struct seq_file *seq, void *v) 614 { 615 return __hugetlb_events_show(seq, true); 616 } 617 618 static void __init __hugetlb_cgroup_file_dfl_init(int idx) 619 { 620 char buf[32]; 621 struct cftype *cft; 622 struct hstate *h = &hstates[idx]; 623 624 /* format the size */ 625 mem_fmt(buf, sizeof(buf), huge_page_size(h)); 626 627 /* Add the limit file */ 628 cft = &h->cgroup_files_dfl[0]; 629 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.max", buf); 630 cft->private = MEMFILE_PRIVATE(idx, RES_LIMIT); 631 cft->seq_show = hugetlb_cgroup_read_u64_max; 632 cft->write = hugetlb_cgroup_write_dfl; 633 cft->flags = CFTYPE_NOT_ON_ROOT; 634 635 /* Add the reservation limit file */ 636 cft = &h->cgroup_files_dfl[1]; 637 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.max", buf); 638 cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_LIMIT); 639 cft->seq_show = hugetlb_cgroup_read_u64_max; 640 cft->write = hugetlb_cgroup_write_dfl; 641 cft->flags = CFTYPE_NOT_ON_ROOT; 642 643 /* Add the current usage file */ 644 cft = &h->cgroup_files_dfl[2]; 645 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.current", buf); 646 cft->private = MEMFILE_PRIVATE(idx, RES_USAGE); 647 cft->seq_show = hugetlb_cgroup_read_u64_max; 648 cft->flags = CFTYPE_NOT_ON_ROOT; 649 650 /* Add the current reservation usage file */ 651 cft = &h->cgroup_files_dfl[3]; 652 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.current", buf); 653 cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_USAGE); 654 cft->seq_show = hugetlb_cgroup_read_u64_max; 655 cft->flags = CFTYPE_NOT_ON_ROOT; 656 657 /* Add the events file */ 658 cft = &h->cgroup_files_dfl[4]; 659 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.events", buf); 660 cft->private = MEMFILE_PRIVATE(idx, 0); 661 cft->seq_show = hugetlb_events_show; 662 cft->file_offset = offsetof(struct hugetlb_cgroup, events_file[idx]); 663 cft->flags = CFTYPE_NOT_ON_ROOT; 664 665 /* Add the events.local file */ 666 cft = &h->cgroup_files_dfl[5]; 667 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.events.local", buf); 668 cft->private = MEMFILE_PRIVATE(idx, 0); 669 cft->seq_show = hugetlb_events_local_show; 670 cft->file_offset = offsetof(struct hugetlb_cgroup, 671 events_local_file[idx]); 672 cft->flags = CFTYPE_NOT_ON_ROOT; 673 674 /* NULL terminate the last cft */ 675 cft = &h->cgroup_files_dfl[6]; 676 memset(cft, 0, sizeof(*cft)); 677 678 WARN_ON(cgroup_add_dfl_cftypes(&hugetlb_cgrp_subsys, 679 h->cgroup_files_dfl)); 680 } 681 682 static void __init __hugetlb_cgroup_file_legacy_init(int idx) 683 { 684 char buf[32]; 685 struct cftype *cft; 686 struct hstate *h = &hstates[idx]; 687 688 /* format the size */ 689 mem_fmt(buf, sizeof(buf), huge_page_size(h)); 690 691 /* Add the limit file */ 692 cft = &h->cgroup_files_legacy[0]; 693 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.limit_in_bytes", buf); 694 cft->private = MEMFILE_PRIVATE(idx, RES_LIMIT); 695 cft->read_u64 = hugetlb_cgroup_read_u64; 696 cft->write = hugetlb_cgroup_write_legacy; 697 698 /* Add the reservation limit file */ 699 cft = &h->cgroup_files_legacy[1]; 700 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.limit_in_bytes", buf); 701 cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_LIMIT); 702 cft->read_u64 = hugetlb_cgroup_read_u64; 703 cft->write = hugetlb_cgroup_write_legacy; 704 705 /* Add the usage file */ 706 cft = &h->cgroup_files_legacy[2]; 707 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.usage_in_bytes", buf); 708 cft->private = MEMFILE_PRIVATE(idx, RES_USAGE); 709 cft->read_u64 = hugetlb_cgroup_read_u64; 710 711 /* Add the reservation usage file */ 712 cft = &h->cgroup_files_legacy[3]; 713 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.usage_in_bytes", buf); 714 cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_USAGE); 715 cft->read_u64 = hugetlb_cgroup_read_u64; 716 717 /* Add the MAX usage file */ 718 cft = &h->cgroup_files_legacy[4]; 719 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.max_usage_in_bytes", buf); 720 cft->private = MEMFILE_PRIVATE(idx, RES_MAX_USAGE); 721 cft->write = hugetlb_cgroup_reset; 722 cft->read_u64 = hugetlb_cgroup_read_u64; 723 724 /* Add the MAX reservation usage file */ 725 cft = &h->cgroup_files_legacy[5]; 726 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.max_usage_in_bytes", buf); 727 cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_MAX_USAGE); 728 cft->write = hugetlb_cgroup_reset; 729 cft->read_u64 = hugetlb_cgroup_read_u64; 730 731 /* Add the failcntfile */ 732 cft = &h->cgroup_files_legacy[6]; 733 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.failcnt", buf); 734 cft->private = MEMFILE_PRIVATE(idx, RES_FAILCNT); 735 cft->write = hugetlb_cgroup_reset; 736 cft->read_u64 = hugetlb_cgroup_read_u64; 737 738 /* Add the reservation failcntfile */ 739 cft = &h->cgroup_files_legacy[7]; 740 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.failcnt", buf); 741 cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_FAILCNT); 742 cft->write = hugetlb_cgroup_reset; 743 cft->read_u64 = hugetlb_cgroup_read_u64; 744 745 /* NULL terminate the last cft */ 746 cft = &h->cgroup_files_legacy[8]; 747 memset(cft, 0, sizeof(*cft)); 748 749 WARN_ON(cgroup_add_legacy_cftypes(&hugetlb_cgrp_subsys, 750 h->cgroup_files_legacy)); 751 } 752 753 static void __init __hugetlb_cgroup_file_init(int idx) 754 { 755 __hugetlb_cgroup_file_dfl_init(idx); 756 __hugetlb_cgroup_file_legacy_init(idx); 757 } 758 759 void __init hugetlb_cgroup_file_init(void) 760 { 761 struct hstate *h; 762 763 for_each_hstate(h) { 764 /* 765 * Add cgroup control files only if the huge page consists 766 * of more than two normal pages. This is because we use 767 * page[2].private for storing cgroup details. 768 */ 769 if (huge_page_order(h) >= HUGETLB_CGROUP_MIN_ORDER) 770 __hugetlb_cgroup_file_init(hstate_index(h)); 771 } 772 } 773 774 /* 775 * hugetlb_lock will make sure a parallel cgroup rmdir won't happen 776 * when we migrate hugepages 777 */ 778 void hugetlb_cgroup_migrate(struct page *oldhpage, struct page *newhpage) 779 { 780 struct hugetlb_cgroup *h_cg; 781 struct hugetlb_cgroup *h_cg_rsvd; 782 struct hstate *h = page_hstate(oldhpage); 783 784 if (hugetlb_cgroup_disabled()) 785 return; 786 787 spin_lock_irq(&hugetlb_lock); 788 h_cg = hugetlb_cgroup_from_page(oldhpage); 789 h_cg_rsvd = hugetlb_cgroup_from_page_rsvd(oldhpage); 790 set_hugetlb_cgroup(oldhpage, NULL); 791 set_hugetlb_cgroup_rsvd(oldhpage, NULL); 792 793 /* move the h_cg details to new cgroup */ 794 set_hugetlb_cgroup(newhpage, h_cg); 795 set_hugetlb_cgroup_rsvd(newhpage, h_cg_rsvd); 796 list_move(&newhpage->lru, &h->hugepage_activelist); 797 spin_unlock_irq(&hugetlb_lock); 798 return; 799 } 800 801 static struct cftype hugetlb_files[] = { 802 {} /* terminate */ 803 }; 804 805 struct cgroup_subsys hugetlb_cgrp_subsys = { 806 .css_alloc = hugetlb_cgroup_css_alloc, 807 .css_offline = hugetlb_cgroup_css_offline, 808 .css_free = hugetlb_cgroup_css_free, 809 .dfl_cftypes = hugetlb_files, 810 .legacy_cftypes = hugetlb_files, 811 }; 812