1 /* 2 * 3 * Copyright IBM Corporation, 2012 4 * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> 5 * 6 * Cgroup v2 7 * Copyright (C) 2019 Red Hat, Inc. 8 * Author: Giuseppe Scrivano <gscrivan@redhat.com> 9 * 10 * This program is free software; you can redistribute it and/or modify it 11 * under the terms of version 2.1 of the GNU Lesser General Public License 12 * as published by the Free Software Foundation. 13 * 14 * This program is distributed in the hope that it would be useful, but 15 * WITHOUT ANY WARRANTY; without even the implied warranty of 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 17 * 18 */ 19 20 #include <linux/cgroup.h> 21 #include <linux/page_counter.h> 22 #include <linux/slab.h> 23 #include <linux/hugetlb.h> 24 #include <linux/hugetlb_cgroup.h> 25 26 #define MEMFILE_PRIVATE(x, val) (((x) << 16) | (val)) 27 #define MEMFILE_IDX(val) (((val) >> 16) & 0xffff) 28 #define MEMFILE_ATTR(val) ((val) & 0xffff) 29 30 static struct hugetlb_cgroup *root_h_cgroup __read_mostly; 31 32 static inline struct page_counter * 33 __hugetlb_cgroup_counter_from_cgroup(struct hugetlb_cgroup *h_cg, int idx, 34 bool rsvd) 35 { 36 if (rsvd) 37 return &h_cg->rsvd_hugepage[idx]; 38 return &h_cg->hugepage[idx]; 39 } 40 41 static inline struct page_counter * 42 hugetlb_cgroup_counter_from_cgroup(struct hugetlb_cgroup *h_cg, int idx) 43 { 44 return __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, false); 45 } 46 47 static inline struct page_counter * 48 hugetlb_cgroup_counter_from_cgroup_rsvd(struct hugetlb_cgroup *h_cg, int idx) 49 { 50 return __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, true); 51 } 52 53 static inline 54 struct hugetlb_cgroup *hugetlb_cgroup_from_css(struct cgroup_subsys_state *s) 55 { 56 return s ? container_of(s, struct hugetlb_cgroup, css) : NULL; 57 } 58 59 static inline 60 struct hugetlb_cgroup *hugetlb_cgroup_from_task(struct task_struct *task) 61 { 62 return hugetlb_cgroup_from_css(task_css(task, hugetlb_cgrp_id)); 63 } 64 65 static inline bool hugetlb_cgroup_is_root(struct hugetlb_cgroup *h_cg) 66 { 67 return (h_cg == root_h_cgroup); 68 } 69 70 static inline struct hugetlb_cgroup * 71 parent_hugetlb_cgroup(struct hugetlb_cgroup *h_cg) 72 { 73 return hugetlb_cgroup_from_css(h_cg->css.parent); 74 } 75 76 static inline bool hugetlb_cgroup_have_usage(struct hugetlb_cgroup *h_cg) 77 { 78 int idx; 79 80 for (idx = 0; idx < hugetlb_max_hstate; idx++) { 81 if (page_counter_read( 82 hugetlb_cgroup_counter_from_cgroup(h_cg, idx))) 83 return true; 84 } 85 return false; 86 } 87 88 static void hugetlb_cgroup_init(struct hugetlb_cgroup *h_cgroup, 89 struct hugetlb_cgroup *parent_h_cgroup) 90 { 91 int idx; 92 93 for (idx = 0; idx < HUGE_MAX_HSTATE; idx++) { 94 struct page_counter *fault_parent = NULL; 95 struct page_counter *rsvd_parent = NULL; 96 unsigned long limit; 97 int ret; 98 99 if (parent_h_cgroup) { 100 fault_parent = hugetlb_cgroup_counter_from_cgroup( 101 parent_h_cgroup, idx); 102 rsvd_parent = hugetlb_cgroup_counter_from_cgroup_rsvd( 103 parent_h_cgroup, idx); 104 } 105 page_counter_init(hugetlb_cgroup_counter_from_cgroup(h_cgroup, 106 idx), 107 fault_parent); 108 page_counter_init( 109 hugetlb_cgroup_counter_from_cgroup_rsvd(h_cgroup, idx), 110 rsvd_parent); 111 112 limit = round_down(PAGE_COUNTER_MAX, 113 pages_per_huge_page(&hstates[idx])); 114 115 ret = page_counter_set_max( 116 hugetlb_cgroup_counter_from_cgroup(h_cgroup, idx), 117 limit); 118 VM_BUG_ON(ret); 119 ret = page_counter_set_max( 120 hugetlb_cgroup_counter_from_cgroup_rsvd(h_cgroup, idx), 121 limit); 122 VM_BUG_ON(ret); 123 } 124 } 125 126 static void hugetlb_cgroup_free(struct hugetlb_cgroup *h_cgroup) 127 { 128 int node; 129 130 for_each_node(node) 131 kfree(h_cgroup->nodeinfo[node]); 132 kfree(h_cgroup); 133 } 134 135 static struct cgroup_subsys_state * 136 hugetlb_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) 137 { 138 struct hugetlb_cgroup *parent_h_cgroup = hugetlb_cgroup_from_css(parent_css); 139 struct hugetlb_cgroup *h_cgroup; 140 int node; 141 142 h_cgroup = kzalloc(struct_size(h_cgroup, nodeinfo, nr_node_ids), 143 GFP_KERNEL); 144 145 if (!h_cgroup) 146 return ERR_PTR(-ENOMEM); 147 148 if (!parent_h_cgroup) 149 root_h_cgroup = h_cgroup; 150 151 /* 152 * TODO: this routine can waste much memory for nodes which will 153 * never be onlined. It's better to use memory hotplug callback 154 * function. 155 */ 156 for_each_node(node) { 157 /* Set node_to_alloc to -1 for offline nodes. */ 158 int node_to_alloc = 159 node_state(node, N_NORMAL_MEMORY) ? node : -1; 160 h_cgroup->nodeinfo[node] = 161 kzalloc_node(sizeof(struct hugetlb_cgroup_per_node), 162 GFP_KERNEL, node_to_alloc); 163 if (!h_cgroup->nodeinfo[node]) 164 goto fail_alloc_nodeinfo; 165 } 166 167 hugetlb_cgroup_init(h_cgroup, parent_h_cgroup); 168 return &h_cgroup->css; 169 170 fail_alloc_nodeinfo: 171 hugetlb_cgroup_free(h_cgroup); 172 return ERR_PTR(-ENOMEM); 173 } 174 175 static void hugetlb_cgroup_css_free(struct cgroup_subsys_state *css) 176 { 177 hugetlb_cgroup_free(hugetlb_cgroup_from_css(css)); 178 } 179 180 /* 181 * Should be called with hugetlb_lock held. 182 * Since we are holding hugetlb_lock, pages cannot get moved from 183 * active list or uncharged from the cgroup, So no need to get 184 * page reference and test for page active here. This function 185 * cannot fail. 186 */ 187 static void hugetlb_cgroup_move_parent(int idx, struct hugetlb_cgroup *h_cg, 188 struct page *page) 189 { 190 unsigned int nr_pages; 191 struct page_counter *counter; 192 struct hugetlb_cgroup *page_hcg; 193 struct hugetlb_cgroup *parent = parent_hugetlb_cgroup(h_cg); 194 195 page_hcg = hugetlb_cgroup_from_page(page); 196 /* 197 * We can have pages in active list without any cgroup 198 * ie, hugepage with less than 3 pages. We can safely 199 * ignore those pages. 200 */ 201 if (!page_hcg || page_hcg != h_cg) 202 goto out; 203 204 nr_pages = compound_nr(page); 205 if (!parent) { 206 parent = root_h_cgroup; 207 /* root has no limit */ 208 page_counter_charge(&parent->hugepage[idx], nr_pages); 209 } 210 counter = &h_cg->hugepage[idx]; 211 /* Take the pages off the local counter */ 212 page_counter_cancel(counter, nr_pages); 213 214 set_hugetlb_cgroup(page, parent); 215 out: 216 return; 217 } 218 219 /* 220 * Force the hugetlb cgroup to empty the hugetlb resources by moving them to 221 * the parent cgroup. 222 */ 223 static void hugetlb_cgroup_css_offline(struct cgroup_subsys_state *css) 224 { 225 struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css); 226 struct hstate *h; 227 struct page *page; 228 int idx; 229 230 do { 231 idx = 0; 232 for_each_hstate(h) { 233 spin_lock_irq(&hugetlb_lock); 234 list_for_each_entry(page, &h->hugepage_activelist, lru) 235 hugetlb_cgroup_move_parent(idx, h_cg, page); 236 237 spin_unlock_irq(&hugetlb_lock); 238 idx++; 239 } 240 cond_resched(); 241 } while (hugetlb_cgroup_have_usage(h_cg)); 242 } 243 244 static inline void hugetlb_event(struct hugetlb_cgroup *hugetlb, int idx, 245 enum hugetlb_memory_event event) 246 { 247 atomic_long_inc(&hugetlb->events_local[idx][event]); 248 cgroup_file_notify(&hugetlb->events_local_file[idx]); 249 250 do { 251 atomic_long_inc(&hugetlb->events[idx][event]); 252 cgroup_file_notify(&hugetlb->events_file[idx]); 253 } while ((hugetlb = parent_hugetlb_cgroup(hugetlb)) && 254 !hugetlb_cgroup_is_root(hugetlb)); 255 } 256 257 static int __hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages, 258 struct hugetlb_cgroup **ptr, 259 bool rsvd) 260 { 261 int ret = 0; 262 struct page_counter *counter; 263 struct hugetlb_cgroup *h_cg = NULL; 264 265 if (hugetlb_cgroup_disabled()) 266 goto done; 267 /* 268 * We don't charge any cgroup if the compound page have less 269 * than 3 pages. 270 */ 271 if (huge_page_order(&hstates[idx]) < HUGETLB_CGROUP_MIN_ORDER) 272 goto done; 273 again: 274 rcu_read_lock(); 275 h_cg = hugetlb_cgroup_from_task(current); 276 if (!css_tryget(&h_cg->css)) { 277 rcu_read_unlock(); 278 goto again; 279 } 280 rcu_read_unlock(); 281 282 if (!page_counter_try_charge( 283 __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, rsvd), 284 nr_pages, &counter)) { 285 ret = -ENOMEM; 286 hugetlb_event(h_cg, idx, HUGETLB_MAX); 287 css_put(&h_cg->css); 288 goto done; 289 } 290 /* Reservations take a reference to the css because they do not get 291 * reparented. 292 */ 293 if (!rsvd) 294 css_put(&h_cg->css); 295 done: 296 *ptr = h_cg; 297 return ret; 298 } 299 300 int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages, 301 struct hugetlb_cgroup **ptr) 302 { 303 return __hugetlb_cgroup_charge_cgroup(idx, nr_pages, ptr, false); 304 } 305 306 int hugetlb_cgroup_charge_cgroup_rsvd(int idx, unsigned long nr_pages, 307 struct hugetlb_cgroup **ptr) 308 { 309 return __hugetlb_cgroup_charge_cgroup(idx, nr_pages, ptr, true); 310 } 311 312 /* Should be called with hugetlb_lock held */ 313 static void __hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages, 314 struct hugetlb_cgroup *h_cg, 315 struct page *page, bool rsvd) 316 { 317 if (hugetlb_cgroup_disabled() || !h_cg) 318 return; 319 320 __set_hugetlb_cgroup(page, h_cg, rsvd); 321 if (!rsvd) { 322 unsigned long usage = 323 h_cg->nodeinfo[page_to_nid(page)]->usage[idx]; 324 /* 325 * This write is not atomic due to fetching usage and writing 326 * to it, but that's fine because we call this with 327 * hugetlb_lock held anyway. 328 */ 329 WRITE_ONCE(h_cg->nodeinfo[page_to_nid(page)]->usage[idx], 330 usage + nr_pages); 331 } 332 } 333 334 void hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages, 335 struct hugetlb_cgroup *h_cg, 336 struct page *page) 337 { 338 __hugetlb_cgroup_commit_charge(idx, nr_pages, h_cg, page, false); 339 } 340 341 void hugetlb_cgroup_commit_charge_rsvd(int idx, unsigned long nr_pages, 342 struct hugetlb_cgroup *h_cg, 343 struct page *page) 344 { 345 __hugetlb_cgroup_commit_charge(idx, nr_pages, h_cg, page, true); 346 } 347 348 /* 349 * Should be called with hugetlb_lock held 350 */ 351 static void __hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages, 352 struct page *page, bool rsvd) 353 { 354 struct hugetlb_cgroup *h_cg; 355 356 if (hugetlb_cgroup_disabled()) 357 return; 358 lockdep_assert_held(&hugetlb_lock); 359 h_cg = __hugetlb_cgroup_from_page(page, rsvd); 360 if (unlikely(!h_cg)) 361 return; 362 __set_hugetlb_cgroup(page, NULL, rsvd); 363 364 page_counter_uncharge(__hugetlb_cgroup_counter_from_cgroup(h_cg, idx, 365 rsvd), 366 nr_pages); 367 368 if (rsvd) 369 css_put(&h_cg->css); 370 else { 371 unsigned long usage = 372 h_cg->nodeinfo[page_to_nid(page)]->usage[idx]; 373 /* 374 * This write is not atomic due to fetching usage and writing 375 * to it, but that's fine because we call this with 376 * hugetlb_lock held anyway. 377 */ 378 WRITE_ONCE(h_cg->nodeinfo[page_to_nid(page)]->usage[idx], 379 usage - nr_pages); 380 } 381 } 382 383 void hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages, 384 struct page *page) 385 { 386 __hugetlb_cgroup_uncharge_page(idx, nr_pages, page, false); 387 } 388 389 void hugetlb_cgroup_uncharge_page_rsvd(int idx, unsigned long nr_pages, 390 struct page *page) 391 { 392 __hugetlb_cgroup_uncharge_page(idx, nr_pages, page, true); 393 } 394 395 static void __hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages, 396 struct hugetlb_cgroup *h_cg, 397 bool rsvd) 398 { 399 if (hugetlb_cgroup_disabled() || !h_cg) 400 return; 401 402 if (huge_page_order(&hstates[idx]) < HUGETLB_CGROUP_MIN_ORDER) 403 return; 404 405 page_counter_uncharge(__hugetlb_cgroup_counter_from_cgroup(h_cg, idx, 406 rsvd), 407 nr_pages); 408 409 if (rsvd) 410 css_put(&h_cg->css); 411 } 412 413 void hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages, 414 struct hugetlb_cgroup *h_cg) 415 { 416 __hugetlb_cgroup_uncharge_cgroup(idx, nr_pages, h_cg, false); 417 } 418 419 void hugetlb_cgroup_uncharge_cgroup_rsvd(int idx, unsigned long nr_pages, 420 struct hugetlb_cgroup *h_cg) 421 { 422 __hugetlb_cgroup_uncharge_cgroup(idx, nr_pages, h_cg, true); 423 } 424 425 void hugetlb_cgroup_uncharge_counter(struct resv_map *resv, unsigned long start, 426 unsigned long end) 427 { 428 if (hugetlb_cgroup_disabled() || !resv || !resv->reservation_counter || 429 !resv->css) 430 return; 431 432 page_counter_uncharge(resv->reservation_counter, 433 (end - start) * resv->pages_per_hpage); 434 css_put(resv->css); 435 } 436 437 void hugetlb_cgroup_uncharge_file_region(struct resv_map *resv, 438 struct file_region *rg, 439 unsigned long nr_pages, 440 bool region_del) 441 { 442 if (hugetlb_cgroup_disabled() || !resv || !rg || !nr_pages) 443 return; 444 445 if (rg->reservation_counter && resv->pages_per_hpage && nr_pages > 0 && 446 !resv->reservation_counter) { 447 page_counter_uncharge(rg->reservation_counter, 448 nr_pages * resv->pages_per_hpage); 449 /* 450 * Only do css_put(rg->css) when we delete the entire region 451 * because one file_region must hold exactly one css reference. 452 */ 453 if (region_del) 454 css_put(rg->css); 455 } 456 } 457 458 enum { 459 RES_USAGE, 460 RES_RSVD_USAGE, 461 RES_LIMIT, 462 RES_RSVD_LIMIT, 463 RES_MAX_USAGE, 464 RES_RSVD_MAX_USAGE, 465 RES_FAILCNT, 466 RES_RSVD_FAILCNT, 467 }; 468 469 static int hugetlb_cgroup_read_numa_stat(struct seq_file *seq, void *dummy) 470 { 471 int nid; 472 struct cftype *cft = seq_cft(seq); 473 int idx = MEMFILE_IDX(cft->private); 474 bool legacy = MEMFILE_ATTR(cft->private); 475 struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(seq_css(seq)); 476 struct cgroup_subsys_state *css; 477 unsigned long usage; 478 479 if (legacy) { 480 /* Add up usage across all nodes for the non-hierarchical total. */ 481 usage = 0; 482 for_each_node_state(nid, N_MEMORY) 483 usage += READ_ONCE(h_cg->nodeinfo[nid]->usage[idx]); 484 seq_printf(seq, "total=%lu", usage * PAGE_SIZE); 485 486 /* Simply print the per-node usage for the non-hierarchical total. */ 487 for_each_node_state(nid, N_MEMORY) 488 seq_printf(seq, " N%d=%lu", nid, 489 READ_ONCE(h_cg->nodeinfo[nid]->usage[idx]) * 490 PAGE_SIZE); 491 seq_putc(seq, '\n'); 492 } 493 494 /* 495 * The hierarchical total is pretty much the value recorded by the 496 * counter, so use that. 497 */ 498 seq_printf(seq, "%stotal=%lu", legacy ? "hierarchical_" : "", 499 page_counter_read(&h_cg->hugepage[idx]) * PAGE_SIZE); 500 501 /* 502 * For each node, transverse the css tree to obtain the hierarchical 503 * node usage. 504 */ 505 for_each_node_state(nid, N_MEMORY) { 506 usage = 0; 507 rcu_read_lock(); 508 css_for_each_descendant_pre(css, &h_cg->css) { 509 usage += READ_ONCE(hugetlb_cgroup_from_css(css) 510 ->nodeinfo[nid] 511 ->usage[idx]); 512 } 513 rcu_read_unlock(); 514 seq_printf(seq, " N%d=%lu", nid, usage * PAGE_SIZE); 515 } 516 517 seq_putc(seq, '\n'); 518 519 return 0; 520 } 521 522 static u64 hugetlb_cgroup_read_u64(struct cgroup_subsys_state *css, 523 struct cftype *cft) 524 { 525 struct page_counter *counter; 526 struct page_counter *rsvd_counter; 527 struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css); 528 529 counter = &h_cg->hugepage[MEMFILE_IDX(cft->private)]; 530 rsvd_counter = &h_cg->rsvd_hugepage[MEMFILE_IDX(cft->private)]; 531 532 switch (MEMFILE_ATTR(cft->private)) { 533 case RES_USAGE: 534 return (u64)page_counter_read(counter) * PAGE_SIZE; 535 case RES_RSVD_USAGE: 536 return (u64)page_counter_read(rsvd_counter) * PAGE_SIZE; 537 case RES_LIMIT: 538 return (u64)counter->max * PAGE_SIZE; 539 case RES_RSVD_LIMIT: 540 return (u64)rsvd_counter->max * PAGE_SIZE; 541 case RES_MAX_USAGE: 542 return (u64)counter->watermark * PAGE_SIZE; 543 case RES_RSVD_MAX_USAGE: 544 return (u64)rsvd_counter->watermark * PAGE_SIZE; 545 case RES_FAILCNT: 546 return counter->failcnt; 547 case RES_RSVD_FAILCNT: 548 return rsvd_counter->failcnt; 549 default: 550 BUG(); 551 } 552 } 553 554 static int hugetlb_cgroup_read_u64_max(struct seq_file *seq, void *v) 555 { 556 int idx; 557 u64 val; 558 struct cftype *cft = seq_cft(seq); 559 unsigned long limit; 560 struct page_counter *counter; 561 struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(seq_css(seq)); 562 563 idx = MEMFILE_IDX(cft->private); 564 counter = &h_cg->hugepage[idx]; 565 566 limit = round_down(PAGE_COUNTER_MAX, 567 pages_per_huge_page(&hstates[idx])); 568 569 switch (MEMFILE_ATTR(cft->private)) { 570 case RES_RSVD_USAGE: 571 counter = &h_cg->rsvd_hugepage[idx]; 572 fallthrough; 573 case RES_USAGE: 574 val = (u64)page_counter_read(counter); 575 seq_printf(seq, "%llu\n", val * PAGE_SIZE); 576 break; 577 case RES_RSVD_LIMIT: 578 counter = &h_cg->rsvd_hugepage[idx]; 579 fallthrough; 580 case RES_LIMIT: 581 val = (u64)counter->max; 582 if (val == limit) 583 seq_puts(seq, "max\n"); 584 else 585 seq_printf(seq, "%llu\n", val * PAGE_SIZE); 586 break; 587 default: 588 BUG(); 589 } 590 591 return 0; 592 } 593 594 static DEFINE_MUTEX(hugetlb_limit_mutex); 595 596 static ssize_t hugetlb_cgroup_write(struct kernfs_open_file *of, 597 char *buf, size_t nbytes, loff_t off, 598 const char *max) 599 { 600 int ret, idx; 601 unsigned long nr_pages; 602 struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(of_css(of)); 603 bool rsvd = false; 604 605 if (hugetlb_cgroup_is_root(h_cg)) /* Can't set limit on root */ 606 return -EINVAL; 607 608 buf = strstrip(buf); 609 ret = page_counter_memparse(buf, max, &nr_pages); 610 if (ret) 611 return ret; 612 613 idx = MEMFILE_IDX(of_cft(of)->private); 614 nr_pages = round_down(nr_pages, pages_per_huge_page(&hstates[idx])); 615 616 switch (MEMFILE_ATTR(of_cft(of)->private)) { 617 case RES_RSVD_LIMIT: 618 rsvd = true; 619 fallthrough; 620 case RES_LIMIT: 621 mutex_lock(&hugetlb_limit_mutex); 622 ret = page_counter_set_max( 623 __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, rsvd), 624 nr_pages); 625 mutex_unlock(&hugetlb_limit_mutex); 626 break; 627 default: 628 ret = -EINVAL; 629 break; 630 } 631 return ret ?: nbytes; 632 } 633 634 static ssize_t hugetlb_cgroup_write_legacy(struct kernfs_open_file *of, 635 char *buf, size_t nbytes, loff_t off) 636 { 637 return hugetlb_cgroup_write(of, buf, nbytes, off, "-1"); 638 } 639 640 static ssize_t hugetlb_cgroup_write_dfl(struct kernfs_open_file *of, 641 char *buf, size_t nbytes, loff_t off) 642 { 643 return hugetlb_cgroup_write(of, buf, nbytes, off, "max"); 644 } 645 646 static ssize_t hugetlb_cgroup_reset(struct kernfs_open_file *of, 647 char *buf, size_t nbytes, loff_t off) 648 { 649 int ret = 0; 650 struct page_counter *counter, *rsvd_counter; 651 struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(of_css(of)); 652 653 counter = &h_cg->hugepage[MEMFILE_IDX(of_cft(of)->private)]; 654 rsvd_counter = &h_cg->rsvd_hugepage[MEMFILE_IDX(of_cft(of)->private)]; 655 656 switch (MEMFILE_ATTR(of_cft(of)->private)) { 657 case RES_MAX_USAGE: 658 page_counter_reset_watermark(counter); 659 break; 660 case RES_RSVD_MAX_USAGE: 661 page_counter_reset_watermark(rsvd_counter); 662 break; 663 case RES_FAILCNT: 664 counter->failcnt = 0; 665 break; 666 case RES_RSVD_FAILCNT: 667 rsvd_counter->failcnt = 0; 668 break; 669 default: 670 ret = -EINVAL; 671 break; 672 } 673 return ret ?: nbytes; 674 } 675 676 static char *mem_fmt(char *buf, int size, unsigned long hsize) 677 { 678 if (hsize >= (1UL << 30)) 679 snprintf(buf, size, "%luGB", hsize >> 30); 680 else if (hsize >= (1UL << 20)) 681 snprintf(buf, size, "%luMB", hsize >> 20); 682 else 683 snprintf(buf, size, "%luKB", hsize >> 10); 684 return buf; 685 } 686 687 static int __hugetlb_events_show(struct seq_file *seq, bool local) 688 { 689 int idx; 690 long max; 691 struct cftype *cft = seq_cft(seq); 692 struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(seq_css(seq)); 693 694 idx = MEMFILE_IDX(cft->private); 695 696 if (local) 697 max = atomic_long_read(&h_cg->events_local[idx][HUGETLB_MAX]); 698 else 699 max = atomic_long_read(&h_cg->events[idx][HUGETLB_MAX]); 700 701 seq_printf(seq, "max %lu\n", max); 702 703 return 0; 704 } 705 706 static int hugetlb_events_show(struct seq_file *seq, void *v) 707 { 708 return __hugetlb_events_show(seq, false); 709 } 710 711 static int hugetlb_events_local_show(struct seq_file *seq, void *v) 712 { 713 return __hugetlb_events_show(seq, true); 714 } 715 716 static void __init __hugetlb_cgroup_file_dfl_init(int idx) 717 { 718 char buf[32]; 719 struct cftype *cft; 720 struct hstate *h = &hstates[idx]; 721 722 /* format the size */ 723 mem_fmt(buf, sizeof(buf), huge_page_size(h)); 724 725 /* Add the limit file */ 726 cft = &h->cgroup_files_dfl[0]; 727 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.max", buf); 728 cft->private = MEMFILE_PRIVATE(idx, RES_LIMIT); 729 cft->seq_show = hugetlb_cgroup_read_u64_max; 730 cft->write = hugetlb_cgroup_write_dfl; 731 cft->flags = CFTYPE_NOT_ON_ROOT; 732 733 /* Add the reservation limit file */ 734 cft = &h->cgroup_files_dfl[1]; 735 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.max", buf); 736 cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_LIMIT); 737 cft->seq_show = hugetlb_cgroup_read_u64_max; 738 cft->write = hugetlb_cgroup_write_dfl; 739 cft->flags = CFTYPE_NOT_ON_ROOT; 740 741 /* Add the current usage file */ 742 cft = &h->cgroup_files_dfl[2]; 743 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.current", buf); 744 cft->private = MEMFILE_PRIVATE(idx, RES_USAGE); 745 cft->seq_show = hugetlb_cgroup_read_u64_max; 746 cft->flags = CFTYPE_NOT_ON_ROOT; 747 748 /* Add the current reservation usage file */ 749 cft = &h->cgroup_files_dfl[3]; 750 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.current", buf); 751 cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_USAGE); 752 cft->seq_show = hugetlb_cgroup_read_u64_max; 753 cft->flags = CFTYPE_NOT_ON_ROOT; 754 755 /* Add the events file */ 756 cft = &h->cgroup_files_dfl[4]; 757 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.events", buf); 758 cft->private = MEMFILE_PRIVATE(idx, 0); 759 cft->seq_show = hugetlb_events_show; 760 cft->file_offset = offsetof(struct hugetlb_cgroup, events_file[idx]); 761 cft->flags = CFTYPE_NOT_ON_ROOT; 762 763 /* Add the events.local file */ 764 cft = &h->cgroup_files_dfl[5]; 765 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.events.local", buf); 766 cft->private = MEMFILE_PRIVATE(idx, 0); 767 cft->seq_show = hugetlb_events_local_show; 768 cft->file_offset = offsetof(struct hugetlb_cgroup, 769 events_local_file[idx]); 770 cft->flags = CFTYPE_NOT_ON_ROOT; 771 772 /* Add the numa stat file */ 773 cft = &h->cgroup_files_dfl[6]; 774 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.numa_stat", buf); 775 cft->private = MEMFILE_PRIVATE(idx, 0); 776 cft->seq_show = hugetlb_cgroup_read_numa_stat; 777 cft->flags = CFTYPE_NOT_ON_ROOT; 778 779 /* NULL terminate the last cft */ 780 cft = &h->cgroup_files_dfl[7]; 781 memset(cft, 0, sizeof(*cft)); 782 783 WARN_ON(cgroup_add_dfl_cftypes(&hugetlb_cgrp_subsys, 784 h->cgroup_files_dfl)); 785 } 786 787 static void __init __hugetlb_cgroup_file_legacy_init(int idx) 788 { 789 char buf[32]; 790 struct cftype *cft; 791 struct hstate *h = &hstates[idx]; 792 793 /* format the size */ 794 mem_fmt(buf, sizeof(buf), huge_page_size(h)); 795 796 /* Add the limit file */ 797 cft = &h->cgroup_files_legacy[0]; 798 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.limit_in_bytes", buf); 799 cft->private = MEMFILE_PRIVATE(idx, RES_LIMIT); 800 cft->read_u64 = hugetlb_cgroup_read_u64; 801 cft->write = hugetlb_cgroup_write_legacy; 802 803 /* Add the reservation limit file */ 804 cft = &h->cgroup_files_legacy[1]; 805 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.limit_in_bytes", buf); 806 cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_LIMIT); 807 cft->read_u64 = hugetlb_cgroup_read_u64; 808 cft->write = hugetlb_cgroup_write_legacy; 809 810 /* Add the usage file */ 811 cft = &h->cgroup_files_legacy[2]; 812 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.usage_in_bytes", buf); 813 cft->private = MEMFILE_PRIVATE(idx, RES_USAGE); 814 cft->read_u64 = hugetlb_cgroup_read_u64; 815 816 /* Add the reservation usage file */ 817 cft = &h->cgroup_files_legacy[3]; 818 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.usage_in_bytes", buf); 819 cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_USAGE); 820 cft->read_u64 = hugetlb_cgroup_read_u64; 821 822 /* Add the MAX usage file */ 823 cft = &h->cgroup_files_legacy[4]; 824 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.max_usage_in_bytes", buf); 825 cft->private = MEMFILE_PRIVATE(idx, RES_MAX_USAGE); 826 cft->write = hugetlb_cgroup_reset; 827 cft->read_u64 = hugetlb_cgroup_read_u64; 828 829 /* Add the MAX reservation usage file */ 830 cft = &h->cgroup_files_legacy[5]; 831 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.max_usage_in_bytes", buf); 832 cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_MAX_USAGE); 833 cft->write = hugetlb_cgroup_reset; 834 cft->read_u64 = hugetlb_cgroup_read_u64; 835 836 /* Add the failcntfile */ 837 cft = &h->cgroup_files_legacy[6]; 838 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.failcnt", buf); 839 cft->private = MEMFILE_PRIVATE(idx, RES_FAILCNT); 840 cft->write = hugetlb_cgroup_reset; 841 cft->read_u64 = hugetlb_cgroup_read_u64; 842 843 /* Add the reservation failcntfile */ 844 cft = &h->cgroup_files_legacy[7]; 845 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.failcnt", buf); 846 cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_FAILCNT); 847 cft->write = hugetlb_cgroup_reset; 848 cft->read_u64 = hugetlb_cgroup_read_u64; 849 850 /* Add the numa stat file */ 851 cft = &h->cgroup_files_legacy[8]; 852 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.numa_stat", buf); 853 cft->private = MEMFILE_PRIVATE(idx, 1); 854 cft->seq_show = hugetlb_cgroup_read_numa_stat; 855 856 /* NULL terminate the last cft */ 857 cft = &h->cgroup_files_legacy[9]; 858 memset(cft, 0, sizeof(*cft)); 859 860 WARN_ON(cgroup_add_legacy_cftypes(&hugetlb_cgrp_subsys, 861 h->cgroup_files_legacy)); 862 } 863 864 static void __init __hugetlb_cgroup_file_init(int idx) 865 { 866 __hugetlb_cgroup_file_dfl_init(idx); 867 __hugetlb_cgroup_file_legacy_init(idx); 868 } 869 870 void __init hugetlb_cgroup_file_init(void) 871 { 872 struct hstate *h; 873 874 for_each_hstate(h) { 875 /* 876 * Add cgroup control files only if the huge page consists 877 * of more than two normal pages. This is because we use 878 * page[2].private for storing cgroup details. 879 */ 880 if (huge_page_order(h) >= HUGETLB_CGROUP_MIN_ORDER) 881 __hugetlb_cgroup_file_init(hstate_index(h)); 882 } 883 } 884 885 /* 886 * hugetlb_lock will make sure a parallel cgroup rmdir won't happen 887 * when we migrate hugepages 888 */ 889 void hugetlb_cgroup_migrate(struct page *oldhpage, struct page *newhpage) 890 { 891 struct hugetlb_cgroup *h_cg; 892 struct hugetlb_cgroup *h_cg_rsvd; 893 struct hstate *h = page_hstate(oldhpage); 894 895 if (hugetlb_cgroup_disabled()) 896 return; 897 898 spin_lock_irq(&hugetlb_lock); 899 h_cg = hugetlb_cgroup_from_page(oldhpage); 900 h_cg_rsvd = hugetlb_cgroup_from_page_rsvd(oldhpage); 901 set_hugetlb_cgroup(oldhpage, NULL); 902 set_hugetlb_cgroup_rsvd(oldhpage, NULL); 903 904 /* move the h_cg details to new cgroup */ 905 set_hugetlb_cgroup(newhpage, h_cg); 906 set_hugetlb_cgroup_rsvd(newhpage, h_cg_rsvd); 907 list_move(&newhpage->lru, &h->hugepage_activelist); 908 spin_unlock_irq(&hugetlb_lock); 909 return; 910 } 911 912 static struct cftype hugetlb_files[] = { 913 {} /* terminate */ 914 }; 915 916 struct cgroup_subsys hugetlb_cgrp_subsys = { 917 .css_alloc = hugetlb_cgroup_css_alloc, 918 .css_offline = hugetlb_cgroup_css_offline, 919 .css_free = hugetlb_cgroup_css_free, 920 .dfl_cftypes = hugetlb_files, 921 .legacy_cftypes = hugetlb_files, 922 }; 923