1 /* 2 * 3 * Copyright IBM Corporation, 2012 4 * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> 5 * 6 * Cgroup v2 7 * Copyright (C) 2019 Red Hat, Inc. 8 * Author: Giuseppe Scrivano <gscrivan@redhat.com> 9 * 10 * This program is free software; you can redistribute it and/or modify it 11 * under the terms of version 2.1 of the GNU Lesser General Public License 12 * as published by the Free Software Foundation. 13 * 14 * This program is distributed in the hope that it would be useful, but 15 * WITHOUT ANY WARRANTY; without even the implied warranty of 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 17 * 18 */ 19 20 #include <linux/cgroup.h> 21 #include <linux/page_counter.h> 22 #include <linux/slab.h> 23 #include <linux/hugetlb.h> 24 #include <linux/hugetlb_cgroup.h> 25 26 #define MEMFILE_PRIVATE(x, val) (((x) << 16) | (val)) 27 #define MEMFILE_IDX(val) (((val) >> 16) & 0xffff) 28 #define MEMFILE_ATTR(val) ((val) & 0xffff) 29 30 static struct hugetlb_cgroup *root_h_cgroup __read_mostly; 31 32 static inline struct page_counter * 33 __hugetlb_cgroup_counter_from_cgroup(struct hugetlb_cgroup *h_cg, int idx, 34 bool rsvd) 35 { 36 if (rsvd) 37 return &h_cg->rsvd_hugepage[idx]; 38 return &h_cg->hugepage[idx]; 39 } 40 41 static inline struct page_counter * 42 hugetlb_cgroup_counter_from_cgroup(struct hugetlb_cgroup *h_cg, int idx) 43 { 44 return __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, false); 45 } 46 47 static inline struct page_counter * 48 hugetlb_cgroup_counter_from_cgroup_rsvd(struct hugetlb_cgroup *h_cg, int idx) 49 { 50 return __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, true); 51 } 52 53 static inline 54 struct hugetlb_cgroup *hugetlb_cgroup_from_css(struct cgroup_subsys_state *s) 55 { 56 return s ? container_of(s, struct hugetlb_cgroup, css) : NULL; 57 } 58 59 static inline 60 struct hugetlb_cgroup *hugetlb_cgroup_from_task(struct task_struct *task) 61 { 62 return hugetlb_cgroup_from_css(task_css(task, hugetlb_cgrp_id)); 63 } 64 65 static inline bool hugetlb_cgroup_is_root(struct hugetlb_cgroup *h_cg) 66 { 67 return (h_cg == root_h_cgroup); 68 } 69 70 static inline struct hugetlb_cgroup * 71 parent_hugetlb_cgroup(struct hugetlb_cgroup *h_cg) 72 { 73 return hugetlb_cgroup_from_css(h_cg->css.parent); 74 } 75 76 static inline bool hugetlb_cgroup_have_usage(struct hugetlb_cgroup *h_cg) 77 { 78 struct hstate *h; 79 80 for_each_hstate(h) { 81 if (page_counter_read( 82 hugetlb_cgroup_counter_from_cgroup(h_cg, hstate_index(h)))) 83 return true; 84 } 85 return false; 86 } 87 88 static void hugetlb_cgroup_init(struct hugetlb_cgroup *h_cgroup, 89 struct hugetlb_cgroup *parent_h_cgroup) 90 { 91 int idx; 92 93 for (idx = 0; idx < HUGE_MAX_HSTATE; idx++) { 94 struct page_counter *fault_parent = NULL; 95 struct page_counter *rsvd_parent = NULL; 96 unsigned long limit; 97 int ret; 98 99 if (parent_h_cgroup) { 100 fault_parent = hugetlb_cgroup_counter_from_cgroup( 101 parent_h_cgroup, idx); 102 rsvd_parent = hugetlb_cgroup_counter_from_cgroup_rsvd( 103 parent_h_cgroup, idx); 104 } 105 page_counter_init(hugetlb_cgroup_counter_from_cgroup(h_cgroup, 106 idx), 107 fault_parent); 108 page_counter_init( 109 hugetlb_cgroup_counter_from_cgroup_rsvd(h_cgroup, idx), 110 rsvd_parent); 111 112 limit = round_down(PAGE_COUNTER_MAX, 113 pages_per_huge_page(&hstates[idx])); 114 115 ret = page_counter_set_max( 116 hugetlb_cgroup_counter_from_cgroup(h_cgroup, idx), 117 limit); 118 VM_BUG_ON(ret); 119 ret = page_counter_set_max( 120 hugetlb_cgroup_counter_from_cgroup_rsvd(h_cgroup, idx), 121 limit); 122 VM_BUG_ON(ret); 123 } 124 } 125 126 static void hugetlb_cgroup_free(struct hugetlb_cgroup *h_cgroup) 127 { 128 int node; 129 130 for_each_node(node) 131 kfree(h_cgroup->nodeinfo[node]); 132 kfree(h_cgroup); 133 } 134 135 static struct cgroup_subsys_state * 136 hugetlb_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) 137 { 138 struct hugetlb_cgroup *parent_h_cgroup = hugetlb_cgroup_from_css(parent_css); 139 struct hugetlb_cgroup *h_cgroup; 140 int node; 141 142 h_cgroup = kzalloc(struct_size(h_cgroup, nodeinfo, nr_node_ids), 143 GFP_KERNEL); 144 145 if (!h_cgroup) 146 return ERR_PTR(-ENOMEM); 147 148 if (!parent_h_cgroup) 149 root_h_cgroup = h_cgroup; 150 151 /* 152 * TODO: this routine can waste much memory for nodes which will 153 * never be onlined. It's better to use memory hotplug callback 154 * function. 155 */ 156 for_each_node(node) { 157 /* Set node_to_alloc to NUMA_NO_NODE for offline nodes. */ 158 int node_to_alloc = 159 node_state(node, N_NORMAL_MEMORY) ? node : NUMA_NO_NODE; 160 h_cgroup->nodeinfo[node] = 161 kzalloc_node(sizeof(struct hugetlb_cgroup_per_node), 162 GFP_KERNEL, node_to_alloc); 163 if (!h_cgroup->nodeinfo[node]) 164 goto fail_alloc_nodeinfo; 165 } 166 167 hugetlb_cgroup_init(h_cgroup, parent_h_cgroup); 168 return &h_cgroup->css; 169 170 fail_alloc_nodeinfo: 171 hugetlb_cgroup_free(h_cgroup); 172 return ERR_PTR(-ENOMEM); 173 } 174 175 static void hugetlb_cgroup_css_free(struct cgroup_subsys_state *css) 176 { 177 hugetlb_cgroup_free(hugetlb_cgroup_from_css(css)); 178 } 179 180 /* 181 * Should be called with hugetlb_lock held. 182 * Since we are holding hugetlb_lock, pages cannot get moved from 183 * active list or uncharged from the cgroup, So no need to get 184 * page reference and test for page active here. This function 185 * cannot fail. 186 */ 187 static void hugetlb_cgroup_move_parent(int idx, struct hugetlb_cgroup *h_cg, 188 struct page *page) 189 { 190 unsigned int nr_pages; 191 struct page_counter *counter; 192 struct hugetlb_cgroup *page_hcg; 193 struct hugetlb_cgroup *parent = parent_hugetlb_cgroup(h_cg); 194 struct folio *folio = page_folio(page); 195 196 page_hcg = hugetlb_cgroup_from_folio(folio); 197 /* 198 * We can have pages in active list without any cgroup 199 * ie, hugepage with less than 3 pages. We can safely 200 * ignore those pages. 201 */ 202 if (!page_hcg || page_hcg != h_cg) 203 goto out; 204 205 nr_pages = compound_nr(page); 206 if (!parent) { 207 parent = root_h_cgroup; 208 /* root has no limit */ 209 page_counter_charge(&parent->hugepage[idx], nr_pages); 210 } 211 counter = &h_cg->hugepage[idx]; 212 /* Take the pages off the local counter */ 213 page_counter_cancel(counter, nr_pages); 214 215 set_hugetlb_cgroup(folio, parent); 216 out: 217 return; 218 } 219 220 /* 221 * Force the hugetlb cgroup to empty the hugetlb resources by moving them to 222 * the parent cgroup. 223 */ 224 static void hugetlb_cgroup_css_offline(struct cgroup_subsys_state *css) 225 { 226 struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css); 227 struct hstate *h; 228 struct page *page; 229 230 do { 231 for_each_hstate(h) { 232 spin_lock_irq(&hugetlb_lock); 233 list_for_each_entry(page, &h->hugepage_activelist, lru) 234 hugetlb_cgroup_move_parent(hstate_index(h), h_cg, page); 235 236 spin_unlock_irq(&hugetlb_lock); 237 } 238 cond_resched(); 239 } while (hugetlb_cgroup_have_usage(h_cg)); 240 } 241 242 static inline void hugetlb_event(struct hugetlb_cgroup *hugetlb, int idx, 243 enum hugetlb_memory_event event) 244 { 245 atomic_long_inc(&hugetlb->events_local[idx][event]); 246 cgroup_file_notify(&hugetlb->events_local_file[idx]); 247 248 do { 249 atomic_long_inc(&hugetlb->events[idx][event]); 250 cgroup_file_notify(&hugetlb->events_file[idx]); 251 } while ((hugetlb = parent_hugetlb_cgroup(hugetlb)) && 252 !hugetlb_cgroup_is_root(hugetlb)); 253 } 254 255 static int __hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages, 256 struct hugetlb_cgroup **ptr, 257 bool rsvd) 258 { 259 int ret = 0; 260 struct page_counter *counter; 261 struct hugetlb_cgroup *h_cg = NULL; 262 263 if (hugetlb_cgroup_disabled()) 264 goto done; 265 again: 266 rcu_read_lock(); 267 h_cg = hugetlb_cgroup_from_task(current); 268 if (!css_tryget(&h_cg->css)) { 269 rcu_read_unlock(); 270 goto again; 271 } 272 rcu_read_unlock(); 273 274 if (!page_counter_try_charge( 275 __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, rsvd), 276 nr_pages, &counter)) { 277 ret = -ENOMEM; 278 hugetlb_event(h_cg, idx, HUGETLB_MAX); 279 css_put(&h_cg->css); 280 goto done; 281 } 282 /* Reservations take a reference to the css because they do not get 283 * reparented. 284 */ 285 if (!rsvd) 286 css_put(&h_cg->css); 287 done: 288 *ptr = h_cg; 289 return ret; 290 } 291 292 int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages, 293 struct hugetlb_cgroup **ptr) 294 { 295 return __hugetlb_cgroup_charge_cgroup(idx, nr_pages, ptr, false); 296 } 297 298 int hugetlb_cgroup_charge_cgroup_rsvd(int idx, unsigned long nr_pages, 299 struct hugetlb_cgroup **ptr) 300 { 301 return __hugetlb_cgroup_charge_cgroup(idx, nr_pages, ptr, true); 302 } 303 304 /* Should be called with hugetlb_lock held */ 305 static void __hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages, 306 struct hugetlb_cgroup *h_cg, 307 struct folio *folio, bool rsvd) 308 { 309 if (hugetlb_cgroup_disabled() || !h_cg) 310 return; 311 312 __set_hugetlb_cgroup(folio, h_cg, rsvd); 313 if (!rsvd) { 314 unsigned long usage = 315 h_cg->nodeinfo[folio_nid(folio)]->usage[idx]; 316 /* 317 * This write is not atomic due to fetching usage and writing 318 * to it, but that's fine because we call this with 319 * hugetlb_lock held anyway. 320 */ 321 WRITE_ONCE(h_cg->nodeinfo[folio_nid(folio)]->usage[idx], 322 usage + nr_pages); 323 } 324 } 325 326 void hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages, 327 struct hugetlb_cgroup *h_cg, 328 struct folio *folio) 329 { 330 __hugetlb_cgroup_commit_charge(idx, nr_pages, h_cg, folio, false); 331 } 332 333 void hugetlb_cgroup_commit_charge_rsvd(int idx, unsigned long nr_pages, 334 struct hugetlb_cgroup *h_cg, 335 struct folio *folio) 336 { 337 __hugetlb_cgroup_commit_charge(idx, nr_pages, h_cg, folio, true); 338 } 339 340 /* 341 * Should be called with hugetlb_lock held 342 */ 343 static void __hugetlb_cgroup_uncharge_folio(int idx, unsigned long nr_pages, 344 struct folio *folio, bool rsvd) 345 { 346 struct hugetlb_cgroup *h_cg; 347 348 if (hugetlb_cgroup_disabled()) 349 return; 350 lockdep_assert_held(&hugetlb_lock); 351 h_cg = __hugetlb_cgroup_from_folio(folio, rsvd); 352 if (unlikely(!h_cg)) 353 return; 354 __set_hugetlb_cgroup(folio, NULL, rsvd); 355 356 page_counter_uncharge(__hugetlb_cgroup_counter_from_cgroup(h_cg, idx, 357 rsvd), 358 nr_pages); 359 360 if (rsvd) 361 css_put(&h_cg->css); 362 else { 363 unsigned long usage = 364 h_cg->nodeinfo[folio_nid(folio)]->usage[idx]; 365 /* 366 * This write is not atomic due to fetching usage and writing 367 * to it, but that's fine because we call this with 368 * hugetlb_lock held anyway. 369 */ 370 WRITE_ONCE(h_cg->nodeinfo[folio_nid(folio)]->usage[idx], 371 usage - nr_pages); 372 } 373 } 374 375 void hugetlb_cgroup_uncharge_folio(int idx, unsigned long nr_pages, 376 struct folio *folio) 377 { 378 __hugetlb_cgroup_uncharge_folio(idx, nr_pages, folio, false); 379 } 380 381 void hugetlb_cgroup_uncharge_folio_rsvd(int idx, unsigned long nr_pages, 382 struct folio *folio) 383 { 384 __hugetlb_cgroup_uncharge_folio(idx, nr_pages, folio, true); 385 } 386 387 static void __hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages, 388 struct hugetlb_cgroup *h_cg, 389 bool rsvd) 390 { 391 if (hugetlb_cgroup_disabled() || !h_cg) 392 return; 393 394 page_counter_uncharge(__hugetlb_cgroup_counter_from_cgroup(h_cg, idx, 395 rsvd), 396 nr_pages); 397 398 if (rsvd) 399 css_put(&h_cg->css); 400 } 401 402 void hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages, 403 struct hugetlb_cgroup *h_cg) 404 { 405 __hugetlb_cgroup_uncharge_cgroup(idx, nr_pages, h_cg, false); 406 } 407 408 void hugetlb_cgroup_uncharge_cgroup_rsvd(int idx, unsigned long nr_pages, 409 struct hugetlb_cgroup *h_cg) 410 { 411 __hugetlb_cgroup_uncharge_cgroup(idx, nr_pages, h_cg, true); 412 } 413 414 void hugetlb_cgroup_uncharge_counter(struct resv_map *resv, unsigned long start, 415 unsigned long end) 416 { 417 if (hugetlb_cgroup_disabled() || !resv || !resv->reservation_counter || 418 !resv->css) 419 return; 420 421 page_counter_uncharge(resv->reservation_counter, 422 (end - start) * resv->pages_per_hpage); 423 css_put(resv->css); 424 } 425 426 void hugetlb_cgroup_uncharge_file_region(struct resv_map *resv, 427 struct file_region *rg, 428 unsigned long nr_pages, 429 bool region_del) 430 { 431 if (hugetlb_cgroup_disabled() || !resv || !rg || !nr_pages) 432 return; 433 434 if (rg->reservation_counter && resv->pages_per_hpage && 435 !resv->reservation_counter) { 436 page_counter_uncharge(rg->reservation_counter, 437 nr_pages * resv->pages_per_hpage); 438 /* 439 * Only do css_put(rg->css) when we delete the entire region 440 * because one file_region must hold exactly one css reference. 441 */ 442 if (region_del) 443 css_put(rg->css); 444 } 445 } 446 447 enum { 448 RES_USAGE, 449 RES_RSVD_USAGE, 450 RES_LIMIT, 451 RES_RSVD_LIMIT, 452 RES_MAX_USAGE, 453 RES_RSVD_MAX_USAGE, 454 RES_FAILCNT, 455 RES_RSVD_FAILCNT, 456 }; 457 458 static int hugetlb_cgroup_read_numa_stat(struct seq_file *seq, void *dummy) 459 { 460 int nid; 461 struct cftype *cft = seq_cft(seq); 462 int idx = MEMFILE_IDX(cft->private); 463 bool legacy = MEMFILE_ATTR(cft->private); 464 struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(seq_css(seq)); 465 struct cgroup_subsys_state *css; 466 unsigned long usage; 467 468 if (legacy) { 469 /* Add up usage across all nodes for the non-hierarchical total. */ 470 usage = 0; 471 for_each_node_state(nid, N_MEMORY) 472 usage += READ_ONCE(h_cg->nodeinfo[nid]->usage[idx]); 473 seq_printf(seq, "total=%lu", usage * PAGE_SIZE); 474 475 /* Simply print the per-node usage for the non-hierarchical total. */ 476 for_each_node_state(nid, N_MEMORY) 477 seq_printf(seq, " N%d=%lu", nid, 478 READ_ONCE(h_cg->nodeinfo[nid]->usage[idx]) * 479 PAGE_SIZE); 480 seq_putc(seq, '\n'); 481 } 482 483 /* 484 * The hierarchical total is pretty much the value recorded by the 485 * counter, so use that. 486 */ 487 seq_printf(seq, "%stotal=%lu", legacy ? "hierarchical_" : "", 488 page_counter_read(&h_cg->hugepage[idx]) * PAGE_SIZE); 489 490 /* 491 * For each node, transverse the css tree to obtain the hierarchical 492 * node usage. 493 */ 494 for_each_node_state(nid, N_MEMORY) { 495 usage = 0; 496 rcu_read_lock(); 497 css_for_each_descendant_pre(css, &h_cg->css) { 498 usage += READ_ONCE(hugetlb_cgroup_from_css(css) 499 ->nodeinfo[nid] 500 ->usage[idx]); 501 } 502 rcu_read_unlock(); 503 seq_printf(seq, " N%d=%lu", nid, usage * PAGE_SIZE); 504 } 505 506 seq_putc(seq, '\n'); 507 508 return 0; 509 } 510 511 static u64 hugetlb_cgroup_read_u64(struct cgroup_subsys_state *css, 512 struct cftype *cft) 513 { 514 struct page_counter *counter; 515 struct page_counter *rsvd_counter; 516 struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css); 517 518 counter = &h_cg->hugepage[MEMFILE_IDX(cft->private)]; 519 rsvd_counter = &h_cg->rsvd_hugepage[MEMFILE_IDX(cft->private)]; 520 521 switch (MEMFILE_ATTR(cft->private)) { 522 case RES_USAGE: 523 return (u64)page_counter_read(counter) * PAGE_SIZE; 524 case RES_RSVD_USAGE: 525 return (u64)page_counter_read(rsvd_counter) * PAGE_SIZE; 526 case RES_LIMIT: 527 return (u64)counter->max * PAGE_SIZE; 528 case RES_RSVD_LIMIT: 529 return (u64)rsvd_counter->max * PAGE_SIZE; 530 case RES_MAX_USAGE: 531 return (u64)counter->watermark * PAGE_SIZE; 532 case RES_RSVD_MAX_USAGE: 533 return (u64)rsvd_counter->watermark * PAGE_SIZE; 534 case RES_FAILCNT: 535 return counter->failcnt; 536 case RES_RSVD_FAILCNT: 537 return rsvd_counter->failcnt; 538 default: 539 BUG(); 540 } 541 } 542 543 static int hugetlb_cgroup_read_u64_max(struct seq_file *seq, void *v) 544 { 545 int idx; 546 u64 val; 547 struct cftype *cft = seq_cft(seq); 548 unsigned long limit; 549 struct page_counter *counter; 550 struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(seq_css(seq)); 551 552 idx = MEMFILE_IDX(cft->private); 553 counter = &h_cg->hugepage[idx]; 554 555 limit = round_down(PAGE_COUNTER_MAX, 556 pages_per_huge_page(&hstates[idx])); 557 558 switch (MEMFILE_ATTR(cft->private)) { 559 case RES_RSVD_USAGE: 560 counter = &h_cg->rsvd_hugepage[idx]; 561 fallthrough; 562 case RES_USAGE: 563 val = (u64)page_counter_read(counter); 564 seq_printf(seq, "%llu\n", val * PAGE_SIZE); 565 break; 566 case RES_RSVD_LIMIT: 567 counter = &h_cg->rsvd_hugepage[idx]; 568 fallthrough; 569 case RES_LIMIT: 570 val = (u64)counter->max; 571 if (val == limit) 572 seq_puts(seq, "max\n"); 573 else 574 seq_printf(seq, "%llu\n", val * PAGE_SIZE); 575 break; 576 default: 577 BUG(); 578 } 579 580 return 0; 581 } 582 583 static DEFINE_MUTEX(hugetlb_limit_mutex); 584 585 static ssize_t hugetlb_cgroup_write(struct kernfs_open_file *of, 586 char *buf, size_t nbytes, loff_t off, 587 const char *max) 588 { 589 int ret, idx; 590 unsigned long nr_pages; 591 struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(of_css(of)); 592 bool rsvd = false; 593 594 if (hugetlb_cgroup_is_root(h_cg)) /* Can't set limit on root */ 595 return -EINVAL; 596 597 buf = strstrip(buf); 598 ret = page_counter_memparse(buf, max, &nr_pages); 599 if (ret) 600 return ret; 601 602 idx = MEMFILE_IDX(of_cft(of)->private); 603 nr_pages = round_down(nr_pages, pages_per_huge_page(&hstates[idx])); 604 605 switch (MEMFILE_ATTR(of_cft(of)->private)) { 606 case RES_RSVD_LIMIT: 607 rsvd = true; 608 fallthrough; 609 case RES_LIMIT: 610 mutex_lock(&hugetlb_limit_mutex); 611 ret = page_counter_set_max( 612 __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, rsvd), 613 nr_pages); 614 mutex_unlock(&hugetlb_limit_mutex); 615 break; 616 default: 617 ret = -EINVAL; 618 break; 619 } 620 return ret ?: nbytes; 621 } 622 623 static ssize_t hugetlb_cgroup_write_legacy(struct kernfs_open_file *of, 624 char *buf, size_t nbytes, loff_t off) 625 { 626 return hugetlb_cgroup_write(of, buf, nbytes, off, "-1"); 627 } 628 629 static ssize_t hugetlb_cgroup_write_dfl(struct kernfs_open_file *of, 630 char *buf, size_t nbytes, loff_t off) 631 { 632 return hugetlb_cgroup_write(of, buf, nbytes, off, "max"); 633 } 634 635 static ssize_t hugetlb_cgroup_reset(struct kernfs_open_file *of, 636 char *buf, size_t nbytes, loff_t off) 637 { 638 int ret = 0; 639 struct page_counter *counter, *rsvd_counter; 640 struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(of_css(of)); 641 642 counter = &h_cg->hugepage[MEMFILE_IDX(of_cft(of)->private)]; 643 rsvd_counter = &h_cg->rsvd_hugepage[MEMFILE_IDX(of_cft(of)->private)]; 644 645 switch (MEMFILE_ATTR(of_cft(of)->private)) { 646 case RES_MAX_USAGE: 647 page_counter_reset_watermark(counter); 648 break; 649 case RES_RSVD_MAX_USAGE: 650 page_counter_reset_watermark(rsvd_counter); 651 break; 652 case RES_FAILCNT: 653 counter->failcnt = 0; 654 break; 655 case RES_RSVD_FAILCNT: 656 rsvd_counter->failcnt = 0; 657 break; 658 default: 659 ret = -EINVAL; 660 break; 661 } 662 return ret ?: nbytes; 663 } 664 665 static char *mem_fmt(char *buf, int size, unsigned long hsize) 666 { 667 if (hsize >= SZ_1G) 668 snprintf(buf, size, "%luGB", hsize / SZ_1G); 669 else if (hsize >= SZ_1M) 670 snprintf(buf, size, "%luMB", hsize / SZ_1M); 671 else 672 snprintf(buf, size, "%luKB", hsize / SZ_1K); 673 return buf; 674 } 675 676 static int __hugetlb_events_show(struct seq_file *seq, bool local) 677 { 678 int idx; 679 long max; 680 struct cftype *cft = seq_cft(seq); 681 struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(seq_css(seq)); 682 683 idx = MEMFILE_IDX(cft->private); 684 685 if (local) 686 max = atomic_long_read(&h_cg->events_local[idx][HUGETLB_MAX]); 687 else 688 max = atomic_long_read(&h_cg->events[idx][HUGETLB_MAX]); 689 690 seq_printf(seq, "max %lu\n", max); 691 692 return 0; 693 } 694 695 static int hugetlb_events_show(struct seq_file *seq, void *v) 696 { 697 return __hugetlb_events_show(seq, false); 698 } 699 700 static int hugetlb_events_local_show(struct seq_file *seq, void *v) 701 { 702 return __hugetlb_events_show(seq, true); 703 } 704 705 static void __init __hugetlb_cgroup_file_dfl_init(int idx) 706 { 707 char buf[32]; 708 struct cftype *cft; 709 struct hstate *h = &hstates[idx]; 710 711 /* format the size */ 712 mem_fmt(buf, sizeof(buf), huge_page_size(h)); 713 714 /* Add the limit file */ 715 cft = &h->cgroup_files_dfl[0]; 716 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.max", buf); 717 cft->private = MEMFILE_PRIVATE(idx, RES_LIMIT); 718 cft->seq_show = hugetlb_cgroup_read_u64_max; 719 cft->write = hugetlb_cgroup_write_dfl; 720 cft->flags = CFTYPE_NOT_ON_ROOT; 721 722 /* Add the reservation limit file */ 723 cft = &h->cgroup_files_dfl[1]; 724 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.max", buf); 725 cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_LIMIT); 726 cft->seq_show = hugetlb_cgroup_read_u64_max; 727 cft->write = hugetlb_cgroup_write_dfl; 728 cft->flags = CFTYPE_NOT_ON_ROOT; 729 730 /* Add the current usage file */ 731 cft = &h->cgroup_files_dfl[2]; 732 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.current", buf); 733 cft->private = MEMFILE_PRIVATE(idx, RES_USAGE); 734 cft->seq_show = hugetlb_cgroup_read_u64_max; 735 cft->flags = CFTYPE_NOT_ON_ROOT; 736 737 /* Add the current reservation usage file */ 738 cft = &h->cgroup_files_dfl[3]; 739 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.current", buf); 740 cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_USAGE); 741 cft->seq_show = hugetlb_cgroup_read_u64_max; 742 cft->flags = CFTYPE_NOT_ON_ROOT; 743 744 /* Add the events file */ 745 cft = &h->cgroup_files_dfl[4]; 746 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.events", buf); 747 cft->private = MEMFILE_PRIVATE(idx, 0); 748 cft->seq_show = hugetlb_events_show; 749 cft->file_offset = offsetof(struct hugetlb_cgroup, events_file[idx]); 750 cft->flags = CFTYPE_NOT_ON_ROOT; 751 752 /* Add the events.local file */ 753 cft = &h->cgroup_files_dfl[5]; 754 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.events.local", buf); 755 cft->private = MEMFILE_PRIVATE(idx, 0); 756 cft->seq_show = hugetlb_events_local_show; 757 cft->file_offset = offsetof(struct hugetlb_cgroup, 758 events_local_file[idx]); 759 cft->flags = CFTYPE_NOT_ON_ROOT; 760 761 /* Add the numa stat file */ 762 cft = &h->cgroup_files_dfl[6]; 763 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.numa_stat", buf); 764 cft->private = MEMFILE_PRIVATE(idx, 0); 765 cft->seq_show = hugetlb_cgroup_read_numa_stat; 766 cft->flags = CFTYPE_NOT_ON_ROOT; 767 768 /* NULL terminate the last cft */ 769 cft = &h->cgroup_files_dfl[7]; 770 memset(cft, 0, sizeof(*cft)); 771 772 WARN_ON(cgroup_add_dfl_cftypes(&hugetlb_cgrp_subsys, 773 h->cgroup_files_dfl)); 774 } 775 776 static void __init __hugetlb_cgroup_file_legacy_init(int idx) 777 { 778 char buf[32]; 779 struct cftype *cft; 780 struct hstate *h = &hstates[idx]; 781 782 /* format the size */ 783 mem_fmt(buf, sizeof(buf), huge_page_size(h)); 784 785 /* Add the limit file */ 786 cft = &h->cgroup_files_legacy[0]; 787 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.limit_in_bytes", buf); 788 cft->private = MEMFILE_PRIVATE(idx, RES_LIMIT); 789 cft->read_u64 = hugetlb_cgroup_read_u64; 790 cft->write = hugetlb_cgroup_write_legacy; 791 792 /* Add the reservation limit file */ 793 cft = &h->cgroup_files_legacy[1]; 794 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.limit_in_bytes", buf); 795 cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_LIMIT); 796 cft->read_u64 = hugetlb_cgroup_read_u64; 797 cft->write = hugetlb_cgroup_write_legacy; 798 799 /* Add the usage file */ 800 cft = &h->cgroup_files_legacy[2]; 801 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.usage_in_bytes", buf); 802 cft->private = MEMFILE_PRIVATE(idx, RES_USAGE); 803 cft->read_u64 = hugetlb_cgroup_read_u64; 804 805 /* Add the reservation usage file */ 806 cft = &h->cgroup_files_legacy[3]; 807 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.usage_in_bytes", buf); 808 cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_USAGE); 809 cft->read_u64 = hugetlb_cgroup_read_u64; 810 811 /* Add the MAX usage file */ 812 cft = &h->cgroup_files_legacy[4]; 813 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.max_usage_in_bytes", buf); 814 cft->private = MEMFILE_PRIVATE(idx, RES_MAX_USAGE); 815 cft->write = hugetlb_cgroup_reset; 816 cft->read_u64 = hugetlb_cgroup_read_u64; 817 818 /* Add the MAX reservation usage file */ 819 cft = &h->cgroup_files_legacy[5]; 820 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.max_usage_in_bytes", buf); 821 cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_MAX_USAGE); 822 cft->write = hugetlb_cgroup_reset; 823 cft->read_u64 = hugetlb_cgroup_read_u64; 824 825 /* Add the failcntfile */ 826 cft = &h->cgroup_files_legacy[6]; 827 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.failcnt", buf); 828 cft->private = MEMFILE_PRIVATE(idx, RES_FAILCNT); 829 cft->write = hugetlb_cgroup_reset; 830 cft->read_u64 = hugetlb_cgroup_read_u64; 831 832 /* Add the reservation failcntfile */ 833 cft = &h->cgroup_files_legacy[7]; 834 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.failcnt", buf); 835 cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_FAILCNT); 836 cft->write = hugetlb_cgroup_reset; 837 cft->read_u64 = hugetlb_cgroup_read_u64; 838 839 /* Add the numa stat file */ 840 cft = &h->cgroup_files_legacy[8]; 841 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.numa_stat", buf); 842 cft->private = MEMFILE_PRIVATE(idx, 1); 843 cft->seq_show = hugetlb_cgroup_read_numa_stat; 844 845 /* NULL terminate the last cft */ 846 cft = &h->cgroup_files_legacy[9]; 847 memset(cft, 0, sizeof(*cft)); 848 849 WARN_ON(cgroup_add_legacy_cftypes(&hugetlb_cgrp_subsys, 850 h->cgroup_files_legacy)); 851 } 852 853 static void __init __hugetlb_cgroup_file_init(int idx) 854 { 855 __hugetlb_cgroup_file_dfl_init(idx); 856 __hugetlb_cgroup_file_legacy_init(idx); 857 } 858 859 void __init hugetlb_cgroup_file_init(void) 860 { 861 struct hstate *h; 862 863 for_each_hstate(h) 864 __hugetlb_cgroup_file_init(hstate_index(h)); 865 } 866 867 /* 868 * hugetlb_lock will make sure a parallel cgroup rmdir won't happen 869 * when we migrate hugepages 870 */ 871 void hugetlb_cgroup_migrate(struct folio *old_folio, struct folio *new_folio) 872 { 873 struct hugetlb_cgroup *h_cg; 874 struct hugetlb_cgroup *h_cg_rsvd; 875 struct hstate *h = folio_hstate(old_folio); 876 877 if (hugetlb_cgroup_disabled()) 878 return; 879 880 spin_lock_irq(&hugetlb_lock); 881 h_cg = hugetlb_cgroup_from_folio(old_folio); 882 h_cg_rsvd = hugetlb_cgroup_from_folio_rsvd(old_folio); 883 set_hugetlb_cgroup(old_folio, NULL); 884 set_hugetlb_cgroup_rsvd(old_folio, NULL); 885 886 /* move the h_cg details to new cgroup */ 887 set_hugetlb_cgroup(new_folio, h_cg); 888 set_hugetlb_cgroup_rsvd(new_folio, h_cg_rsvd); 889 list_move(&new_folio->lru, &h->hugepage_activelist); 890 spin_unlock_irq(&hugetlb_lock); 891 return; 892 } 893 894 static struct cftype hugetlb_files[] = { 895 {} /* terminate */ 896 }; 897 898 struct cgroup_subsys hugetlb_cgrp_subsys = { 899 .css_alloc = hugetlb_cgroup_css_alloc, 900 .css_offline = hugetlb_cgroup_css_offline, 901 .css_free = hugetlb_cgroup_css_free, 902 .dfl_cftypes = hugetlb_files, 903 .legacy_cftypes = hugetlb_files, 904 }; 905