1 // SPDX-License-Identifier: LGPL-2.1 2 /* 3 * 4 * Copyright IBM Corporation, 2012 5 * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> 6 * 7 * Cgroup v2 8 * Copyright (C) 2019 Red Hat, Inc. 9 * Author: Giuseppe Scrivano <gscrivan@redhat.com> 10 * 11 */ 12 13 #include <linux/cgroup.h> 14 #include <linux/page_counter.h> 15 #include <linux/slab.h> 16 #include <linux/hugetlb.h> 17 #include <linux/hugetlb_cgroup.h> 18 19 #define MEMFILE_PRIVATE(x, val) (((x) << 16) | (val)) 20 #define MEMFILE_IDX(val) (((val) >> 16) & 0xffff) 21 #define MEMFILE_ATTR(val) ((val) & 0xffff) 22 23 /* Use t->m[0] to encode the offset */ 24 #define MEMFILE_OFFSET(t, m0) (((offsetof(t, m0) << 16) | sizeof_field(t, m0))) 25 #define MEMFILE_OFFSET0(val) (((val) >> 16) & 0xffff) 26 #define MEMFILE_FIELD_SIZE(val) ((val) & 0xffff) 27 28 #define DFL_TMPL_SIZE ARRAY_SIZE(hugetlb_dfl_tmpl) 29 #define LEGACY_TMPL_SIZE ARRAY_SIZE(hugetlb_legacy_tmpl) 30 31 static struct hugetlb_cgroup *root_h_cgroup __read_mostly; 32 static struct cftype *dfl_files; 33 static struct cftype *legacy_files; 34 35 static inline struct page_counter * 36 __hugetlb_cgroup_counter_from_cgroup(struct hugetlb_cgroup *h_cg, int idx, 37 bool rsvd) 38 { 39 if (rsvd) 40 return &h_cg->rsvd_hugepage[idx]; 41 return &h_cg->hugepage[idx]; 42 } 43 44 static inline struct page_counter * 45 hugetlb_cgroup_counter_from_cgroup(struct hugetlb_cgroup *h_cg, int idx) 46 { 47 return __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, false); 48 } 49 50 static inline struct page_counter * 51 hugetlb_cgroup_counter_from_cgroup_rsvd(struct hugetlb_cgroup *h_cg, int idx) 52 { 53 return __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, true); 54 } 55 56 static inline 57 struct hugetlb_cgroup *hugetlb_cgroup_from_css(struct cgroup_subsys_state *s) 58 { 59 return s ? container_of(s, struct hugetlb_cgroup, css) : NULL; 60 } 61 62 static inline 63 struct hugetlb_cgroup *hugetlb_cgroup_from_task(struct task_struct *task) 64 { 65 return hugetlb_cgroup_from_css(task_css(task, hugetlb_cgrp_id)); 66 } 67 68 static inline bool hugetlb_cgroup_is_root(struct hugetlb_cgroup *h_cg) 69 { 70 return (h_cg == root_h_cgroup); 71 } 72 73 static inline struct hugetlb_cgroup * 74 parent_hugetlb_cgroup(struct hugetlb_cgroup *h_cg) 75 { 76 return hugetlb_cgroup_from_css(h_cg->css.parent); 77 } 78 79 static inline bool hugetlb_cgroup_have_usage(struct hugetlb_cgroup *h_cg) 80 { 81 struct hstate *h; 82 83 for_each_hstate(h) { 84 if (page_counter_read( 85 hugetlb_cgroup_counter_from_cgroup(h_cg, hstate_index(h)))) 86 return true; 87 } 88 return false; 89 } 90 91 static void hugetlb_cgroup_init(struct hugetlb_cgroup *h_cgroup, 92 struct hugetlb_cgroup *parent_h_cgroup) 93 { 94 int idx; 95 96 for (idx = 0; idx < HUGE_MAX_HSTATE; idx++) { 97 struct page_counter *fault, *fault_parent = NULL; 98 struct page_counter *rsvd, *rsvd_parent = NULL; 99 unsigned long limit; 100 101 if (parent_h_cgroup) { 102 fault_parent = hugetlb_cgroup_counter_from_cgroup( 103 parent_h_cgroup, idx); 104 rsvd_parent = hugetlb_cgroup_counter_from_cgroup_rsvd( 105 parent_h_cgroup, idx); 106 } 107 fault = hugetlb_cgroup_counter_from_cgroup(h_cgroup, idx); 108 rsvd = hugetlb_cgroup_counter_from_cgroup_rsvd(h_cgroup, idx); 109 110 page_counter_init(fault, fault_parent, false); 111 page_counter_init(rsvd, rsvd_parent, false); 112 113 if (!cgroup_subsys_on_dfl(hugetlb_cgrp_subsys)) { 114 fault->track_failcnt = true; 115 rsvd->track_failcnt = true; 116 } 117 118 limit = round_down(PAGE_COUNTER_MAX, 119 pages_per_huge_page(&hstates[idx])); 120 121 VM_BUG_ON(page_counter_set_max(fault, limit)); 122 VM_BUG_ON(page_counter_set_max(rsvd, limit)); 123 } 124 } 125 126 static void hugetlb_cgroup_free(struct hugetlb_cgroup *h_cgroup) 127 { 128 int node; 129 130 for_each_node(node) 131 kfree(h_cgroup->nodeinfo[node]); 132 kfree(h_cgroup); 133 } 134 135 static struct cgroup_subsys_state * 136 hugetlb_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) 137 { 138 struct hugetlb_cgroup *parent_h_cgroup = hugetlb_cgroup_from_css(parent_css); 139 struct hugetlb_cgroup *h_cgroup; 140 int node; 141 142 h_cgroup = kzalloc_flex(*h_cgroup, nodeinfo, nr_node_ids); 143 144 if (!h_cgroup) 145 return ERR_PTR(-ENOMEM); 146 147 if (!parent_h_cgroup) 148 root_h_cgroup = h_cgroup; 149 150 /* 151 * TODO: this routine can waste much memory for nodes which will 152 * never be onlined. It's better to use memory hotplug callback 153 * function. 154 */ 155 for_each_node(node) { 156 /* Set node_to_alloc to NUMA_NO_NODE for offline nodes. */ 157 int node_to_alloc = 158 node_state(node, N_NORMAL_MEMORY) ? node : NUMA_NO_NODE; 159 h_cgroup->nodeinfo[node] = 160 kzalloc_node(sizeof(struct hugetlb_cgroup_per_node), 161 GFP_KERNEL, node_to_alloc); 162 if (!h_cgroup->nodeinfo[node]) 163 goto fail_alloc_nodeinfo; 164 } 165 166 hugetlb_cgroup_init(h_cgroup, parent_h_cgroup); 167 return &h_cgroup->css; 168 169 fail_alloc_nodeinfo: 170 hugetlb_cgroup_free(h_cgroup); 171 return ERR_PTR(-ENOMEM); 172 } 173 174 static void hugetlb_cgroup_css_free(struct cgroup_subsys_state *css) 175 { 176 hugetlb_cgroup_free(hugetlb_cgroup_from_css(css)); 177 } 178 179 /* 180 * Should be called with hugetlb_lock held. 181 * Since we are holding hugetlb_lock, pages cannot get moved from 182 * active list or uncharged from the cgroup, So no need to get 183 * page reference and test for page active here. This function 184 * cannot fail. 185 */ 186 static void hugetlb_cgroup_move_parent(int idx, struct hugetlb_cgroup *h_cg, 187 struct folio *folio) 188 { 189 unsigned int nr_pages; 190 struct page_counter *counter; 191 struct hugetlb_cgroup *hcg; 192 struct hugetlb_cgroup *parent = parent_hugetlb_cgroup(h_cg); 193 194 hcg = hugetlb_cgroup_from_folio(folio); 195 /* 196 * We can have pages in active list without any cgroup 197 * ie, hugepage with less than 3 pages. We can safely 198 * ignore those pages. 199 */ 200 if (!hcg || hcg != h_cg) 201 goto out; 202 203 nr_pages = folio_nr_pages(folio); 204 if (!parent) { 205 parent = root_h_cgroup; 206 /* root has no limit */ 207 page_counter_charge(&parent->hugepage[idx], nr_pages); 208 } 209 counter = &h_cg->hugepage[idx]; 210 /* Take the pages off the local counter */ 211 page_counter_cancel(counter, nr_pages); 212 213 set_hugetlb_cgroup(folio, parent); 214 out: 215 return; 216 } 217 218 /* 219 * Force the hugetlb cgroup to empty the hugetlb resources by moving them to 220 * the parent cgroup. 221 */ 222 static void hugetlb_cgroup_css_offline(struct cgroup_subsys_state *css) 223 { 224 struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css); 225 struct hstate *h; 226 struct folio *folio; 227 228 do { 229 for_each_hstate(h) { 230 spin_lock_irq(&hugetlb_lock); 231 list_for_each_entry(folio, &h->hugepage_activelist, lru) 232 hugetlb_cgroup_move_parent(hstate_index(h), h_cg, folio); 233 234 spin_unlock_irq(&hugetlb_lock); 235 } 236 cond_resched(); 237 } while (hugetlb_cgroup_have_usage(h_cg)); 238 } 239 240 static inline void hugetlb_event(struct hugetlb_cgroup *hugetlb, int idx, 241 enum hugetlb_memory_event event) 242 { 243 atomic_long_inc(&hugetlb->events_local[idx][event]); 244 cgroup_file_notify(&hugetlb->events_local_file[idx]); 245 246 do { 247 atomic_long_inc(&hugetlb->events[idx][event]); 248 cgroup_file_notify(&hugetlb->events_file[idx]); 249 } while ((hugetlb = parent_hugetlb_cgroup(hugetlb)) && 250 !hugetlb_cgroup_is_root(hugetlb)); 251 } 252 253 static int __hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages, 254 struct hugetlb_cgroup **ptr, 255 bool rsvd) 256 { 257 int ret = 0; 258 struct page_counter *counter; 259 struct hugetlb_cgroup *h_cg = NULL; 260 261 if (hugetlb_cgroup_disabled()) 262 goto done; 263 again: 264 rcu_read_lock(); 265 h_cg = hugetlb_cgroup_from_task(current); 266 if (!css_tryget(&h_cg->css)) { 267 rcu_read_unlock(); 268 goto again; 269 } 270 rcu_read_unlock(); 271 272 if (!page_counter_try_charge( 273 __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, rsvd), 274 nr_pages, &counter)) { 275 ret = -ENOMEM; 276 hugetlb_event(h_cg, idx, HUGETLB_MAX); 277 css_put(&h_cg->css); 278 goto done; 279 } 280 /* Reservations take a reference to the css because they do not get 281 * reparented. 282 */ 283 if (!rsvd) 284 css_put(&h_cg->css); 285 done: 286 *ptr = h_cg; 287 return ret; 288 } 289 290 int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages, 291 struct hugetlb_cgroup **ptr) 292 { 293 return __hugetlb_cgroup_charge_cgroup(idx, nr_pages, ptr, false); 294 } 295 296 int hugetlb_cgroup_charge_cgroup_rsvd(int idx, unsigned long nr_pages, 297 struct hugetlb_cgroup **ptr) 298 { 299 return __hugetlb_cgroup_charge_cgroup(idx, nr_pages, ptr, true); 300 } 301 302 /* Should be called with hugetlb_lock held */ 303 static void __hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages, 304 struct hugetlb_cgroup *h_cg, 305 struct folio *folio, bool rsvd) 306 { 307 if (hugetlb_cgroup_disabled() || !h_cg) 308 return; 309 lockdep_assert_held(&hugetlb_lock); 310 __set_hugetlb_cgroup(folio, h_cg, rsvd); 311 if (!rsvd) { 312 unsigned long usage = 313 h_cg->nodeinfo[folio_nid(folio)]->usage[idx]; 314 /* 315 * This write is not atomic due to fetching usage and writing 316 * to it, but that's fine because we call this with 317 * hugetlb_lock held anyway. 318 */ 319 WRITE_ONCE(h_cg->nodeinfo[folio_nid(folio)]->usage[idx], 320 usage + nr_pages); 321 } 322 } 323 324 void hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages, 325 struct hugetlb_cgroup *h_cg, 326 struct folio *folio) 327 { 328 __hugetlb_cgroup_commit_charge(idx, nr_pages, h_cg, folio, false); 329 } 330 331 void hugetlb_cgroup_commit_charge_rsvd(int idx, unsigned long nr_pages, 332 struct hugetlb_cgroup *h_cg, 333 struct folio *folio) 334 { 335 __hugetlb_cgroup_commit_charge(idx, nr_pages, h_cg, folio, true); 336 } 337 338 /* 339 * Should be called with hugetlb_lock held 340 */ 341 static void __hugetlb_cgroup_uncharge_folio(int idx, unsigned long nr_pages, 342 struct folio *folio, bool rsvd) 343 { 344 struct hugetlb_cgroup *h_cg; 345 346 if (hugetlb_cgroup_disabled()) 347 return; 348 lockdep_assert_held(&hugetlb_lock); 349 h_cg = __hugetlb_cgroup_from_folio(folio, rsvd); 350 if (unlikely(!h_cg)) 351 return; 352 __set_hugetlb_cgroup(folio, NULL, rsvd); 353 354 page_counter_uncharge(__hugetlb_cgroup_counter_from_cgroup(h_cg, idx, 355 rsvd), 356 nr_pages); 357 358 if (rsvd) 359 css_put(&h_cg->css); 360 else { 361 unsigned long usage = 362 h_cg->nodeinfo[folio_nid(folio)]->usage[idx]; 363 /* 364 * This write is not atomic due to fetching usage and writing 365 * to it, but that's fine because we call this with 366 * hugetlb_lock held anyway. 367 */ 368 WRITE_ONCE(h_cg->nodeinfo[folio_nid(folio)]->usage[idx], 369 usage - nr_pages); 370 } 371 } 372 373 void hugetlb_cgroup_uncharge_folio(int idx, unsigned long nr_pages, 374 struct folio *folio) 375 { 376 __hugetlb_cgroup_uncharge_folio(idx, nr_pages, folio, false); 377 } 378 379 void hugetlb_cgroup_uncharge_folio_rsvd(int idx, unsigned long nr_pages, 380 struct folio *folio) 381 { 382 __hugetlb_cgroup_uncharge_folio(idx, nr_pages, folio, true); 383 } 384 385 static void __hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages, 386 struct hugetlb_cgroup *h_cg, 387 bool rsvd) 388 { 389 if (hugetlb_cgroup_disabled() || !h_cg) 390 return; 391 392 page_counter_uncharge(__hugetlb_cgroup_counter_from_cgroup(h_cg, idx, 393 rsvd), 394 nr_pages); 395 396 if (rsvd) 397 css_put(&h_cg->css); 398 } 399 400 void hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages, 401 struct hugetlb_cgroup *h_cg) 402 { 403 __hugetlb_cgroup_uncharge_cgroup(idx, nr_pages, h_cg, false); 404 } 405 406 void hugetlb_cgroup_uncharge_cgroup_rsvd(int idx, unsigned long nr_pages, 407 struct hugetlb_cgroup *h_cg) 408 { 409 __hugetlb_cgroup_uncharge_cgroup(idx, nr_pages, h_cg, true); 410 } 411 412 void hugetlb_cgroup_uncharge_counter(struct resv_map *resv, unsigned long start, 413 unsigned long end) 414 { 415 if (hugetlb_cgroup_disabled() || !resv || !resv->reservation_counter || 416 !resv->css) 417 return; 418 419 page_counter_uncharge(resv->reservation_counter, 420 (end - start) * resv->pages_per_hpage); 421 css_put(resv->css); 422 } 423 424 void hugetlb_cgroup_uncharge_file_region(struct resv_map *resv, 425 struct file_region *rg, 426 unsigned long nr_pages, 427 bool region_del) 428 { 429 if (hugetlb_cgroup_disabled() || !resv || !rg || !nr_pages) 430 return; 431 432 if (rg->reservation_counter && resv->pages_per_hpage && 433 !resv->reservation_counter) { 434 page_counter_uncharge(rg->reservation_counter, 435 nr_pages * resv->pages_per_hpage); 436 /* 437 * Only do css_put(rg->css) when we delete the entire region 438 * because one file_region must hold exactly one css reference. 439 */ 440 if (region_del) 441 css_put(rg->css); 442 } 443 } 444 445 enum { 446 RES_USAGE, 447 RES_RSVD_USAGE, 448 RES_LIMIT, 449 RES_RSVD_LIMIT, 450 RES_MAX_USAGE, 451 RES_RSVD_MAX_USAGE, 452 RES_FAILCNT, 453 RES_RSVD_FAILCNT, 454 }; 455 456 static int hugetlb_cgroup_read_numa_stat(struct seq_file *seq, void *dummy) 457 { 458 int nid; 459 struct cftype *cft = seq_cft(seq); 460 int idx = MEMFILE_IDX(cft->private); 461 bool legacy = !cgroup_subsys_on_dfl(hugetlb_cgrp_subsys); 462 struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(seq_css(seq)); 463 struct cgroup_subsys_state *css; 464 unsigned long usage; 465 466 if (legacy) { 467 /* Add up usage across all nodes for the non-hierarchical total. */ 468 usage = 0; 469 for_each_node_state(nid, N_MEMORY) 470 usage += READ_ONCE(h_cg->nodeinfo[nid]->usage[idx]); 471 seq_printf(seq, "total=%lu", usage * PAGE_SIZE); 472 473 /* Simply print the per-node usage for the non-hierarchical total. */ 474 for_each_node_state(nid, N_MEMORY) 475 seq_printf(seq, " N%d=%lu", nid, 476 READ_ONCE(h_cg->nodeinfo[nid]->usage[idx]) * 477 PAGE_SIZE); 478 seq_putc(seq, '\n'); 479 } 480 481 /* 482 * The hierarchical total is pretty much the value recorded by the 483 * counter, so use that. 484 */ 485 seq_printf(seq, "%stotal=%lu", legacy ? "hierarchical_" : "", 486 page_counter_read(&h_cg->hugepage[idx]) * PAGE_SIZE); 487 488 /* 489 * For each node, transverse the css tree to obtain the hierarchical 490 * node usage. 491 */ 492 for_each_node_state(nid, N_MEMORY) { 493 usage = 0; 494 rcu_read_lock(); 495 css_for_each_descendant_pre(css, &h_cg->css) { 496 usage += READ_ONCE(hugetlb_cgroup_from_css(css) 497 ->nodeinfo[nid] 498 ->usage[idx]); 499 } 500 rcu_read_unlock(); 501 seq_printf(seq, " N%d=%lu", nid, usage * PAGE_SIZE); 502 } 503 504 seq_putc(seq, '\n'); 505 506 return 0; 507 } 508 509 static u64 hugetlb_cgroup_read_u64(struct cgroup_subsys_state *css, 510 struct cftype *cft) 511 { 512 struct page_counter *counter; 513 struct page_counter *rsvd_counter; 514 struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css); 515 516 counter = &h_cg->hugepage[MEMFILE_IDX(cft->private)]; 517 rsvd_counter = &h_cg->rsvd_hugepage[MEMFILE_IDX(cft->private)]; 518 519 switch (MEMFILE_ATTR(cft->private)) { 520 case RES_USAGE: 521 return (u64)page_counter_read(counter) * PAGE_SIZE; 522 case RES_RSVD_USAGE: 523 return (u64)page_counter_read(rsvd_counter) * PAGE_SIZE; 524 case RES_LIMIT: 525 return (u64)counter->max * PAGE_SIZE; 526 case RES_RSVD_LIMIT: 527 return (u64)rsvd_counter->max * PAGE_SIZE; 528 case RES_MAX_USAGE: 529 return (u64)counter->watermark * PAGE_SIZE; 530 case RES_RSVD_MAX_USAGE: 531 return (u64)rsvd_counter->watermark * PAGE_SIZE; 532 case RES_FAILCNT: 533 return counter->failcnt; 534 case RES_RSVD_FAILCNT: 535 return rsvd_counter->failcnt; 536 default: 537 BUG(); 538 } 539 } 540 541 static int hugetlb_cgroup_read_u64_max(struct seq_file *seq, void *v) 542 { 543 int idx; 544 u64 val; 545 struct cftype *cft = seq_cft(seq); 546 unsigned long limit; 547 struct page_counter *counter; 548 struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(seq_css(seq)); 549 550 idx = MEMFILE_IDX(cft->private); 551 counter = &h_cg->hugepage[idx]; 552 553 limit = round_down(PAGE_COUNTER_MAX, 554 pages_per_huge_page(&hstates[idx])); 555 556 switch (MEMFILE_ATTR(cft->private)) { 557 case RES_RSVD_USAGE: 558 counter = &h_cg->rsvd_hugepage[idx]; 559 fallthrough; 560 case RES_USAGE: 561 val = (u64)page_counter_read(counter); 562 seq_printf(seq, "%llu\n", val * PAGE_SIZE); 563 break; 564 case RES_RSVD_LIMIT: 565 counter = &h_cg->rsvd_hugepage[idx]; 566 fallthrough; 567 case RES_LIMIT: 568 val = (u64)counter->max; 569 if (val == limit) 570 seq_puts(seq, "max\n"); 571 else 572 seq_printf(seq, "%llu\n", val * PAGE_SIZE); 573 break; 574 default: 575 BUG(); 576 } 577 578 return 0; 579 } 580 581 static DEFINE_MUTEX(hugetlb_limit_mutex); 582 583 static ssize_t hugetlb_cgroup_write(struct kernfs_open_file *of, 584 char *buf, size_t nbytes, loff_t off, 585 const char *max) 586 { 587 int ret, idx; 588 unsigned long nr_pages; 589 struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(of_css(of)); 590 bool rsvd = false; 591 592 if (hugetlb_cgroup_is_root(h_cg)) /* Can't set limit on root */ 593 return -EINVAL; 594 595 buf = strstrip(buf); 596 ret = page_counter_memparse(buf, max, &nr_pages); 597 if (ret) 598 return ret; 599 600 idx = MEMFILE_IDX(of_cft(of)->private); 601 nr_pages = round_down(nr_pages, pages_per_huge_page(&hstates[idx])); 602 603 switch (MEMFILE_ATTR(of_cft(of)->private)) { 604 case RES_RSVD_LIMIT: 605 rsvd = true; 606 fallthrough; 607 case RES_LIMIT: 608 mutex_lock(&hugetlb_limit_mutex); 609 ret = page_counter_set_max( 610 __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, rsvd), 611 nr_pages); 612 mutex_unlock(&hugetlb_limit_mutex); 613 break; 614 default: 615 ret = -EINVAL; 616 break; 617 } 618 return ret ?: nbytes; 619 } 620 621 static ssize_t hugetlb_cgroup_write_legacy(struct kernfs_open_file *of, 622 char *buf, size_t nbytes, loff_t off) 623 { 624 return hugetlb_cgroup_write(of, buf, nbytes, off, "-1"); 625 } 626 627 static ssize_t hugetlb_cgroup_write_dfl(struct kernfs_open_file *of, 628 char *buf, size_t nbytes, loff_t off) 629 { 630 return hugetlb_cgroup_write(of, buf, nbytes, off, "max"); 631 } 632 633 static ssize_t hugetlb_cgroup_reset(struct kernfs_open_file *of, 634 char *buf, size_t nbytes, loff_t off) 635 { 636 int ret = 0; 637 struct page_counter *counter, *rsvd_counter; 638 struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(of_css(of)); 639 640 counter = &h_cg->hugepage[MEMFILE_IDX(of_cft(of)->private)]; 641 rsvd_counter = &h_cg->rsvd_hugepage[MEMFILE_IDX(of_cft(of)->private)]; 642 643 switch (MEMFILE_ATTR(of_cft(of)->private)) { 644 case RES_MAX_USAGE: 645 page_counter_reset_watermark(counter); 646 break; 647 case RES_RSVD_MAX_USAGE: 648 page_counter_reset_watermark(rsvd_counter); 649 break; 650 case RES_FAILCNT: 651 counter->failcnt = 0; 652 break; 653 case RES_RSVD_FAILCNT: 654 rsvd_counter->failcnt = 0; 655 break; 656 default: 657 ret = -EINVAL; 658 break; 659 } 660 return ret ?: nbytes; 661 } 662 663 static char *mem_fmt(char *buf, int size, unsigned long hsize) 664 { 665 if (hsize >= SZ_1G) 666 snprintf(buf, size, "%luGB", hsize / SZ_1G); 667 else if (hsize >= SZ_1M) 668 snprintf(buf, size, "%luMB", hsize / SZ_1M); 669 else 670 snprintf(buf, size, "%luKB", hsize / SZ_1K); 671 return buf; 672 } 673 674 static int __hugetlb_events_show(struct seq_file *seq, bool local) 675 { 676 int idx; 677 long max; 678 struct cftype *cft = seq_cft(seq); 679 struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(seq_css(seq)); 680 681 idx = MEMFILE_IDX(cft->private); 682 683 if (local) 684 max = atomic_long_read(&h_cg->events_local[idx][HUGETLB_MAX]); 685 else 686 max = atomic_long_read(&h_cg->events[idx][HUGETLB_MAX]); 687 688 seq_printf(seq, "max %lu\n", max); 689 690 return 0; 691 } 692 693 static int hugetlb_events_show(struct seq_file *seq, void *v) 694 { 695 return __hugetlb_events_show(seq, false); 696 } 697 698 static int hugetlb_events_local_show(struct seq_file *seq, void *v) 699 { 700 return __hugetlb_events_show(seq, true); 701 } 702 703 static struct cftype hugetlb_dfl_tmpl[] = { 704 { 705 .name = "max", 706 .private = RES_LIMIT, 707 .seq_show = hugetlb_cgroup_read_u64_max, 708 .write = hugetlb_cgroup_write_dfl, 709 .flags = CFTYPE_NOT_ON_ROOT, 710 }, 711 { 712 .name = "rsvd.max", 713 .private = RES_RSVD_LIMIT, 714 .seq_show = hugetlb_cgroup_read_u64_max, 715 .write = hugetlb_cgroup_write_dfl, 716 .flags = CFTYPE_NOT_ON_ROOT, 717 }, 718 { 719 .name = "current", 720 .private = RES_USAGE, 721 .seq_show = hugetlb_cgroup_read_u64_max, 722 .flags = CFTYPE_NOT_ON_ROOT, 723 }, 724 { 725 .name = "rsvd.current", 726 .private = RES_RSVD_USAGE, 727 .seq_show = hugetlb_cgroup_read_u64_max, 728 .flags = CFTYPE_NOT_ON_ROOT, 729 }, 730 { 731 .name = "events", 732 .seq_show = hugetlb_events_show, 733 .file_offset = MEMFILE_OFFSET(struct hugetlb_cgroup, events_file[0]), 734 .flags = CFTYPE_NOT_ON_ROOT, 735 }, 736 { 737 .name = "events.local", 738 .seq_show = hugetlb_events_local_show, 739 .file_offset = MEMFILE_OFFSET(struct hugetlb_cgroup, events_local_file[0]), 740 .flags = CFTYPE_NOT_ON_ROOT, 741 }, 742 { 743 .name = "numa_stat", 744 .seq_show = hugetlb_cgroup_read_numa_stat, 745 .flags = CFTYPE_NOT_ON_ROOT, 746 }, 747 /* don't need terminator here */ 748 }; 749 750 static struct cftype hugetlb_legacy_tmpl[] = { 751 { 752 .name = "limit_in_bytes", 753 .private = RES_LIMIT, 754 .read_u64 = hugetlb_cgroup_read_u64, 755 .write = hugetlb_cgroup_write_legacy, 756 }, 757 { 758 .name = "rsvd.limit_in_bytes", 759 .private = RES_RSVD_LIMIT, 760 .read_u64 = hugetlb_cgroup_read_u64, 761 .write = hugetlb_cgroup_write_legacy, 762 }, 763 { 764 .name = "usage_in_bytes", 765 .private = RES_USAGE, 766 .read_u64 = hugetlb_cgroup_read_u64, 767 }, 768 { 769 .name = "rsvd.usage_in_bytes", 770 .private = RES_RSVD_USAGE, 771 .read_u64 = hugetlb_cgroup_read_u64, 772 }, 773 { 774 .name = "max_usage_in_bytes", 775 .private = RES_MAX_USAGE, 776 .write = hugetlb_cgroup_reset, 777 .read_u64 = hugetlb_cgroup_read_u64, 778 }, 779 { 780 .name = "rsvd.max_usage_in_bytes", 781 .private = RES_RSVD_MAX_USAGE, 782 .write = hugetlb_cgroup_reset, 783 .read_u64 = hugetlb_cgroup_read_u64, 784 }, 785 { 786 .name = "failcnt", 787 .private = RES_FAILCNT, 788 .write = hugetlb_cgroup_reset, 789 .read_u64 = hugetlb_cgroup_read_u64, 790 }, 791 { 792 .name = "rsvd.failcnt", 793 .private = RES_RSVD_FAILCNT, 794 .write = hugetlb_cgroup_reset, 795 .read_u64 = hugetlb_cgroup_read_u64, 796 }, 797 { 798 .name = "numa_stat", 799 .seq_show = hugetlb_cgroup_read_numa_stat, 800 }, 801 /* don't need terminator here */ 802 }; 803 804 static void __init 805 hugetlb_cgroup_cfttypes_init(struct hstate *h, struct cftype *cft, 806 struct cftype *tmpl, int tmpl_size) 807 { 808 char buf[32]; 809 int i, idx = hstate_index(h); 810 811 /* format the size */ 812 mem_fmt(buf, sizeof(buf), huge_page_size(h)); 813 814 for (i = 0; i < tmpl_size; cft++, tmpl++, i++) { 815 *cft = *tmpl; 816 /* rebuild the name */ 817 scnprintf(cft->name, MAX_CFTYPE_NAME, "%s.%s", buf, tmpl->name); 818 /* rebuild the private */ 819 cft->private = MEMFILE_PRIVATE(idx, tmpl->private); 820 /* rebuild the file_offset */ 821 if (tmpl->file_offset) { 822 unsigned int offset = tmpl->file_offset; 823 824 cft->file_offset = MEMFILE_OFFSET0(offset) + 825 MEMFILE_FIELD_SIZE(offset) * idx; 826 } 827 828 lockdep_register_key(&cft->lockdep_key); 829 } 830 } 831 832 static void __init __hugetlb_cgroup_file_dfl_init(struct hstate *h) 833 { 834 int idx = hstate_index(h); 835 836 hugetlb_cgroup_cfttypes_init(h, dfl_files + idx * DFL_TMPL_SIZE, 837 hugetlb_dfl_tmpl, DFL_TMPL_SIZE); 838 } 839 840 static void __init __hugetlb_cgroup_file_legacy_init(struct hstate *h) 841 { 842 int idx = hstate_index(h); 843 844 hugetlb_cgroup_cfttypes_init(h, legacy_files + idx * LEGACY_TMPL_SIZE, 845 hugetlb_legacy_tmpl, LEGACY_TMPL_SIZE); 846 } 847 848 static void __init __hugetlb_cgroup_file_init(struct hstate *h) 849 { 850 __hugetlb_cgroup_file_dfl_init(h); 851 __hugetlb_cgroup_file_legacy_init(h); 852 } 853 854 static void __init __hugetlb_cgroup_file_pre_init(void) 855 { 856 int cft_count; 857 858 cft_count = hugetlb_max_hstate * DFL_TMPL_SIZE + 1; /* add terminator */ 859 dfl_files = kzalloc_objs(struct cftype, cft_count); 860 BUG_ON(!dfl_files); 861 cft_count = hugetlb_max_hstate * LEGACY_TMPL_SIZE + 1; /* add terminator */ 862 legacy_files = kzalloc_objs(struct cftype, cft_count); 863 BUG_ON(!legacy_files); 864 } 865 866 static void __init __hugetlb_cgroup_file_post_init(void) 867 { 868 WARN_ON(cgroup_add_dfl_cftypes(&hugetlb_cgrp_subsys, 869 dfl_files)); 870 WARN_ON(cgroup_add_legacy_cftypes(&hugetlb_cgrp_subsys, 871 legacy_files)); 872 } 873 874 void __init hugetlb_cgroup_file_init(void) 875 { 876 struct hstate *h; 877 878 __hugetlb_cgroup_file_pre_init(); 879 for_each_hstate(h) 880 __hugetlb_cgroup_file_init(h); 881 __hugetlb_cgroup_file_post_init(); 882 } 883 884 /* 885 * hugetlb_lock will make sure a parallel cgroup rmdir won't happen 886 * when we migrate hugepages 887 */ 888 void hugetlb_cgroup_migrate(struct folio *old_folio, struct folio *new_folio) 889 { 890 struct hugetlb_cgroup *h_cg; 891 struct hugetlb_cgroup *h_cg_rsvd; 892 struct hstate *h = folio_hstate(old_folio); 893 894 if (hugetlb_cgroup_disabled()) 895 return; 896 897 spin_lock_irq(&hugetlb_lock); 898 h_cg = hugetlb_cgroup_from_folio(old_folio); 899 h_cg_rsvd = hugetlb_cgroup_from_folio_rsvd(old_folio); 900 set_hugetlb_cgroup(old_folio, NULL); 901 set_hugetlb_cgroup_rsvd(old_folio, NULL); 902 903 /* move the h_cg details to new cgroup */ 904 set_hugetlb_cgroup(new_folio, h_cg); 905 set_hugetlb_cgroup_rsvd(new_folio, h_cg_rsvd); 906 list_move(&new_folio->lru, &h->hugepage_activelist); 907 spin_unlock_irq(&hugetlb_lock); 908 } 909 910 static struct cftype hugetlb_files[] = { 911 {} /* terminate */ 912 }; 913 914 struct cgroup_subsys hugetlb_cgrp_subsys = { 915 .css_alloc = hugetlb_cgroup_css_alloc, 916 .css_offline = hugetlb_cgroup_css_offline, 917 .css_free = hugetlb_cgroup_css_free, 918 .dfl_cftypes = hugetlb_files, 919 .legacy_cftypes = hugetlb_files, 920 }; 921