1 // SPDX-License-Identifier: LGPL-2.1 2 /* 3 * 4 * Copyright IBM Corporation, 2012 5 * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> 6 * 7 * Cgroup v2 8 * Copyright (C) 2019 Red Hat, Inc. 9 * Author: Giuseppe Scrivano <gscrivan@redhat.com> 10 * 11 */ 12 13 #include <linux/cgroup.h> 14 #include <linux/page_counter.h> 15 #include <linux/slab.h> 16 #include <linux/hugetlb.h> 17 #include <linux/hugetlb_cgroup.h> 18 19 #define MEMFILE_PRIVATE(x, val) (((x) << 16) | (val)) 20 #define MEMFILE_IDX(val) (((val) >> 16) & 0xffff) 21 #define MEMFILE_ATTR(val) ((val) & 0xffff) 22 23 /* Use t->m[0] to encode the offset */ 24 #define MEMFILE_OFFSET(t, m0) (((offsetof(t, m0) << 16) | sizeof_field(t, m0))) 25 #define MEMFILE_OFFSET0(val) (((val) >> 16) & 0xffff) 26 #define MEMFILE_FIELD_SIZE(val) ((val) & 0xffff) 27 28 #define DFL_TMPL_SIZE ARRAY_SIZE(hugetlb_dfl_tmpl) 29 #define LEGACY_TMPL_SIZE ARRAY_SIZE(hugetlb_legacy_tmpl) 30 31 static struct hugetlb_cgroup *root_h_cgroup __read_mostly; 32 static struct cftype *dfl_files; 33 static struct cftype *legacy_files; 34 35 static inline struct page_counter * 36 __hugetlb_cgroup_counter_from_cgroup(struct hugetlb_cgroup *h_cg, int idx, 37 bool rsvd) 38 { 39 if (rsvd) 40 return &h_cg->rsvd_hugepage[idx]; 41 return &h_cg->hugepage[idx]; 42 } 43 44 static inline struct page_counter * 45 hugetlb_cgroup_counter_from_cgroup(struct hugetlb_cgroup *h_cg, int idx) 46 { 47 return __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, false); 48 } 49 50 static inline struct page_counter * 51 hugetlb_cgroup_counter_from_cgroup_rsvd(struct hugetlb_cgroup *h_cg, int idx) 52 { 53 return __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, true); 54 } 55 56 static inline 57 struct hugetlb_cgroup *hugetlb_cgroup_from_css(struct cgroup_subsys_state *s) 58 { 59 return s ? container_of(s, struct hugetlb_cgroup, css) : NULL; 60 } 61 62 static inline 63 struct hugetlb_cgroup *hugetlb_cgroup_from_task(struct task_struct *task) 64 { 65 return hugetlb_cgroup_from_css(task_css(task, hugetlb_cgrp_id)); 66 } 67 68 static inline bool hugetlb_cgroup_is_root(struct hugetlb_cgroup *h_cg) 69 { 70 return (h_cg == root_h_cgroup); 71 } 72 73 static inline struct hugetlb_cgroup * 74 parent_hugetlb_cgroup(struct hugetlb_cgroup *h_cg) 75 { 76 return hugetlb_cgroup_from_css(h_cg->css.parent); 77 } 78 79 static inline bool hugetlb_cgroup_have_usage(struct hugetlb_cgroup *h_cg) 80 { 81 struct hstate *h; 82 83 for_each_hstate(h) { 84 if (page_counter_read( 85 hugetlb_cgroup_counter_from_cgroup(h_cg, hstate_index(h)))) 86 return true; 87 } 88 return false; 89 } 90 91 static void hugetlb_cgroup_init(struct hugetlb_cgroup *h_cgroup, 92 struct hugetlb_cgroup *parent_h_cgroup) 93 { 94 int idx; 95 96 for (idx = 0; idx < HUGE_MAX_HSTATE; idx++) { 97 struct page_counter *fault, *fault_parent = NULL; 98 struct page_counter *rsvd, *rsvd_parent = NULL; 99 unsigned long limit; 100 101 if (parent_h_cgroup) { 102 fault_parent = hugetlb_cgroup_counter_from_cgroup( 103 parent_h_cgroup, idx); 104 rsvd_parent = hugetlb_cgroup_counter_from_cgroup_rsvd( 105 parent_h_cgroup, idx); 106 } 107 fault = hugetlb_cgroup_counter_from_cgroup(h_cgroup, idx); 108 rsvd = hugetlb_cgroup_counter_from_cgroup_rsvd(h_cgroup, idx); 109 110 page_counter_init(fault, fault_parent, false); 111 page_counter_init(rsvd, rsvd_parent, false); 112 113 if (!cgroup_subsys_on_dfl(hugetlb_cgrp_subsys)) { 114 fault->track_failcnt = true; 115 rsvd->track_failcnt = true; 116 } 117 118 limit = round_down(PAGE_COUNTER_MAX, 119 pages_per_huge_page(&hstates[idx])); 120 121 VM_BUG_ON(page_counter_set_max(fault, limit)); 122 VM_BUG_ON(page_counter_set_max(rsvd, limit)); 123 } 124 } 125 126 static void hugetlb_cgroup_free(struct hugetlb_cgroup *h_cgroup) 127 { 128 int node; 129 130 for_each_node(node) 131 kfree(h_cgroup->nodeinfo[node]); 132 kfree(h_cgroup); 133 } 134 135 static struct cgroup_subsys_state * 136 hugetlb_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) 137 { 138 struct hugetlb_cgroup *parent_h_cgroup = hugetlb_cgroup_from_css(parent_css); 139 struct hugetlb_cgroup *h_cgroup; 140 int node; 141 142 h_cgroup = kzalloc(struct_size(h_cgroup, nodeinfo, nr_node_ids), 143 GFP_KERNEL); 144 145 if (!h_cgroup) 146 return ERR_PTR(-ENOMEM); 147 148 if (!parent_h_cgroup) 149 root_h_cgroup = h_cgroup; 150 151 /* 152 * TODO: this routine can waste much memory for nodes which will 153 * never be onlined. It's better to use memory hotplug callback 154 * function. 155 */ 156 for_each_node(node) { 157 /* Set node_to_alloc to NUMA_NO_NODE for offline nodes. */ 158 int node_to_alloc = 159 node_state(node, N_NORMAL_MEMORY) ? node : NUMA_NO_NODE; 160 h_cgroup->nodeinfo[node] = 161 kzalloc_node(sizeof(struct hugetlb_cgroup_per_node), 162 GFP_KERNEL, node_to_alloc); 163 if (!h_cgroup->nodeinfo[node]) 164 goto fail_alloc_nodeinfo; 165 } 166 167 hugetlb_cgroup_init(h_cgroup, parent_h_cgroup); 168 return &h_cgroup->css; 169 170 fail_alloc_nodeinfo: 171 hugetlb_cgroup_free(h_cgroup); 172 return ERR_PTR(-ENOMEM); 173 } 174 175 static void hugetlb_cgroup_css_free(struct cgroup_subsys_state *css) 176 { 177 hugetlb_cgroup_free(hugetlb_cgroup_from_css(css)); 178 } 179 180 /* 181 * Should be called with hugetlb_lock held. 182 * Since we are holding hugetlb_lock, pages cannot get moved from 183 * active list or uncharged from the cgroup, So no need to get 184 * page reference and test for page active here. This function 185 * cannot fail. 186 */ 187 static void hugetlb_cgroup_move_parent(int idx, struct hugetlb_cgroup *h_cg, 188 struct folio *folio) 189 { 190 unsigned int nr_pages; 191 struct page_counter *counter; 192 struct hugetlb_cgroup *hcg; 193 struct hugetlb_cgroup *parent = parent_hugetlb_cgroup(h_cg); 194 195 hcg = hugetlb_cgroup_from_folio(folio); 196 /* 197 * We can have pages in active list without any cgroup 198 * ie, hugepage with less than 3 pages. We can safely 199 * ignore those pages. 200 */ 201 if (!hcg || hcg != h_cg) 202 goto out; 203 204 nr_pages = folio_nr_pages(folio); 205 if (!parent) { 206 parent = root_h_cgroup; 207 /* root has no limit */ 208 page_counter_charge(&parent->hugepage[idx], nr_pages); 209 } 210 counter = &h_cg->hugepage[idx]; 211 /* Take the pages off the local counter */ 212 page_counter_cancel(counter, nr_pages); 213 214 set_hugetlb_cgroup(folio, parent); 215 out: 216 return; 217 } 218 219 /* 220 * Force the hugetlb cgroup to empty the hugetlb resources by moving them to 221 * the parent cgroup. 222 */ 223 static void hugetlb_cgroup_css_offline(struct cgroup_subsys_state *css) 224 { 225 struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css); 226 struct hstate *h; 227 struct folio *folio; 228 229 do { 230 for_each_hstate(h) { 231 spin_lock_irq(&hugetlb_lock); 232 list_for_each_entry(folio, &h->hugepage_activelist, lru) 233 hugetlb_cgroup_move_parent(hstate_index(h), h_cg, folio); 234 235 spin_unlock_irq(&hugetlb_lock); 236 } 237 cond_resched(); 238 } while (hugetlb_cgroup_have_usage(h_cg)); 239 } 240 241 static inline void hugetlb_event(struct hugetlb_cgroup *hugetlb, int idx, 242 enum hugetlb_memory_event event) 243 { 244 atomic_long_inc(&hugetlb->events_local[idx][event]); 245 cgroup_file_notify(&hugetlb->events_local_file[idx]); 246 247 do { 248 atomic_long_inc(&hugetlb->events[idx][event]); 249 cgroup_file_notify(&hugetlb->events_file[idx]); 250 } while ((hugetlb = parent_hugetlb_cgroup(hugetlb)) && 251 !hugetlb_cgroup_is_root(hugetlb)); 252 } 253 254 static int __hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages, 255 struct hugetlb_cgroup **ptr, 256 bool rsvd) 257 { 258 int ret = 0; 259 struct page_counter *counter; 260 struct hugetlb_cgroup *h_cg = NULL; 261 262 if (hugetlb_cgroup_disabled()) 263 goto done; 264 again: 265 rcu_read_lock(); 266 h_cg = hugetlb_cgroup_from_task(current); 267 if (!css_tryget(&h_cg->css)) { 268 rcu_read_unlock(); 269 goto again; 270 } 271 rcu_read_unlock(); 272 273 if (!page_counter_try_charge( 274 __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, rsvd), 275 nr_pages, &counter)) { 276 ret = -ENOMEM; 277 hugetlb_event(h_cg, idx, HUGETLB_MAX); 278 css_put(&h_cg->css); 279 goto done; 280 } 281 /* Reservations take a reference to the css because they do not get 282 * reparented. 283 */ 284 if (!rsvd) 285 css_put(&h_cg->css); 286 done: 287 *ptr = h_cg; 288 return ret; 289 } 290 291 int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages, 292 struct hugetlb_cgroup **ptr) 293 { 294 return __hugetlb_cgroup_charge_cgroup(idx, nr_pages, ptr, false); 295 } 296 297 int hugetlb_cgroup_charge_cgroup_rsvd(int idx, unsigned long nr_pages, 298 struct hugetlb_cgroup **ptr) 299 { 300 return __hugetlb_cgroup_charge_cgroup(idx, nr_pages, ptr, true); 301 } 302 303 /* Should be called with hugetlb_lock held */ 304 static void __hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages, 305 struct hugetlb_cgroup *h_cg, 306 struct folio *folio, bool rsvd) 307 { 308 if (hugetlb_cgroup_disabled() || !h_cg) 309 return; 310 lockdep_assert_held(&hugetlb_lock); 311 __set_hugetlb_cgroup(folio, h_cg, rsvd); 312 if (!rsvd) { 313 unsigned long usage = 314 h_cg->nodeinfo[folio_nid(folio)]->usage[idx]; 315 /* 316 * This write is not atomic due to fetching usage and writing 317 * to it, but that's fine because we call this with 318 * hugetlb_lock held anyway. 319 */ 320 WRITE_ONCE(h_cg->nodeinfo[folio_nid(folio)]->usage[idx], 321 usage + nr_pages); 322 } 323 } 324 325 void hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages, 326 struct hugetlb_cgroup *h_cg, 327 struct folio *folio) 328 { 329 __hugetlb_cgroup_commit_charge(idx, nr_pages, h_cg, folio, false); 330 } 331 332 void hugetlb_cgroup_commit_charge_rsvd(int idx, unsigned long nr_pages, 333 struct hugetlb_cgroup *h_cg, 334 struct folio *folio) 335 { 336 __hugetlb_cgroup_commit_charge(idx, nr_pages, h_cg, folio, true); 337 } 338 339 /* 340 * Should be called with hugetlb_lock held 341 */ 342 static void __hugetlb_cgroup_uncharge_folio(int idx, unsigned long nr_pages, 343 struct folio *folio, bool rsvd) 344 { 345 struct hugetlb_cgroup *h_cg; 346 347 if (hugetlb_cgroup_disabled()) 348 return; 349 lockdep_assert_held(&hugetlb_lock); 350 h_cg = __hugetlb_cgroup_from_folio(folio, rsvd); 351 if (unlikely(!h_cg)) 352 return; 353 __set_hugetlb_cgroup(folio, NULL, rsvd); 354 355 page_counter_uncharge(__hugetlb_cgroup_counter_from_cgroup(h_cg, idx, 356 rsvd), 357 nr_pages); 358 359 if (rsvd) 360 css_put(&h_cg->css); 361 else { 362 unsigned long usage = 363 h_cg->nodeinfo[folio_nid(folio)]->usage[idx]; 364 /* 365 * This write is not atomic due to fetching usage and writing 366 * to it, but that's fine because we call this with 367 * hugetlb_lock held anyway. 368 */ 369 WRITE_ONCE(h_cg->nodeinfo[folio_nid(folio)]->usage[idx], 370 usage - nr_pages); 371 } 372 } 373 374 void hugetlb_cgroup_uncharge_folio(int idx, unsigned long nr_pages, 375 struct folio *folio) 376 { 377 __hugetlb_cgroup_uncharge_folio(idx, nr_pages, folio, false); 378 } 379 380 void hugetlb_cgroup_uncharge_folio_rsvd(int idx, unsigned long nr_pages, 381 struct folio *folio) 382 { 383 __hugetlb_cgroup_uncharge_folio(idx, nr_pages, folio, true); 384 } 385 386 static void __hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages, 387 struct hugetlb_cgroup *h_cg, 388 bool rsvd) 389 { 390 if (hugetlb_cgroup_disabled() || !h_cg) 391 return; 392 393 page_counter_uncharge(__hugetlb_cgroup_counter_from_cgroup(h_cg, idx, 394 rsvd), 395 nr_pages); 396 397 if (rsvd) 398 css_put(&h_cg->css); 399 } 400 401 void hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages, 402 struct hugetlb_cgroup *h_cg) 403 { 404 __hugetlb_cgroup_uncharge_cgroup(idx, nr_pages, h_cg, false); 405 } 406 407 void hugetlb_cgroup_uncharge_cgroup_rsvd(int idx, unsigned long nr_pages, 408 struct hugetlb_cgroup *h_cg) 409 { 410 __hugetlb_cgroup_uncharge_cgroup(idx, nr_pages, h_cg, true); 411 } 412 413 void hugetlb_cgroup_uncharge_counter(struct resv_map *resv, unsigned long start, 414 unsigned long end) 415 { 416 if (hugetlb_cgroup_disabled() || !resv || !resv->reservation_counter || 417 !resv->css) 418 return; 419 420 page_counter_uncharge(resv->reservation_counter, 421 (end - start) * resv->pages_per_hpage); 422 css_put(resv->css); 423 } 424 425 void hugetlb_cgroup_uncharge_file_region(struct resv_map *resv, 426 struct file_region *rg, 427 unsigned long nr_pages, 428 bool region_del) 429 { 430 if (hugetlb_cgroup_disabled() || !resv || !rg || !nr_pages) 431 return; 432 433 if (rg->reservation_counter && resv->pages_per_hpage && 434 !resv->reservation_counter) { 435 page_counter_uncharge(rg->reservation_counter, 436 nr_pages * resv->pages_per_hpage); 437 /* 438 * Only do css_put(rg->css) when we delete the entire region 439 * because one file_region must hold exactly one css reference. 440 */ 441 if (region_del) 442 css_put(rg->css); 443 } 444 } 445 446 enum { 447 RES_USAGE, 448 RES_RSVD_USAGE, 449 RES_LIMIT, 450 RES_RSVD_LIMIT, 451 RES_MAX_USAGE, 452 RES_RSVD_MAX_USAGE, 453 RES_FAILCNT, 454 RES_RSVD_FAILCNT, 455 }; 456 457 static int hugetlb_cgroup_read_numa_stat(struct seq_file *seq, void *dummy) 458 { 459 int nid; 460 struct cftype *cft = seq_cft(seq); 461 int idx = MEMFILE_IDX(cft->private); 462 bool legacy = !cgroup_subsys_on_dfl(hugetlb_cgrp_subsys); 463 struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(seq_css(seq)); 464 struct cgroup_subsys_state *css; 465 unsigned long usage; 466 467 if (legacy) { 468 /* Add up usage across all nodes for the non-hierarchical total. */ 469 usage = 0; 470 for_each_node_state(nid, N_MEMORY) 471 usage += READ_ONCE(h_cg->nodeinfo[nid]->usage[idx]); 472 seq_printf(seq, "total=%lu", usage * PAGE_SIZE); 473 474 /* Simply print the per-node usage for the non-hierarchical total. */ 475 for_each_node_state(nid, N_MEMORY) 476 seq_printf(seq, " N%d=%lu", nid, 477 READ_ONCE(h_cg->nodeinfo[nid]->usage[idx]) * 478 PAGE_SIZE); 479 seq_putc(seq, '\n'); 480 } 481 482 /* 483 * The hierarchical total is pretty much the value recorded by the 484 * counter, so use that. 485 */ 486 seq_printf(seq, "%stotal=%lu", legacy ? "hierarchical_" : "", 487 page_counter_read(&h_cg->hugepage[idx]) * PAGE_SIZE); 488 489 /* 490 * For each node, transverse the css tree to obtain the hierarchical 491 * node usage. 492 */ 493 for_each_node_state(nid, N_MEMORY) { 494 usage = 0; 495 rcu_read_lock(); 496 css_for_each_descendant_pre(css, &h_cg->css) { 497 usage += READ_ONCE(hugetlb_cgroup_from_css(css) 498 ->nodeinfo[nid] 499 ->usage[idx]); 500 } 501 rcu_read_unlock(); 502 seq_printf(seq, " N%d=%lu", nid, usage * PAGE_SIZE); 503 } 504 505 seq_putc(seq, '\n'); 506 507 return 0; 508 } 509 510 static u64 hugetlb_cgroup_read_u64(struct cgroup_subsys_state *css, 511 struct cftype *cft) 512 { 513 struct page_counter *counter; 514 struct page_counter *rsvd_counter; 515 struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css); 516 517 counter = &h_cg->hugepage[MEMFILE_IDX(cft->private)]; 518 rsvd_counter = &h_cg->rsvd_hugepage[MEMFILE_IDX(cft->private)]; 519 520 switch (MEMFILE_ATTR(cft->private)) { 521 case RES_USAGE: 522 return (u64)page_counter_read(counter) * PAGE_SIZE; 523 case RES_RSVD_USAGE: 524 return (u64)page_counter_read(rsvd_counter) * PAGE_SIZE; 525 case RES_LIMIT: 526 return (u64)counter->max * PAGE_SIZE; 527 case RES_RSVD_LIMIT: 528 return (u64)rsvd_counter->max * PAGE_SIZE; 529 case RES_MAX_USAGE: 530 return (u64)counter->watermark * PAGE_SIZE; 531 case RES_RSVD_MAX_USAGE: 532 return (u64)rsvd_counter->watermark * PAGE_SIZE; 533 case RES_FAILCNT: 534 return counter->failcnt; 535 case RES_RSVD_FAILCNT: 536 return rsvd_counter->failcnt; 537 default: 538 BUG(); 539 } 540 } 541 542 static int hugetlb_cgroup_read_u64_max(struct seq_file *seq, void *v) 543 { 544 int idx; 545 u64 val; 546 struct cftype *cft = seq_cft(seq); 547 unsigned long limit; 548 struct page_counter *counter; 549 struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(seq_css(seq)); 550 551 idx = MEMFILE_IDX(cft->private); 552 counter = &h_cg->hugepage[idx]; 553 554 limit = round_down(PAGE_COUNTER_MAX, 555 pages_per_huge_page(&hstates[idx])); 556 557 switch (MEMFILE_ATTR(cft->private)) { 558 case RES_RSVD_USAGE: 559 counter = &h_cg->rsvd_hugepage[idx]; 560 fallthrough; 561 case RES_USAGE: 562 val = (u64)page_counter_read(counter); 563 seq_printf(seq, "%llu\n", val * PAGE_SIZE); 564 break; 565 case RES_RSVD_LIMIT: 566 counter = &h_cg->rsvd_hugepage[idx]; 567 fallthrough; 568 case RES_LIMIT: 569 val = (u64)counter->max; 570 if (val == limit) 571 seq_puts(seq, "max\n"); 572 else 573 seq_printf(seq, "%llu\n", val * PAGE_SIZE); 574 break; 575 default: 576 BUG(); 577 } 578 579 return 0; 580 } 581 582 static DEFINE_MUTEX(hugetlb_limit_mutex); 583 584 static ssize_t hugetlb_cgroup_write(struct kernfs_open_file *of, 585 char *buf, size_t nbytes, loff_t off, 586 const char *max) 587 { 588 int ret, idx; 589 unsigned long nr_pages; 590 struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(of_css(of)); 591 bool rsvd = false; 592 593 if (hugetlb_cgroup_is_root(h_cg)) /* Can't set limit on root */ 594 return -EINVAL; 595 596 buf = strstrip(buf); 597 ret = page_counter_memparse(buf, max, &nr_pages); 598 if (ret) 599 return ret; 600 601 idx = MEMFILE_IDX(of_cft(of)->private); 602 nr_pages = round_down(nr_pages, pages_per_huge_page(&hstates[idx])); 603 604 switch (MEMFILE_ATTR(of_cft(of)->private)) { 605 case RES_RSVD_LIMIT: 606 rsvd = true; 607 fallthrough; 608 case RES_LIMIT: 609 mutex_lock(&hugetlb_limit_mutex); 610 ret = page_counter_set_max( 611 __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, rsvd), 612 nr_pages); 613 mutex_unlock(&hugetlb_limit_mutex); 614 break; 615 default: 616 ret = -EINVAL; 617 break; 618 } 619 return ret ?: nbytes; 620 } 621 622 static ssize_t hugetlb_cgroup_write_legacy(struct kernfs_open_file *of, 623 char *buf, size_t nbytes, loff_t off) 624 { 625 return hugetlb_cgroup_write(of, buf, nbytes, off, "-1"); 626 } 627 628 static ssize_t hugetlb_cgroup_write_dfl(struct kernfs_open_file *of, 629 char *buf, size_t nbytes, loff_t off) 630 { 631 return hugetlb_cgroup_write(of, buf, nbytes, off, "max"); 632 } 633 634 static ssize_t hugetlb_cgroup_reset(struct kernfs_open_file *of, 635 char *buf, size_t nbytes, loff_t off) 636 { 637 int ret = 0; 638 struct page_counter *counter, *rsvd_counter; 639 struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(of_css(of)); 640 641 counter = &h_cg->hugepage[MEMFILE_IDX(of_cft(of)->private)]; 642 rsvd_counter = &h_cg->rsvd_hugepage[MEMFILE_IDX(of_cft(of)->private)]; 643 644 switch (MEMFILE_ATTR(of_cft(of)->private)) { 645 case RES_MAX_USAGE: 646 page_counter_reset_watermark(counter); 647 break; 648 case RES_RSVD_MAX_USAGE: 649 page_counter_reset_watermark(rsvd_counter); 650 break; 651 case RES_FAILCNT: 652 counter->failcnt = 0; 653 break; 654 case RES_RSVD_FAILCNT: 655 rsvd_counter->failcnt = 0; 656 break; 657 default: 658 ret = -EINVAL; 659 break; 660 } 661 return ret ?: nbytes; 662 } 663 664 static char *mem_fmt(char *buf, int size, unsigned long hsize) 665 { 666 if (hsize >= SZ_1G) 667 snprintf(buf, size, "%luGB", hsize / SZ_1G); 668 else if (hsize >= SZ_1M) 669 snprintf(buf, size, "%luMB", hsize / SZ_1M); 670 else 671 snprintf(buf, size, "%luKB", hsize / SZ_1K); 672 return buf; 673 } 674 675 static int __hugetlb_events_show(struct seq_file *seq, bool local) 676 { 677 int idx; 678 long max; 679 struct cftype *cft = seq_cft(seq); 680 struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(seq_css(seq)); 681 682 idx = MEMFILE_IDX(cft->private); 683 684 if (local) 685 max = atomic_long_read(&h_cg->events_local[idx][HUGETLB_MAX]); 686 else 687 max = atomic_long_read(&h_cg->events[idx][HUGETLB_MAX]); 688 689 seq_printf(seq, "max %lu\n", max); 690 691 return 0; 692 } 693 694 static int hugetlb_events_show(struct seq_file *seq, void *v) 695 { 696 return __hugetlb_events_show(seq, false); 697 } 698 699 static int hugetlb_events_local_show(struct seq_file *seq, void *v) 700 { 701 return __hugetlb_events_show(seq, true); 702 } 703 704 static struct cftype hugetlb_dfl_tmpl[] = { 705 { 706 .name = "max", 707 .private = RES_LIMIT, 708 .seq_show = hugetlb_cgroup_read_u64_max, 709 .write = hugetlb_cgroup_write_dfl, 710 .flags = CFTYPE_NOT_ON_ROOT, 711 }, 712 { 713 .name = "rsvd.max", 714 .private = RES_RSVD_LIMIT, 715 .seq_show = hugetlb_cgroup_read_u64_max, 716 .write = hugetlb_cgroup_write_dfl, 717 .flags = CFTYPE_NOT_ON_ROOT, 718 }, 719 { 720 .name = "current", 721 .private = RES_USAGE, 722 .seq_show = hugetlb_cgroup_read_u64_max, 723 .flags = CFTYPE_NOT_ON_ROOT, 724 }, 725 { 726 .name = "rsvd.current", 727 .private = RES_RSVD_USAGE, 728 .seq_show = hugetlb_cgroup_read_u64_max, 729 .flags = CFTYPE_NOT_ON_ROOT, 730 }, 731 { 732 .name = "events", 733 .seq_show = hugetlb_events_show, 734 .file_offset = MEMFILE_OFFSET(struct hugetlb_cgroup, events_file[0]), 735 .flags = CFTYPE_NOT_ON_ROOT, 736 }, 737 { 738 .name = "events.local", 739 .seq_show = hugetlb_events_local_show, 740 .file_offset = MEMFILE_OFFSET(struct hugetlb_cgroup, events_local_file[0]), 741 .flags = CFTYPE_NOT_ON_ROOT, 742 }, 743 { 744 .name = "numa_stat", 745 .seq_show = hugetlb_cgroup_read_numa_stat, 746 .flags = CFTYPE_NOT_ON_ROOT, 747 }, 748 /* don't need terminator here */ 749 }; 750 751 static struct cftype hugetlb_legacy_tmpl[] = { 752 { 753 .name = "limit_in_bytes", 754 .private = RES_LIMIT, 755 .read_u64 = hugetlb_cgroup_read_u64, 756 .write = hugetlb_cgroup_write_legacy, 757 }, 758 { 759 .name = "rsvd.limit_in_bytes", 760 .private = RES_RSVD_LIMIT, 761 .read_u64 = hugetlb_cgroup_read_u64, 762 .write = hugetlb_cgroup_write_legacy, 763 }, 764 { 765 .name = "usage_in_bytes", 766 .private = RES_USAGE, 767 .read_u64 = hugetlb_cgroup_read_u64, 768 }, 769 { 770 .name = "rsvd.usage_in_bytes", 771 .private = RES_RSVD_USAGE, 772 .read_u64 = hugetlb_cgroup_read_u64, 773 }, 774 { 775 .name = "max_usage_in_bytes", 776 .private = RES_MAX_USAGE, 777 .write = hugetlb_cgroup_reset, 778 .read_u64 = hugetlb_cgroup_read_u64, 779 }, 780 { 781 .name = "rsvd.max_usage_in_bytes", 782 .private = RES_RSVD_MAX_USAGE, 783 .write = hugetlb_cgroup_reset, 784 .read_u64 = hugetlb_cgroup_read_u64, 785 }, 786 { 787 .name = "failcnt", 788 .private = RES_FAILCNT, 789 .write = hugetlb_cgroup_reset, 790 .read_u64 = hugetlb_cgroup_read_u64, 791 }, 792 { 793 .name = "rsvd.failcnt", 794 .private = RES_RSVD_FAILCNT, 795 .write = hugetlb_cgroup_reset, 796 .read_u64 = hugetlb_cgroup_read_u64, 797 }, 798 { 799 .name = "numa_stat", 800 .seq_show = hugetlb_cgroup_read_numa_stat, 801 }, 802 /* don't need terminator here */ 803 }; 804 805 static void __init 806 hugetlb_cgroup_cfttypes_init(struct hstate *h, struct cftype *cft, 807 struct cftype *tmpl, int tmpl_size) 808 { 809 char buf[32]; 810 int i, idx = hstate_index(h); 811 812 /* format the size */ 813 mem_fmt(buf, sizeof(buf), huge_page_size(h)); 814 815 for (i = 0; i < tmpl_size; cft++, tmpl++, i++) { 816 *cft = *tmpl; 817 /* rebuild the name */ 818 scnprintf(cft->name, MAX_CFTYPE_NAME, "%s.%s", buf, tmpl->name); 819 /* rebuild the private */ 820 cft->private = MEMFILE_PRIVATE(idx, tmpl->private); 821 /* rebuild the file_offset */ 822 if (tmpl->file_offset) { 823 unsigned int offset = tmpl->file_offset; 824 825 cft->file_offset = MEMFILE_OFFSET0(offset) + 826 MEMFILE_FIELD_SIZE(offset) * idx; 827 } 828 829 lockdep_register_key(&cft->lockdep_key); 830 } 831 } 832 833 static void __init __hugetlb_cgroup_file_dfl_init(struct hstate *h) 834 { 835 int idx = hstate_index(h); 836 837 hugetlb_cgroup_cfttypes_init(h, dfl_files + idx * DFL_TMPL_SIZE, 838 hugetlb_dfl_tmpl, DFL_TMPL_SIZE); 839 } 840 841 static void __init __hugetlb_cgroup_file_legacy_init(struct hstate *h) 842 { 843 int idx = hstate_index(h); 844 845 hugetlb_cgroup_cfttypes_init(h, legacy_files + idx * LEGACY_TMPL_SIZE, 846 hugetlb_legacy_tmpl, LEGACY_TMPL_SIZE); 847 } 848 849 static void __init __hugetlb_cgroup_file_init(struct hstate *h) 850 { 851 __hugetlb_cgroup_file_dfl_init(h); 852 __hugetlb_cgroup_file_legacy_init(h); 853 } 854 855 static void __init __hugetlb_cgroup_file_pre_init(void) 856 { 857 int cft_count; 858 859 cft_count = hugetlb_max_hstate * DFL_TMPL_SIZE + 1; /* add terminator */ 860 dfl_files = kcalloc(cft_count, sizeof(struct cftype), GFP_KERNEL); 861 BUG_ON(!dfl_files); 862 cft_count = hugetlb_max_hstate * LEGACY_TMPL_SIZE + 1; /* add terminator */ 863 legacy_files = kcalloc(cft_count, sizeof(struct cftype), GFP_KERNEL); 864 BUG_ON(!legacy_files); 865 } 866 867 static void __init __hugetlb_cgroup_file_post_init(void) 868 { 869 WARN_ON(cgroup_add_dfl_cftypes(&hugetlb_cgrp_subsys, 870 dfl_files)); 871 WARN_ON(cgroup_add_legacy_cftypes(&hugetlb_cgrp_subsys, 872 legacy_files)); 873 } 874 875 void __init hugetlb_cgroup_file_init(void) 876 { 877 struct hstate *h; 878 879 __hugetlb_cgroup_file_pre_init(); 880 for_each_hstate(h) 881 __hugetlb_cgroup_file_init(h); 882 __hugetlb_cgroup_file_post_init(); 883 } 884 885 /* 886 * hugetlb_lock will make sure a parallel cgroup rmdir won't happen 887 * when we migrate hugepages 888 */ 889 void hugetlb_cgroup_migrate(struct folio *old_folio, struct folio *new_folio) 890 { 891 struct hugetlb_cgroup *h_cg; 892 struct hugetlb_cgroup *h_cg_rsvd; 893 struct hstate *h = folio_hstate(old_folio); 894 895 if (hugetlb_cgroup_disabled()) 896 return; 897 898 spin_lock_irq(&hugetlb_lock); 899 h_cg = hugetlb_cgroup_from_folio(old_folio); 900 h_cg_rsvd = hugetlb_cgroup_from_folio_rsvd(old_folio); 901 set_hugetlb_cgroup(old_folio, NULL); 902 set_hugetlb_cgroup_rsvd(old_folio, NULL); 903 904 /* move the h_cg details to new cgroup */ 905 set_hugetlb_cgroup(new_folio, h_cg); 906 set_hugetlb_cgroup_rsvd(new_folio, h_cg_rsvd); 907 list_move(&new_folio->lru, &h->hugepage_activelist); 908 spin_unlock_irq(&hugetlb_lock); 909 } 910 911 static struct cftype hugetlb_files[] = { 912 {} /* terminate */ 913 }; 914 915 struct cgroup_subsys hugetlb_cgrp_subsys = { 916 .css_alloc = hugetlb_cgroup_css_alloc, 917 .css_offline = hugetlb_cgroup_css_offline, 918 .css_free = hugetlb_cgroup_css_free, 919 .dfl_cftypes = hugetlb_files, 920 .legacy_cftypes = hugetlb_files, 921 }; 922