15904db98SRoman Gushchin // SPDX-License-Identifier: GPL-2.0-or-later 25904db98SRoman Gushchin /* 35904db98SRoman Gushchin * Memory Controller-related BPF kfuncs and auxiliary code 45904db98SRoman Gushchin * 55904db98SRoman Gushchin * Author: Roman Gushchin <roman.gushchin@linux.dev> 65904db98SRoman Gushchin */ 75904db98SRoman Gushchin 85904db98SRoman Gushchin #include <linux/memcontrol.h> 95904db98SRoman Gushchin #include <linux/bpf.h> 105904db98SRoman Gushchin 115904db98SRoman Gushchin __bpf_kfunc_start_defs(); 125904db98SRoman Gushchin 135904db98SRoman Gushchin /** 145c7db323SRoman Gushchin * bpf_get_root_mem_cgroup - Returns a pointer to the root memory cgroup 155c7db323SRoman Gushchin * 16*d0f5d4f8SMatt Bobrowski * The function has KF_ACQUIRE semantics, even though the root memory 17*d0f5d4f8SMatt Bobrowski * cgroup is never destroyed after being created and doesn't require 18*d0f5d4f8SMatt Bobrowski * reference counting. And it's perfectly safe to pass it to 19*d0f5d4f8SMatt Bobrowski * bpf_put_mem_cgroup() 20*d0f5d4f8SMatt Bobrowski * 21*d0f5d4f8SMatt Bobrowski * Return: A pointer to the root memory cgroup. 225c7db323SRoman Gushchin */ 235c7db323SRoman Gushchin __bpf_kfunc struct mem_cgroup *bpf_get_root_mem_cgroup(void) 245c7db323SRoman Gushchin { 255c7db323SRoman Gushchin if (mem_cgroup_disabled()) 265c7db323SRoman Gushchin return NULL; 275c7db323SRoman Gushchin 285c7db323SRoman Gushchin /* css_get() is not needed */ 295c7db323SRoman Gushchin return root_mem_cgroup; 305c7db323SRoman Gushchin } 315c7db323SRoman Gushchin 325c7db323SRoman Gushchin /** 335904db98SRoman Gushchin * bpf_get_mem_cgroup - Get a reference to a memory cgroup 345904db98SRoman Gushchin * @css: pointer to the css structure 355904db98SRoman Gushchin * 365904db98SRoman Gushchin * It's fine to pass a css which belongs to any cgroup controller, 375904db98SRoman Gushchin * e.g. unified hierarchy's main css. 385904db98SRoman Gushchin * 395904db98SRoman Gushchin * Implements KF_ACQUIRE semantics. 405904db98SRoman Gushchin * 415904db98SRoman Gushchin * Return: A pointer to a mem_cgroup structure after bumping 425904db98SRoman Gushchin * the corresponding css's reference counter. 435904db98SRoman Gushchin */ 445904db98SRoman Gushchin __bpf_kfunc struct mem_cgroup * 455904db98SRoman Gushchin bpf_get_mem_cgroup(struct cgroup_subsys_state *css) 465904db98SRoman Gushchin { 475904db98SRoman Gushchin struct mem_cgroup *memcg = NULL; 485904db98SRoman Gushchin bool rcu_unlock = false; 495904db98SRoman Gushchin 505904db98SRoman Gushchin if (mem_cgroup_disabled() || !root_mem_cgroup) 515904db98SRoman Gushchin return NULL; 525904db98SRoman Gushchin 535904db98SRoman Gushchin if (root_mem_cgroup->css.ss != css->ss) { 545904db98SRoman Gushchin struct cgroup *cgroup = css->cgroup; 555904db98SRoman Gushchin int ssid = root_mem_cgroup->css.ss->id; 565904db98SRoman Gushchin 575904db98SRoman Gushchin rcu_read_lock(); 585904db98SRoman Gushchin rcu_unlock = true; 595904db98SRoman Gushchin css = rcu_dereference_raw(cgroup->subsys[ssid]); 605904db98SRoman Gushchin } 615904db98SRoman Gushchin 625904db98SRoman Gushchin if (css && css_tryget(css)) 635904db98SRoman Gushchin memcg = container_of(css, struct mem_cgroup, css); 645904db98SRoman Gushchin 655904db98SRoman Gushchin if (rcu_unlock) 665904db98SRoman Gushchin rcu_read_unlock(); 675904db98SRoman Gushchin 685904db98SRoman Gushchin return memcg; 695904db98SRoman Gushchin } 705904db98SRoman Gushchin 715904db98SRoman Gushchin /** 725904db98SRoman Gushchin * bpf_put_mem_cgroup - Put a reference to a memory cgroup 735904db98SRoman Gushchin * @memcg: memory cgroup to release 745904db98SRoman Gushchin * 755904db98SRoman Gushchin * Releases a previously acquired memcg reference. 765904db98SRoman Gushchin * Implements KF_RELEASE semantics. 775904db98SRoman Gushchin */ 785904db98SRoman Gushchin __bpf_kfunc void bpf_put_mem_cgroup(struct mem_cgroup *memcg) 795904db98SRoman Gushchin { 805904db98SRoman Gushchin css_put(&memcg->css); 815904db98SRoman Gushchin } 825904db98SRoman Gushchin 8399430ab8SRoman Gushchin /** 8499430ab8SRoman Gushchin * bpf_mem_cgroup_vm_events - Read memory cgroup's vm event counter 8599430ab8SRoman Gushchin * @memcg: memory cgroup 8699430ab8SRoman Gushchin * @event: event id 8799430ab8SRoman Gushchin * 8899430ab8SRoman Gushchin * Allows to read memory cgroup event counters. 8999430ab8SRoman Gushchin * 9099430ab8SRoman Gushchin * Return: The current value of the corresponding events counter. 9199430ab8SRoman Gushchin */ 9299430ab8SRoman Gushchin __bpf_kfunc unsigned long bpf_mem_cgroup_vm_events(struct mem_cgroup *memcg, 9399430ab8SRoman Gushchin enum vm_event_item event) 9499430ab8SRoman Gushchin { 9599430ab8SRoman Gushchin if (unlikely(!memcg_vm_event_item_valid(event))) 9699430ab8SRoman Gushchin return (unsigned long)-1; 9799430ab8SRoman Gushchin 9899430ab8SRoman Gushchin return memcg_events(memcg, event); 9999430ab8SRoman Gushchin } 10099430ab8SRoman Gushchin 10199430ab8SRoman Gushchin /** 10299430ab8SRoman Gushchin * bpf_mem_cgroup_usage - Read memory cgroup's usage 10399430ab8SRoman Gushchin * @memcg: memory cgroup 10499430ab8SRoman Gushchin * 10599430ab8SRoman Gushchin * Please, note that the root memory cgroup it special and is exempt 10699430ab8SRoman Gushchin * from the memory accounting. The returned value is a sum of sub-cgroup's 10799430ab8SRoman Gushchin * usages and it not reflecting the size of the root memory cgroup itself. 10899430ab8SRoman Gushchin * If you need to get an approximation, you can use root level statistics: 10999430ab8SRoman Gushchin * e.g. NR_FILE_PAGES + NR_ANON_MAPPED. 11099430ab8SRoman Gushchin * 11199430ab8SRoman Gushchin * Return: The current memory cgroup size in bytes. 11299430ab8SRoman Gushchin */ 11399430ab8SRoman Gushchin __bpf_kfunc unsigned long bpf_mem_cgroup_usage(struct mem_cgroup *memcg) 11499430ab8SRoman Gushchin { 11599430ab8SRoman Gushchin return page_counter_read(&memcg->memory) * PAGE_SIZE; 11699430ab8SRoman Gushchin } 11799430ab8SRoman Gushchin 11899430ab8SRoman Gushchin /** 11999430ab8SRoman Gushchin * bpf_mem_cgroup_memory_events - Read memory cgroup's memory event value 12099430ab8SRoman Gushchin * @memcg: memory cgroup 12199430ab8SRoman Gushchin * @event: memory event id 12299430ab8SRoman Gushchin * 12399430ab8SRoman Gushchin * Return: The current value of the memory event counter. 12499430ab8SRoman Gushchin */ 12599430ab8SRoman Gushchin __bpf_kfunc unsigned long bpf_mem_cgroup_memory_events(struct mem_cgroup *memcg, 12699430ab8SRoman Gushchin enum memcg_memory_event event) 12799430ab8SRoman Gushchin { 12899430ab8SRoman Gushchin if (unlikely(event >= MEMCG_NR_MEMORY_EVENTS)) 12999430ab8SRoman Gushchin return (unsigned long)-1; 13099430ab8SRoman Gushchin 13199430ab8SRoman Gushchin return atomic_long_read(&memcg->memory_events[event]); 13299430ab8SRoman Gushchin } 13399430ab8SRoman Gushchin 13499430ab8SRoman Gushchin /** 13599430ab8SRoman Gushchin * bpf_mem_cgroup_page_state - Read memory cgroup's page state counter 13699430ab8SRoman Gushchin * @memcg: memory cgroup 13799430ab8SRoman Gushchin * @idx: counter idx 13899430ab8SRoman Gushchin * 13999430ab8SRoman Gushchin * Allows to read memory cgroup statistics. The output is in bytes. 14099430ab8SRoman Gushchin * 14199430ab8SRoman Gushchin * Return: The value of the page state counter in bytes. 14299430ab8SRoman Gushchin */ 14399430ab8SRoman Gushchin __bpf_kfunc unsigned long bpf_mem_cgroup_page_state(struct mem_cgroup *memcg, int idx) 14499430ab8SRoman Gushchin { 14599430ab8SRoman Gushchin if (unlikely(!memcg_stat_item_valid(idx))) 14699430ab8SRoman Gushchin return (unsigned long)-1; 14799430ab8SRoman Gushchin 14899430ab8SRoman Gushchin return memcg_page_state_output(memcg, idx); 14999430ab8SRoman Gushchin } 15099430ab8SRoman Gushchin 15199430ab8SRoman Gushchin /** 15299430ab8SRoman Gushchin * bpf_mem_cgroup_flush_stats - Flush memory cgroup's statistics 15399430ab8SRoman Gushchin * @memcg: memory cgroup 15499430ab8SRoman Gushchin * 15599430ab8SRoman Gushchin * Propagate memory cgroup's statistics up the cgroup tree. 15699430ab8SRoman Gushchin */ 15799430ab8SRoman Gushchin __bpf_kfunc void bpf_mem_cgroup_flush_stats(struct mem_cgroup *memcg) 15899430ab8SRoman Gushchin { 15999430ab8SRoman Gushchin mem_cgroup_flush_stats(memcg); 16099430ab8SRoman Gushchin } 16199430ab8SRoman Gushchin 1625904db98SRoman Gushchin __bpf_kfunc_end_defs(); 1635904db98SRoman Gushchin 1645904db98SRoman Gushchin BTF_KFUNCS_START(bpf_memcontrol_kfuncs) 165*d0f5d4f8SMatt Bobrowski BTF_ID_FLAGS(func, bpf_get_root_mem_cgroup, KF_ACQUIRE | KF_RET_NULL) 1665904db98SRoman Gushchin BTF_ID_FLAGS(func, bpf_get_mem_cgroup, KF_ACQUIRE | KF_RET_NULL | KF_RCU) 1675904db98SRoman Gushchin BTF_ID_FLAGS(func, bpf_put_mem_cgroup, KF_RELEASE) 1685904db98SRoman Gushchin 1697646c7afSPuranjay Mohan BTF_ID_FLAGS(func, bpf_mem_cgroup_vm_events) 1707646c7afSPuranjay Mohan BTF_ID_FLAGS(func, bpf_mem_cgroup_memory_events) 1717646c7afSPuranjay Mohan BTF_ID_FLAGS(func, bpf_mem_cgroup_usage) 1727646c7afSPuranjay Mohan BTF_ID_FLAGS(func, bpf_mem_cgroup_page_state) 1737646c7afSPuranjay Mohan BTF_ID_FLAGS(func, bpf_mem_cgroup_flush_stats, KF_SLEEPABLE) 17499430ab8SRoman Gushchin 1755904db98SRoman Gushchin BTF_KFUNCS_END(bpf_memcontrol_kfuncs) 1765904db98SRoman Gushchin 1775904db98SRoman Gushchin static const struct btf_kfunc_id_set bpf_memcontrol_kfunc_set = { 1785904db98SRoman Gushchin .owner = THIS_MODULE, 1795904db98SRoman Gushchin .set = &bpf_memcontrol_kfuncs, 1805904db98SRoman Gushchin }; 1815904db98SRoman Gushchin 1825904db98SRoman Gushchin static int __init bpf_memcontrol_init(void) 1835904db98SRoman Gushchin { 1845904db98SRoman Gushchin int err; 1855904db98SRoman Gushchin 1865904db98SRoman Gushchin err = register_btf_kfunc_id_set(BPF_PROG_TYPE_UNSPEC, 1875904db98SRoman Gushchin &bpf_memcontrol_kfunc_set); 1885904db98SRoman Gushchin if (err) 1895904db98SRoman Gushchin pr_warn("error while registering bpf memcontrol kfuncs: %d", err); 1905904db98SRoman Gushchin 1915904db98SRoman Gushchin return err; 1925904db98SRoman Gushchin } 1935904db98SRoman Gushchin late_initcall(bpf_memcontrol_init); 194