1de9cbbaaSRoman Gushchin // SPDX-License-Identifier: GPL-2.0 2de9cbbaaSRoman Gushchin #include <linux/bpf-cgroup.h> 3de9cbbaaSRoman Gushchin #include <linux/bpf.h> 433b57e0cSStanislav Fomichev #include <linux/bpf_local_storage.h> 59a1126b6SRoman Gushchin #include <linux/btf.h> 6de9cbbaaSRoman Gushchin #include <linux/bug.h> 7de9cbbaaSRoman Gushchin #include <linux/filter.h> 8de9cbbaaSRoman Gushchin #include <linux/mm.h> 9de9cbbaaSRoman Gushchin #include <linux/rbtree.h> 10de9cbbaaSRoman Gushchin #include <linux/slab.h> 119a1126b6SRoman Gushchin #include <uapi/linux/btf.h> 12c317ab71SMenglong Dong #include <linux/btf_ids.h> 13de9cbbaaSRoman Gushchin 14de9cbbaaSRoman Gushchin #ifdef CONFIG_CGROUP_BPF 15de9cbbaaSRoman Gushchin 16dfcdf0e9SYiFei Zhu #include "../cgroup/cgroup-internal.h" 17dfcdf0e9SYiFei Zhu 18de9cbbaaSRoman Gushchin #define LOCAL_STORAGE_CREATE_FLAG_MASK \ 19591fe988SDaniel Borkmann (BPF_F_NUMA_NODE | BPF_F_ACCESS_MASK) 20de9cbbaaSRoman Gushchin 21de9cbbaaSRoman Gushchin struct bpf_cgroup_storage_map { 22de9cbbaaSRoman Gushchin struct bpf_map map; 23de9cbbaaSRoman Gushchin 24de9cbbaaSRoman Gushchin spinlock_t lock; 25de9cbbaaSRoman Gushchin struct rb_root root; 26de9cbbaaSRoman Gushchin struct list_head list; 27de9cbbaaSRoman Gushchin }; 28de9cbbaaSRoman Gushchin 29de9cbbaaSRoman Gushchin static struct bpf_cgroup_storage_map *map_to_storage(struct bpf_map *map) 30de9cbbaaSRoman Gushchin { 31de9cbbaaSRoman Gushchin return container_of(map, struct bpf_cgroup_storage_map, map); 32de9cbbaaSRoman Gushchin } 33de9cbbaaSRoman Gushchin 347d9c3427SYiFei Zhu static bool attach_type_isolated(const struct bpf_map *map) 35de9cbbaaSRoman Gushchin { 367d9c3427SYiFei Zhu return map->key_size == sizeof(struct bpf_cgroup_storage_key); 377d9c3427SYiFei Zhu } 387d9c3427SYiFei Zhu 397d9c3427SYiFei Zhu static int bpf_cgroup_storage_key_cmp(const struct bpf_cgroup_storage_map *map, 407d9c3427SYiFei Zhu const void *_key1, const void *_key2) 417d9c3427SYiFei Zhu { 427d9c3427SYiFei Zhu if (attach_type_isolated(&map->map)) { 437d9c3427SYiFei Zhu const struct bpf_cgroup_storage_key *key1 = _key1; 447d9c3427SYiFei Zhu const struct bpf_cgroup_storage_key *key2 = _key2; 457d9c3427SYiFei Zhu 46de9cbbaaSRoman Gushchin if (key1->cgroup_inode_id < key2->cgroup_inode_id) 47de9cbbaaSRoman Gushchin return -1; 48de9cbbaaSRoman Gushchin else if (key1->cgroup_inode_id > key2->cgroup_inode_id) 49de9cbbaaSRoman Gushchin return 1; 50de9cbbaaSRoman Gushchin else if (key1->attach_type < key2->attach_type) 51de9cbbaaSRoman Gushchin return -1; 52de9cbbaaSRoman Gushchin else if (key1->attach_type > key2->attach_type) 53de9cbbaaSRoman Gushchin return 1; 547d9c3427SYiFei Zhu } else { 557d9c3427SYiFei Zhu const __u64 *cgroup_inode_id1 = _key1; 567d9c3427SYiFei Zhu const __u64 *cgroup_inode_id2 = _key2; 577d9c3427SYiFei Zhu 587d9c3427SYiFei Zhu if (*cgroup_inode_id1 < *cgroup_inode_id2) 597d9c3427SYiFei Zhu return -1; 607d9c3427SYiFei Zhu else if (*cgroup_inode_id1 > *cgroup_inode_id2) 617d9c3427SYiFei Zhu return 1; 627d9c3427SYiFei Zhu } 63de9cbbaaSRoman Gushchin return 0; 64de9cbbaaSRoman Gushchin } 65de9cbbaaSRoman Gushchin 667d9c3427SYiFei Zhu struct bpf_cgroup_storage * 677d9c3427SYiFei Zhu cgroup_storage_lookup(struct bpf_cgroup_storage_map *map, 687d9c3427SYiFei Zhu void *key, bool locked) 69de9cbbaaSRoman Gushchin { 70de9cbbaaSRoman Gushchin struct rb_root *root = &map->root; 71de9cbbaaSRoman Gushchin struct rb_node *node; 72de9cbbaaSRoman Gushchin 73de9cbbaaSRoman Gushchin if (!locked) 74de9cbbaaSRoman Gushchin spin_lock_bh(&map->lock); 75de9cbbaaSRoman Gushchin 76de9cbbaaSRoman Gushchin node = root->rb_node; 77de9cbbaaSRoman Gushchin while (node) { 78de9cbbaaSRoman Gushchin struct bpf_cgroup_storage *storage; 79de9cbbaaSRoman Gushchin 80de9cbbaaSRoman Gushchin storage = container_of(node, struct bpf_cgroup_storage, node); 81de9cbbaaSRoman Gushchin 827d9c3427SYiFei Zhu switch (bpf_cgroup_storage_key_cmp(map, key, &storage->key)) { 83de9cbbaaSRoman Gushchin case -1: 84de9cbbaaSRoman Gushchin node = node->rb_left; 85de9cbbaaSRoman Gushchin break; 86de9cbbaaSRoman Gushchin case 1: 87de9cbbaaSRoman Gushchin node = node->rb_right; 88de9cbbaaSRoman Gushchin break; 89de9cbbaaSRoman Gushchin default: 90de9cbbaaSRoman Gushchin if (!locked) 91de9cbbaaSRoman Gushchin spin_unlock_bh(&map->lock); 92de9cbbaaSRoman Gushchin return storage; 93de9cbbaaSRoman Gushchin } 94de9cbbaaSRoman Gushchin } 95de9cbbaaSRoman Gushchin 96de9cbbaaSRoman Gushchin if (!locked) 97de9cbbaaSRoman Gushchin spin_unlock_bh(&map->lock); 98de9cbbaaSRoman Gushchin 99de9cbbaaSRoman Gushchin return NULL; 100de9cbbaaSRoman Gushchin } 101de9cbbaaSRoman Gushchin 102de9cbbaaSRoman Gushchin static int cgroup_storage_insert(struct bpf_cgroup_storage_map *map, 103de9cbbaaSRoman Gushchin struct bpf_cgroup_storage *storage) 104de9cbbaaSRoman Gushchin { 105de9cbbaaSRoman Gushchin struct rb_root *root = &map->root; 106de9cbbaaSRoman Gushchin struct rb_node **new = &(root->rb_node), *parent = NULL; 107de9cbbaaSRoman Gushchin 108de9cbbaaSRoman Gushchin while (*new) { 109de9cbbaaSRoman Gushchin struct bpf_cgroup_storage *this; 110de9cbbaaSRoman Gushchin 111de9cbbaaSRoman Gushchin this = container_of(*new, struct bpf_cgroup_storage, node); 112de9cbbaaSRoman Gushchin 113de9cbbaaSRoman Gushchin parent = *new; 1147d9c3427SYiFei Zhu switch (bpf_cgroup_storage_key_cmp(map, &storage->key, &this->key)) { 115de9cbbaaSRoman Gushchin case -1: 116de9cbbaaSRoman Gushchin new = &((*new)->rb_left); 117de9cbbaaSRoman Gushchin break; 118de9cbbaaSRoman Gushchin case 1: 119de9cbbaaSRoman Gushchin new = &((*new)->rb_right); 120de9cbbaaSRoman Gushchin break; 121de9cbbaaSRoman Gushchin default: 122de9cbbaaSRoman Gushchin return -EEXIST; 123de9cbbaaSRoman Gushchin } 124de9cbbaaSRoman Gushchin } 125de9cbbaaSRoman Gushchin 126de9cbbaaSRoman Gushchin rb_link_node(&storage->node, parent, new); 127de9cbbaaSRoman Gushchin rb_insert_color(&storage->node, root); 128de9cbbaaSRoman Gushchin 129de9cbbaaSRoman Gushchin return 0; 130de9cbbaaSRoman Gushchin } 131de9cbbaaSRoman Gushchin 1327d9c3427SYiFei Zhu static void *cgroup_storage_lookup_elem(struct bpf_map *_map, void *key) 133de9cbbaaSRoman Gushchin { 134de9cbbaaSRoman Gushchin struct bpf_cgroup_storage_map *map = map_to_storage(_map); 135de9cbbaaSRoman Gushchin struct bpf_cgroup_storage *storage; 136de9cbbaaSRoman Gushchin 137de9cbbaaSRoman Gushchin storage = cgroup_storage_lookup(map, key, false); 138de9cbbaaSRoman Gushchin if (!storage) 139de9cbbaaSRoman Gushchin return NULL; 140de9cbbaaSRoman Gushchin 141de9cbbaaSRoman Gushchin return &READ_ONCE(storage->buf)->data[0]; 142de9cbbaaSRoman Gushchin } 143de9cbbaaSRoman Gushchin 144d7ba4cc9SJP Kobryn static long cgroup_storage_update_elem(struct bpf_map *map, void *key, 145de9cbbaaSRoman Gushchin void *value, u64 flags) 146de9cbbaaSRoman Gushchin { 147de9cbbaaSRoman Gushchin struct bpf_cgroup_storage *storage; 148de9cbbaaSRoman Gushchin struct bpf_storage_buffer *new; 149de9cbbaaSRoman Gushchin 1507d9c3427SYiFei Zhu if (unlikely(flags & ~(BPF_F_LOCK | BPF_EXIST))) 15196049f3aSAlexei Starovoitov return -EINVAL; 15296049f3aSAlexei Starovoitov 15396049f3aSAlexei Starovoitov if (unlikely((flags & BPF_F_LOCK) && 154db559117SKumar Kartikeya Dwivedi !btf_record_has_field(map->record, BPF_SPIN_LOCK))) 155de9cbbaaSRoman Gushchin return -EINVAL; 156de9cbbaaSRoman Gushchin 157de9cbbaaSRoman Gushchin storage = cgroup_storage_lookup((struct bpf_cgroup_storage_map *)map, 158de9cbbaaSRoman Gushchin key, false); 159de9cbbaaSRoman Gushchin if (!storage) 160de9cbbaaSRoman Gushchin return -ENOENT; 161de9cbbaaSRoman Gushchin 16296049f3aSAlexei Starovoitov if (flags & BPF_F_LOCK) { 16396049f3aSAlexei Starovoitov copy_map_value_locked(map, storage->buf->data, value, false); 16496049f3aSAlexei Starovoitov return 0; 16596049f3aSAlexei Starovoitov } 16696049f3aSAlexei Starovoitov 1670dd668d2SXiu Jianfeng new = bpf_map_kmalloc_node(map, struct_size(new, data, map->value_size), 168ace2bee8SYafang Shao __GFP_ZERO | GFP_NOWAIT | __GFP_NOWARN, 169de9cbbaaSRoman Gushchin map->numa_node); 170de9cbbaaSRoman Gushchin if (!new) 171de9cbbaaSRoman Gushchin return -ENOMEM; 172de9cbbaaSRoman Gushchin 173de9cbbaaSRoman Gushchin memcpy(&new->data[0], value, map->value_size); 17468134668SAlexei Starovoitov check_and_init_map_value(map, new->data); 175de9cbbaaSRoman Gushchin 176de9cbbaaSRoman Gushchin new = xchg(&storage->buf, new); 177de9cbbaaSRoman Gushchin kfree_rcu(new, rcu); 178de9cbbaaSRoman Gushchin 179de9cbbaaSRoman Gushchin return 0; 180de9cbbaaSRoman Gushchin } 181de9cbbaaSRoman Gushchin 1827d9c3427SYiFei Zhu int bpf_percpu_cgroup_storage_copy(struct bpf_map *_map, void *key, 183b741f163SRoman Gushchin void *value) 184b741f163SRoman Gushchin { 185b741f163SRoman Gushchin struct bpf_cgroup_storage_map *map = map_to_storage(_map); 186b741f163SRoman Gushchin struct bpf_cgroup_storage *storage; 187b741f163SRoman Gushchin int cpu, off = 0; 188b741f163SRoman Gushchin u32 size; 189b741f163SRoman Gushchin 190b741f163SRoman Gushchin rcu_read_lock(); 191b741f163SRoman Gushchin storage = cgroup_storage_lookup(map, key, false); 192b741f163SRoman Gushchin if (!storage) { 193b741f163SRoman Gushchin rcu_read_unlock(); 194b741f163SRoman Gushchin return -ENOENT; 195b741f163SRoman Gushchin } 196b741f163SRoman Gushchin 197b741f163SRoman Gushchin /* per_cpu areas are zero-filled and bpf programs can only 198b741f163SRoman Gushchin * access 'value_size' of them, so copying rounded areas 199b741f163SRoman Gushchin * will not leak any kernel data 200b741f163SRoman Gushchin */ 201b741f163SRoman Gushchin size = round_up(_map->value_size, 8); 202b741f163SRoman Gushchin for_each_possible_cpu(cpu) { 203b741f163SRoman Gushchin bpf_long_memcpy(value + off, 204b741f163SRoman Gushchin per_cpu_ptr(storage->percpu_buf, cpu), size); 205b741f163SRoman Gushchin off += size; 206b741f163SRoman Gushchin } 207b741f163SRoman Gushchin rcu_read_unlock(); 208b741f163SRoman Gushchin return 0; 209b741f163SRoman Gushchin } 210b741f163SRoman Gushchin 2117d9c3427SYiFei Zhu int bpf_percpu_cgroup_storage_update(struct bpf_map *_map, void *key, 212b741f163SRoman Gushchin void *value, u64 map_flags) 213b741f163SRoman Gushchin { 214b741f163SRoman Gushchin struct bpf_cgroup_storage_map *map = map_to_storage(_map); 215b741f163SRoman Gushchin struct bpf_cgroup_storage *storage; 216b741f163SRoman Gushchin int cpu, off = 0; 217b741f163SRoman Gushchin u32 size; 218b741f163SRoman Gushchin 219b741f163SRoman Gushchin if (map_flags != BPF_ANY && map_flags != BPF_EXIST) 220b741f163SRoman Gushchin return -EINVAL; 221b741f163SRoman Gushchin 222b741f163SRoman Gushchin rcu_read_lock(); 223b741f163SRoman Gushchin storage = cgroup_storage_lookup(map, key, false); 224b741f163SRoman Gushchin if (!storage) { 225b741f163SRoman Gushchin rcu_read_unlock(); 226b741f163SRoman Gushchin return -ENOENT; 227b741f163SRoman Gushchin } 228b741f163SRoman Gushchin 229b741f163SRoman Gushchin /* the user space will provide round_up(value_size, 8) bytes that 230b741f163SRoman Gushchin * will be copied into per-cpu area. bpf programs can only access 231b741f163SRoman Gushchin * value_size of it. During lookup the same extra bytes will be 232b741f163SRoman Gushchin * returned or zeros which were zero-filled by percpu_alloc, 233b741f163SRoman Gushchin * so no kernel data leaks possible 234b741f163SRoman Gushchin */ 235b741f163SRoman Gushchin size = round_up(_map->value_size, 8); 236b741f163SRoman Gushchin for_each_possible_cpu(cpu) { 237b741f163SRoman Gushchin bpf_long_memcpy(per_cpu_ptr(storage->percpu_buf, cpu), 238b741f163SRoman Gushchin value + off, size); 239b741f163SRoman Gushchin off += size; 240b741f163SRoman Gushchin } 241b741f163SRoman Gushchin rcu_read_unlock(); 242b741f163SRoman Gushchin return 0; 243b741f163SRoman Gushchin } 244b741f163SRoman Gushchin 2457d9c3427SYiFei Zhu static int cgroup_storage_get_next_key(struct bpf_map *_map, void *key, 246de9cbbaaSRoman Gushchin void *_next_key) 247de9cbbaaSRoman Gushchin { 248de9cbbaaSRoman Gushchin struct bpf_cgroup_storage_map *map = map_to_storage(_map); 249de9cbbaaSRoman Gushchin struct bpf_cgroup_storage *storage; 250de9cbbaaSRoman Gushchin 251de9cbbaaSRoman Gushchin spin_lock_bh(&map->lock); 252de9cbbaaSRoman Gushchin 253de9cbbaaSRoman Gushchin if (list_empty(&map->list)) 254de9cbbaaSRoman Gushchin goto enoent; 255de9cbbaaSRoman Gushchin 256de9cbbaaSRoman Gushchin if (key) { 257de9cbbaaSRoman Gushchin storage = cgroup_storage_lookup(map, key, true); 258de9cbbaaSRoman Gushchin if (!storage) 259de9cbbaaSRoman Gushchin goto enoent; 260de9cbbaaSRoman Gushchin 2617d9c3427SYiFei Zhu storage = list_next_entry(storage, list_map); 262de9cbbaaSRoman Gushchin if (!storage) 263de9cbbaaSRoman Gushchin goto enoent; 264de9cbbaaSRoman Gushchin } else { 265de9cbbaaSRoman Gushchin storage = list_first_entry(&map->list, 2667d9c3427SYiFei Zhu struct bpf_cgroup_storage, list_map); 267de9cbbaaSRoman Gushchin } 268de9cbbaaSRoman Gushchin 269de9cbbaaSRoman Gushchin spin_unlock_bh(&map->lock); 2707d9c3427SYiFei Zhu 2717d9c3427SYiFei Zhu if (attach_type_isolated(&map->map)) { 2727d9c3427SYiFei Zhu struct bpf_cgroup_storage_key *next = _next_key; 2737d9c3427SYiFei Zhu *next = storage->key; 2747d9c3427SYiFei Zhu } else { 2757d9c3427SYiFei Zhu __u64 *next = _next_key; 2767d9c3427SYiFei Zhu *next = storage->key.cgroup_inode_id; 2777d9c3427SYiFei Zhu } 278de9cbbaaSRoman Gushchin return 0; 279de9cbbaaSRoman Gushchin 280de9cbbaaSRoman Gushchin enoent: 281de9cbbaaSRoman Gushchin spin_unlock_bh(&map->lock); 282de9cbbaaSRoman Gushchin return -ENOENT; 283de9cbbaaSRoman Gushchin } 284de9cbbaaSRoman Gushchin 285de9cbbaaSRoman Gushchin static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr) 286de9cbbaaSRoman Gushchin { 28733b57e0cSStanislav Fomichev __u32 max_value_size = BPF_LOCAL_STORAGE_MAX_VALUE_SIZE; 288de9cbbaaSRoman Gushchin int numa_node = bpf_map_attr_numa_node(attr); 289de9cbbaaSRoman Gushchin struct bpf_cgroup_storage_map *map; 290de9cbbaaSRoman Gushchin 29133b57e0cSStanislav Fomichev /* percpu is bound by PCPU_MIN_UNIT_SIZE, non-percu 29233b57e0cSStanislav Fomichev * is the same as other local storages. 29333b57e0cSStanislav Fomichev */ 29433b57e0cSStanislav Fomichev if (attr->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) 29533b57e0cSStanislav Fomichev max_value_size = min_t(__u32, max_value_size, 29633b57e0cSStanislav Fomichev PCPU_MIN_UNIT_SIZE); 29733b57e0cSStanislav Fomichev 2987d9c3427SYiFei Zhu if (attr->key_size != sizeof(struct bpf_cgroup_storage_key) && 2997d9c3427SYiFei Zhu attr->key_size != sizeof(__u64)) 300de9cbbaaSRoman Gushchin return ERR_PTR(-EINVAL); 301de9cbbaaSRoman Gushchin 302b0584ea6SRoman Gushchin if (attr->value_size == 0) 303b0584ea6SRoman Gushchin return ERR_PTR(-EINVAL); 304b0584ea6SRoman Gushchin 30533b57e0cSStanislav Fomichev if (attr->value_size > max_value_size) 306de9cbbaaSRoman Gushchin return ERR_PTR(-E2BIG); 307de9cbbaaSRoman Gushchin 308591fe988SDaniel Borkmann if (attr->map_flags & ~LOCAL_STORAGE_CREATE_FLAG_MASK || 309591fe988SDaniel Borkmann !bpf_map_flags_access_ok(attr->map_flags)) 310de9cbbaaSRoman Gushchin return ERR_PTR(-EINVAL); 311de9cbbaaSRoman Gushchin 312de9cbbaaSRoman Gushchin if (attr->max_entries) 313de9cbbaaSRoman Gushchin /* max_entries is not used and enforced to be 0 */ 314de9cbbaaSRoman Gushchin return ERR_PTR(-EINVAL); 315de9cbbaaSRoman Gushchin 31673cf09a3SYafang Shao map = bpf_map_area_alloc(sizeof(struct bpf_cgroup_storage_map), numa_node); 317087b0d39SRoman Gushchin if (!map) 318de9cbbaaSRoman Gushchin return ERR_PTR(-ENOMEM); 319de9cbbaaSRoman Gushchin 320de9cbbaaSRoman Gushchin /* copy mandatory map attributes */ 321de9cbbaaSRoman Gushchin bpf_map_init_from_attr(&map->map, attr); 322de9cbbaaSRoman Gushchin 323de9cbbaaSRoman Gushchin spin_lock_init(&map->lock); 324de9cbbaaSRoman Gushchin map->root = RB_ROOT; 325de9cbbaaSRoman Gushchin INIT_LIST_HEAD(&map->list); 326de9cbbaaSRoman Gushchin 327de9cbbaaSRoman Gushchin return &map->map; 328de9cbbaaSRoman Gushchin } 329de9cbbaaSRoman Gushchin 330de9cbbaaSRoman Gushchin static void cgroup_storage_map_free(struct bpf_map *_map) 331de9cbbaaSRoman Gushchin { 332de9cbbaaSRoman Gushchin struct bpf_cgroup_storage_map *map = map_to_storage(_map); 3337d9c3427SYiFei Zhu struct list_head *storages = &map->list; 3347d9c3427SYiFei Zhu struct bpf_cgroup_storage *storage, *stmp; 3357d9c3427SYiFei Zhu 3364cdb91b0SKamalesh Babulal cgroup_lock(); 3377d9c3427SYiFei Zhu 3387d9c3427SYiFei Zhu list_for_each_entry_safe(storage, stmp, storages, list_map) { 3397d9c3427SYiFei Zhu bpf_cgroup_storage_unlink(storage); 3407d9c3427SYiFei Zhu bpf_cgroup_storage_free(storage); 3417d9c3427SYiFei Zhu } 3427d9c3427SYiFei Zhu 3434cdb91b0SKamalesh Babulal cgroup_unlock(); 344de9cbbaaSRoman Gushchin 345de9cbbaaSRoman Gushchin WARN_ON(!RB_EMPTY_ROOT(&map->root)); 346de9cbbaaSRoman Gushchin WARN_ON(!list_empty(&map->list)); 347de9cbbaaSRoman Gushchin 34873cf09a3SYafang Shao bpf_map_area_free(map); 349de9cbbaaSRoman Gushchin } 350de9cbbaaSRoman Gushchin 351d7ba4cc9SJP Kobryn static long cgroup_storage_delete_elem(struct bpf_map *map, void *key) 352de9cbbaaSRoman Gushchin { 353de9cbbaaSRoman Gushchin return -EINVAL; 354de9cbbaaSRoman Gushchin } 355de9cbbaaSRoman Gushchin 3569a1126b6SRoman Gushchin static int cgroup_storage_check_btf(const struct bpf_map *map, 3579a1126b6SRoman Gushchin const struct btf *btf, 3589a1126b6SRoman Gushchin const struct btf_type *key_type, 3599a1126b6SRoman Gushchin const struct btf_type *value_type) 3609a1126b6SRoman Gushchin { 3617d9c3427SYiFei Zhu if (attach_type_isolated(map)) { 3629a1126b6SRoman Gushchin struct btf_member *m; 363ffa0c1cfSYonghong Song u32 offset, size; 3649a1126b6SRoman Gushchin 3659a1126b6SRoman Gushchin /* Key is expected to be of struct bpf_cgroup_storage_key type, 3669a1126b6SRoman Gushchin * which is: 3679a1126b6SRoman Gushchin * struct bpf_cgroup_storage_key { 3689a1126b6SRoman Gushchin * __u64 cgroup_inode_id; 3699a1126b6SRoman Gushchin * __u32 attach_type; 3709a1126b6SRoman Gushchin * }; 3719a1126b6SRoman Gushchin */ 3729a1126b6SRoman Gushchin 3739a1126b6SRoman Gushchin /* 3749a1126b6SRoman Gushchin * Key_type must be a structure with two fields. 3759a1126b6SRoman Gushchin */ 3769a1126b6SRoman Gushchin if (BTF_INFO_KIND(key_type->info) != BTF_KIND_STRUCT || 3779a1126b6SRoman Gushchin BTF_INFO_VLEN(key_type->info) != 2) 3789a1126b6SRoman Gushchin return -EINVAL; 3799a1126b6SRoman Gushchin 3809a1126b6SRoman Gushchin /* 3819a1126b6SRoman Gushchin * The first field must be a 64 bit integer at 0 offset. 3829a1126b6SRoman Gushchin */ 3839a1126b6SRoman Gushchin m = (struct btf_member *)(key_type + 1); 384c593642cSPankaj Bharadiya size = sizeof_field(struct bpf_cgroup_storage_key, cgroup_inode_id); 385ffa0c1cfSYonghong Song if (!btf_member_is_reg_int(btf, key_type, m, 0, size)) 3869a1126b6SRoman Gushchin return -EINVAL; 3879a1126b6SRoman Gushchin 3889a1126b6SRoman Gushchin /* 3899a1126b6SRoman Gushchin * The second field must be a 32 bit integer at 64 bit offset. 3909a1126b6SRoman Gushchin */ 3919a1126b6SRoman Gushchin m++; 392ffa0c1cfSYonghong Song offset = offsetof(struct bpf_cgroup_storage_key, attach_type); 393c593642cSPankaj Bharadiya size = sizeof_field(struct bpf_cgroup_storage_key, attach_type); 394ffa0c1cfSYonghong Song if (!btf_member_is_reg_int(btf, key_type, m, offset, size)) 3959a1126b6SRoman Gushchin return -EINVAL; 3967d9c3427SYiFei Zhu } else { 3977d9c3427SYiFei Zhu u32 int_data; 3987d9c3427SYiFei Zhu 3997d9c3427SYiFei Zhu /* 4007d9c3427SYiFei Zhu * Key is expected to be u64, which stores the cgroup_inode_id 4017d9c3427SYiFei Zhu */ 4027d9c3427SYiFei Zhu 4037d9c3427SYiFei Zhu if (BTF_INFO_KIND(key_type->info) != BTF_KIND_INT) 4047d9c3427SYiFei Zhu return -EINVAL; 4057d9c3427SYiFei Zhu 4067d9c3427SYiFei Zhu int_data = *(u32 *)(key_type + 1); 4077d9c3427SYiFei Zhu if (BTF_INT_BITS(int_data) != 64 || BTF_INT_OFFSET(int_data)) 4087d9c3427SYiFei Zhu return -EINVAL; 4097d9c3427SYiFei Zhu } 4109a1126b6SRoman Gushchin 4119a1126b6SRoman Gushchin return 0; 4129a1126b6SRoman Gushchin } 4139a1126b6SRoman Gushchin 4147d9c3427SYiFei Zhu static void cgroup_storage_seq_show_elem(struct bpf_map *map, void *key, 4159a1126b6SRoman Gushchin struct seq_file *m) 4169a1126b6SRoman Gushchin { 417724f17b7SColin Ian King enum bpf_cgroup_storage_type stype; 4189a1126b6SRoman Gushchin struct bpf_cgroup_storage *storage; 4199a1126b6SRoman Gushchin int cpu; 4209a1126b6SRoman Gushchin 4219a1126b6SRoman Gushchin rcu_read_lock(); 4229a1126b6SRoman Gushchin storage = cgroup_storage_lookup(map_to_storage(map), key, false); 4239a1126b6SRoman Gushchin if (!storage) { 4249a1126b6SRoman Gushchin rcu_read_unlock(); 4259a1126b6SRoman Gushchin return; 4269a1126b6SRoman Gushchin } 4279a1126b6SRoman Gushchin 4289a1126b6SRoman Gushchin btf_type_seq_show(map->btf, map->btf_key_type_id, key, m); 4299a1126b6SRoman Gushchin stype = cgroup_storage_type(map); 4309a1126b6SRoman Gushchin if (stype == BPF_CGROUP_STORAGE_SHARED) { 4319a1126b6SRoman Gushchin seq_puts(m, ": "); 4329a1126b6SRoman Gushchin btf_type_seq_show(map->btf, map->btf_value_type_id, 4339a1126b6SRoman Gushchin &READ_ONCE(storage->buf)->data[0], m); 434*df862de4SMarkus Elfring seq_putc(m, '\n'); 4359a1126b6SRoman Gushchin } else { 4369a1126b6SRoman Gushchin seq_puts(m, ": {\n"); 4379a1126b6SRoman Gushchin for_each_possible_cpu(cpu) { 4389a1126b6SRoman Gushchin seq_printf(m, "\tcpu%d: ", cpu); 4399a1126b6SRoman Gushchin btf_type_seq_show(map->btf, map->btf_value_type_id, 4409a1126b6SRoman Gushchin per_cpu_ptr(storage->percpu_buf, cpu), 4419a1126b6SRoman Gushchin m); 442*df862de4SMarkus Elfring seq_putc(m, '\n'); 4439a1126b6SRoman Gushchin } 4449a1126b6SRoman Gushchin seq_puts(m, "}\n"); 4459a1126b6SRoman Gushchin } 4469a1126b6SRoman Gushchin rcu_read_unlock(); 4479a1126b6SRoman Gushchin } 4489a1126b6SRoman Gushchin 4492f536977SYafang Shao static u64 cgroup_storage_map_usage(const struct bpf_map *map) 4502f536977SYafang Shao { 4512f536977SYafang Shao /* Currently the dynamically allocated elements are not counted. */ 4522f536977SYafang Shao return sizeof(struct bpf_cgroup_storage_map); 4532f536977SYafang Shao } 4542f536977SYafang Shao 455c317ab71SMenglong Dong BTF_ID_LIST_SINGLE(cgroup_storage_map_btf_ids, struct, 456c317ab71SMenglong Dong bpf_cgroup_storage_map) 457de9cbbaaSRoman Gushchin const struct bpf_map_ops cgroup_storage_map_ops = { 458de9cbbaaSRoman Gushchin .map_alloc = cgroup_storage_map_alloc, 459de9cbbaaSRoman Gushchin .map_free = cgroup_storage_map_free, 460de9cbbaaSRoman Gushchin .map_get_next_key = cgroup_storage_get_next_key, 461de9cbbaaSRoman Gushchin .map_lookup_elem = cgroup_storage_lookup_elem, 462de9cbbaaSRoman Gushchin .map_update_elem = cgroup_storage_update_elem, 463de9cbbaaSRoman Gushchin .map_delete_elem = cgroup_storage_delete_elem, 4649a1126b6SRoman Gushchin .map_check_btf = cgroup_storage_check_btf, 4659a1126b6SRoman Gushchin .map_seq_show_elem = cgroup_storage_seq_show_elem, 4662f536977SYafang Shao .map_mem_usage = cgroup_storage_map_usage, 467c317ab71SMenglong Dong .map_btf_id = &cgroup_storage_map_btf_ids[0], 468de9cbbaaSRoman Gushchin }; 469de9cbbaaSRoman Gushchin 470e4730423SDaniel Borkmann int bpf_cgroup_storage_assign(struct bpf_prog_aux *aux, struct bpf_map *_map) 471de9cbbaaSRoman Gushchin { 4728bad74f9SRoman Gushchin enum bpf_cgroup_storage_type stype = cgroup_storage_type(_map); 473de9cbbaaSRoman Gushchin 474e4730423SDaniel Borkmann if (aux->cgroup_storage[stype] && 475e4730423SDaniel Borkmann aux->cgroup_storage[stype] != _map) 4767d9c3427SYiFei Zhu return -EBUSY; 477de9cbbaaSRoman Gushchin 478e4730423SDaniel Borkmann aux->cgroup_storage[stype] = _map; 4797d9c3427SYiFei Zhu return 0; 480de9cbbaaSRoman Gushchin } 481de9cbbaaSRoman Gushchin 482b741f163SRoman Gushchin static size_t bpf_cgroup_storage_calculate_size(struct bpf_map *map, u32 *pages) 483b741f163SRoman Gushchin { 484b741f163SRoman Gushchin size_t size; 485b741f163SRoman Gushchin 486b741f163SRoman Gushchin if (cgroup_storage_type(map) == BPF_CGROUP_STORAGE_SHARED) { 487b741f163SRoman Gushchin size = sizeof(struct bpf_storage_buffer) + map->value_size; 488b741f163SRoman Gushchin *pages = round_up(sizeof(struct bpf_cgroup_storage) + size, 489b741f163SRoman Gushchin PAGE_SIZE) >> PAGE_SHIFT; 490b741f163SRoman Gushchin } else { 491b741f163SRoman Gushchin size = map->value_size; 492b741f163SRoman Gushchin *pages = round_up(round_up(size, 8) * num_possible_cpus(), 493b741f163SRoman Gushchin PAGE_SIZE) >> PAGE_SHIFT; 494b741f163SRoman Gushchin } 495b741f163SRoman Gushchin 496b741f163SRoman Gushchin return size; 497b741f163SRoman Gushchin } 498b741f163SRoman Gushchin 4998bad74f9SRoman Gushchin struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog, 5008bad74f9SRoman Gushchin enum bpf_cgroup_storage_type stype) 501de9cbbaaSRoman Gushchin { 5023a61c7c5SRoman Gushchin const gfp_t gfp = __GFP_ZERO | GFP_USER; 503de9cbbaaSRoman Gushchin struct bpf_cgroup_storage *storage; 504de9cbbaaSRoman Gushchin struct bpf_map *map; 505b741f163SRoman Gushchin size_t size; 506de9cbbaaSRoman Gushchin u32 pages; 507de9cbbaaSRoman Gushchin 5088bad74f9SRoman Gushchin map = prog->aux->cgroup_storage[stype]; 509de9cbbaaSRoman Gushchin if (!map) 510de9cbbaaSRoman Gushchin return NULL; 511de9cbbaaSRoman Gushchin 512b741f163SRoman Gushchin size = bpf_cgroup_storage_calculate_size(map, &pages); 513b741f163SRoman Gushchin 5143a61c7c5SRoman Gushchin storage = bpf_map_kmalloc_node(map, sizeof(struct bpf_cgroup_storage), 5153a61c7c5SRoman Gushchin gfp, map->numa_node); 516b741f163SRoman Gushchin if (!storage) 517b741f163SRoman Gushchin goto enomem; 518de9cbbaaSRoman Gushchin 519b741f163SRoman Gushchin if (stype == BPF_CGROUP_STORAGE_SHARED) { 5203a61c7c5SRoman Gushchin storage->buf = bpf_map_kmalloc_node(map, size, gfp, 5213a61c7c5SRoman Gushchin map->numa_node); 522b741f163SRoman Gushchin if (!storage->buf) 523b741f163SRoman Gushchin goto enomem; 52468134668SAlexei Starovoitov check_and_init_map_value(map, storage->buf->data); 525b741f163SRoman Gushchin } else { 5263a61c7c5SRoman Gushchin storage->percpu_buf = bpf_map_alloc_percpu(map, size, 8, gfp); 527b741f163SRoman Gushchin if (!storage->percpu_buf) 528b741f163SRoman Gushchin goto enomem; 529de9cbbaaSRoman Gushchin } 530de9cbbaaSRoman Gushchin 531de9cbbaaSRoman Gushchin storage->map = (struct bpf_cgroup_storage_map *)map; 532de9cbbaaSRoman Gushchin 533de9cbbaaSRoman Gushchin return storage; 534b741f163SRoman Gushchin 535b741f163SRoman Gushchin enomem: 536b741f163SRoman Gushchin kfree(storage); 537b741f163SRoman Gushchin return ERR_PTR(-ENOMEM); 538b741f163SRoman Gushchin } 539b741f163SRoman Gushchin 540b741f163SRoman Gushchin static void free_shared_cgroup_storage_rcu(struct rcu_head *rcu) 541b741f163SRoman Gushchin { 542b741f163SRoman Gushchin struct bpf_cgroup_storage *storage = 543b741f163SRoman Gushchin container_of(rcu, struct bpf_cgroup_storage, rcu); 544b741f163SRoman Gushchin 545b741f163SRoman Gushchin kfree(storage->buf); 546b741f163SRoman Gushchin kfree(storage); 547b741f163SRoman Gushchin } 548b741f163SRoman Gushchin 549b741f163SRoman Gushchin static void free_percpu_cgroup_storage_rcu(struct rcu_head *rcu) 550b741f163SRoman Gushchin { 551b741f163SRoman Gushchin struct bpf_cgroup_storage *storage = 552b741f163SRoman Gushchin container_of(rcu, struct bpf_cgroup_storage, rcu); 553b741f163SRoman Gushchin 554b741f163SRoman Gushchin free_percpu(storage->percpu_buf); 555b741f163SRoman Gushchin kfree(storage); 556de9cbbaaSRoman Gushchin } 557de9cbbaaSRoman Gushchin 558de9cbbaaSRoman Gushchin void bpf_cgroup_storage_free(struct bpf_cgroup_storage *storage) 559de9cbbaaSRoman Gushchin { 560b741f163SRoman Gushchin enum bpf_cgroup_storage_type stype; 561de9cbbaaSRoman Gushchin struct bpf_map *map; 562de9cbbaaSRoman Gushchin 563de9cbbaaSRoman Gushchin if (!storage) 564de9cbbaaSRoman Gushchin return; 565de9cbbaaSRoman Gushchin 566de9cbbaaSRoman Gushchin map = &storage->map->map; 567b741f163SRoman Gushchin stype = cgroup_storage_type(map); 568b741f163SRoman Gushchin if (stype == BPF_CGROUP_STORAGE_SHARED) 569b741f163SRoman Gushchin call_rcu(&storage->rcu, free_shared_cgroup_storage_rcu); 570b741f163SRoman Gushchin else 571b741f163SRoman Gushchin call_rcu(&storage->rcu, free_percpu_cgroup_storage_rcu); 572de9cbbaaSRoman Gushchin } 573de9cbbaaSRoman Gushchin 574de9cbbaaSRoman Gushchin void bpf_cgroup_storage_link(struct bpf_cgroup_storage *storage, 575de9cbbaaSRoman Gushchin struct cgroup *cgroup, 576de9cbbaaSRoman Gushchin enum bpf_attach_type type) 577de9cbbaaSRoman Gushchin { 578de9cbbaaSRoman Gushchin struct bpf_cgroup_storage_map *map; 579de9cbbaaSRoman Gushchin 580de9cbbaaSRoman Gushchin if (!storage) 581de9cbbaaSRoman Gushchin return; 582de9cbbaaSRoman Gushchin 583de9cbbaaSRoman Gushchin storage->key.attach_type = type; 58474321038STejun Heo storage->key.cgroup_inode_id = cgroup_id(cgroup); 585de9cbbaaSRoman Gushchin 586de9cbbaaSRoman Gushchin map = storage->map; 587de9cbbaaSRoman Gushchin 588de9cbbaaSRoman Gushchin spin_lock_bh(&map->lock); 589de9cbbaaSRoman Gushchin WARN_ON(cgroup_storage_insert(map, storage)); 5907d9c3427SYiFei Zhu list_add(&storage->list_map, &map->list); 5917d9c3427SYiFei Zhu list_add(&storage->list_cg, &cgroup->bpf.storages); 592de9cbbaaSRoman Gushchin spin_unlock_bh(&map->lock); 593de9cbbaaSRoman Gushchin } 594de9cbbaaSRoman Gushchin 595de9cbbaaSRoman Gushchin void bpf_cgroup_storage_unlink(struct bpf_cgroup_storage *storage) 596de9cbbaaSRoman Gushchin { 597de9cbbaaSRoman Gushchin struct bpf_cgroup_storage_map *map; 598de9cbbaaSRoman Gushchin struct rb_root *root; 599de9cbbaaSRoman Gushchin 600de9cbbaaSRoman Gushchin if (!storage) 601de9cbbaaSRoman Gushchin return; 602de9cbbaaSRoman Gushchin 603de9cbbaaSRoman Gushchin map = storage->map; 604de9cbbaaSRoman Gushchin 605de9cbbaaSRoman Gushchin spin_lock_bh(&map->lock); 606de9cbbaaSRoman Gushchin root = &map->root; 607de9cbbaaSRoman Gushchin rb_erase(&storage->node, root); 608de9cbbaaSRoman Gushchin 6097d9c3427SYiFei Zhu list_del(&storage->list_map); 6107d9c3427SYiFei Zhu list_del(&storage->list_cg); 611de9cbbaaSRoman Gushchin spin_unlock_bh(&map->lock); 612de9cbbaaSRoman Gushchin } 613de9cbbaaSRoman Gushchin 614de9cbbaaSRoman Gushchin #endif 615