199c55f7dSAlexei Starovoitov /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com 299c55f7dSAlexei Starovoitov * 399c55f7dSAlexei Starovoitov * This program is free software; you can redistribute it and/or 499c55f7dSAlexei Starovoitov * modify it under the terms of version 2 of the GNU General Public 599c55f7dSAlexei Starovoitov * License as published by the Free Software Foundation. 699c55f7dSAlexei Starovoitov * 799c55f7dSAlexei Starovoitov * This program is distributed in the hope that it will be useful, but 899c55f7dSAlexei Starovoitov * WITHOUT ANY WARRANTY; without even the implied warranty of 999c55f7dSAlexei Starovoitov * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 1099c55f7dSAlexei Starovoitov * General Public License for more details. 1199c55f7dSAlexei Starovoitov */ 1299c55f7dSAlexei Starovoitov #include <linux/bpf.h> 13a67edbf4SDaniel Borkmann #include <linux/bpf_trace.h> 1499c55f7dSAlexei Starovoitov #include <linux/syscalls.h> 1599c55f7dSAlexei Starovoitov #include <linux/slab.h> 163f07c014SIngo Molnar #include <linux/sched/signal.h> 17d407bd25SDaniel Borkmann #include <linux/vmalloc.h> 18d407bd25SDaniel Borkmann #include <linux/mmzone.h> 1999c55f7dSAlexei Starovoitov #include <linux/anon_inodes.h> 20db20fd2bSAlexei Starovoitov #include <linux/file.h> 2109756af4SAlexei Starovoitov #include <linux/license.h> 2209756af4SAlexei Starovoitov #include <linux/filter.h> 232541517cSAlexei Starovoitov #include <linux/version.h> 24535e7b4bSMickaël Salaün #include <linux/kernel.h> 2599c55f7dSAlexei Starovoitov 26b121d1e7SAlexei Starovoitov DEFINE_PER_CPU(int, bpf_prog_active); 27b121d1e7SAlexei Starovoitov 281be7f75dSAlexei Starovoitov int sysctl_unprivileged_bpf_disabled __read_mostly; 291be7f75dSAlexei Starovoitov 3040077e0cSJohannes Berg static const struct bpf_map_ops * const bpf_map_types[] = { 3140077e0cSJohannes Berg #define BPF_PROG_TYPE(_id, _ops) 3240077e0cSJohannes Berg #define BPF_MAP_TYPE(_id, _ops) \ 3340077e0cSJohannes Berg [_id] = &_ops, 3440077e0cSJohannes Berg #include <linux/bpf_types.h> 3540077e0cSJohannes Berg #undef BPF_PROG_TYPE 3640077e0cSJohannes Berg #undef BPF_MAP_TYPE 3740077e0cSJohannes Berg }; 3899c55f7dSAlexei Starovoitov 3999c55f7dSAlexei Starovoitov static struct bpf_map *find_and_alloc_map(union bpf_attr *attr) 4099c55f7dSAlexei Starovoitov { 4199c55f7dSAlexei Starovoitov struct bpf_map *map; 4299c55f7dSAlexei Starovoitov 4340077e0cSJohannes Berg if (attr->map_type >= ARRAY_SIZE(bpf_map_types) || 4440077e0cSJohannes Berg !bpf_map_types[attr->map_type]) 4540077e0cSJohannes Berg return ERR_PTR(-EINVAL); 4640077e0cSJohannes Berg 4740077e0cSJohannes Berg map = bpf_map_types[attr->map_type]->map_alloc(attr); 4899c55f7dSAlexei Starovoitov if (IS_ERR(map)) 4999c55f7dSAlexei Starovoitov return map; 5040077e0cSJohannes Berg map->ops = bpf_map_types[attr->map_type]; 5199c55f7dSAlexei Starovoitov map->map_type = attr->map_type; 5299c55f7dSAlexei Starovoitov return map; 5399c55f7dSAlexei Starovoitov } 5499c55f7dSAlexei Starovoitov 55d407bd25SDaniel Borkmann void *bpf_map_area_alloc(size_t size) 56d407bd25SDaniel Borkmann { 57d407bd25SDaniel Borkmann /* We definitely need __GFP_NORETRY, so OOM killer doesn't 58d407bd25SDaniel Borkmann * trigger under memory pressure as we really just want to 59d407bd25SDaniel Borkmann * fail instead. 60d407bd25SDaniel Borkmann */ 61d407bd25SDaniel Borkmann const gfp_t flags = __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO; 62d407bd25SDaniel Borkmann void *area; 63d407bd25SDaniel Borkmann 64d407bd25SDaniel Borkmann if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) { 65d407bd25SDaniel Borkmann area = kmalloc(size, GFP_USER | flags); 66d407bd25SDaniel Borkmann if (area != NULL) 67d407bd25SDaniel Borkmann return area; 68d407bd25SDaniel Borkmann } 69d407bd25SDaniel Borkmann 7019809c2dSMichal Hocko return __vmalloc(size, GFP_KERNEL | flags, PAGE_KERNEL); 71d407bd25SDaniel Borkmann } 72d407bd25SDaniel Borkmann 73d407bd25SDaniel Borkmann void bpf_map_area_free(void *area) 74d407bd25SDaniel Borkmann { 75d407bd25SDaniel Borkmann kvfree(area); 76d407bd25SDaniel Borkmann } 77d407bd25SDaniel Borkmann 786c905981SAlexei Starovoitov int bpf_map_precharge_memlock(u32 pages) 796c905981SAlexei Starovoitov { 806c905981SAlexei Starovoitov struct user_struct *user = get_current_user(); 816c905981SAlexei Starovoitov unsigned long memlock_limit, cur; 826c905981SAlexei Starovoitov 836c905981SAlexei Starovoitov memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 846c905981SAlexei Starovoitov cur = atomic_long_read(&user->locked_vm); 856c905981SAlexei Starovoitov free_uid(user); 866c905981SAlexei Starovoitov if (cur + pages > memlock_limit) 876c905981SAlexei Starovoitov return -EPERM; 886c905981SAlexei Starovoitov return 0; 896c905981SAlexei Starovoitov } 906c905981SAlexei Starovoitov 91aaac3ba9SAlexei Starovoitov static int bpf_map_charge_memlock(struct bpf_map *map) 92aaac3ba9SAlexei Starovoitov { 93aaac3ba9SAlexei Starovoitov struct user_struct *user = get_current_user(); 94aaac3ba9SAlexei Starovoitov unsigned long memlock_limit; 95aaac3ba9SAlexei Starovoitov 96aaac3ba9SAlexei Starovoitov memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 97aaac3ba9SAlexei Starovoitov 98aaac3ba9SAlexei Starovoitov atomic_long_add(map->pages, &user->locked_vm); 99aaac3ba9SAlexei Starovoitov 100aaac3ba9SAlexei Starovoitov if (atomic_long_read(&user->locked_vm) > memlock_limit) { 101aaac3ba9SAlexei Starovoitov atomic_long_sub(map->pages, &user->locked_vm); 102aaac3ba9SAlexei Starovoitov free_uid(user); 103aaac3ba9SAlexei Starovoitov return -EPERM; 104aaac3ba9SAlexei Starovoitov } 105aaac3ba9SAlexei Starovoitov map->user = user; 106aaac3ba9SAlexei Starovoitov return 0; 107aaac3ba9SAlexei Starovoitov } 108aaac3ba9SAlexei Starovoitov 109aaac3ba9SAlexei Starovoitov static void bpf_map_uncharge_memlock(struct bpf_map *map) 110aaac3ba9SAlexei Starovoitov { 111aaac3ba9SAlexei Starovoitov struct user_struct *user = map->user; 112aaac3ba9SAlexei Starovoitov 113aaac3ba9SAlexei Starovoitov atomic_long_sub(map->pages, &user->locked_vm); 114aaac3ba9SAlexei Starovoitov free_uid(user); 115aaac3ba9SAlexei Starovoitov } 116aaac3ba9SAlexei Starovoitov 11799c55f7dSAlexei Starovoitov /* called from workqueue */ 11899c55f7dSAlexei Starovoitov static void bpf_map_free_deferred(struct work_struct *work) 11999c55f7dSAlexei Starovoitov { 12099c55f7dSAlexei Starovoitov struct bpf_map *map = container_of(work, struct bpf_map, work); 12199c55f7dSAlexei Starovoitov 122aaac3ba9SAlexei Starovoitov bpf_map_uncharge_memlock(map); 12399c55f7dSAlexei Starovoitov /* implementation dependent freeing */ 12499c55f7dSAlexei Starovoitov map->ops->map_free(map); 12599c55f7dSAlexei Starovoitov } 12699c55f7dSAlexei Starovoitov 127c9da161cSDaniel Borkmann static void bpf_map_put_uref(struct bpf_map *map) 128c9da161cSDaniel Borkmann { 129c9da161cSDaniel Borkmann if (atomic_dec_and_test(&map->usercnt)) { 130c9da161cSDaniel Borkmann if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) 131c9da161cSDaniel Borkmann bpf_fd_array_map_clear(map); 132c9da161cSDaniel Borkmann } 133c9da161cSDaniel Borkmann } 134c9da161cSDaniel Borkmann 13599c55f7dSAlexei Starovoitov /* decrement map refcnt and schedule it for freeing via workqueue 13699c55f7dSAlexei Starovoitov * (unrelying map implementation ops->map_free() might sleep) 13799c55f7dSAlexei Starovoitov */ 13899c55f7dSAlexei Starovoitov void bpf_map_put(struct bpf_map *map) 13999c55f7dSAlexei Starovoitov { 14099c55f7dSAlexei Starovoitov if (atomic_dec_and_test(&map->refcnt)) { 14199c55f7dSAlexei Starovoitov INIT_WORK(&map->work, bpf_map_free_deferred); 14299c55f7dSAlexei Starovoitov schedule_work(&map->work); 14399c55f7dSAlexei Starovoitov } 14499c55f7dSAlexei Starovoitov } 14599c55f7dSAlexei Starovoitov 146c9da161cSDaniel Borkmann void bpf_map_put_with_uref(struct bpf_map *map) 147c9da161cSDaniel Borkmann { 148c9da161cSDaniel Borkmann bpf_map_put_uref(map); 149c9da161cSDaniel Borkmann bpf_map_put(map); 150c9da161cSDaniel Borkmann } 151c9da161cSDaniel Borkmann 15299c55f7dSAlexei Starovoitov static int bpf_map_release(struct inode *inode, struct file *filp) 15399c55f7dSAlexei Starovoitov { 15461d1b6a4SDaniel Borkmann struct bpf_map *map = filp->private_data; 15561d1b6a4SDaniel Borkmann 15661d1b6a4SDaniel Borkmann if (map->ops->map_release) 15761d1b6a4SDaniel Borkmann map->ops->map_release(map, filp); 15861d1b6a4SDaniel Borkmann 15961d1b6a4SDaniel Borkmann bpf_map_put_with_uref(map); 16099c55f7dSAlexei Starovoitov return 0; 16199c55f7dSAlexei Starovoitov } 16299c55f7dSAlexei Starovoitov 163f99bf205SDaniel Borkmann #ifdef CONFIG_PROC_FS 164f99bf205SDaniel Borkmann static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) 165f99bf205SDaniel Borkmann { 166f99bf205SDaniel Borkmann const struct bpf_map *map = filp->private_data; 16721116b70SDaniel Borkmann const struct bpf_array *array; 16821116b70SDaniel Borkmann u32 owner_prog_type = 0; 16921116b70SDaniel Borkmann 17021116b70SDaniel Borkmann if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) { 17121116b70SDaniel Borkmann array = container_of(map, struct bpf_array, map); 17221116b70SDaniel Borkmann owner_prog_type = array->owner_prog_type; 17321116b70SDaniel Borkmann } 174f99bf205SDaniel Borkmann 175f99bf205SDaniel Borkmann seq_printf(m, 176f99bf205SDaniel Borkmann "map_type:\t%u\n" 177f99bf205SDaniel Borkmann "key_size:\t%u\n" 178f99bf205SDaniel Borkmann "value_size:\t%u\n" 179322cea2fSDaniel Borkmann "max_entries:\t%u\n" 18021116b70SDaniel Borkmann "map_flags:\t%#x\n" 18121116b70SDaniel Borkmann "memlock:\t%llu\n", 182f99bf205SDaniel Borkmann map->map_type, 183f99bf205SDaniel Borkmann map->key_size, 184f99bf205SDaniel Borkmann map->value_size, 185322cea2fSDaniel Borkmann map->max_entries, 18621116b70SDaniel Borkmann map->map_flags, 18721116b70SDaniel Borkmann map->pages * 1ULL << PAGE_SHIFT); 18821116b70SDaniel Borkmann 18921116b70SDaniel Borkmann if (owner_prog_type) 19021116b70SDaniel Borkmann seq_printf(m, "owner_prog_type:\t%u\n", 19121116b70SDaniel Borkmann owner_prog_type); 192f99bf205SDaniel Borkmann } 193f99bf205SDaniel Borkmann #endif 194f99bf205SDaniel Borkmann 19599c55f7dSAlexei Starovoitov static const struct file_operations bpf_map_fops = { 196f99bf205SDaniel Borkmann #ifdef CONFIG_PROC_FS 197f99bf205SDaniel Borkmann .show_fdinfo = bpf_map_show_fdinfo, 198f99bf205SDaniel Borkmann #endif 19999c55f7dSAlexei Starovoitov .release = bpf_map_release, 20099c55f7dSAlexei Starovoitov }; 20199c55f7dSAlexei Starovoitov 202b2197755SDaniel Borkmann int bpf_map_new_fd(struct bpf_map *map) 203aa79781bSDaniel Borkmann { 204aa79781bSDaniel Borkmann return anon_inode_getfd("bpf-map", &bpf_map_fops, map, 205aa79781bSDaniel Borkmann O_RDWR | O_CLOEXEC); 206aa79781bSDaniel Borkmann } 207aa79781bSDaniel Borkmann 20899c55f7dSAlexei Starovoitov /* helper macro to check that unused fields 'union bpf_attr' are zero */ 20999c55f7dSAlexei Starovoitov #define CHECK_ATTR(CMD) \ 21099c55f7dSAlexei Starovoitov memchr_inv((void *) &attr->CMD##_LAST_FIELD + \ 21199c55f7dSAlexei Starovoitov sizeof(attr->CMD##_LAST_FIELD), 0, \ 21299c55f7dSAlexei Starovoitov sizeof(*attr) - \ 21399c55f7dSAlexei Starovoitov offsetof(union bpf_attr, CMD##_LAST_FIELD) - \ 21499c55f7dSAlexei Starovoitov sizeof(attr->CMD##_LAST_FIELD)) != NULL 21599c55f7dSAlexei Starovoitov 21656f668dfSMartin KaFai Lau #define BPF_MAP_CREATE_LAST_FIELD inner_map_fd 21799c55f7dSAlexei Starovoitov /* called via syscall */ 21899c55f7dSAlexei Starovoitov static int map_create(union bpf_attr *attr) 21999c55f7dSAlexei Starovoitov { 22099c55f7dSAlexei Starovoitov struct bpf_map *map; 22199c55f7dSAlexei Starovoitov int err; 22299c55f7dSAlexei Starovoitov 22399c55f7dSAlexei Starovoitov err = CHECK_ATTR(BPF_MAP_CREATE); 22499c55f7dSAlexei Starovoitov if (err) 22599c55f7dSAlexei Starovoitov return -EINVAL; 22699c55f7dSAlexei Starovoitov 22799c55f7dSAlexei Starovoitov /* find map type and init map: hashtable vs rbtree vs bloom vs ... */ 22899c55f7dSAlexei Starovoitov map = find_and_alloc_map(attr); 22999c55f7dSAlexei Starovoitov if (IS_ERR(map)) 23099c55f7dSAlexei Starovoitov return PTR_ERR(map); 23199c55f7dSAlexei Starovoitov 23299c55f7dSAlexei Starovoitov atomic_set(&map->refcnt, 1); 233c9da161cSDaniel Borkmann atomic_set(&map->usercnt, 1); 23499c55f7dSAlexei Starovoitov 235aaac3ba9SAlexei Starovoitov err = bpf_map_charge_memlock(map); 236aaac3ba9SAlexei Starovoitov if (err) 23720b2b24fSDaniel Borkmann goto free_map_nouncharge; 238aaac3ba9SAlexei Starovoitov 239aa79781bSDaniel Borkmann err = bpf_map_new_fd(map); 24099c55f7dSAlexei Starovoitov if (err < 0) 24199c55f7dSAlexei Starovoitov /* failed to allocate fd */ 24299c55f7dSAlexei Starovoitov goto free_map; 24399c55f7dSAlexei Starovoitov 244a67edbf4SDaniel Borkmann trace_bpf_map_create(map, err); 24599c55f7dSAlexei Starovoitov return err; 24699c55f7dSAlexei Starovoitov 24799c55f7dSAlexei Starovoitov free_map: 24820b2b24fSDaniel Borkmann bpf_map_uncharge_memlock(map); 24920b2b24fSDaniel Borkmann free_map_nouncharge: 25099c55f7dSAlexei Starovoitov map->ops->map_free(map); 25199c55f7dSAlexei Starovoitov return err; 25299c55f7dSAlexei Starovoitov } 25399c55f7dSAlexei Starovoitov 254db20fd2bSAlexei Starovoitov /* if error is returned, fd is released. 255db20fd2bSAlexei Starovoitov * On success caller should complete fd access with matching fdput() 256db20fd2bSAlexei Starovoitov */ 257c2101297SDaniel Borkmann struct bpf_map *__bpf_map_get(struct fd f) 258db20fd2bSAlexei Starovoitov { 259db20fd2bSAlexei Starovoitov if (!f.file) 260db20fd2bSAlexei Starovoitov return ERR_PTR(-EBADF); 261db20fd2bSAlexei Starovoitov if (f.file->f_op != &bpf_map_fops) { 262db20fd2bSAlexei Starovoitov fdput(f); 263db20fd2bSAlexei Starovoitov return ERR_PTR(-EINVAL); 264db20fd2bSAlexei Starovoitov } 265db20fd2bSAlexei Starovoitov 266c2101297SDaniel Borkmann return f.file->private_data; 267c2101297SDaniel Borkmann } 268c2101297SDaniel Borkmann 26992117d84SAlexei Starovoitov /* prog's and map's refcnt limit */ 27092117d84SAlexei Starovoitov #define BPF_MAX_REFCNT 32768 27192117d84SAlexei Starovoitov 27292117d84SAlexei Starovoitov struct bpf_map *bpf_map_inc(struct bpf_map *map, bool uref) 273c9da161cSDaniel Borkmann { 27492117d84SAlexei Starovoitov if (atomic_inc_return(&map->refcnt) > BPF_MAX_REFCNT) { 27592117d84SAlexei Starovoitov atomic_dec(&map->refcnt); 27692117d84SAlexei Starovoitov return ERR_PTR(-EBUSY); 27792117d84SAlexei Starovoitov } 278c9da161cSDaniel Borkmann if (uref) 279c9da161cSDaniel Borkmann atomic_inc(&map->usercnt); 28092117d84SAlexei Starovoitov return map; 281c9da161cSDaniel Borkmann } 282c9da161cSDaniel Borkmann 283c9da161cSDaniel Borkmann struct bpf_map *bpf_map_get_with_uref(u32 ufd) 284c2101297SDaniel Borkmann { 285c2101297SDaniel Borkmann struct fd f = fdget(ufd); 286c2101297SDaniel Borkmann struct bpf_map *map; 287c2101297SDaniel Borkmann 288c2101297SDaniel Borkmann map = __bpf_map_get(f); 289c2101297SDaniel Borkmann if (IS_ERR(map)) 290c2101297SDaniel Borkmann return map; 291c2101297SDaniel Borkmann 29292117d84SAlexei Starovoitov map = bpf_map_inc(map, true); 293c2101297SDaniel Borkmann fdput(f); 294db20fd2bSAlexei Starovoitov 295db20fd2bSAlexei Starovoitov return map; 296db20fd2bSAlexei Starovoitov } 297db20fd2bSAlexei Starovoitov 298b8cdc051SAlexei Starovoitov int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value) 299b8cdc051SAlexei Starovoitov { 300b8cdc051SAlexei Starovoitov return -ENOTSUPP; 301b8cdc051SAlexei Starovoitov } 302b8cdc051SAlexei Starovoitov 303db20fd2bSAlexei Starovoitov /* last field in 'union bpf_attr' used by this command */ 304db20fd2bSAlexei Starovoitov #define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value 305db20fd2bSAlexei Starovoitov 306db20fd2bSAlexei Starovoitov static int map_lookup_elem(union bpf_attr *attr) 307db20fd2bSAlexei Starovoitov { 308535e7b4bSMickaël Salaün void __user *ukey = u64_to_user_ptr(attr->key); 309535e7b4bSMickaël Salaün void __user *uvalue = u64_to_user_ptr(attr->value); 310db20fd2bSAlexei Starovoitov int ufd = attr->map_fd; 311db20fd2bSAlexei Starovoitov struct bpf_map *map; 3128ebe667cSAlexei Starovoitov void *key, *value, *ptr; 31315a07b33SAlexei Starovoitov u32 value_size; 314592867bfSDaniel Borkmann struct fd f; 315db20fd2bSAlexei Starovoitov int err; 316db20fd2bSAlexei Starovoitov 317db20fd2bSAlexei Starovoitov if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM)) 318db20fd2bSAlexei Starovoitov return -EINVAL; 319db20fd2bSAlexei Starovoitov 320592867bfSDaniel Borkmann f = fdget(ufd); 321c2101297SDaniel Borkmann map = __bpf_map_get(f); 322db20fd2bSAlexei Starovoitov if (IS_ERR(map)) 323db20fd2bSAlexei Starovoitov return PTR_ERR(map); 324db20fd2bSAlexei Starovoitov 325*e4448ed8SAl Viro key = memdup_user(ukey, map->key_size); 326*e4448ed8SAl Viro if (IS_ERR(key)) { 327*e4448ed8SAl Viro err = PTR_ERR(key); 328db20fd2bSAlexei Starovoitov goto err_put; 329*e4448ed8SAl Viro } 330db20fd2bSAlexei Starovoitov 33115a07b33SAlexei Starovoitov if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 3328f844938SMartin KaFai Lau map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || 33315a07b33SAlexei Starovoitov map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) 33415a07b33SAlexei Starovoitov value_size = round_up(map->value_size, 8) * num_possible_cpus(); 33515a07b33SAlexei Starovoitov else 33615a07b33SAlexei Starovoitov value_size = map->value_size; 33715a07b33SAlexei Starovoitov 3388ebe667cSAlexei Starovoitov err = -ENOMEM; 33915a07b33SAlexei Starovoitov value = kmalloc(value_size, GFP_USER | __GFP_NOWARN); 340db20fd2bSAlexei Starovoitov if (!value) 3418ebe667cSAlexei Starovoitov goto free_key; 3428ebe667cSAlexei Starovoitov 3438f844938SMartin KaFai Lau if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 3448f844938SMartin KaFai Lau map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { 34515a07b33SAlexei Starovoitov err = bpf_percpu_hash_copy(map, key, value); 34615a07b33SAlexei Starovoitov } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { 34715a07b33SAlexei Starovoitov err = bpf_percpu_array_copy(map, key, value); 348557c0c6eSAlexei Starovoitov } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) { 349557c0c6eSAlexei Starovoitov err = bpf_stackmap_copy(map, key, value); 350bcc6b1b7SMartin KaFai Lau } else if (map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS || 351bcc6b1b7SMartin KaFai Lau map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) { 35256f668dfSMartin KaFai Lau err = -ENOTSUPP; 35315a07b33SAlexei Starovoitov } else { 3548ebe667cSAlexei Starovoitov rcu_read_lock(); 3558ebe667cSAlexei Starovoitov ptr = map->ops->map_lookup_elem(map, key); 3568ebe667cSAlexei Starovoitov if (ptr) 35715a07b33SAlexei Starovoitov memcpy(value, ptr, value_size); 3588ebe667cSAlexei Starovoitov rcu_read_unlock(); 35915a07b33SAlexei Starovoitov err = ptr ? 0 : -ENOENT; 36015a07b33SAlexei Starovoitov } 3618ebe667cSAlexei Starovoitov 36215a07b33SAlexei Starovoitov if (err) 3638ebe667cSAlexei Starovoitov goto free_value; 364db20fd2bSAlexei Starovoitov 365db20fd2bSAlexei Starovoitov err = -EFAULT; 36615a07b33SAlexei Starovoitov if (copy_to_user(uvalue, value, value_size) != 0) 3678ebe667cSAlexei Starovoitov goto free_value; 368db20fd2bSAlexei Starovoitov 369a67edbf4SDaniel Borkmann trace_bpf_map_lookup_elem(map, ufd, key, value); 370db20fd2bSAlexei Starovoitov err = 0; 371db20fd2bSAlexei Starovoitov 3728ebe667cSAlexei Starovoitov free_value: 3738ebe667cSAlexei Starovoitov kfree(value); 374db20fd2bSAlexei Starovoitov free_key: 375db20fd2bSAlexei Starovoitov kfree(key); 376db20fd2bSAlexei Starovoitov err_put: 377db20fd2bSAlexei Starovoitov fdput(f); 378db20fd2bSAlexei Starovoitov return err; 379db20fd2bSAlexei Starovoitov } 380db20fd2bSAlexei Starovoitov 3813274f520SAlexei Starovoitov #define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags 382db20fd2bSAlexei Starovoitov 383db20fd2bSAlexei Starovoitov static int map_update_elem(union bpf_attr *attr) 384db20fd2bSAlexei Starovoitov { 385535e7b4bSMickaël Salaün void __user *ukey = u64_to_user_ptr(attr->key); 386535e7b4bSMickaël Salaün void __user *uvalue = u64_to_user_ptr(attr->value); 387db20fd2bSAlexei Starovoitov int ufd = attr->map_fd; 388db20fd2bSAlexei Starovoitov struct bpf_map *map; 389db20fd2bSAlexei Starovoitov void *key, *value; 39015a07b33SAlexei Starovoitov u32 value_size; 391592867bfSDaniel Borkmann struct fd f; 392db20fd2bSAlexei Starovoitov int err; 393db20fd2bSAlexei Starovoitov 394db20fd2bSAlexei Starovoitov if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM)) 395db20fd2bSAlexei Starovoitov return -EINVAL; 396db20fd2bSAlexei Starovoitov 397592867bfSDaniel Borkmann f = fdget(ufd); 398c2101297SDaniel Borkmann map = __bpf_map_get(f); 399db20fd2bSAlexei Starovoitov if (IS_ERR(map)) 400db20fd2bSAlexei Starovoitov return PTR_ERR(map); 401db20fd2bSAlexei Starovoitov 402*e4448ed8SAl Viro key = memdup_user(ukey, map->key_size); 403*e4448ed8SAl Viro if (IS_ERR(key)) { 404*e4448ed8SAl Viro err = PTR_ERR(key); 405db20fd2bSAlexei Starovoitov goto err_put; 406*e4448ed8SAl Viro } 407db20fd2bSAlexei Starovoitov 40815a07b33SAlexei Starovoitov if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 4098f844938SMartin KaFai Lau map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || 41015a07b33SAlexei Starovoitov map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) 41115a07b33SAlexei Starovoitov value_size = round_up(map->value_size, 8) * num_possible_cpus(); 41215a07b33SAlexei Starovoitov else 41315a07b33SAlexei Starovoitov value_size = map->value_size; 41415a07b33SAlexei Starovoitov 415db20fd2bSAlexei Starovoitov err = -ENOMEM; 41615a07b33SAlexei Starovoitov value = kmalloc(value_size, GFP_USER | __GFP_NOWARN); 417db20fd2bSAlexei Starovoitov if (!value) 418db20fd2bSAlexei Starovoitov goto free_key; 419db20fd2bSAlexei Starovoitov 420db20fd2bSAlexei Starovoitov err = -EFAULT; 42115a07b33SAlexei Starovoitov if (copy_from_user(value, uvalue, value_size) != 0) 422db20fd2bSAlexei Starovoitov goto free_value; 423db20fd2bSAlexei Starovoitov 424b121d1e7SAlexei Starovoitov /* must increment bpf_prog_active to avoid kprobe+bpf triggering from 425b121d1e7SAlexei Starovoitov * inside bpf map update or delete otherwise deadlocks are possible 426b121d1e7SAlexei Starovoitov */ 427b121d1e7SAlexei Starovoitov preempt_disable(); 428b121d1e7SAlexei Starovoitov __this_cpu_inc(bpf_prog_active); 4298f844938SMartin KaFai Lau if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 4308f844938SMartin KaFai Lau map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { 43115a07b33SAlexei Starovoitov err = bpf_percpu_hash_update(map, key, value, attr->flags); 43215a07b33SAlexei Starovoitov } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { 43315a07b33SAlexei Starovoitov err = bpf_percpu_array_update(map, key, value, attr->flags); 434d056a788SDaniel Borkmann } else if (map->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || 4354ed8ec52SMartin KaFai Lau map->map_type == BPF_MAP_TYPE_PROG_ARRAY || 43656f668dfSMartin KaFai Lau map->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || 43756f668dfSMartin KaFai Lau map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) { 438d056a788SDaniel Borkmann rcu_read_lock(); 439d056a788SDaniel Borkmann err = bpf_fd_array_map_update_elem(map, f.file, key, value, 440d056a788SDaniel Borkmann attr->flags); 441d056a788SDaniel Borkmann rcu_read_unlock(); 442bcc6b1b7SMartin KaFai Lau } else if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) { 443bcc6b1b7SMartin KaFai Lau rcu_read_lock(); 444bcc6b1b7SMartin KaFai Lau err = bpf_fd_htab_map_update_elem(map, f.file, key, value, 445bcc6b1b7SMartin KaFai Lau attr->flags); 446bcc6b1b7SMartin KaFai Lau rcu_read_unlock(); 44715a07b33SAlexei Starovoitov } else { 448db20fd2bSAlexei Starovoitov rcu_read_lock(); 4493274f520SAlexei Starovoitov err = map->ops->map_update_elem(map, key, value, attr->flags); 450db20fd2bSAlexei Starovoitov rcu_read_unlock(); 45115a07b33SAlexei Starovoitov } 452b121d1e7SAlexei Starovoitov __this_cpu_dec(bpf_prog_active); 453b121d1e7SAlexei Starovoitov preempt_enable(); 454db20fd2bSAlexei Starovoitov 455a67edbf4SDaniel Borkmann if (!err) 456a67edbf4SDaniel Borkmann trace_bpf_map_update_elem(map, ufd, key, value); 457db20fd2bSAlexei Starovoitov free_value: 458db20fd2bSAlexei Starovoitov kfree(value); 459db20fd2bSAlexei Starovoitov free_key: 460db20fd2bSAlexei Starovoitov kfree(key); 461db20fd2bSAlexei Starovoitov err_put: 462db20fd2bSAlexei Starovoitov fdput(f); 463db20fd2bSAlexei Starovoitov return err; 464db20fd2bSAlexei Starovoitov } 465db20fd2bSAlexei Starovoitov 466db20fd2bSAlexei Starovoitov #define BPF_MAP_DELETE_ELEM_LAST_FIELD key 467db20fd2bSAlexei Starovoitov 468db20fd2bSAlexei Starovoitov static int map_delete_elem(union bpf_attr *attr) 469db20fd2bSAlexei Starovoitov { 470535e7b4bSMickaël Salaün void __user *ukey = u64_to_user_ptr(attr->key); 471db20fd2bSAlexei Starovoitov int ufd = attr->map_fd; 472db20fd2bSAlexei Starovoitov struct bpf_map *map; 473592867bfSDaniel Borkmann struct fd f; 474db20fd2bSAlexei Starovoitov void *key; 475db20fd2bSAlexei Starovoitov int err; 476db20fd2bSAlexei Starovoitov 477db20fd2bSAlexei Starovoitov if (CHECK_ATTR(BPF_MAP_DELETE_ELEM)) 478db20fd2bSAlexei Starovoitov return -EINVAL; 479db20fd2bSAlexei Starovoitov 480592867bfSDaniel Borkmann f = fdget(ufd); 481c2101297SDaniel Borkmann map = __bpf_map_get(f); 482db20fd2bSAlexei Starovoitov if (IS_ERR(map)) 483db20fd2bSAlexei Starovoitov return PTR_ERR(map); 484db20fd2bSAlexei Starovoitov 485*e4448ed8SAl Viro key = memdup_user(ukey, map->key_size); 486*e4448ed8SAl Viro if (IS_ERR(key)) { 487*e4448ed8SAl Viro err = PTR_ERR(key); 488db20fd2bSAlexei Starovoitov goto err_put; 489*e4448ed8SAl Viro } 490db20fd2bSAlexei Starovoitov 491b121d1e7SAlexei Starovoitov preempt_disable(); 492b121d1e7SAlexei Starovoitov __this_cpu_inc(bpf_prog_active); 493db20fd2bSAlexei Starovoitov rcu_read_lock(); 494db20fd2bSAlexei Starovoitov err = map->ops->map_delete_elem(map, key); 495db20fd2bSAlexei Starovoitov rcu_read_unlock(); 496b121d1e7SAlexei Starovoitov __this_cpu_dec(bpf_prog_active); 497b121d1e7SAlexei Starovoitov preempt_enable(); 498db20fd2bSAlexei Starovoitov 499a67edbf4SDaniel Borkmann if (!err) 500a67edbf4SDaniel Borkmann trace_bpf_map_delete_elem(map, ufd, key); 501db20fd2bSAlexei Starovoitov kfree(key); 502db20fd2bSAlexei Starovoitov err_put: 503db20fd2bSAlexei Starovoitov fdput(f); 504db20fd2bSAlexei Starovoitov return err; 505db20fd2bSAlexei Starovoitov } 506db20fd2bSAlexei Starovoitov 507db20fd2bSAlexei Starovoitov /* last field in 'union bpf_attr' used by this command */ 508db20fd2bSAlexei Starovoitov #define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key 509db20fd2bSAlexei Starovoitov 510db20fd2bSAlexei Starovoitov static int map_get_next_key(union bpf_attr *attr) 511db20fd2bSAlexei Starovoitov { 512535e7b4bSMickaël Salaün void __user *ukey = u64_to_user_ptr(attr->key); 513535e7b4bSMickaël Salaün void __user *unext_key = u64_to_user_ptr(attr->next_key); 514db20fd2bSAlexei Starovoitov int ufd = attr->map_fd; 515db20fd2bSAlexei Starovoitov struct bpf_map *map; 516db20fd2bSAlexei Starovoitov void *key, *next_key; 517592867bfSDaniel Borkmann struct fd f; 518db20fd2bSAlexei Starovoitov int err; 519db20fd2bSAlexei Starovoitov 520db20fd2bSAlexei Starovoitov if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY)) 521db20fd2bSAlexei Starovoitov return -EINVAL; 522db20fd2bSAlexei Starovoitov 523592867bfSDaniel Borkmann f = fdget(ufd); 524c2101297SDaniel Borkmann map = __bpf_map_get(f); 525db20fd2bSAlexei Starovoitov if (IS_ERR(map)) 526db20fd2bSAlexei Starovoitov return PTR_ERR(map); 527db20fd2bSAlexei Starovoitov 5288fe45924STeng Qin if (ukey) { 529*e4448ed8SAl Viro key = memdup_user(ukey, map->key_size); 530*e4448ed8SAl Viro if (IS_ERR(key)) { 531*e4448ed8SAl Viro err = PTR_ERR(key); 532db20fd2bSAlexei Starovoitov goto err_put; 533*e4448ed8SAl Viro } 5348fe45924STeng Qin } else { 5358fe45924STeng Qin key = NULL; 5368fe45924STeng Qin } 537db20fd2bSAlexei Starovoitov 538db20fd2bSAlexei Starovoitov err = -ENOMEM; 539db20fd2bSAlexei Starovoitov next_key = kmalloc(map->key_size, GFP_USER); 540db20fd2bSAlexei Starovoitov if (!next_key) 541db20fd2bSAlexei Starovoitov goto free_key; 542db20fd2bSAlexei Starovoitov 543db20fd2bSAlexei Starovoitov rcu_read_lock(); 544db20fd2bSAlexei Starovoitov err = map->ops->map_get_next_key(map, key, next_key); 545db20fd2bSAlexei Starovoitov rcu_read_unlock(); 546db20fd2bSAlexei Starovoitov if (err) 547db20fd2bSAlexei Starovoitov goto free_next_key; 548db20fd2bSAlexei Starovoitov 549db20fd2bSAlexei Starovoitov err = -EFAULT; 550db20fd2bSAlexei Starovoitov if (copy_to_user(unext_key, next_key, map->key_size) != 0) 551db20fd2bSAlexei Starovoitov goto free_next_key; 552db20fd2bSAlexei Starovoitov 553a67edbf4SDaniel Borkmann trace_bpf_map_next_key(map, ufd, key, next_key); 554db20fd2bSAlexei Starovoitov err = 0; 555db20fd2bSAlexei Starovoitov 556db20fd2bSAlexei Starovoitov free_next_key: 557db20fd2bSAlexei Starovoitov kfree(next_key); 558db20fd2bSAlexei Starovoitov free_key: 559db20fd2bSAlexei Starovoitov kfree(key); 560db20fd2bSAlexei Starovoitov err_put: 561db20fd2bSAlexei Starovoitov fdput(f); 562db20fd2bSAlexei Starovoitov return err; 563db20fd2bSAlexei Starovoitov } 564db20fd2bSAlexei Starovoitov 565be9370a7SJohannes Berg static const struct bpf_verifier_ops * const bpf_prog_types[] = { 566be9370a7SJohannes Berg #define BPF_PROG_TYPE(_id, _ops) \ 567be9370a7SJohannes Berg [_id] = &_ops, 56840077e0cSJohannes Berg #define BPF_MAP_TYPE(_id, _ops) 569be9370a7SJohannes Berg #include <linux/bpf_types.h> 570be9370a7SJohannes Berg #undef BPF_PROG_TYPE 57140077e0cSJohannes Berg #undef BPF_MAP_TYPE 572be9370a7SJohannes Berg }; 57309756af4SAlexei Starovoitov 57409756af4SAlexei Starovoitov static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog) 57509756af4SAlexei Starovoitov { 576be9370a7SJohannes Berg if (type >= ARRAY_SIZE(bpf_prog_types) || !bpf_prog_types[type]) 577be9370a7SJohannes Berg return -EINVAL; 57809756af4SAlexei Starovoitov 579be9370a7SJohannes Berg prog->aux->ops = bpf_prog_types[type]; 58024701eceSDaniel Borkmann prog->type = type; 58109756af4SAlexei Starovoitov return 0; 58209756af4SAlexei Starovoitov } 58309756af4SAlexei Starovoitov 58409756af4SAlexei Starovoitov /* drop refcnt on maps used by eBPF program and free auxilary data */ 58509756af4SAlexei Starovoitov static void free_used_maps(struct bpf_prog_aux *aux) 58609756af4SAlexei Starovoitov { 58709756af4SAlexei Starovoitov int i; 58809756af4SAlexei Starovoitov 58909756af4SAlexei Starovoitov for (i = 0; i < aux->used_map_cnt; i++) 59009756af4SAlexei Starovoitov bpf_map_put(aux->used_maps[i]); 59109756af4SAlexei Starovoitov 59209756af4SAlexei Starovoitov kfree(aux->used_maps); 59309756af4SAlexei Starovoitov } 59409756af4SAlexei Starovoitov 5955ccb071eSDaniel Borkmann int __bpf_prog_charge(struct user_struct *user, u32 pages) 5965ccb071eSDaniel Borkmann { 5975ccb071eSDaniel Borkmann unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 5985ccb071eSDaniel Borkmann unsigned long user_bufs; 5995ccb071eSDaniel Borkmann 6005ccb071eSDaniel Borkmann if (user) { 6015ccb071eSDaniel Borkmann user_bufs = atomic_long_add_return(pages, &user->locked_vm); 6025ccb071eSDaniel Borkmann if (user_bufs > memlock_limit) { 6035ccb071eSDaniel Borkmann atomic_long_sub(pages, &user->locked_vm); 6045ccb071eSDaniel Borkmann return -EPERM; 6055ccb071eSDaniel Borkmann } 6065ccb071eSDaniel Borkmann } 6075ccb071eSDaniel Borkmann 6085ccb071eSDaniel Borkmann return 0; 6095ccb071eSDaniel Borkmann } 6105ccb071eSDaniel Borkmann 6115ccb071eSDaniel Borkmann void __bpf_prog_uncharge(struct user_struct *user, u32 pages) 6125ccb071eSDaniel Borkmann { 6135ccb071eSDaniel Borkmann if (user) 6145ccb071eSDaniel Borkmann atomic_long_sub(pages, &user->locked_vm); 6155ccb071eSDaniel Borkmann } 6165ccb071eSDaniel Borkmann 617aaac3ba9SAlexei Starovoitov static int bpf_prog_charge_memlock(struct bpf_prog *prog) 618aaac3ba9SAlexei Starovoitov { 619aaac3ba9SAlexei Starovoitov struct user_struct *user = get_current_user(); 6205ccb071eSDaniel Borkmann int ret; 621aaac3ba9SAlexei Starovoitov 6225ccb071eSDaniel Borkmann ret = __bpf_prog_charge(user, prog->pages); 6235ccb071eSDaniel Borkmann if (ret) { 624aaac3ba9SAlexei Starovoitov free_uid(user); 6255ccb071eSDaniel Borkmann return ret; 626aaac3ba9SAlexei Starovoitov } 6275ccb071eSDaniel Borkmann 628aaac3ba9SAlexei Starovoitov prog->aux->user = user; 629aaac3ba9SAlexei Starovoitov return 0; 630aaac3ba9SAlexei Starovoitov } 631aaac3ba9SAlexei Starovoitov 632aaac3ba9SAlexei Starovoitov static void bpf_prog_uncharge_memlock(struct bpf_prog *prog) 633aaac3ba9SAlexei Starovoitov { 634aaac3ba9SAlexei Starovoitov struct user_struct *user = prog->aux->user; 635aaac3ba9SAlexei Starovoitov 6365ccb071eSDaniel Borkmann __bpf_prog_uncharge(user, prog->pages); 637aaac3ba9SAlexei Starovoitov free_uid(user); 638aaac3ba9SAlexei Starovoitov } 639aaac3ba9SAlexei Starovoitov 6401aacde3dSDaniel Borkmann static void __bpf_prog_put_rcu(struct rcu_head *rcu) 641abf2e7d6SAlexei Starovoitov { 642abf2e7d6SAlexei Starovoitov struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu); 643abf2e7d6SAlexei Starovoitov 644abf2e7d6SAlexei Starovoitov free_used_maps(aux); 645aaac3ba9SAlexei Starovoitov bpf_prog_uncharge_memlock(aux->prog); 646abf2e7d6SAlexei Starovoitov bpf_prog_free(aux->prog); 647abf2e7d6SAlexei Starovoitov } 648abf2e7d6SAlexei Starovoitov 64909756af4SAlexei Starovoitov void bpf_prog_put(struct bpf_prog *prog) 65009756af4SAlexei Starovoitov { 651a67edbf4SDaniel Borkmann if (atomic_dec_and_test(&prog->aux->refcnt)) { 652a67edbf4SDaniel Borkmann trace_bpf_prog_put_rcu(prog); 65374451e66SDaniel Borkmann bpf_prog_kallsyms_del(prog); 6541aacde3dSDaniel Borkmann call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu); 65509756af4SAlexei Starovoitov } 656a67edbf4SDaniel Borkmann } 657e2e9b654SDaniel Borkmann EXPORT_SYMBOL_GPL(bpf_prog_put); 65809756af4SAlexei Starovoitov 65909756af4SAlexei Starovoitov static int bpf_prog_release(struct inode *inode, struct file *filp) 66009756af4SAlexei Starovoitov { 66109756af4SAlexei Starovoitov struct bpf_prog *prog = filp->private_data; 66209756af4SAlexei Starovoitov 6631aacde3dSDaniel Borkmann bpf_prog_put(prog); 66409756af4SAlexei Starovoitov return 0; 66509756af4SAlexei Starovoitov } 66609756af4SAlexei Starovoitov 6677bd509e3SDaniel Borkmann #ifdef CONFIG_PROC_FS 6687bd509e3SDaniel Borkmann static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp) 6697bd509e3SDaniel Borkmann { 6707bd509e3SDaniel Borkmann const struct bpf_prog *prog = filp->private_data; 671f1f7714eSDaniel Borkmann char prog_tag[sizeof(prog->tag) * 2 + 1] = { }; 6727bd509e3SDaniel Borkmann 673f1f7714eSDaniel Borkmann bin2hex(prog_tag, prog->tag, sizeof(prog->tag)); 6747bd509e3SDaniel Borkmann seq_printf(m, 6757bd509e3SDaniel Borkmann "prog_type:\t%u\n" 6767bd509e3SDaniel Borkmann "prog_jited:\t%u\n" 677f1f7714eSDaniel Borkmann "prog_tag:\t%s\n" 6787bd509e3SDaniel Borkmann "memlock:\t%llu\n", 6797bd509e3SDaniel Borkmann prog->type, 6807bd509e3SDaniel Borkmann prog->jited, 681f1f7714eSDaniel Borkmann prog_tag, 6827bd509e3SDaniel Borkmann prog->pages * 1ULL << PAGE_SHIFT); 6837bd509e3SDaniel Borkmann } 6847bd509e3SDaniel Borkmann #endif 6857bd509e3SDaniel Borkmann 68609756af4SAlexei Starovoitov static const struct file_operations bpf_prog_fops = { 6877bd509e3SDaniel Borkmann #ifdef CONFIG_PROC_FS 6887bd509e3SDaniel Borkmann .show_fdinfo = bpf_prog_show_fdinfo, 6897bd509e3SDaniel Borkmann #endif 69009756af4SAlexei Starovoitov .release = bpf_prog_release, 69109756af4SAlexei Starovoitov }; 69209756af4SAlexei Starovoitov 693b2197755SDaniel Borkmann int bpf_prog_new_fd(struct bpf_prog *prog) 694aa79781bSDaniel Borkmann { 695aa79781bSDaniel Borkmann return anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog, 696aa79781bSDaniel Borkmann O_RDWR | O_CLOEXEC); 697aa79781bSDaniel Borkmann } 698aa79781bSDaniel Borkmann 699113214beSDaniel Borkmann static struct bpf_prog *____bpf_prog_get(struct fd f) 70009756af4SAlexei Starovoitov { 70109756af4SAlexei Starovoitov if (!f.file) 70209756af4SAlexei Starovoitov return ERR_PTR(-EBADF); 70309756af4SAlexei Starovoitov if (f.file->f_op != &bpf_prog_fops) { 70409756af4SAlexei Starovoitov fdput(f); 70509756af4SAlexei Starovoitov return ERR_PTR(-EINVAL); 70609756af4SAlexei Starovoitov } 70709756af4SAlexei Starovoitov 708c2101297SDaniel Borkmann return f.file->private_data; 70909756af4SAlexei Starovoitov } 71009756af4SAlexei Starovoitov 71159d3656dSBrenden Blanco struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i) 71292117d84SAlexei Starovoitov { 71359d3656dSBrenden Blanco if (atomic_add_return(i, &prog->aux->refcnt) > BPF_MAX_REFCNT) { 71459d3656dSBrenden Blanco atomic_sub(i, &prog->aux->refcnt); 71592117d84SAlexei Starovoitov return ERR_PTR(-EBUSY); 71692117d84SAlexei Starovoitov } 71792117d84SAlexei Starovoitov return prog; 71892117d84SAlexei Starovoitov } 71959d3656dSBrenden Blanco EXPORT_SYMBOL_GPL(bpf_prog_add); 72059d3656dSBrenden Blanco 721c540594fSDaniel Borkmann void bpf_prog_sub(struct bpf_prog *prog, int i) 722c540594fSDaniel Borkmann { 723c540594fSDaniel Borkmann /* Only to be used for undoing previous bpf_prog_add() in some 724c540594fSDaniel Borkmann * error path. We still know that another entity in our call 725c540594fSDaniel Borkmann * path holds a reference to the program, thus atomic_sub() can 726c540594fSDaniel Borkmann * be safely used in such cases! 727c540594fSDaniel Borkmann */ 728c540594fSDaniel Borkmann WARN_ON(atomic_sub_return(i, &prog->aux->refcnt) == 0); 729c540594fSDaniel Borkmann } 730c540594fSDaniel Borkmann EXPORT_SYMBOL_GPL(bpf_prog_sub); 731c540594fSDaniel Borkmann 73259d3656dSBrenden Blanco struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog) 73359d3656dSBrenden Blanco { 73459d3656dSBrenden Blanco return bpf_prog_add(prog, 1); 73559d3656dSBrenden Blanco } 73697bc402dSDaniel Borkmann EXPORT_SYMBOL_GPL(bpf_prog_inc); 73792117d84SAlexei Starovoitov 738113214beSDaniel Borkmann static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *type) 73909756af4SAlexei Starovoitov { 74009756af4SAlexei Starovoitov struct fd f = fdget(ufd); 74109756af4SAlexei Starovoitov struct bpf_prog *prog; 74209756af4SAlexei Starovoitov 743113214beSDaniel Borkmann prog = ____bpf_prog_get(f); 74409756af4SAlexei Starovoitov if (IS_ERR(prog)) 74509756af4SAlexei Starovoitov return prog; 746113214beSDaniel Borkmann if (type && prog->type != *type) { 747113214beSDaniel Borkmann prog = ERR_PTR(-EINVAL); 748113214beSDaniel Borkmann goto out; 749113214beSDaniel Borkmann } 75009756af4SAlexei Starovoitov 75192117d84SAlexei Starovoitov prog = bpf_prog_inc(prog); 752113214beSDaniel Borkmann out: 75309756af4SAlexei Starovoitov fdput(f); 75409756af4SAlexei Starovoitov return prog; 75509756af4SAlexei Starovoitov } 756113214beSDaniel Borkmann 757113214beSDaniel Borkmann struct bpf_prog *bpf_prog_get(u32 ufd) 758113214beSDaniel Borkmann { 759113214beSDaniel Borkmann return __bpf_prog_get(ufd, NULL); 760113214beSDaniel Borkmann } 761113214beSDaniel Borkmann 762113214beSDaniel Borkmann struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type) 763113214beSDaniel Borkmann { 764a67edbf4SDaniel Borkmann struct bpf_prog *prog = __bpf_prog_get(ufd, &type); 765a67edbf4SDaniel Borkmann 766a67edbf4SDaniel Borkmann if (!IS_ERR(prog)) 767a67edbf4SDaniel Borkmann trace_bpf_prog_get_type(prog); 768a67edbf4SDaniel Borkmann return prog; 769113214beSDaniel Borkmann } 770113214beSDaniel Borkmann EXPORT_SYMBOL_GPL(bpf_prog_get_type); 77109756af4SAlexei Starovoitov 77209756af4SAlexei Starovoitov /* last field in 'union bpf_attr' used by this command */ 7732541517cSAlexei Starovoitov #define BPF_PROG_LOAD_LAST_FIELD kern_version 77409756af4SAlexei Starovoitov 77509756af4SAlexei Starovoitov static int bpf_prog_load(union bpf_attr *attr) 77609756af4SAlexei Starovoitov { 77709756af4SAlexei Starovoitov enum bpf_prog_type type = attr->prog_type; 77809756af4SAlexei Starovoitov struct bpf_prog *prog; 77909756af4SAlexei Starovoitov int err; 78009756af4SAlexei Starovoitov char license[128]; 78109756af4SAlexei Starovoitov bool is_gpl; 78209756af4SAlexei Starovoitov 78309756af4SAlexei Starovoitov if (CHECK_ATTR(BPF_PROG_LOAD)) 78409756af4SAlexei Starovoitov return -EINVAL; 78509756af4SAlexei Starovoitov 78609756af4SAlexei Starovoitov /* copy eBPF program license from user space */ 787535e7b4bSMickaël Salaün if (strncpy_from_user(license, u64_to_user_ptr(attr->license), 78809756af4SAlexei Starovoitov sizeof(license) - 1) < 0) 78909756af4SAlexei Starovoitov return -EFAULT; 79009756af4SAlexei Starovoitov license[sizeof(license) - 1] = 0; 79109756af4SAlexei Starovoitov 79209756af4SAlexei Starovoitov /* eBPF programs must be GPL compatible to use GPL-ed functions */ 79309756af4SAlexei Starovoitov is_gpl = license_is_gpl_compatible(license); 79409756af4SAlexei Starovoitov 795ef0915caSDaniel Borkmann if (attr->insn_cnt == 0 || attr->insn_cnt > BPF_MAXINSNS) 796ef0915caSDaniel Borkmann return -E2BIG; 79709756af4SAlexei Starovoitov 7982541517cSAlexei Starovoitov if (type == BPF_PROG_TYPE_KPROBE && 7992541517cSAlexei Starovoitov attr->kern_version != LINUX_VERSION_CODE) 8002541517cSAlexei Starovoitov return -EINVAL; 8012541517cSAlexei Starovoitov 8021be7f75dSAlexei Starovoitov if (type != BPF_PROG_TYPE_SOCKET_FILTER && !capable(CAP_SYS_ADMIN)) 8031be7f75dSAlexei Starovoitov return -EPERM; 8041be7f75dSAlexei Starovoitov 80509756af4SAlexei Starovoitov /* plain bpf_prog allocation */ 80609756af4SAlexei Starovoitov prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER); 80709756af4SAlexei Starovoitov if (!prog) 80809756af4SAlexei Starovoitov return -ENOMEM; 80909756af4SAlexei Starovoitov 810aaac3ba9SAlexei Starovoitov err = bpf_prog_charge_memlock(prog); 811aaac3ba9SAlexei Starovoitov if (err) 812aaac3ba9SAlexei Starovoitov goto free_prog_nouncharge; 813aaac3ba9SAlexei Starovoitov 81409756af4SAlexei Starovoitov prog->len = attr->insn_cnt; 81509756af4SAlexei Starovoitov 81609756af4SAlexei Starovoitov err = -EFAULT; 817535e7b4bSMickaël Salaün if (copy_from_user(prog->insns, u64_to_user_ptr(attr->insns), 818aafe6ae9SDaniel Borkmann bpf_prog_insn_size(prog)) != 0) 81909756af4SAlexei Starovoitov goto free_prog; 82009756af4SAlexei Starovoitov 82109756af4SAlexei Starovoitov prog->orig_prog = NULL; 822a91263d5SDaniel Borkmann prog->jited = 0; 82309756af4SAlexei Starovoitov 82409756af4SAlexei Starovoitov atomic_set(&prog->aux->refcnt, 1); 825a91263d5SDaniel Borkmann prog->gpl_compatible = is_gpl ? 1 : 0; 82609756af4SAlexei Starovoitov 82709756af4SAlexei Starovoitov /* find program type: socket_filter vs tracing_filter */ 82809756af4SAlexei Starovoitov err = find_prog_type(type, prog); 82909756af4SAlexei Starovoitov if (err < 0) 83009756af4SAlexei Starovoitov goto free_prog; 83109756af4SAlexei Starovoitov 83209756af4SAlexei Starovoitov /* run eBPF verifier */ 8339bac3d6dSAlexei Starovoitov err = bpf_check(&prog, attr); 83409756af4SAlexei Starovoitov if (err < 0) 83509756af4SAlexei Starovoitov goto free_used_maps; 83609756af4SAlexei Starovoitov 83709756af4SAlexei Starovoitov /* eBPF program is ready to be JITed */ 838d1c55ab5SDaniel Borkmann prog = bpf_prog_select_runtime(prog, &err); 83904fd61abSAlexei Starovoitov if (err < 0) 84004fd61abSAlexei Starovoitov goto free_used_maps; 84109756af4SAlexei Starovoitov 842aa79781bSDaniel Borkmann err = bpf_prog_new_fd(prog); 84309756af4SAlexei Starovoitov if (err < 0) 84409756af4SAlexei Starovoitov /* failed to allocate fd */ 84509756af4SAlexei Starovoitov goto free_used_maps; 84609756af4SAlexei Starovoitov 84774451e66SDaniel Borkmann bpf_prog_kallsyms_add(prog); 848a67edbf4SDaniel Borkmann trace_bpf_prog_load(prog, err); 84909756af4SAlexei Starovoitov return err; 85009756af4SAlexei Starovoitov 85109756af4SAlexei Starovoitov free_used_maps: 85209756af4SAlexei Starovoitov free_used_maps(prog->aux); 85309756af4SAlexei Starovoitov free_prog: 854aaac3ba9SAlexei Starovoitov bpf_prog_uncharge_memlock(prog); 855aaac3ba9SAlexei Starovoitov free_prog_nouncharge: 85609756af4SAlexei Starovoitov bpf_prog_free(prog); 85709756af4SAlexei Starovoitov return err; 85809756af4SAlexei Starovoitov } 85909756af4SAlexei Starovoitov 860b2197755SDaniel Borkmann #define BPF_OBJ_LAST_FIELD bpf_fd 861b2197755SDaniel Borkmann 862b2197755SDaniel Borkmann static int bpf_obj_pin(const union bpf_attr *attr) 863b2197755SDaniel Borkmann { 864b2197755SDaniel Borkmann if (CHECK_ATTR(BPF_OBJ)) 865b2197755SDaniel Borkmann return -EINVAL; 866b2197755SDaniel Borkmann 867535e7b4bSMickaël Salaün return bpf_obj_pin_user(attr->bpf_fd, u64_to_user_ptr(attr->pathname)); 868b2197755SDaniel Borkmann } 869b2197755SDaniel Borkmann 870b2197755SDaniel Borkmann static int bpf_obj_get(const union bpf_attr *attr) 871b2197755SDaniel Borkmann { 872b2197755SDaniel Borkmann if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0) 873b2197755SDaniel Borkmann return -EINVAL; 874b2197755SDaniel Borkmann 875535e7b4bSMickaël Salaün return bpf_obj_get_user(u64_to_user_ptr(attr->pathname)); 876b2197755SDaniel Borkmann } 877b2197755SDaniel Borkmann 878f4324551SDaniel Mack #ifdef CONFIG_CGROUP_BPF 879f4324551SDaniel Mack 8807f677633SAlexei Starovoitov #define BPF_PROG_ATTACH_LAST_FIELD attach_flags 881f4324551SDaniel Mack 882f4324551SDaniel Mack static int bpf_prog_attach(const union bpf_attr *attr) 883f4324551SDaniel Mack { 8847f677633SAlexei Starovoitov enum bpf_prog_type ptype; 885f4324551SDaniel Mack struct bpf_prog *prog; 886f4324551SDaniel Mack struct cgroup *cgrp; 8877f677633SAlexei Starovoitov int ret; 888f4324551SDaniel Mack 889f4324551SDaniel Mack if (!capable(CAP_NET_ADMIN)) 890f4324551SDaniel Mack return -EPERM; 891f4324551SDaniel Mack 892f4324551SDaniel Mack if (CHECK_ATTR(BPF_PROG_ATTACH)) 893f4324551SDaniel Mack return -EINVAL; 894f4324551SDaniel Mack 8957f677633SAlexei Starovoitov if (attr->attach_flags & ~BPF_F_ALLOW_OVERRIDE) 8967f677633SAlexei Starovoitov return -EINVAL; 8977f677633SAlexei Starovoitov 898f4324551SDaniel Mack switch (attr->attach_type) { 899f4324551SDaniel Mack case BPF_CGROUP_INET_INGRESS: 900f4324551SDaniel Mack case BPF_CGROUP_INET_EGRESS: 901b2cd1257SDavid Ahern ptype = BPF_PROG_TYPE_CGROUP_SKB; 902b2cd1257SDavid Ahern break; 90361023658SDavid Ahern case BPF_CGROUP_INET_SOCK_CREATE: 90461023658SDavid Ahern ptype = BPF_PROG_TYPE_CGROUP_SOCK; 90561023658SDavid Ahern break; 906b2cd1257SDavid Ahern default: 907b2cd1257SDavid Ahern return -EINVAL; 908b2cd1257SDavid Ahern } 909b2cd1257SDavid Ahern 910b2cd1257SDavid Ahern prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype); 911f4324551SDaniel Mack if (IS_ERR(prog)) 912f4324551SDaniel Mack return PTR_ERR(prog); 913f4324551SDaniel Mack 914f4324551SDaniel Mack cgrp = cgroup_get_from_fd(attr->target_fd); 915f4324551SDaniel Mack if (IS_ERR(cgrp)) { 916f4324551SDaniel Mack bpf_prog_put(prog); 917f4324551SDaniel Mack return PTR_ERR(cgrp); 918f4324551SDaniel Mack } 919f4324551SDaniel Mack 9207f677633SAlexei Starovoitov ret = cgroup_bpf_update(cgrp, prog, attr->attach_type, 9217f677633SAlexei Starovoitov attr->attach_flags & BPF_F_ALLOW_OVERRIDE); 9227f677633SAlexei Starovoitov if (ret) 9237f677633SAlexei Starovoitov bpf_prog_put(prog); 924f4324551SDaniel Mack cgroup_put(cgrp); 925f4324551SDaniel Mack 9267f677633SAlexei Starovoitov return ret; 927f4324551SDaniel Mack } 928f4324551SDaniel Mack 929f4324551SDaniel Mack #define BPF_PROG_DETACH_LAST_FIELD attach_type 930f4324551SDaniel Mack 931f4324551SDaniel Mack static int bpf_prog_detach(const union bpf_attr *attr) 932f4324551SDaniel Mack { 933f4324551SDaniel Mack struct cgroup *cgrp; 9347f677633SAlexei Starovoitov int ret; 935f4324551SDaniel Mack 936f4324551SDaniel Mack if (!capable(CAP_NET_ADMIN)) 937f4324551SDaniel Mack return -EPERM; 938f4324551SDaniel Mack 939f4324551SDaniel Mack if (CHECK_ATTR(BPF_PROG_DETACH)) 940f4324551SDaniel Mack return -EINVAL; 941f4324551SDaniel Mack 942f4324551SDaniel Mack switch (attr->attach_type) { 943f4324551SDaniel Mack case BPF_CGROUP_INET_INGRESS: 944f4324551SDaniel Mack case BPF_CGROUP_INET_EGRESS: 94561023658SDavid Ahern case BPF_CGROUP_INET_SOCK_CREATE: 946f4324551SDaniel Mack cgrp = cgroup_get_from_fd(attr->target_fd); 947f4324551SDaniel Mack if (IS_ERR(cgrp)) 948f4324551SDaniel Mack return PTR_ERR(cgrp); 949f4324551SDaniel Mack 9507f677633SAlexei Starovoitov ret = cgroup_bpf_update(cgrp, NULL, attr->attach_type, false); 951f4324551SDaniel Mack cgroup_put(cgrp); 952f4324551SDaniel Mack break; 953f4324551SDaniel Mack 954f4324551SDaniel Mack default: 955f4324551SDaniel Mack return -EINVAL; 956f4324551SDaniel Mack } 957f4324551SDaniel Mack 9587f677633SAlexei Starovoitov return ret; 959f4324551SDaniel Mack } 960f4324551SDaniel Mack #endif /* CONFIG_CGROUP_BPF */ 961f4324551SDaniel Mack 9621cf1cae9SAlexei Starovoitov #define BPF_PROG_TEST_RUN_LAST_FIELD test.duration 9631cf1cae9SAlexei Starovoitov 9641cf1cae9SAlexei Starovoitov static int bpf_prog_test_run(const union bpf_attr *attr, 9651cf1cae9SAlexei Starovoitov union bpf_attr __user *uattr) 9661cf1cae9SAlexei Starovoitov { 9671cf1cae9SAlexei Starovoitov struct bpf_prog *prog; 9681cf1cae9SAlexei Starovoitov int ret = -ENOTSUPP; 9691cf1cae9SAlexei Starovoitov 9701cf1cae9SAlexei Starovoitov if (CHECK_ATTR(BPF_PROG_TEST_RUN)) 9711cf1cae9SAlexei Starovoitov return -EINVAL; 9721cf1cae9SAlexei Starovoitov 9731cf1cae9SAlexei Starovoitov prog = bpf_prog_get(attr->test.prog_fd); 9741cf1cae9SAlexei Starovoitov if (IS_ERR(prog)) 9751cf1cae9SAlexei Starovoitov return PTR_ERR(prog); 9761cf1cae9SAlexei Starovoitov 9771cf1cae9SAlexei Starovoitov if (prog->aux->ops->test_run) 9781cf1cae9SAlexei Starovoitov ret = prog->aux->ops->test_run(prog, attr, uattr); 9791cf1cae9SAlexei Starovoitov 9801cf1cae9SAlexei Starovoitov bpf_prog_put(prog); 9811cf1cae9SAlexei Starovoitov return ret; 9821cf1cae9SAlexei Starovoitov } 9831cf1cae9SAlexei Starovoitov 98499c55f7dSAlexei Starovoitov SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) 98599c55f7dSAlexei Starovoitov { 98699c55f7dSAlexei Starovoitov union bpf_attr attr = {}; 98799c55f7dSAlexei Starovoitov int err; 98899c55f7dSAlexei Starovoitov 9891be7f75dSAlexei Starovoitov if (!capable(CAP_SYS_ADMIN) && sysctl_unprivileged_bpf_disabled) 99099c55f7dSAlexei Starovoitov return -EPERM; 99199c55f7dSAlexei Starovoitov 99299c55f7dSAlexei Starovoitov if (!access_ok(VERIFY_READ, uattr, 1)) 99399c55f7dSAlexei Starovoitov return -EFAULT; 99499c55f7dSAlexei Starovoitov 99599c55f7dSAlexei Starovoitov if (size > PAGE_SIZE) /* silly large */ 99699c55f7dSAlexei Starovoitov return -E2BIG; 99799c55f7dSAlexei Starovoitov 99899c55f7dSAlexei Starovoitov /* If we're handed a bigger struct than we know of, 99999c55f7dSAlexei Starovoitov * ensure all the unknown bits are 0 - i.e. new 100099c55f7dSAlexei Starovoitov * user-space does not rely on any kernel feature 100199c55f7dSAlexei Starovoitov * extensions we dont know about yet. 100299c55f7dSAlexei Starovoitov */ 100399c55f7dSAlexei Starovoitov if (size > sizeof(attr)) { 100499c55f7dSAlexei Starovoitov unsigned char __user *addr; 100599c55f7dSAlexei Starovoitov unsigned char __user *end; 100699c55f7dSAlexei Starovoitov unsigned char val; 100799c55f7dSAlexei Starovoitov 100899c55f7dSAlexei Starovoitov addr = (void __user *)uattr + sizeof(attr); 100999c55f7dSAlexei Starovoitov end = (void __user *)uattr + size; 101099c55f7dSAlexei Starovoitov 101199c55f7dSAlexei Starovoitov for (; addr < end; addr++) { 101299c55f7dSAlexei Starovoitov err = get_user(val, addr); 101399c55f7dSAlexei Starovoitov if (err) 101499c55f7dSAlexei Starovoitov return err; 101599c55f7dSAlexei Starovoitov if (val) 101699c55f7dSAlexei Starovoitov return -E2BIG; 101799c55f7dSAlexei Starovoitov } 101899c55f7dSAlexei Starovoitov size = sizeof(attr); 101999c55f7dSAlexei Starovoitov } 102099c55f7dSAlexei Starovoitov 102199c55f7dSAlexei Starovoitov /* copy attributes from user space, may be less than sizeof(bpf_attr) */ 102299c55f7dSAlexei Starovoitov if (copy_from_user(&attr, uattr, size) != 0) 102399c55f7dSAlexei Starovoitov return -EFAULT; 102499c55f7dSAlexei Starovoitov 102599c55f7dSAlexei Starovoitov switch (cmd) { 102699c55f7dSAlexei Starovoitov case BPF_MAP_CREATE: 102799c55f7dSAlexei Starovoitov err = map_create(&attr); 102899c55f7dSAlexei Starovoitov break; 1029db20fd2bSAlexei Starovoitov case BPF_MAP_LOOKUP_ELEM: 1030db20fd2bSAlexei Starovoitov err = map_lookup_elem(&attr); 1031db20fd2bSAlexei Starovoitov break; 1032db20fd2bSAlexei Starovoitov case BPF_MAP_UPDATE_ELEM: 1033db20fd2bSAlexei Starovoitov err = map_update_elem(&attr); 1034db20fd2bSAlexei Starovoitov break; 1035db20fd2bSAlexei Starovoitov case BPF_MAP_DELETE_ELEM: 1036db20fd2bSAlexei Starovoitov err = map_delete_elem(&attr); 1037db20fd2bSAlexei Starovoitov break; 1038db20fd2bSAlexei Starovoitov case BPF_MAP_GET_NEXT_KEY: 1039db20fd2bSAlexei Starovoitov err = map_get_next_key(&attr); 1040db20fd2bSAlexei Starovoitov break; 104109756af4SAlexei Starovoitov case BPF_PROG_LOAD: 104209756af4SAlexei Starovoitov err = bpf_prog_load(&attr); 104309756af4SAlexei Starovoitov break; 1044b2197755SDaniel Borkmann case BPF_OBJ_PIN: 1045b2197755SDaniel Borkmann err = bpf_obj_pin(&attr); 1046b2197755SDaniel Borkmann break; 1047b2197755SDaniel Borkmann case BPF_OBJ_GET: 1048b2197755SDaniel Borkmann err = bpf_obj_get(&attr); 1049b2197755SDaniel Borkmann break; 1050f4324551SDaniel Mack #ifdef CONFIG_CGROUP_BPF 1051f4324551SDaniel Mack case BPF_PROG_ATTACH: 1052f4324551SDaniel Mack err = bpf_prog_attach(&attr); 1053f4324551SDaniel Mack break; 1054f4324551SDaniel Mack case BPF_PROG_DETACH: 1055f4324551SDaniel Mack err = bpf_prog_detach(&attr); 1056f4324551SDaniel Mack break; 1057f4324551SDaniel Mack #endif 10581cf1cae9SAlexei Starovoitov case BPF_PROG_TEST_RUN: 10591cf1cae9SAlexei Starovoitov err = bpf_prog_test_run(&attr, uattr); 10601cf1cae9SAlexei Starovoitov break; 106199c55f7dSAlexei Starovoitov default: 106299c55f7dSAlexei Starovoitov err = -EINVAL; 106399c55f7dSAlexei Starovoitov break; 106499c55f7dSAlexei Starovoitov } 106599c55f7dSAlexei Starovoitov 106699c55f7dSAlexei Starovoitov return err; 106799c55f7dSAlexei Starovoitov } 1068