xref: /linux/kernel/bpf/map_in_map.c (revision 6ce4c5dc5dd2706d1821d8ebdc53afad8182c2d5)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2017 Facebook
3  */
4 #include <linux/slab.h>
5 #include <linux/bpf.h>
6 #include <linux/btf.h>
7 
8 #include "map_in_map.h"
9 
10 struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd)
11 {
12 	struct bpf_map *inner_map, *inner_map_meta;
13 	u32 inner_map_meta_size;
14 	CLASS(fd, f)(inner_map_ufd);
15 
16 	inner_map = __bpf_map_get(f);
17 	if (IS_ERR(inner_map))
18 		return inner_map;
19 
20 	/* Does not support >1 level map-in-map */
21 	if (inner_map->inner_map_meta)
22 		return ERR_PTR(-EINVAL);
23 
24 	if (!inner_map->ops->map_meta_equal)
25 		return ERR_PTR(-ENOTSUPP);
26 
27 	inner_map_meta_size = sizeof(*inner_map_meta);
28 	/* In some cases verifier needs to access beyond just base map. */
29 	if (inner_map->ops == &array_map_ops || inner_map->ops == &percpu_array_map_ops)
30 		inner_map_meta_size = sizeof(struct bpf_array);
31 
32 	inner_map_meta = kzalloc(inner_map_meta_size, GFP_USER);
33 	if (!inner_map_meta)
34 		return ERR_PTR(-ENOMEM);
35 
36 	inner_map_meta->map_type = inner_map->map_type;
37 	inner_map_meta->key_size = inner_map->key_size;
38 	inner_map_meta->value_size = inner_map->value_size;
39 	inner_map_meta->map_flags = inner_map->map_flags;
40 	inner_map_meta->max_entries = inner_map->max_entries;
41 
42 	inner_map_meta->record = btf_record_dup(inner_map->record);
43 	if (IS_ERR(inner_map_meta->record)) {
44 		/* btf_record_dup returns NULL or valid pointer in case of
45 		 * invalid/empty/valid, but ERR_PTR in case of errors. During
46 		 * equality NULL or IS_ERR is equivalent.
47 		 */
48 		struct bpf_map *ret = ERR_CAST(inner_map_meta->record);
49 		kfree(inner_map_meta);
50 		return ret;
51 	}
52 	/* Note: We must use the same BTF, as we also used btf_record_dup above
53 	 * which relies on BTF being same for both maps, as some members like
54 	 * record->fields.list_head have pointers like value_rec pointing into
55 	 * inner_map->btf.
56 	 */
57 	if (inner_map->btf) {
58 		btf_get(inner_map->btf);
59 		inner_map_meta->btf = inner_map->btf;
60 	}
61 
62 	/* Misc members not needed in bpf_map_meta_equal() check. */
63 	inner_map_meta->ops = inner_map->ops;
64 	if (inner_map->ops == &array_map_ops || inner_map->ops == &percpu_array_map_ops) {
65 		struct bpf_array *inner_array_meta =
66 			container_of(inner_map_meta, struct bpf_array, map);
67 		struct bpf_array *inner_array = container_of(inner_map, struct bpf_array, map);
68 
69 		inner_array_meta->index_mask = inner_array->index_mask;
70 		inner_array_meta->elem_size = inner_array->elem_size;
71 		inner_map_meta->bypass_spec_v1 = inner_map->bypass_spec_v1;
72 	}
73 	return inner_map_meta;
74 }
75 
76 void bpf_map_meta_free(struct bpf_map *map_meta)
77 {
78 	bpf_map_free_record(map_meta);
79 	btf_put(map_meta->btf);
80 	kfree(map_meta);
81 }
82 
83 bool bpf_map_meta_equal(const struct bpf_map *meta0,
84 			const struct bpf_map *meta1)
85 {
86 	/* No need to compare ops because it is covered by map_type */
87 	return meta0->map_type == meta1->map_type &&
88 		meta0->key_size == meta1->key_size &&
89 		meta0->value_size == meta1->value_size &&
90 		meta0->map_flags == meta1->map_flags &&
91 		btf_record_equal(meta0->record, meta1->record);
92 }
93 
94 void *bpf_map_fd_get_ptr(struct bpf_map *map,
95 			 struct file *map_file /* not used */,
96 			 int ufd)
97 {
98 	struct bpf_map *inner_map, *inner_map_meta;
99 	CLASS(fd, f)(ufd);
100 
101 	inner_map = __bpf_map_get(f);
102 	if (IS_ERR(inner_map))
103 		return inner_map;
104 
105 	inner_map_meta = map->inner_map_meta;
106 	if (inner_map_meta->ops->map_meta_equal(inner_map_meta, inner_map))
107 		bpf_map_inc(inner_map);
108 	else
109 		inner_map = ERR_PTR(-EINVAL);
110 
111 	return inner_map;
112 }
113 
114 void bpf_map_fd_put_ptr(struct bpf_map *map, void *ptr, bool need_defer)
115 {
116 	struct bpf_map *inner_map = ptr;
117 
118 	/* Defer the freeing of inner map according to the sleepable attribute
119 	 * of bpf program which owns the outer map, so unnecessary waiting for
120 	 * RCU tasks trace grace period can be avoided.
121 	 */
122 	if (need_defer) {
123 		if (atomic64_read(&map->sleepable_refcnt))
124 			WRITE_ONCE(inner_map->free_after_mult_rcu_gp, true);
125 		else
126 			WRITE_ONCE(inner_map->free_after_rcu_gp, true);
127 	}
128 	bpf_map_put(inner_map);
129 }
130 
131 u32 bpf_map_fd_sys_lookup_elem(void *ptr)
132 {
133 	return ((struct bpf_map *)ptr)->id;
134 }
135