1 /* SPDX-License-Identifier: GPL-2.0 */
2 /* Copyright (c) 2023 Isovalent */
3 #ifndef __BPF_MPROG_H
4 #define __BPF_MPROG_H
5
6 #include <linux/bpf.h>
7
8 /* bpf_mprog framework:
9 *
10 * bpf_mprog is a generic layer for multi-program attachment. In-kernel users
11 * of the bpf_mprog don't need to care about the dependency resolution
12 * internals, they can just consume it with few API calls. Currently available
13 * dependency directives are BPF_F_{BEFORE,AFTER} which enable insertion of
14 * a BPF program or BPF link relative to an existing BPF program or BPF link
15 * inside the multi-program array as well as prepend and append behavior if
16 * no relative object was specified, see corresponding selftests for concrete
17 * examples (e.g. tc_links and tc_opts test cases of test_progs).
18 *
19 * Usage of bpf_mprog_{attach,detach,query}() core APIs with pseudo code:
20 *
21 * Attach case:
22 *
23 * struct bpf_mprog_entry *entry, *entry_new;
24 * int ret;
25 *
26 * // bpf_mprog user-side lock
27 * // fetch active @entry from attach location
28 * [...]
29 * ret = bpf_mprog_attach(entry, &entry_new, [...]);
30 * if (!ret) {
31 * if (entry != entry_new) {
32 * // swap @entry to @entry_new at attach location
33 * // ensure there are no inflight users of @entry:
34 * synchronize_rcu();
35 * }
36 * bpf_mprog_commit(entry);
37 * } else {
38 * // error path, bail out, propagate @ret
39 * }
40 * // bpf_mprog user-side unlock
41 *
42 * Detach case:
43 *
44 * struct bpf_mprog_entry *entry, *entry_new;
45 * int ret;
46 *
47 * // bpf_mprog user-side lock
48 * // fetch active @entry from attach location
49 * [...]
50 * ret = bpf_mprog_detach(entry, &entry_new, [...]);
51 * if (!ret) {
52 * // all (*) marked is optional and depends on the use-case
53 * // whether bpf_mprog_bundle should be freed or not
54 * if (!bpf_mprog_total(entry_new)) (*)
55 * entry_new = NULL (*)
56 * // swap @entry to @entry_new at attach location
57 * // ensure there are no inflight users of @entry:
58 * synchronize_rcu();
59 * bpf_mprog_commit(entry);
60 * if (!entry_new) (*)
61 * // free bpf_mprog_bundle (*)
62 * } else {
63 * // error path, bail out, propagate @ret
64 * }
65 * // bpf_mprog user-side unlock
66 *
67 * Query case:
68 *
69 * struct bpf_mprog_entry *entry;
70 * int ret;
71 *
72 * // bpf_mprog user-side lock
73 * // fetch active @entry from attach location
74 * [...]
75 * ret = bpf_mprog_query(attr, uattr, entry);
76 * // bpf_mprog user-side unlock
77 *
78 * Data/fast path:
79 *
80 * struct bpf_mprog_entry *entry;
81 * struct bpf_mprog_fp *fp;
82 * struct bpf_prog *prog;
83 * int ret = [...];
84 *
85 * rcu_read_lock();
86 * // fetch active @entry from attach location
87 * [...]
88 * bpf_mprog_foreach_prog(entry, fp, prog) {
89 * ret = bpf_prog_run(prog, [...]);
90 * // process @ret from program
91 * }
92 * [...]
93 * rcu_read_unlock();
94 *
95 * bpf_mprog locking considerations:
96 *
97 * bpf_mprog_{attach,detach,query}() must be protected by an external lock
98 * (like RTNL in case of tcx).
99 *
100 * bpf_mprog_entry pointer can be an __rcu annotated pointer (in case of tcx
101 * the netdevice has tcx_ingress and tcx_egress __rcu pointer) which gets
102 * updated via rcu_assign_pointer() pointing to the active bpf_mprog_entry of
103 * the bpf_mprog_bundle.
104 *
105 * Fast path accesses the active bpf_mprog_entry within RCU critical section
106 * (in case of tcx it runs in NAPI which provides RCU protection there,
107 * other users might need explicit rcu_read_lock()). The bpf_mprog_commit()
108 * assumes that for the old bpf_mprog_entry there are no inflight users
109 * anymore.
110 *
111 * The READ_ONCE()/WRITE_ONCE() pairing for bpf_mprog_fp's prog access is for
112 * the replacement case where we don't swap the bpf_mprog_entry.
113 */
114
115 #define bpf_mprog_foreach_tuple(entry, fp, cp, t) \
116 for (fp = &entry->fp_items[0], cp = &entry->parent->cp_items[0];\
117 ({ \
118 t.prog = READ_ONCE(fp->prog); \
119 t.link = cp->link; \
120 t.prog; \
121 }); \
122 fp++, cp++)
123
124 #define bpf_mprog_foreach_prog(entry, fp, p) \
125 for (fp = &entry->fp_items[0]; \
126 (p = READ_ONCE(fp->prog)); \
127 fp++)
128
129 #define BPF_MPROG_MAX 64
130
131 struct bpf_mprog_fp {
132 struct bpf_prog *prog;
133 };
134
135 struct bpf_mprog_cp {
136 struct bpf_link *link;
137 };
138
139 struct bpf_mprog_entry {
140 struct bpf_mprog_fp fp_items[BPF_MPROG_MAX];
141 struct bpf_mprog_bundle *parent;
142 };
143
144 struct bpf_mprog_bundle {
145 struct bpf_mprog_entry a;
146 struct bpf_mprog_entry b;
147 struct bpf_mprog_cp cp_items[BPF_MPROG_MAX];
148 struct bpf_prog *ref;
149 atomic64_t revision;
150 u32 count;
151 };
152
153 struct bpf_tuple {
154 struct bpf_prog *prog;
155 struct bpf_link *link;
156 };
157
158 static inline struct bpf_mprog_entry *
bpf_mprog_peer(const struct bpf_mprog_entry * entry)159 bpf_mprog_peer(const struct bpf_mprog_entry *entry)
160 {
161 if (entry == &entry->parent->a)
162 return &entry->parent->b;
163 else
164 return &entry->parent->a;
165 }
166
bpf_mprog_bundle_init(struct bpf_mprog_bundle * bundle)167 static inline void bpf_mprog_bundle_init(struct bpf_mprog_bundle *bundle)
168 {
169 BUILD_BUG_ON(sizeof(bundle->a.fp_items[0]) > sizeof(u64));
170 BUILD_BUG_ON(ARRAY_SIZE(bundle->a.fp_items) !=
171 ARRAY_SIZE(bundle->cp_items));
172
173 memset(bundle, 0, sizeof(*bundle));
174 atomic64_set(&bundle->revision, 1);
175 bundle->a.parent = bundle;
176 bundle->b.parent = bundle;
177 }
178
bpf_mprog_inc(struct bpf_mprog_entry * entry)179 static inline void bpf_mprog_inc(struct bpf_mprog_entry *entry)
180 {
181 entry->parent->count++;
182 }
183
bpf_mprog_dec(struct bpf_mprog_entry * entry)184 static inline void bpf_mprog_dec(struct bpf_mprog_entry *entry)
185 {
186 entry->parent->count--;
187 }
188
bpf_mprog_max(void)189 static inline int bpf_mprog_max(void)
190 {
191 return ARRAY_SIZE(((struct bpf_mprog_entry *)NULL)->fp_items) - 1;
192 }
193
bpf_mprog_total(struct bpf_mprog_entry * entry)194 static inline int bpf_mprog_total(struct bpf_mprog_entry *entry)
195 {
196 int total = entry->parent->count;
197
198 WARN_ON_ONCE(total > bpf_mprog_max());
199 return total;
200 }
201
bpf_mprog_exists(struct bpf_mprog_entry * entry,struct bpf_prog * prog)202 static inline bool bpf_mprog_exists(struct bpf_mprog_entry *entry,
203 struct bpf_prog *prog)
204 {
205 const struct bpf_mprog_fp *fp;
206 const struct bpf_prog *tmp;
207
208 bpf_mprog_foreach_prog(entry, fp, tmp) {
209 if (tmp == prog)
210 return true;
211 }
212 return false;
213 }
214
bpf_mprog_mark_for_release(struct bpf_mprog_entry * entry,struct bpf_tuple * tuple)215 static inline void bpf_mprog_mark_for_release(struct bpf_mprog_entry *entry,
216 struct bpf_tuple *tuple)
217 {
218 WARN_ON_ONCE(entry->parent->ref);
219 if (!tuple->link)
220 entry->parent->ref = tuple->prog;
221 }
222
bpf_mprog_complete_release(struct bpf_mprog_entry * entry)223 static inline void bpf_mprog_complete_release(struct bpf_mprog_entry *entry)
224 {
225 /* In the non-link case prog deletions can only drop the reference
226 * to the prog after the bpf_mprog_entry got swapped and the
227 * bpf_mprog ensured that there are no inflight users anymore.
228 *
229 * Paired with bpf_mprog_mark_for_release().
230 */
231 if (entry->parent->ref) {
232 bpf_prog_put(entry->parent->ref);
233 entry->parent->ref = NULL;
234 }
235 }
236
bpf_mprog_revision_new(struct bpf_mprog_entry * entry)237 static inline void bpf_mprog_revision_new(struct bpf_mprog_entry *entry)
238 {
239 atomic64_inc(&entry->parent->revision);
240 }
241
bpf_mprog_commit(struct bpf_mprog_entry * entry)242 static inline void bpf_mprog_commit(struct bpf_mprog_entry *entry)
243 {
244 bpf_mprog_complete_release(entry);
245 bpf_mprog_revision_new(entry);
246 }
247
bpf_mprog_revision(struct bpf_mprog_entry * entry)248 static inline u64 bpf_mprog_revision(struct bpf_mprog_entry *entry)
249 {
250 return atomic64_read(&entry->parent->revision);
251 }
252
bpf_mprog_entry_copy(struct bpf_mprog_entry * dst,struct bpf_mprog_entry * src)253 static inline void bpf_mprog_entry_copy(struct bpf_mprog_entry *dst,
254 struct bpf_mprog_entry *src)
255 {
256 memcpy(dst->fp_items, src->fp_items, sizeof(src->fp_items));
257 }
258
bpf_mprog_entry_clear(struct bpf_mprog_entry * dst)259 static inline void bpf_mprog_entry_clear(struct bpf_mprog_entry *dst)
260 {
261 memset(dst->fp_items, 0, sizeof(dst->fp_items));
262 }
263
bpf_mprog_clear_all(struct bpf_mprog_entry * entry,struct bpf_mprog_entry ** entry_new)264 static inline void bpf_mprog_clear_all(struct bpf_mprog_entry *entry,
265 struct bpf_mprog_entry **entry_new)
266 {
267 struct bpf_mprog_entry *peer;
268
269 peer = bpf_mprog_peer(entry);
270 bpf_mprog_entry_clear(peer);
271 peer->parent->count = 0;
272 *entry_new = peer;
273 }
274
bpf_mprog_entry_grow(struct bpf_mprog_entry * entry,int idx)275 static inline void bpf_mprog_entry_grow(struct bpf_mprog_entry *entry, int idx)
276 {
277 int total = bpf_mprog_total(entry);
278
279 memmove(entry->fp_items + idx + 1,
280 entry->fp_items + idx,
281 (total - idx) * sizeof(struct bpf_mprog_fp));
282
283 memmove(entry->parent->cp_items + idx + 1,
284 entry->parent->cp_items + idx,
285 (total - idx) * sizeof(struct bpf_mprog_cp));
286 }
287
bpf_mprog_entry_shrink(struct bpf_mprog_entry * entry,int idx)288 static inline void bpf_mprog_entry_shrink(struct bpf_mprog_entry *entry, int idx)
289 {
290 /* Total array size is needed in this case to enure the NULL
291 * entry is copied at the end.
292 */
293 int total = ARRAY_SIZE(entry->fp_items);
294
295 memmove(entry->fp_items + idx,
296 entry->fp_items + idx + 1,
297 (total - idx - 1) * sizeof(struct bpf_mprog_fp));
298
299 memmove(entry->parent->cp_items + idx,
300 entry->parent->cp_items + idx + 1,
301 (total - idx - 1) * sizeof(struct bpf_mprog_cp));
302 }
303
bpf_mprog_read(struct bpf_mprog_entry * entry,u32 idx,struct bpf_mprog_fp ** fp,struct bpf_mprog_cp ** cp)304 static inline void bpf_mprog_read(struct bpf_mprog_entry *entry, u32 idx,
305 struct bpf_mprog_fp **fp,
306 struct bpf_mprog_cp **cp)
307 {
308 *fp = &entry->fp_items[idx];
309 *cp = &entry->parent->cp_items[idx];
310 }
311
bpf_mprog_write(struct bpf_mprog_fp * fp,struct bpf_mprog_cp * cp,struct bpf_tuple * tuple)312 static inline void bpf_mprog_write(struct bpf_mprog_fp *fp,
313 struct bpf_mprog_cp *cp,
314 struct bpf_tuple *tuple)
315 {
316 WRITE_ONCE(fp->prog, tuple->prog);
317 cp->link = tuple->link;
318 }
319
320 int bpf_mprog_attach(struct bpf_mprog_entry *entry,
321 struct bpf_mprog_entry **entry_new,
322 struct bpf_prog *prog_new, struct bpf_link *link,
323 struct bpf_prog *prog_old,
324 u32 flags, u32 id_or_fd, u64 revision);
325
326 int bpf_mprog_detach(struct bpf_mprog_entry *entry,
327 struct bpf_mprog_entry **entry_new,
328 struct bpf_prog *prog, struct bpf_link *link,
329 u32 flags, u32 id_or_fd, u64 revision);
330
331 int bpf_mprog_query(const union bpf_attr *attr, union bpf_attr __user *uattr,
332 struct bpf_mprog_entry *entry);
333
bpf_mprog_supported(enum bpf_prog_type type)334 static inline bool bpf_mprog_supported(enum bpf_prog_type type)
335 {
336 switch (type) {
337 case BPF_PROG_TYPE_SCHED_CLS:
338 return true;
339 default:
340 return false;
341 }
342 }
343 #endif /* __BPF_MPROG_H */
344