xref: /linux/include/linux/bpf_mprog.h (revision a1c613ae4c322ddd58d5a8539dbfba2a0380a8c0)
1  /* SPDX-License-Identifier: GPL-2.0 */
2  /* Copyright (c) 2023 Isovalent */
3  #ifndef __BPF_MPROG_H
4  #define __BPF_MPROG_H
5  
6  #include <linux/bpf.h>
7  
8  /* bpf_mprog framework:
9   *
10   * bpf_mprog is a generic layer for multi-program attachment. In-kernel users
11   * of the bpf_mprog don't need to care about the dependency resolution
12   * internals, they can just consume it with few API calls. Currently available
13   * dependency directives are BPF_F_{BEFORE,AFTER} which enable insertion of
14   * a BPF program or BPF link relative to an existing BPF program or BPF link
15   * inside the multi-program array as well as prepend and append behavior if
16   * no relative object was specified, see corresponding selftests for concrete
17   * examples (e.g. tc_links and tc_opts test cases of test_progs).
18   *
19   * Usage of bpf_mprog_{attach,detach,query}() core APIs with pseudo code:
20   *
21   *  Attach case:
22   *
23   *   struct bpf_mprog_entry *entry, *entry_new;
24   *   int ret;
25   *
26   *   // bpf_mprog user-side lock
27   *   // fetch active @entry from attach location
28   *   [...]
29   *   ret = bpf_mprog_attach(entry, &entry_new, [...]);
30   *   if (!ret) {
31   *       if (entry != entry_new) {
32   *           // swap @entry to @entry_new at attach location
33   *           // ensure there are no inflight users of @entry:
34   *           synchronize_rcu();
35   *       }
36   *       bpf_mprog_commit(entry);
37   *   } else {
38   *       // error path, bail out, propagate @ret
39   *   }
40   *   // bpf_mprog user-side unlock
41   *
42   *  Detach case:
43   *
44   *   struct bpf_mprog_entry *entry, *entry_new;
45   *   int ret;
46   *
47   *   // bpf_mprog user-side lock
48   *   // fetch active @entry from attach location
49   *   [...]
50   *   ret = bpf_mprog_detach(entry, &entry_new, [...]);
51   *   if (!ret) {
52   *       // all (*) marked is optional and depends on the use-case
53   *       // whether bpf_mprog_bundle should be freed or not
54   *       if (!bpf_mprog_total(entry_new))     (*)
55   *           entry_new = NULL                 (*)
56   *       // swap @entry to @entry_new at attach location
57   *       // ensure there are no inflight users of @entry:
58   *       synchronize_rcu();
59   *       bpf_mprog_commit(entry);
60   *       if (!entry_new)                      (*)
61   *           // free bpf_mprog_bundle         (*)
62   *   } else {
63   *       // error path, bail out, propagate @ret
64   *   }
65   *   // bpf_mprog user-side unlock
66   *
67   *  Query case:
68   *
69   *   struct bpf_mprog_entry *entry;
70   *   int ret;
71   *
72   *   // bpf_mprog user-side lock
73   *   // fetch active @entry from attach location
74   *   [...]
75   *   ret = bpf_mprog_query(attr, uattr, entry);
76   *   // bpf_mprog user-side unlock
77   *
78   *  Data/fast path:
79   *
80   *   struct bpf_mprog_entry *entry;
81   *   struct bpf_mprog_fp *fp;
82   *   struct bpf_prog *prog;
83   *   int ret = [...];
84   *
85   *   rcu_read_lock();
86   *   // fetch active @entry from attach location
87   *   [...]
88   *   bpf_mprog_foreach_prog(entry, fp, prog) {
89   *       ret = bpf_prog_run(prog, [...]);
90   *       // process @ret from program
91   *   }
92   *   [...]
93   *   rcu_read_unlock();
94   *
95   * bpf_mprog locking considerations:
96   *
97   * bpf_mprog_{attach,detach,query}() must be protected by an external lock
98   * (like RTNL in case of tcx).
99   *
100   * bpf_mprog_entry pointer can be an __rcu annotated pointer (in case of tcx
101   * the netdevice has tcx_ingress and tcx_egress __rcu pointer) which gets
102   * updated via rcu_assign_pointer() pointing to the active bpf_mprog_entry of
103   * the bpf_mprog_bundle.
104   *
105   * Fast path accesses the active bpf_mprog_entry within RCU critical section
106   * (in case of tcx it runs in NAPI which provides RCU protection there,
107   * other users might need explicit rcu_read_lock()). The bpf_mprog_commit()
108   * assumes that for the old bpf_mprog_entry there are no inflight users
109   * anymore.
110   *
111   * The READ_ONCE()/WRITE_ONCE() pairing for bpf_mprog_fp's prog access is for
112   * the replacement case where we don't swap the bpf_mprog_entry.
113   */
114  
115  #define bpf_mprog_foreach_tuple(entry, fp, cp, t)			\
116  	for (fp = &entry->fp_items[0], cp = &entry->parent->cp_items[0];\
117  	     ({								\
118  		t.prog = READ_ONCE(fp->prog);				\
119  		t.link = cp->link;					\
120  		t.prog;							\
121  	      });							\
122  	     fp++, cp++)
123  
124  #define bpf_mprog_foreach_prog(entry, fp, p)				\
125  	for (fp = &entry->fp_items[0];					\
126  	     (p = READ_ONCE(fp->prog));					\
127  	     fp++)
128  
129  #define BPF_MPROG_MAX 64
130  
131  struct bpf_mprog_fp {
132  	struct bpf_prog *prog;
133  };
134  
135  struct bpf_mprog_cp {
136  	struct bpf_link *link;
137  };
138  
139  struct bpf_mprog_entry {
140  	struct bpf_mprog_fp fp_items[BPF_MPROG_MAX];
141  	struct bpf_mprog_bundle *parent;
142  };
143  
144  struct bpf_mprog_bundle {
145  	struct bpf_mprog_entry a;
146  	struct bpf_mprog_entry b;
147  	struct bpf_mprog_cp cp_items[BPF_MPROG_MAX];
148  	struct bpf_prog *ref;
149  	atomic64_t revision;
150  	u32 count;
151  };
152  
153  struct bpf_tuple {
154  	struct bpf_prog *prog;
155  	struct bpf_link *link;
156  };
157  
158  static inline struct bpf_mprog_entry *
bpf_mprog_peer(const struct bpf_mprog_entry * entry)159  bpf_mprog_peer(const struct bpf_mprog_entry *entry)
160  {
161  	if (entry == &entry->parent->a)
162  		return &entry->parent->b;
163  	else
164  		return &entry->parent->a;
165  }
166  
bpf_mprog_bundle_init(struct bpf_mprog_bundle * bundle)167  static inline void bpf_mprog_bundle_init(struct bpf_mprog_bundle *bundle)
168  {
169  	BUILD_BUG_ON(sizeof(bundle->a.fp_items[0]) > sizeof(u64));
170  	BUILD_BUG_ON(ARRAY_SIZE(bundle->a.fp_items) !=
171  		     ARRAY_SIZE(bundle->cp_items));
172  
173  	memset(bundle, 0, sizeof(*bundle));
174  	atomic64_set(&bundle->revision, 1);
175  	bundle->a.parent = bundle;
176  	bundle->b.parent = bundle;
177  }
178  
bpf_mprog_inc(struct bpf_mprog_entry * entry)179  static inline void bpf_mprog_inc(struct bpf_mprog_entry *entry)
180  {
181  	entry->parent->count++;
182  }
183  
bpf_mprog_dec(struct bpf_mprog_entry * entry)184  static inline void bpf_mprog_dec(struct bpf_mprog_entry *entry)
185  {
186  	entry->parent->count--;
187  }
188  
bpf_mprog_max(void)189  static inline int bpf_mprog_max(void)
190  {
191  	return ARRAY_SIZE(((struct bpf_mprog_entry *)NULL)->fp_items) - 1;
192  }
193  
bpf_mprog_total(struct bpf_mprog_entry * entry)194  static inline int bpf_mprog_total(struct bpf_mprog_entry *entry)
195  {
196  	int total = entry->parent->count;
197  
198  	WARN_ON_ONCE(total > bpf_mprog_max());
199  	return total;
200  }
201  
bpf_mprog_exists(struct bpf_mprog_entry * entry,struct bpf_prog * prog)202  static inline bool bpf_mprog_exists(struct bpf_mprog_entry *entry,
203  				    struct bpf_prog *prog)
204  {
205  	const struct bpf_mprog_fp *fp;
206  	const struct bpf_prog *tmp;
207  
208  	bpf_mprog_foreach_prog(entry, fp, tmp) {
209  		if (tmp == prog)
210  			return true;
211  	}
212  	return false;
213  }
214  
bpf_mprog_mark_for_release(struct bpf_mprog_entry * entry,struct bpf_tuple * tuple)215  static inline void bpf_mprog_mark_for_release(struct bpf_mprog_entry *entry,
216  					      struct bpf_tuple *tuple)
217  {
218  	WARN_ON_ONCE(entry->parent->ref);
219  	if (!tuple->link)
220  		entry->parent->ref = tuple->prog;
221  }
222  
bpf_mprog_complete_release(struct bpf_mprog_entry * entry)223  static inline void bpf_mprog_complete_release(struct bpf_mprog_entry *entry)
224  {
225  	/* In the non-link case prog deletions can only drop the reference
226  	 * to the prog after the bpf_mprog_entry got swapped and the
227  	 * bpf_mprog ensured that there are no inflight users anymore.
228  	 *
229  	 * Paired with bpf_mprog_mark_for_release().
230  	 */
231  	if (entry->parent->ref) {
232  		bpf_prog_put(entry->parent->ref);
233  		entry->parent->ref = NULL;
234  	}
235  }
236  
bpf_mprog_revision_new(struct bpf_mprog_entry * entry)237  static inline void bpf_mprog_revision_new(struct bpf_mprog_entry *entry)
238  {
239  	atomic64_inc(&entry->parent->revision);
240  }
241  
bpf_mprog_commit(struct bpf_mprog_entry * entry)242  static inline void bpf_mprog_commit(struct bpf_mprog_entry *entry)
243  {
244  	bpf_mprog_complete_release(entry);
245  	bpf_mprog_revision_new(entry);
246  }
247  
bpf_mprog_revision(struct bpf_mprog_entry * entry)248  static inline u64 bpf_mprog_revision(struct bpf_mprog_entry *entry)
249  {
250  	return atomic64_read(&entry->parent->revision);
251  }
252  
bpf_mprog_entry_copy(struct bpf_mprog_entry * dst,struct bpf_mprog_entry * src)253  static inline void bpf_mprog_entry_copy(struct bpf_mprog_entry *dst,
254  					struct bpf_mprog_entry *src)
255  {
256  	memcpy(dst->fp_items, src->fp_items, sizeof(src->fp_items));
257  }
258  
bpf_mprog_entry_clear(struct bpf_mprog_entry * dst)259  static inline void bpf_mprog_entry_clear(struct bpf_mprog_entry *dst)
260  {
261  	memset(dst->fp_items, 0, sizeof(dst->fp_items));
262  }
263  
bpf_mprog_clear_all(struct bpf_mprog_entry * entry,struct bpf_mprog_entry ** entry_new)264  static inline void bpf_mprog_clear_all(struct bpf_mprog_entry *entry,
265  				       struct bpf_mprog_entry **entry_new)
266  {
267  	struct bpf_mprog_entry *peer;
268  
269  	peer = bpf_mprog_peer(entry);
270  	bpf_mprog_entry_clear(peer);
271  	peer->parent->count = 0;
272  	*entry_new = peer;
273  }
274  
bpf_mprog_entry_grow(struct bpf_mprog_entry * entry,int idx)275  static inline void bpf_mprog_entry_grow(struct bpf_mprog_entry *entry, int idx)
276  {
277  	int total = bpf_mprog_total(entry);
278  
279  	memmove(entry->fp_items + idx + 1,
280  		entry->fp_items + idx,
281  		(total - idx) * sizeof(struct bpf_mprog_fp));
282  
283  	memmove(entry->parent->cp_items + idx + 1,
284  		entry->parent->cp_items + idx,
285  		(total - idx) * sizeof(struct bpf_mprog_cp));
286  }
287  
bpf_mprog_entry_shrink(struct bpf_mprog_entry * entry,int idx)288  static inline void bpf_mprog_entry_shrink(struct bpf_mprog_entry *entry, int idx)
289  {
290  	/* Total array size is needed in this case to enure the NULL
291  	 * entry is copied at the end.
292  	 */
293  	int total = ARRAY_SIZE(entry->fp_items);
294  
295  	memmove(entry->fp_items + idx,
296  		entry->fp_items + idx + 1,
297  		(total - idx - 1) * sizeof(struct bpf_mprog_fp));
298  
299  	memmove(entry->parent->cp_items + idx,
300  		entry->parent->cp_items + idx + 1,
301  		(total - idx - 1) * sizeof(struct bpf_mprog_cp));
302  }
303  
bpf_mprog_read(struct bpf_mprog_entry * entry,u32 idx,struct bpf_mprog_fp ** fp,struct bpf_mprog_cp ** cp)304  static inline void bpf_mprog_read(struct bpf_mprog_entry *entry, u32 idx,
305  				  struct bpf_mprog_fp **fp,
306  				  struct bpf_mprog_cp **cp)
307  {
308  	*fp = &entry->fp_items[idx];
309  	*cp = &entry->parent->cp_items[idx];
310  }
311  
bpf_mprog_write(struct bpf_mprog_fp * fp,struct bpf_mprog_cp * cp,struct bpf_tuple * tuple)312  static inline void bpf_mprog_write(struct bpf_mprog_fp *fp,
313  				   struct bpf_mprog_cp *cp,
314  				   struct bpf_tuple *tuple)
315  {
316  	WRITE_ONCE(fp->prog, tuple->prog);
317  	cp->link = tuple->link;
318  }
319  
320  int bpf_mprog_attach(struct bpf_mprog_entry *entry,
321  		     struct bpf_mprog_entry **entry_new,
322  		     struct bpf_prog *prog_new, struct bpf_link *link,
323  		     struct bpf_prog *prog_old,
324  		     u32 flags, u32 id_or_fd, u64 revision);
325  
326  int bpf_mprog_detach(struct bpf_mprog_entry *entry,
327  		     struct bpf_mprog_entry **entry_new,
328  		     struct bpf_prog *prog, struct bpf_link *link,
329  		     u32 flags, u32 id_or_fd, u64 revision);
330  
331  int bpf_mprog_query(const union bpf_attr *attr, union bpf_attr __user *uattr,
332  		    struct bpf_mprog_entry *entry);
333  
bpf_mprog_supported(enum bpf_prog_type type)334  static inline bool bpf_mprog_supported(enum bpf_prog_type type)
335  {
336  	switch (type) {
337  	case BPF_PROG_TYPE_SCHED_CLS:
338  		return true;
339  	default:
340  		return false;
341  	}
342  }
343  #endif /* __BPF_MPROG_H */
344