1 /* SPDX-License-Identifier: GPL-2.0 */ 2 /* Copyright (c) 2023 Isovalent */ 3 #ifndef __BPF_MPROG_H 4 #define __BPF_MPROG_H 5 6 #include <linux/bpf.h> 7 8 /* bpf_mprog framework: 9 * 10 * bpf_mprog is a generic layer for multi-program attachment. In-kernel users 11 * of the bpf_mprog don't need to care about the dependency resolution 12 * internals, they can just consume it with few API calls. Currently available 13 * dependency directives are BPF_F_{BEFORE,AFTER} which enable insertion of 14 * a BPF program or BPF link relative to an existing BPF program or BPF link 15 * inside the multi-program array as well as prepend and append behavior if 16 * no relative object was specified, see corresponding selftests for concrete 17 * examples (e.g. tc_links and tc_opts test cases of test_progs). 18 * 19 * Usage of bpf_mprog_{attach,detach,query}() core APIs with pseudo code: 20 * 21 * Attach case: 22 * 23 * struct bpf_mprog_entry *entry, *entry_new; 24 * int ret; 25 * 26 * // bpf_mprog user-side lock 27 * // fetch active @entry from attach location 28 * [...] 29 * ret = bpf_mprog_attach(entry, &entry_new, [...]); 30 * if (!ret) { 31 * if (entry != entry_new) { 32 * // swap @entry to @entry_new at attach location 33 * // ensure there are no inflight users of @entry: 34 * synchronize_rcu(); 35 * } 36 * bpf_mprog_commit(entry); 37 * } else { 38 * // error path, bail out, propagate @ret 39 * } 40 * // bpf_mprog user-side unlock 41 * 42 * Detach case: 43 * 44 * struct bpf_mprog_entry *entry, *entry_new; 45 * int ret; 46 * 47 * // bpf_mprog user-side lock 48 * // fetch active @entry from attach location 49 * [...] 50 * ret = bpf_mprog_detach(entry, &entry_new, [...]); 51 * if (!ret) { 52 * // all (*) marked is optional and depends on the use-case 53 * // whether bpf_mprog_bundle should be freed or not 54 * if (!bpf_mprog_total(entry_new)) (*) 55 * entry_new = NULL (*) 56 * // swap @entry to @entry_new at attach location 57 * // ensure there are no inflight users of @entry: 58 * synchronize_rcu(); 59 * bpf_mprog_commit(entry); 60 * if (!entry_new) (*) 61 * // free bpf_mprog_bundle (*) 62 * } else { 63 * // error path, bail out, propagate @ret 64 * } 65 * // bpf_mprog user-side unlock 66 * 67 * Query case: 68 * 69 * struct bpf_mprog_entry *entry; 70 * int ret; 71 * 72 * // bpf_mprog user-side lock 73 * // fetch active @entry from attach location 74 * [...] 75 * ret = bpf_mprog_query(attr, uattr, entry); 76 * // bpf_mprog user-side unlock 77 * 78 * Data/fast path: 79 * 80 * struct bpf_mprog_entry *entry; 81 * struct bpf_mprog_fp *fp; 82 * struct bpf_prog *prog; 83 * int ret = [...]; 84 * 85 * rcu_read_lock(); 86 * // fetch active @entry from attach location 87 * [...] 88 * bpf_mprog_foreach_prog(entry, fp, prog) { 89 * ret = bpf_prog_run(prog, [...]); 90 * // process @ret from program 91 * } 92 * [...] 93 * rcu_read_unlock(); 94 * 95 * bpf_mprog locking considerations: 96 * 97 * bpf_mprog_{attach,detach,query}() must be protected by an external lock 98 * (like RTNL in case of tcx). 99 * 100 * bpf_mprog_entry pointer can be an __rcu annotated pointer (in case of tcx 101 * the netdevice has tcx_ingress and tcx_egress __rcu pointer) which gets 102 * updated via rcu_assign_pointer() pointing to the active bpf_mprog_entry of 103 * the bpf_mprog_bundle. 104 * 105 * Fast path accesses the active bpf_mprog_entry within RCU critical section 106 * (in case of tcx it runs in NAPI which provides RCU protection there, 107 * other users might need explicit rcu_read_lock()). The bpf_mprog_commit() 108 * assumes that for the old bpf_mprog_entry there are no inflight users 109 * anymore. 110 * 111 * The READ_ONCE()/WRITE_ONCE() pairing for bpf_mprog_fp's prog access is for 112 * the replacement case where we don't swap the bpf_mprog_entry. 113 */ 114 115 #define bpf_mprog_foreach_tuple(entry, fp, cp, t) \ 116 for (fp = &entry->fp_items[0], cp = &entry->parent->cp_items[0];\ 117 ({ \ 118 t.prog = READ_ONCE(fp->prog); \ 119 t.link = cp->link; \ 120 t.prog; \ 121 }); \ 122 fp++, cp++) 123 124 #define bpf_mprog_foreach_prog(entry, fp, p) \ 125 for (fp = &entry->fp_items[0]; \ 126 (p = READ_ONCE(fp->prog)); \ 127 fp++) 128 129 #define BPF_MPROG_MAX 64 130 131 struct bpf_mprog_fp { 132 struct bpf_prog *prog; 133 }; 134 135 struct bpf_mprog_cp { 136 struct bpf_link *link; 137 }; 138 139 struct bpf_mprog_entry { 140 struct bpf_mprog_fp fp_items[BPF_MPROG_MAX]; 141 struct bpf_mprog_bundle *parent; 142 }; 143 144 struct bpf_mprog_bundle { 145 struct bpf_mprog_entry a; 146 struct bpf_mprog_entry b; 147 struct bpf_mprog_cp cp_items[BPF_MPROG_MAX]; 148 struct bpf_prog *ref; 149 atomic64_t revision; 150 u32 count; 151 }; 152 153 struct bpf_tuple { 154 struct bpf_prog *prog; 155 struct bpf_link *link; 156 }; 157 158 static inline struct bpf_mprog_entry * bpf_mprog_peer(const struct bpf_mprog_entry * entry)159 bpf_mprog_peer(const struct bpf_mprog_entry *entry) 160 { 161 if (entry == &entry->parent->a) 162 return &entry->parent->b; 163 else 164 return &entry->parent->a; 165 } 166 bpf_mprog_bundle_init(struct bpf_mprog_bundle * bundle)167 static inline void bpf_mprog_bundle_init(struct bpf_mprog_bundle *bundle) 168 { 169 BUILD_BUG_ON(sizeof(bundle->a.fp_items[0]) > sizeof(u64)); 170 BUILD_BUG_ON(ARRAY_SIZE(bundle->a.fp_items) != 171 ARRAY_SIZE(bundle->cp_items)); 172 173 memset(bundle, 0, sizeof(*bundle)); 174 atomic64_set(&bundle->revision, 1); 175 bundle->a.parent = bundle; 176 bundle->b.parent = bundle; 177 } 178 bpf_mprog_inc(struct bpf_mprog_entry * entry)179 static inline void bpf_mprog_inc(struct bpf_mprog_entry *entry) 180 { 181 entry->parent->count++; 182 } 183 bpf_mprog_dec(struct bpf_mprog_entry * entry)184 static inline void bpf_mprog_dec(struct bpf_mprog_entry *entry) 185 { 186 entry->parent->count--; 187 } 188 bpf_mprog_max(void)189 static inline int bpf_mprog_max(void) 190 { 191 return ARRAY_SIZE(((struct bpf_mprog_entry *)NULL)->fp_items) - 1; 192 } 193 bpf_mprog_total(struct bpf_mprog_entry * entry)194 static inline int bpf_mprog_total(struct bpf_mprog_entry *entry) 195 { 196 int total = entry->parent->count; 197 198 WARN_ON_ONCE(total > bpf_mprog_max()); 199 return total; 200 } 201 bpf_mprog_exists(struct bpf_mprog_entry * entry,struct bpf_prog * prog)202 static inline bool bpf_mprog_exists(struct bpf_mprog_entry *entry, 203 struct bpf_prog *prog) 204 { 205 const struct bpf_mprog_fp *fp; 206 const struct bpf_prog *tmp; 207 208 bpf_mprog_foreach_prog(entry, fp, tmp) { 209 if (tmp == prog) 210 return true; 211 } 212 return false; 213 } 214 bpf_mprog_mark_for_release(struct bpf_mprog_entry * entry,struct bpf_tuple * tuple)215 static inline void bpf_mprog_mark_for_release(struct bpf_mprog_entry *entry, 216 struct bpf_tuple *tuple) 217 { 218 WARN_ON_ONCE(entry->parent->ref); 219 if (!tuple->link) 220 entry->parent->ref = tuple->prog; 221 } 222 bpf_mprog_complete_release(struct bpf_mprog_entry * entry)223 static inline void bpf_mprog_complete_release(struct bpf_mprog_entry *entry) 224 { 225 /* In the non-link case prog deletions can only drop the reference 226 * to the prog after the bpf_mprog_entry got swapped and the 227 * bpf_mprog ensured that there are no inflight users anymore. 228 * 229 * Paired with bpf_mprog_mark_for_release(). 230 */ 231 if (entry->parent->ref) { 232 bpf_prog_put(entry->parent->ref); 233 entry->parent->ref = NULL; 234 } 235 } 236 bpf_mprog_revision_new(struct bpf_mprog_entry * entry)237 static inline void bpf_mprog_revision_new(struct bpf_mprog_entry *entry) 238 { 239 atomic64_inc(&entry->parent->revision); 240 } 241 bpf_mprog_commit(struct bpf_mprog_entry * entry)242 static inline void bpf_mprog_commit(struct bpf_mprog_entry *entry) 243 { 244 bpf_mprog_complete_release(entry); 245 bpf_mprog_revision_new(entry); 246 } 247 bpf_mprog_revision(struct bpf_mprog_entry * entry)248 static inline u64 bpf_mprog_revision(struct bpf_mprog_entry *entry) 249 { 250 return atomic64_read(&entry->parent->revision); 251 } 252 bpf_mprog_entry_copy(struct bpf_mprog_entry * dst,struct bpf_mprog_entry * src)253 static inline void bpf_mprog_entry_copy(struct bpf_mprog_entry *dst, 254 struct bpf_mprog_entry *src) 255 { 256 memcpy(dst->fp_items, src->fp_items, sizeof(src->fp_items)); 257 } 258 bpf_mprog_entry_clear(struct bpf_mprog_entry * dst)259 static inline void bpf_mprog_entry_clear(struct bpf_mprog_entry *dst) 260 { 261 memset(dst->fp_items, 0, sizeof(dst->fp_items)); 262 } 263 bpf_mprog_clear_all(struct bpf_mprog_entry * entry,struct bpf_mprog_entry ** entry_new)264 static inline void bpf_mprog_clear_all(struct bpf_mprog_entry *entry, 265 struct bpf_mprog_entry **entry_new) 266 { 267 struct bpf_mprog_entry *peer; 268 269 peer = bpf_mprog_peer(entry); 270 bpf_mprog_entry_clear(peer); 271 peer->parent->count = 0; 272 *entry_new = peer; 273 } 274 bpf_mprog_entry_grow(struct bpf_mprog_entry * entry,int idx)275 static inline void bpf_mprog_entry_grow(struct bpf_mprog_entry *entry, int idx) 276 { 277 int total = bpf_mprog_total(entry); 278 279 memmove(entry->fp_items + idx + 1, 280 entry->fp_items + idx, 281 (total - idx) * sizeof(struct bpf_mprog_fp)); 282 283 memmove(entry->parent->cp_items + idx + 1, 284 entry->parent->cp_items + idx, 285 (total - idx) * sizeof(struct bpf_mprog_cp)); 286 } 287 bpf_mprog_entry_shrink(struct bpf_mprog_entry * entry,int idx)288 static inline void bpf_mprog_entry_shrink(struct bpf_mprog_entry *entry, int idx) 289 { 290 /* Total array size is needed in this case to enure the NULL 291 * entry is copied at the end. 292 */ 293 int total = ARRAY_SIZE(entry->fp_items); 294 295 memmove(entry->fp_items + idx, 296 entry->fp_items + idx + 1, 297 (total - idx - 1) * sizeof(struct bpf_mprog_fp)); 298 299 memmove(entry->parent->cp_items + idx, 300 entry->parent->cp_items + idx + 1, 301 (total - idx - 1) * sizeof(struct bpf_mprog_cp)); 302 } 303 bpf_mprog_read(struct bpf_mprog_entry * entry,u32 idx,struct bpf_mprog_fp ** fp,struct bpf_mprog_cp ** cp)304 static inline void bpf_mprog_read(struct bpf_mprog_entry *entry, u32 idx, 305 struct bpf_mprog_fp **fp, 306 struct bpf_mprog_cp **cp) 307 { 308 *fp = &entry->fp_items[idx]; 309 *cp = &entry->parent->cp_items[idx]; 310 } 311 bpf_mprog_write(struct bpf_mprog_fp * fp,struct bpf_mprog_cp * cp,struct bpf_tuple * tuple)312 static inline void bpf_mprog_write(struct bpf_mprog_fp *fp, 313 struct bpf_mprog_cp *cp, 314 struct bpf_tuple *tuple) 315 { 316 WRITE_ONCE(fp->prog, tuple->prog); 317 cp->link = tuple->link; 318 } 319 320 int bpf_mprog_attach(struct bpf_mprog_entry *entry, 321 struct bpf_mprog_entry **entry_new, 322 struct bpf_prog *prog_new, struct bpf_link *link, 323 struct bpf_prog *prog_old, 324 u32 flags, u32 id_or_fd, u64 revision); 325 326 int bpf_mprog_detach(struct bpf_mprog_entry *entry, 327 struct bpf_mprog_entry **entry_new, 328 struct bpf_prog *prog, struct bpf_link *link, 329 u32 flags, u32 id_or_fd, u64 revision); 330 331 int bpf_mprog_query(const union bpf_attr *attr, union bpf_attr __user *uattr, 332 struct bpf_mprog_entry *entry); 333 bpf_mprog_supported(enum bpf_prog_type type)334 static inline bool bpf_mprog_supported(enum bpf_prog_type type) 335 { 336 switch (type) { 337 case BPF_PROG_TYPE_SCHED_CLS: 338 return true; 339 default: 340 return false; 341 } 342 } 343 #endif /* __BPF_MPROG_H */ 344