xref: /linux/tools/lib/bpf/btf_relocate.c (revision 7d32e779eb9add47bfdb4731c4d4ff443a7f7fa6)
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2024, Oracle and/or its affiliates. */
3 
4 #ifndef _GNU_SOURCE
5 #define _GNU_SOURCE
6 #endif
7 
8 #ifdef __KERNEL__
9 #include <linux/bpf.h>
10 #include <linux/bsearch.h>
11 #include <linux/btf.h>
12 #include <linux/sort.h>
13 #include <linux/string.h>
14 #include <linux/bpf_verifier.h>
15 
16 #define btf_type_by_id				(struct btf_type *)btf_type_by_id
17 #define btf__type_cnt				btf_nr_types
18 #define btf__base_btf				btf_base_btf
19 #define btf__name_by_offset			btf_name_by_offset
20 #define btf__str_by_offset			btf_str_by_offset
21 #define btf_kflag				btf_type_kflag
22 
23 #define calloc(nmemb, sz)			kvcalloc(nmemb, sz, GFP_KERNEL | __GFP_NOWARN)
24 #define free(ptr)				kvfree(ptr)
25 #define qsort(base, num, sz, cmp)		sort(base, num, sz, cmp, NULL)
26 
27 #else
28 
29 #include "btf.h"
30 #include "bpf.h"
31 #include "libbpf.h"
32 #include "libbpf_internal.h"
33 
34 #endif /* __KERNEL__ */
35 
36 struct btf;
37 
38 struct btf_relocate {
39 	struct btf *btf;
40 	const struct btf *base_btf;
41 	const struct btf *dist_base_btf;
42 	unsigned int nr_base_types;
43 	unsigned int nr_split_types;
44 	unsigned int nr_dist_base_types;
45 	int dist_str_len;
46 	int base_str_len;
47 	__u32 *id_map;
48 	__u32 *str_map;
49 };
50 
51 /* Set temporarily in relocation id_map if distilled base struct/union is
52  * embedded in a split BTF struct/union; in such a case, size information must
53  * match between distilled base BTF and base BTF representation of type.
54  */
55 #define BTF_IS_EMBEDDED ((__u32)-1)
56 
57 /* <name, size, id> triple used in sorting/searching distilled base BTF. */
58 struct btf_name_info {
59 	const char *name;
60 	/* set when search requires a size match */
61 	bool needs_size: 1;
62 	unsigned int size: 31;
63 	__u32 id;
64 };
65 
66 static int btf_relocate_rewrite_type_id(struct btf_relocate *r, __u32 i)
67 {
68 	struct btf_type *t = btf_type_by_id(r->btf, i);
69 	struct btf_field_iter it;
70 	__u32 *id;
71 	int err;
72 
73 	err = btf_field_iter_init(&it, t, BTF_FIELD_ITER_IDS);
74 	if (err)
75 		return err;
76 
77 	while ((id = btf_field_iter_next(&it)))
78 		*id = r->id_map[*id];
79 	return 0;
80 }
81 
82 /* Simple string comparison used for sorting within BTF, since all distilled
83  * types are named.  If strings match, and size is non-zero for both elements
84  * fall back to using size for ordering.
85  */
86 static int cmp_btf_name_size(const void *n1, const void *n2)
87 {
88 	const struct btf_name_info *ni1 = n1;
89 	const struct btf_name_info *ni2 = n2;
90 	int name_diff = strcmp(ni1->name, ni2->name);
91 
92 	if (!name_diff && ni1->needs_size && ni2->needs_size)
93 		return ni2->size - ni1->size;
94 	return name_diff;
95 }
96 
97 /* Binary search with a small twist; find leftmost element that matches
98  * so that we can then iterate through all exact matches.  So for example
99  * searching { "a", "bb", "bb", "c" }  we would always match on the
100  * leftmost "bb".
101  */
102 static struct btf_name_info *search_btf_name_size(struct btf_name_info *key,
103 						  struct btf_name_info *vals,
104 						  int nelems)
105 {
106 	struct btf_name_info *ret = NULL;
107 	int high = nelems - 1;
108 	int low = 0;
109 
110 	while (low <= high) {
111 		int mid = (low + high)/2;
112 		struct btf_name_info *val = &vals[mid];
113 		int diff = cmp_btf_name_size(key, val);
114 
115 		if (diff == 0)
116 			ret = val;
117 		/* even if found, keep searching for leftmost match */
118 		if (diff <= 0)
119 			high = mid - 1;
120 		else
121 			low = mid + 1;
122 	}
123 	return ret;
124 }
125 
126 /* If a member of a split BTF struct/union refers to a base BTF
127  * struct/union, mark that struct/union id temporarily in the id_map
128  * with BTF_IS_EMBEDDED.  Members can be const/restrict/volatile/typedef
129  * reference types, but if a pointer is encountered, the type is no longer
130  * considered embedded.
131  */
132 static int btf_mark_embedded_composite_type_ids(struct btf_relocate *r, __u32 i)
133 {
134 	struct btf_type *t = btf_type_by_id(r->btf, i);
135 	struct btf_field_iter it;
136 	__u32 *id;
137 	int err;
138 
139 	if (!btf_is_composite(t))
140 		return 0;
141 
142 	err = btf_field_iter_init(&it, t, BTF_FIELD_ITER_IDS);
143 	if (err)
144 		return err;
145 
146 	while ((id = btf_field_iter_next(&it))) {
147 		__u32 next_id = *id;
148 
149 		while (next_id) {
150 			t = btf_type_by_id(r->btf, next_id);
151 			switch (btf_kind(t)) {
152 			case BTF_KIND_CONST:
153 			case BTF_KIND_RESTRICT:
154 			case BTF_KIND_VOLATILE:
155 			case BTF_KIND_TYPEDEF:
156 			case BTF_KIND_TYPE_TAG:
157 				next_id = t->type;
158 				break;
159 			case BTF_KIND_ARRAY: {
160 				struct btf_array *a = btf_array(t);
161 
162 				next_id = a->type;
163 				break;
164 			}
165 			case BTF_KIND_STRUCT:
166 			case BTF_KIND_UNION:
167 				if (next_id < r->nr_dist_base_types)
168 					r->id_map[next_id] = BTF_IS_EMBEDDED;
169 				next_id = 0;
170 				break;
171 			default:
172 				next_id = 0;
173 				break;
174 			}
175 		}
176 	}
177 
178 	return 0;
179 }
180 
181 /* Build a map from distilled base BTF ids to base BTF ids. To do so, iterate
182  * through base BTF looking up distilled type (using binary search) equivalents.
183  */
184 static int btf_relocate_map_distilled_base(struct btf_relocate *r)
185 {
186 	struct btf_name_info *info, *info_end;
187 	struct btf_type *base_t, *dist_t;
188 	__u8 *base_name_cnt = NULL;
189 	int err = 0;
190 	__u32 id;
191 
192 	/* generate a sort index array of name/type ids sorted by name for
193 	 * distilled base BTF to speed name-based lookups.
194 	 */
195 	info = calloc(r->nr_dist_base_types, sizeof(*info));
196 	if (!info) {
197 		err = -ENOMEM;
198 		goto done;
199 	}
200 	info_end = info + r->nr_dist_base_types;
201 	for (id = 0; id < r->nr_dist_base_types; id++) {
202 		dist_t = btf_type_by_id(r->dist_base_btf, id);
203 		info[id].name = btf__name_by_offset(r->dist_base_btf, dist_t->name_off);
204 		info[id].id = id;
205 		info[id].size = dist_t->size;
206 		info[id].needs_size = true;
207 	}
208 	qsort(info, r->nr_dist_base_types, sizeof(*info), cmp_btf_name_size);
209 
210 	/* Mark distilled base struct/union members of split BTF structs/unions
211 	 * in id_map with BTF_IS_EMBEDDED; this signals that these types
212 	 * need to match both name and size, otherwise embedding the base
213 	 * struct/union in the split type is invalid.
214 	 */
215 	for (id = r->nr_dist_base_types; id < r->nr_split_types; id++) {
216 		err = btf_mark_embedded_composite_type_ids(r, id);
217 		if (err)
218 			goto done;
219 	}
220 
221 	/* Collect name counts for composite types in base BTF.  If multiple
222 	 * instances of a struct/union of the same name exist, we need to use
223 	 * size to determine which to map to since name alone is ambiguous.
224 	 */
225 	base_name_cnt = calloc(r->base_str_len, sizeof(*base_name_cnt));
226 	if (!base_name_cnt) {
227 		err = -ENOMEM;
228 		goto done;
229 	}
230 	for (id = 1; id < r->nr_base_types; id++) {
231 		base_t = btf_type_by_id(r->base_btf, id);
232 		if (!btf_is_composite(base_t) || !base_t->name_off)
233 			continue;
234 		if (base_name_cnt[base_t->name_off] < 255)
235 			base_name_cnt[base_t->name_off]++;
236 	}
237 
238 	/* Now search base BTF for matching distilled base BTF types. */
239 	for (id = 1; id < r->nr_base_types; id++) {
240 		struct btf_name_info *dist_info, base_info = {};
241 		int dist_kind, base_kind;
242 
243 		base_t = btf_type_by_id(r->base_btf, id);
244 		/* distilled base consists of named types only. */
245 		if (!base_t->name_off)
246 			continue;
247 		base_kind = btf_kind(base_t);
248 		base_info.id = id;
249 		base_info.name = btf__name_by_offset(r->base_btf, base_t->name_off);
250 		switch (base_kind) {
251 		case BTF_KIND_INT:
252 		case BTF_KIND_FLOAT:
253 		case BTF_KIND_ENUM:
254 		case BTF_KIND_ENUM64:
255 			/* These types should match both name and size */
256 			base_info.needs_size = true;
257 			base_info.size = base_t->size;
258 			break;
259 		case BTF_KIND_FWD:
260 			/* No size considerations for fwds. */
261 			break;
262 		case BTF_KIND_STRUCT:
263 		case BTF_KIND_UNION:
264 			/* Size only needs to be used for struct/union if there
265 			 * are multiple types in base BTF with the same name.
266 			 * If there are multiple _distilled_ types with the same
267 			 * name (a very unlikely scenario), that doesn't matter
268 			 * unless corresponding _base_ types to match them are
269 			 * missing.
270 			 */
271 			base_info.needs_size = base_name_cnt[base_t->name_off] > 1;
272 			base_info.size = base_t->size;
273 			break;
274 		default:
275 			continue;
276 		}
277 		/* iterate over all matching distilled base types */
278 		for (dist_info = search_btf_name_size(&base_info, info, r->nr_dist_base_types);
279 		     dist_info != NULL && dist_info < info_end &&
280 		     cmp_btf_name_size(&base_info, dist_info) == 0;
281 		     dist_info++) {
282 			if (!dist_info->id || dist_info->id >= r->nr_dist_base_types) {
283 				pr_warn("base BTF id [%d] maps to invalid distilled base BTF id [%d]\n",
284 					id, dist_info->id);
285 				err = -EINVAL;
286 				goto done;
287 			}
288 			dist_t = btf_type_by_id(r->dist_base_btf, dist_info->id);
289 			dist_kind = btf_kind(dist_t);
290 
291 			/* Validate that the found distilled type is compatible.
292 			 * Do not error out on mismatch as another match may
293 			 * occur for an identically-named type.
294 			 */
295 			switch (dist_kind) {
296 			case BTF_KIND_FWD:
297 				switch (base_kind) {
298 				case BTF_KIND_FWD:
299 					if (btf_kflag(dist_t) != btf_kflag(base_t))
300 						continue;
301 					break;
302 				case BTF_KIND_STRUCT:
303 					if (btf_kflag(base_t))
304 						continue;
305 					break;
306 				case BTF_KIND_UNION:
307 					if (!btf_kflag(base_t))
308 						continue;
309 					break;
310 				default:
311 					continue;
312 				}
313 				break;
314 			case BTF_KIND_INT:
315 				if (dist_kind != base_kind ||
316 				    btf_int_encoding(base_t) != btf_int_encoding(dist_t))
317 					continue;
318 				break;
319 			case BTF_KIND_FLOAT:
320 				if (dist_kind != base_kind)
321 					continue;
322 				break;
323 			case BTF_KIND_ENUM:
324 				/* ENUM and ENUM64 are encoded as sized ENUM in
325 				 * distilled base BTF.
326 				 */
327 				if (base_kind != dist_kind && base_kind != BTF_KIND_ENUM64)
328 					continue;
329 				break;
330 			case BTF_KIND_STRUCT:
331 			case BTF_KIND_UNION:
332 				/* size verification is required for embedded
333 				 * struct/unions.
334 				 */
335 				if (r->id_map[dist_info->id] == BTF_IS_EMBEDDED &&
336 				    base_t->size != dist_t->size)
337 					continue;
338 				break;
339 			default:
340 				continue;
341 			}
342 			if (r->id_map[dist_info->id] &&
343 			    r->id_map[dist_info->id] != BTF_IS_EMBEDDED) {
344 				/* we already have a match; this tells us that
345 				 * multiple base types of the same name
346 				 * have the same size, since for cases where
347 				 * multiple types have the same name we match
348 				 * on name and size.  In this case, we have
349 				 * no way of determining which to relocate
350 				 * to in base BTF, so error out.
351 				 */
352 				pr_warn("distilled base BTF type '%s' [%u], size %u has multiple candidates of the same size (ids [%u, %u]) in base BTF\n",
353 					base_info.name, dist_info->id,
354 					base_t->size, id, r->id_map[dist_info->id]);
355 				err = -EINVAL;
356 				goto done;
357 			}
358 			/* map id and name */
359 			r->id_map[dist_info->id] = id;
360 			r->str_map[dist_t->name_off] = base_t->name_off;
361 		}
362 	}
363 	/* ensure all distilled BTF ids now have a mapping... */
364 	for (id = 1; id < r->nr_dist_base_types; id++) {
365 		const char *name;
366 
367 		if (r->id_map[id] && r->id_map[id] != BTF_IS_EMBEDDED)
368 			continue;
369 		dist_t = btf_type_by_id(r->dist_base_btf, id);
370 		name = btf__name_by_offset(r->dist_base_btf, dist_t->name_off);
371 		pr_warn("distilled base BTF type '%s' [%d] is not mapped to base BTF id\n",
372 			name, id);
373 		err = -EINVAL;
374 		break;
375 	}
376 done:
377 	free(base_name_cnt);
378 	free(info);
379 	return err;
380 }
381 
382 /* distilled base should only have named int/float/enum/fwd/struct/union types. */
383 static int btf_relocate_validate_distilled_base(struct btf_relocate *r)
384 {
385 	unsigned int i;
386 
387 	for (i = 1; i < r->nr_dist_base_types; i++) {
388 		struct btf_type *t = btf_type_by_id(r->dist_base_btf, i);
389 		int kind = btf_kind(t);
390 
391 		switch (kind) {
392 		case BTF_KIND_INT:
393 		case BTF_KIND_FLOAT:
394 		case BTF_KIND_ENUM:
395 		case BTF_KIND_STRUCT:
396 		case BTF_KIND_UNION:
397 		case BTF_KIND_FWD:
398 			if (t->name_off)
399 				break;
400 			pr_warn("type [%d], kind [%d] is invalid for distilled base BTF; it is anonymous\n",
401 				i, kind);
402 			return -EINVAL;
403 		default:
404 			pr_warn("type [%d] in distilled based BTF has unexpected kind [%d]\n",
405 				i, kind);
406 			return -EINVAL;
407 		}
408 	}
409 	return 0;
410 }
411 
412 static int btf_relocate_rewrite_strs(struct btf_relocate *r, __u32 i)
413 {
414 	struct btf_type *t = btf_type_by_id(r->btf, i);
415 	struct btf_field_iter it;
416 	__u32 *str_off;
417 	int off, err;
418 
419 	err = btf_field_iter_init(&it, t, BTF_FIELD_ITER_STRS);
420 	if (err)
421 		return err;
422 
423 	while ((str_off = btf_field_iter_next(&it))) {
424 		if (!*str_off)
425 			continue;
426 		if (*str_off >= r->dist_str_len) {
427 			*str_off += r->base_str_len - r->dist_str_len;
428 		} else {
429 			off = r->str_map[*str_off];
430 			if (!off) {
431 				pr_warn("string '%s' [offset %u] is not mapped to base BTF",
432 					btf__str_by_offset(r->btf, off), *str_off);
433 				return -ENOENT;
434 			}
435 			*str_off = off;
436 		}
437 	}
438 	return 0;
439 }
440 
441 /* If successful, output of relocation is updated BTF with base BTF pointing
442  * at base_btf, and type ids, strings adjusted accordingly.
443  */
444 int btf_relocate(struct btf *btf, const struct btf *base_btf, __u32 **id_map)
445 {
446 	unsigned int nr_types = btf__type_cnt(btf);
447 	const struct btf_header *dist_base_hdr;
448 	const struct btf_header *base_hdr;
449 	struct btf_relocate r = {};
450 	int err = 0;
451 	__u32 id, i;
452 
453 	r.dist_base_btf = btf__base_btf(btf);
454 	if (!base_btf || r.dist_base_btf == base_btf)
455 		return -EINVAL;
456 
457 	r.nr_dist_base_types = btf__type_cnt(r.dist_base_btf);
458 	r.nr_base_types = btf__type_cnt(base_btf);
459 	r.nr_split_types = nr_types - r.nr_dist_base_types;
460 	r.btf = btf;
461 	r.base_btf = base_btf;
462 
463 	r.id_map = calloc(nr_types, sizeof(*r.id_map));
464 	r.str_map = calloc(btf_header(r.dist_base_btf)->str_len, sizeof(*r.str_map));
465 	dist_base_hdr = btf_header(r.dist_base_btf);
466 	base_hdr = btf_header(r.base_btf);
467 	r.dist_str_len = dist_base_hdr->str_len;
468 	r.base_str_len = base_hdr->str_len;
469 	if (!r.id_map || !r.str_map) {
470 		err = -ENOMEM;
471 		goto err_out;
472 	}
473 
474 	err = btf_relocate_validate_distilled_base(&r);
475 	if (err)
476 		goto err_out;
477 
478 	/* Split BTF ids need to be adjusted as base and distilled base
479 	 * have different numbers of types, changing the start id of split
480 	 * BTF.
481 	 */
482 	for (id = r.nr_dist_base_types; id < nr_types; id++)
483 		r.id_map[id] = id + r.nr_base_types - r.nr_dist_base_types;
484 
485 	/* Build a map from distilled base ids to actual base BTF ids; it is used
486 	 * to update split BTF id references.  Also build a str_map mapping from
487 	 * distilled base BTF names to base BTF names.
488 	 */
489 	err = btf_relocate_map_distilled_base(&r);
490 	if (err)
491 		goto err_out;
492 
493 	/* Next, rewrite type ids in split BTF, replacing split ids with updated
494 	 * ids based on number of types in base BTF, and base ids with
495 	 * relocated ids from base_btf.
496 	 */
497 	for (i = 0, id = r.nr_dist_base_types; i < r.nr_split_types; i++, id++) {
498 		err = btf_relocate_rewrite_type_id(&r, id);
499 		if (err)
500 			goto err_out;
501 	}
502 	/* String offsets now need to be updated using the str_map. */
503 	for (i = 0; i < r.nr_split_types; i++) {
504 		err = btf_relocate_rewrite_strs(&r, i + r.nr_dist_base_types);
505 		if (err)
506 			goto err_out;
507 	}
508 	/* Finally reset base BTF to be base_btf */
509 	btf_set_base_btf(btf, base_btf);
510 
511 	if (id_map) {
512 		*id_map = r.id_map;
513 		r.id_map = NULL;
514 	}
515 err_out:
516 	free(r.id_map);
517 	free(r.str_map);
518 	return err;
519 }
520