xref: /linux/tools/lib/bpf/btf_relocate.c (revision 2a52ca7c98960aafb0eca9ef96b2d0c932171357)
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2024, Oracle and/or its affiliates. */
3 
4 #ifndef _GNU_SOURCE
5 #define _GNU_SOURCE
6 #endif
7 
8 #include "btf.h"
9 #include "bpf.h"
10 #include "libbpf.h"
11 #include "libbpf_internal.h"
12 
13 struct btf;
14 
15 struct btf_relocate {
16 	struct btf *btf;
17 	const struct btf *base_btf;
18 	const struct btf *dist_base_btf;
19 	unsigned int nr_base_types;
20 	unsigned int nr_split_types;
21 	unsigned int nr_dist_base_types;
22 	int dist_str_len;
23 	int base_str_len;
24 	__u32 *id_map;
25 	__u32 *str_map;
26 };
27 
28 /* Set temporarily in relocation id_map if distilled base struct/union is
29  * embedded in a split BTF struct/union; in such a case, size information must
30  * match between distilled base BTF and base BTF representation of type.
31  */
32 #define BTF_IS_EMBEDDED ((__u32)-1)
33 
34 /* <name, size, id> triple used in sorting/searching distilled base BTF. */
35 struct btf_name_info {
36 	const char *name;
37 	/* set when search requires a size match */
38 	int needs_size:1,
39 	    size:31;
40 	__u32 id;
41 };
42 
43 static int btf_relocate_rewrite_type_id(struct btf_relocate *r, __u32 i)
44 {
45 	struct btf_type *t = btf_type_by_id(r->btf, i);
46 	struct btf_field_iter it;
47 	__u32 *id;
48 	int err;
49 
50 	err = btf_field_iter_init(&it, t, BTF_FIELD_ITER_IDS);
51 	if (err)
52 		return err;
53 
54 	while ((id = btf_field_iter_next(&it)))
55 		*id = r->id_map[*id];
56 	return 0;
57 }
58 
59 /* Simple string comparison used for sorting within BTF, since all distilled
60  * types are named.  If strings match, and size is non-zero for both elements
61  * fall back to using size for ordering.
62  */
63 static int cmp_btf_name_size(const void *n1, const void *n2)
64 {
65 	const struct btf_name_info *ni1 = n1;
66 	const struct btf_name_info *ni2 = n2;
67 	int name_diff = strcmp(ni1->name, ni2->name);
68 
69 	if (!name_diff && ni1->needs_size && ni2->needs_size)
70 		return ni2->size - ni1->size;
71 	return name_diff;
72 }
73 
74 /* Binary search with a small twist; find leftmost element that matches
75  * so that we can then iterate through all exact matches.  So for example
76  * searching { "a", "bb", "bb", "c" }  we would always match on the
77  * leftmost "bb".
78  */
79 static struct btf_name_info *search_btf_name_size(struct btf_name_info *key,
80 						  struct btf_name_info *vals,
81 						  int nelems)
82 {
83 	struct btf_name_info *ret = NULL;
84 	int high = nelems - 1;
85 	int low = 0;
86 
87 	while (low <= high) {
88 		int mid = (low + high)/2;
89 		struct btf_name_info *val = &vals[mid];
90 		int diff = cmp_btf_name_size(key, val);
91 
92 		if (diff == 0)
93 			ret = val;
94 		/* even if found, keep searching for leftmost match */
95 		if (diff <= 0)
96 			high = mid - 1;
97 		else
98 			low = mid + 1;
99 	}
100 	return ret;
101 }
102 
103 /* If a member of a split BTF struct/union refers to a base BTF
104  * struct/union, mark that struct/union id temporarily in the id_map
105  * with BTF_IS_EMBEDDED.  Members can be const/restrict/volatile/typedef
106  * reference types, but if a pointer is encountered, the type is no longer
107  * considered embedded.
108  */
109 static int btf_mark_embedded_composite_type_ids(struct btf_relocate *r, __u32 i)
110 {
111 	struct btf_type *t = btf_type_by_id(r->btf, i);
112 	struct btf_field_iter it;
113 	__u32 *id;
114 	int err;
115 
116 	if (!btf_is_composite(t))
117 		return 0;
118 
119 	err = btf_field_iter_init(&it, t, BTF_FIELD_ITER_IDS);
120 	if (err)
121 		return err;
122 
123 	while ((id = btf_field_iter_next(&it))) {
124 		__u32 next_id = *id;
125 
126 		while (next_id) {
127 			t = btf_type_by_id(r->btf, next_id);
128 			switch (btf_kind(t)) {
129 			case BTF_KIND_CONST:
130 			case BTF_KIND_RESTRICT:
131 			case BTF_KIND_VOLATILE:
132 			case BTF_KIND_TYPEDEF:
133 			case BTF_KIND_TYPE_TAG:
134 				next_id = t->type;
135 				break;
136 			case BTF_KIND_ARRAY: {
137 				struct btf_array *a = btf_array(t);
138 
139 				next_id = a->type;
140 				break;
141 			}
142 			case BTF_KIND_STRUCT:
143 			case BTF_KIND_UNION:
144 				if (next_id < r->nr_dist_base_types)
145 					r->id_map[next_id] = BTF_IS_EMBEDDED;
146 				next_id = 0;
147 				break;
148 			default:
149 				next_id = 0;
150 				break;
151 			}
152 		}
153 	}
154 
155 	return 0;
156 }
157 
158 /* Build a map from distilled base BTF ids to base BTF ids. To do so, iterate
159  * through base BTF looking up distilled type (using binary search) equivalents.
160  */
161 static int btf_relocate_map_distilled_base(struct btf_relocate *r)
162 {
163 	struct btf_name_info *dist_base_info_sorted, *dist_base_info_sorted_end;
164 	struct btf_type *base_t, *dist_t;
165 	__u8 *base_name_cnt = NULL;
166 	int err = 0;
167 	__u32 id;
168 
169 	/* generate a sort index array of name/type ids sorted by name for
170 	 * distilled base BTF to speed name-based lookups.
171 	 */
172 	dist_base_info_sorted = calloc(r->nr_dist_base_types, sizeof(*dist_base_info_sorted));
173 	if (!dist_base_info_sorted) {
174 		err = -ENOMEM;
175 		goto done;
176 	}
177 	dist_base_info_sorted_end = dist_base_info_sorted + r->nr_dist_base_types;
178 	for (id = 0; id < r->nr_dist_base_types; id++) {
179 		dist_t = btf_type_by_id(r->dist_base_btf, id);
180 		dist_base_info_sorted[id].name = btf__name_by_offset(r->dist_base_btf,
181 								     dist_t->name_off);
182 		dist_base_info_sorted[id].id = id;
183 		dist_base_info_sorted[id].size = dist_t->size;
184 		dist_base_info_sorted[id].needs_size = true;
185 	}
186 	qsort(dist_base_info_sorted, r->nr_dist_base_types, sizeof(*dist_base_info_sorted),
187 	      cmp_btf_name_size);
188 
189 	/* Mark distilled base struct/union members of split BTF structs/unions
190 	 * in id_map with BTF_IS_EMBEDDED; this signals that these types
191 	 * need to match both name and size, otherwise embeddding the base
192 	 * struct/union in the split type is invalid.
193 	 */
194 	for (id = r->nr_dist_base_types; id < r->nr_split_types; id++) {
195 		err = btf_mark_embedded_composite_type_ids(r, id);
196 		if (err)
197 			goto done;
198 	}
199 
200 	/* Collect name counts for composite types in base BTF.  If multiple
201 	 * instances of a struct/union of the same name exist, we need to use
202 	 * size to determine which to map to since name alone is ambiguous.
203 	 */
204 	base_name_cnt = calloc(r->base_str_len, sizeof(*base_name_cnt));
205 	if (!base_name_cnt) {
206 		err = -ENOMEM;
207 		goto done;
208 	}
209 	for (id = 1; id < r->nr_base_types; id++) {
210 		base_t = btf_type_by_id(r->base_btf, id);
211 		if (!btf_is_composite(base_t) || !base_t->name_off)
212 			continue;
213 		if (base_name_cnt[base_t->name_off] < 255)
214 			base_name_cnt[base_t->name_off]++;
215 	}
216 
217 	/* Now search base BTF for matching distilled base BTF types. */
218 	for (id = 1; id < r->nr_base_types; id++) {
219 		struct btf_name_info *dist_name_info, *dist_name_info_next = NULL;
220 		struct btf_name_info base_name_info = {};
221 		int dist_kind, base_kind;
222 
223 		base_t = btf_type_by_id(r->base_btf, id);
224 		/* distilled base consists of named types only. */
225 		if (!base_t->name_off)
226 			continue;
227 		base_kind = btf_kind(base_t);
228 		base_name_info.id = id;
229 		base_name_info.name = btf__name_by_offset(r->base_btf, base_t->name_off);
230 		switch (base_kind) {
231 		case BTF_KIND_INT:
232 		case BTF_KIND_FLOAT:
233 		case BTF_KIND_ENUM:
234 		case BTF_KIND_ENUM64:
235 			/* These types should match both name and size */
236 			base_name_info.needs_size = true;
237 			base_name_info.size = base_t->size;
238 			break;
239 		case BTF_KIND_FWD:
240 			/* No size considerations for fwds. */
241 			break;
242 		case BTF_KIND_STRUCT:
243 		case BTF_KIND_UNION:
244 			/* Size only needs to be used for struct/union if there
245 			 * are multiple types in base BTF with the same name.
246 			 * If there are multiple _distilled_ types with the same
247 			 * name (a very unlikely scenario), that doesn't matter
248 			 * unless corresponding _base_ types to match them are
249 			 * missing.
250 			 */
251 			base_name_info.needs_size = base_name_cnt[base_t->name_off] > 1;
252 			base_name_info.size = base_t->size;
253 			break;
254 		default:
255 			continue;
256 		}
257 		/* iterate over all matching distilled base types */
258 		for (dist_name_info = search_btf_name_size(&base_name_info, dist_base_info_sorted,
259 							   r->nr_dist_base_types);
260 		     dist_name_info != NULL; dist_name_info = dist_name_info_next) {
261 			/* Are there more distilled matches to process after
262 			 * this one?
263 			 */
264 			dist_name_info_next = dist_name_info + 1;
265 			if (dist_name_info_next >= dist_base_info_sorted_end ||
266 			    cmp_btf_name_size(&base_name_info, dist_name_info_next))
267 				dist_name_info_next = NULL;
268 
269 			if (!dist_name_info->id || dist_name_info->id > r->nr_dist_base_types) {
270 				pr_warn("base BTF id [%d] maps to invalid distilled base BTF id [%d]\n",
271 					id, dist_name_info->id);
272 				err = -EINVAL;
273 				goto done;
274 			}
275 			dist_t = btf_type_by_id(r->dist_base_btf, dist_name_info->id);
276 			dist_kind = btf_kind(dist_t);
277 
278 			/* Validate that the found distilled type is compatible.
279 			 * Do not error out on mismatch as another match may
280 			 * occur for an identically-named type.
281 			 */
282 			switch (dist_kind) {
283 			case BTF_KIND_FWD:
284 				switch (base_kind) {
285 				case BTF_KIND_FWD:
286 					if (btf_kflag(dist_t) != btf_kflag(base_t))
287 						continue;
288 					break;
289 				case BTF_KIND_STRUCT:
290 					if (btf_kflag(base_t))
291 						continue;
292 					break;
293 				case BTF_KIND_UNION:
294 					if (!btf_kflag(base_t))
295 						continue;
296 					break;
297 				default:
298 					continue;
299 				}
300 				break;
301 			case BTF_KIND_INT:
302 				if (dist_kind != base_kind ||
303 				    btf_int_encoding(base_t) != btf_int_encoding(dist_t))
304 					continue;
305 				break;
306 			case BTF_KIND_FLOAT:
307 				if (dist_kind != base_kind)
308 					continue;
309 				break;
310 			case BTF_KIND_ENUM:
311 				/* ENUM and ENUM64 are encoded as sized ENUM in
312 				 * distilled base BTF.
313 				 */
314 				if (base_kind != dist_kind && base_kind != BTF_KIND_ENUM64)
315 					continue;
316 				break;
317 			case BTF_KIND_STRUCT:
318 			case BTF_KIND_UNION:
319 				/* size verification is required for embedded
320 				 * struct/unions.
321 				 */
322 				if (r->id_map[dist_name_info->id] == BTF_IS_EMBEDDED &&
323 				    base_t->size != dist_t->size)
324 					continue;
325 				break;
326 			default:
327 				continue;
328 			}
329 			if (r->id_map[dist_name_info->id] &&
330 			    r->id_map[dist_name_info->id] != BTF_IS_EMBEDDED) {
331 				/* we already have a match; this tells us that
332 				 * multiple base types of the same name
333 				 * have the same size, since for cases where
334 				 * multiple types have the same name we match
335 				 * on name and size.  In this case, we have
336 				 * no way of determining which to relocate
337 				 * to in base BTF, so error out.
338 				 */
339 				pr_warn("distilled base BTF type '%s' [%u], size %u has multiple candidates of the same size (ids [%u, %u]) in base BTF\n",
340 					base_name_info.name, dist_name_info->id,
341 					base_t->size, id, r->id_map[dist_name_info->id]);
342 				err = -EINVAL;
343 				goto done;
344 			}
345 			/* map id and name */
346 			r->id_map[dist_name_info->id] = id;
347 			r->str_map[dist_t->name_off] = base_t->name_off;
348 		}
349 	}
350 	/* ensure all distilled BTF ids now have a mapping... */
351 	for (id = 1; id < r->nr_dist_base_types; id++) {
352 		const char *name;
353 
354 		if (r->id_map[id] && r->id_map[id] != BTF_IS_EMBEDDED)
355 			continue;
356 		dist_t = btf_type_by_id(r->dist_base_btf, id);
357 		name = btf__name_by_offset(r->dist_base_btf, dist_t->name_off);
358 		pr_warn("distilled base BTF type '%s' [%d] is not mapped to base BTF id\n",
359 			name, id);
360 		err = -EINVAL;
361 		break;
362 	}
363 done:
364 	free(base_name_cnt);
365 	free(dist_base_info_sorted);
366 	return err;
367 }
368 
369 /* distilled base should only have named int/float/enum/fwd/struct/union types. */
370 static int btf_relocate_validate_distilled_base(struct btf_relocate *r)
371 {
372 	unsigned int i;
373 
374 	for (i = 1; i < r->nr_dist_base_types; i++) {
375 		struct btf_type *t = btf_type_by_id(r->dist_base_btf, i);
376 		int kind = btf_kind(t);
377 
378 		switch (kind) {
379 		case BTF_KIND_INT:
380 		case BTF_KIND_FLOAT:
381 		case BTF_KIND_ENUM:
382 		case BTF_KIND_STRUCT:
383 		case BTF_KIND_UNION:
384 		case BTF_KIND_FWD:
385 			if (t->name_off)
386 				break;
387 			pr_warn("type [%d], kind [%d] is invalid for distilled base BTF; it is anonymous\n",
388 				i, kind);
389 			return -EINVAL;
390 		default:
391 			pr_warn("type [%d] in distilled based BTF has unexpected kind [%d]\n",
392 				i, kind);
393 			return -EINVAL;
394 		}
395 	}
396 	return 0;
397 }
398 
399 static int btf_relocate_rewrite_strs(struct btf_relocate *r, __u32 i)
400 {
401 	struct btf_type *t = btf_type_by_id(r->btf, i);
402 	struct btf_field_iter it;
403 	__u32 *str_off;
404 	int off, err;
405 
406 	err = btf_field_iter_init(&it, t, BTF_FIELD_ITER_STRS);
407 	if (err)
408 		return err;
409 
410 	while ((str_off = btf_field_iter_next(&it))) {
411 		if (!*str_off)
412 			continue;
413 		if (*str_off >= r->dist_str_len) {
414 			*str_off += r->base_str_len - r->dist_str_len;
415 		} else {
416 			off = r->str_map[*str_off];
417 			if (!off) {
418 				pr_warn("string '%s' [offset %u] is not mapped to base BTF",
419 					btf__str_by_offset(r->btf, off), *str_off);
420 				return -ENOENT;
421 			}
422 			*str_off = off;
423 		}
424 	}
425 	return 0;
426 }
427 
428 /* If successful, output of relocation is updated BTF with base BTF pointing
429  * at base_btf, and type ids, strings adjusted accordingly.
430  */
431 int btf_relocate(struct btf *btf, const struct btf *base_btf, __u32 **id_map)
432 {
433 	unsigned int nr_types = btf__type_cnt(btf);
434 	const struct btf_header *dist_base_hdr;
435 	const struct btf_header *base_hdr;
436 	struct btf_relocate r = {};
437 	int err = 0;
438 	__u32 id, i;
439 
440 	r.dist_base_btf = btf__base_btf(btf);
441 	if (!base_btf || r.dist_base_btf == base_btf)
442 		return -EINVAL;
443 
444 	r.nr_dist_base_types = btf__type_cnt(r.dist_base_btf);
445 	r.nr_base_types = btf__type_cnt(base_btf);
446 	r.nr_split_types = nr_types - r.nr_dist_base_types;
447 	r.btf = btf;
448 	r.base_btf = base_btf;
449 
450 	r.id_map = calloc(nr_types, sizeof(*r.id_map));
451 	r.str_map = calloc(btf_header(r.dist_base_btf)->str_len, sizeof(*r.str_map));
452 	dist_base_hdr = btf_header(r.dist_base_btf);
453 	base_hdr = btf_header(r.base_btf);
454 	r.dist_str_len = dist_base_hdr->str_len;
455 	r.base_str_len = base_hdr->str_len;
456 	if (!r.id_map || !r.str_map) {
457 		err = -ENOMEM;
458 		goto err_out;
459 	}
460 
461 	err = btf_relocate_validate_distilled_base(&r);
462 	if (err)
463 		goto err_out;
464 
465 	/* Split BTF ids need to be adjusted as base and distilled base
466 	 * have different numbers of types, changing the start id of split
467 	 * BTF.
468 	 */
469 	for (id = r.nr_dist_base_types; id < nr_types; id++)
470 		r.id_map[id] = id + r.nr_base_types - r.nr_dist_base_types;
471 
472 	/* Build a map from distilled base ids to actual base BTF ids; it is used
473 	 * to update split BTF id references.  Also build a str_map mapping from
474 	 * distilled base BTF names to base BTF names.
475 	 */
476 	err = btf_relocate_map_distilled_base(&r);
477 	if (err)
478 		goto err_out;
479 
480 	/* Next, rewrite type ids in split BTF, replacing split ids with updated
481 	 * ids based on number of types in base BTF, and base ids with
482 	 * relocated ids from base_btf.
483 	 */
484 	for (i = 0, id = r.nr_dist_base_types; i < r.nr_split_types; i++, id++) {
485 		err = btf_relocate_rewrite_type_id(&r, id);
486 		if (err)
487 			goto err_out;
488 	}
489 	/* String offsets now need to be updated using the str_map. */
490 	for (i = 0; i < r.nr_split_types; i++) {
491 		err = btf_relocate_rewrite_strs(&r, i + r.nr_dist_base_types);
492 		if (err)
493 			goto err_out;
494 	}
495 	/* Finally reset base BTF to be base_btf */
496 	btf_set_base_btf(btf, base_btf);
497 
498 	if (id_map) {
499 		*id_map = r.id_map;
500 		r.id_map = NULL;
501 	}
502 err_out:
503 	free(r.id_map);
504 	free(r.str_map);
505 	return err;
506 }
507