xref: /linux/lib/alloc_tag.c (revision 1f70367f7b6720ca0d3280b202317aa9d0167066)
1 // SPDX-License-Identifier: GPL-2.0-only
2 #include <linux/alloc_tag.h>
3 #include <linux/execmem.h>
4 #include <linux/fs.h>
5 #include <linux/gfp.h>
6 #include <linux/kallsyms.h>
7 #include <linux/module.h>
8 #include <linux/page_ext.h>
9 #include <linux/proc_fs.h>
10 #include <linux/seq_buf.h>
11 #include <linux/seq_file.h>
12 #include <linux/vmalloc.h>
13 #include <linux/kmemleak.h>
14 
15 #define ALLOCINFO_FILE_NAME		"allocinfo"
16 #define MODULE_ALLOC_TAG_VMAP_SIZE	(100000UL * sizeof(struct alloc_tag))
17 #define SECTION_START(NAME)		(CODETAG_SECTION_START_PREFIX NAME)
18 #define SECTION_STOP(NAME)		(CODETAG_SECTION_STOP_PREFIX NAME)
19 
20 #ifdef CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT
21 static bool mem_profiling_support = true;
22 #else
23 static bool mem_profiling_support;
24 #endif
25 
26 static struct codetag_type *alloc_tag_cttype;
27 
28 #ifdef CONFIG_ARCH_MODULE_NEEDS_WEAK_PER_CPU
29 DEFINE_PER_CPU(struct alloc_tag_counters, _shared_alloc_tag);
30 EXPORT_SYMBOL(_shared_alloc_tag);
31 #endif
32 
33 DEFINE_STATIC_KEY_MAYBE(CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT,
34 			mem_alloc_profiling_key);
35 EXPORT_SYMBOL(mem_alloc_profiling_key);
36 
37 DEFINE_STATIC_KEY_FALSE(mem_profiling_compressed);
38 
39 struct alloc_tag_kernel_section kernel_tags = { NULL, 0 };
40 unsigned long alloc_tag_ref_mask;
41 int alloc_tag_ref_offs;
42 
43 struct allocinfo_private {
44 	struct codetag_iterator iter;
45 	bool print_header;
46 };
47 
48 static void *allocinfo_start(struct seq_file *m, loff_t *pos)
49 {
50 	struct allocinfo_private *priv;
51 	loff_t node = *pos;
52 
53 	priv = (struct allocinfo_private *)m->private;
54 	codetag_lock_module_list(alloc_tag_cttype, true);
55 	if (node == 0) {
56 		priv->print_header = true;
57 		priv->iter = codetag_get_ct_iter(alloc_tag_cttype);
58 		codetag_next_ct(&priv->iter);
59 	}
60 	return priv->iter.ct ? priv : NULL;
61 }
62 
63 static void *allocinfo_next(struct seq_file *m, void *arg, loff_t *pos)
64 {
65 	struct allocinfo_private *priv = (struct allocinfo_private *)arg;
66 	struct codetag *ct = codetag_next_ct(&priv->iter);
67 
68 	(*pos)++;
69 	if (!ct)
70 		return NULL;
71 
72 	return priv;
73 }
74 
75 static void allocinfo_stop(struct seq_file *m, void *arg)
76 {
77 	codetag_lock_module_list(alloc_tag_cttype, false);
78 }
79 
80 static void print_allocinfo_header(struct seq_buf *buf)
81 {
82 	/* Output format version, so we can change it. */
83 	seq_buf_printf(buf, "allocinfo - version: 2.0\n");
84 	seq_buf_printf(buf, "#     <size>  <calls> <tag info>\n");
85 }
86 
87 static void alloc_tag_to_text(struct seq_buf *out, struct codetag *ct)
88 {
89 	struct alloc_tag *tag = ct_to_alloc_tag(ct);
90 	struct alloc_tag_counters counter = alloc_tag_read(tag);
91 	s64 bytes = counter.bytes;
92 
93 	seq_buf_printf(out, "%12lli %8llu ", bytes, counter.calls);
94 	codetag_to_text(out, ct);
95 	if (unlikely(alloc_tag_is_inaccurate(tag)))
96 		seq_buf_printf(out, " accurate:no");
97 	seq_buf_putc(out, ' ');
98 	seq_buf_putc(out, '\n');
99 }
100 
101 static int allocinfo_show(struct seq_file *m, void *arg)
102 {
103 	struct allocinfo_private *priv = (struct allocinfo_private *)arg;
104 	char *bufp;
105 	size_t n = seq_get_buf(m, &bufp);
106 	struct seq_buf buf;
107 
108 	seq_buf_init(&buf, bufp, n);
109 	if (priv->print_header) {
110 		print_allocinfo_header(&buf);
111 		priv->print_header = false;
112 	}
113 	alloc_tag_to_text(&buf, priv->iter.ct);
114 	seq_commit(m, seq_buf_used(&buf));
115 	return 0;
116 }
117 
118 static const struct seq_operations allocinfo_seq_op = {
119 	.start	= allocinfo_start,
120 	.next	= allocinfo_next,
121 	.stop	= allocinfo_stop,
122 	.show	= allocinfo_show,
123 };
124 
125 size_t alloc_tag_top_users(struct codetag_bytes *tags, size_t count, bool can_sleep)
126 {
127 	struct codetag_iterator iter;
128 	struct codetag *ct;
129 	struct codetag_bytes n;
130 	unsigned int i, nr = 0;
131 
132 	if (IS_ERR_OR_NULL(alloc_tag_cttype))
133 		return 0;
134 
135 	if (can_sleep)
136 		codetag_lock_module_list(alloc_tag_cttype, true);
137 	else if (!codetag_trylock_module_list(alloc_tag_cttype))
138 		return 0;
139 
140 	iter = codetag_get_ct_iter(alloc_tag_cttype);
141 	while ((ct = codetag_next_ct(&iter))) {
142 		struct alloc_tag_counters counter = alloc_tag_read(ct_to_alloc_tag(ct));
143 
144 		n.ct	= ct;
145 		n.bytes = counter.bytes;
146 
147 		for (i = 0; i < nr; i++)
148 			if (n.bytes > tags[i].bytes)
149 				break;
150 
151 		if (i < count) {
152 			nr -= nr == count;
153 			memmove(&tags[i + 1],
154 				&tags[i],
155 				sizeof(tags[0]) * (nr - i));
156 			nr++;
157 			tags[i] = n;
158 		}
159 	}
160 
161 	codetag_lock_module_list(alloc_tag_cttype, false);
162 
163 	return nr;
164 }
165 
166 void pgalloc_tag_split(struct folio *folio, int old_order, int new_order)
167 {
168 	int i;
169 	struct alloc_tag *tag;
170 	unsigned int nr_pages = 1 << new_order;
171 
172 	if (!mem_alloc_profiling_enabled())
173 		return;
174 
175 	tag = __pgalloc_tag_get(&folio->page);
176 	if (!tag)
177 		return;
178 
179 	for (i = nr_pages; i < (1 << old_order); i += nr_pages) {
180 		union pgtag_ref_handle handle;
181 		union codetag_ref ref;
182 
183 		if (get_page_tag_ref(folio_page(folio, i), &ref, &handle)) {
184 			/* Set new reference to point to the original tag */
185 			alloc_tag_ref_set(&ref, tag);
186 			update_page_tag_ref(handle, &ref);
187 			put_page_tag_ref(handle);
188 		}
189 	}
190 }
191 
192 void pgalloc_tag_swap(struct folio *new, struct folio *old)
193 {
194 	union pgtag_ref_handle handle_old, handle_new;
195 	union codetag_ref ref_old, ref_new;
196 	struct alloc_tag *tag_old, *tag_new;
197 
198 	if (!mem_alloc_profiling_enabled())
199 		return;
200 
201 	tag_old = __pgalloc_tag_get(&old->page);
202 	if (!tag_old)
203 		return;
204 	tag_new = __pgalloc_tag_get(&new->page);
205 	if (!tag_new)
206 		return;
207 
208 	if (!get_page_tag_ref(&old->page, &ref_old, &handle_old))
209 		return;
210 	if (!get_page_tag_ref(&new->page, &ref_new, &handle_new)) {
211 		put_page_tag_ref(handle_old);
212 		return;
213 	}
214 
215 	/*
216 	 * Clear tag references to avoid debug warning when using
217 	 * __alloc_tag_ref_set() with non-empty reference.
218 	 */
219 	set_codetag_empty(&ref_old);
220 	set_codetag_empty(&ref_new);
221 
222 	/* swap tags */
223 	__alloc_tag_ref_set(&ref_old, tag_new);
224 	update_page_tag_ref(handle_old, &ref_old);
225 	__alloc_tag_ref_set(&ref_new, tag_old);
226 	update_page_tag_ref(handle_new, &ref_new);
227 
228 	put_page_tag_ref(handle_old);
229 	put_page_tag_ref(handle_new);
230 }
231 
232 static void shutdown_mem_profiling(bool remove_file)
233 {
234 	if (mem_alloc_profiling_enabled())
235 		static_branch_disable(&mem_alloc_profiling_key);
236 
237 	if (!mem_profiling_support)
238 		return;
239 
240 	if (remove_file)
241 		remove_proc_entry(ALLOCINFO_FILE_NAME, NULL);
242 	mem_profiling_support = false;
243 }
244 
245 void __init alloc_tag_sec_init(void)
246 {
247 	struct alloc_tag *last_codetag;
248 
249 	if (!mem_profiling_support)
250 		return;
251 
252 	if (!static_key_enabled(&mem_profiling_compressed))
253 		return;
254 
255 	kernel_tags.first_tag = (struct alloc_tag *)kallsyms_lookup_name(
256 					SECTION_START(ALLOC_TAG_SECTION_NAME));
257 	last_codetag = (struct alloc_tag *)kallsyms_lookup_name(
258 					SECTION_STOP(ALLOC_TAG_SECTION_NAME));
259 	kernel_tags.count = last_codetag - kernel_tags.first_tag;
260 
261 	/* Check if kernel tags fit into page flags */
262 	if (kernel_tags.count > (1UL << NR_UNUSED_PAGEFLAG_BITS)) {
263 		shutdown_mem_profiling(false); /* allocinfo file does not exist yet */
264 		pr_err("%lu allocation tags cannot be references using %d available page flag bits. Memory allocation profiling is disabled!\n",
265 			kernel_tags.count, NR_UNUSED_PAGEFLAG_BITS);
266 		return;
267 	}
268 
269 	alloc_tag_ref_offs = (LRU_REFS_PGOFF - NR_UNUSED_PAGEFLAG_BITS);
270 	alloc_tag_ref_mask = ((1UL << NR_UNUSED_PAGEFLAG_BITS) - 1);
271 	pr_debug("Memory allocation profiling compression is using %d page flag bits!\n",
272 		 NR_UNUSED_PAGEFLAG_BITS);
273 }
274 
275 #ifdef CONFIG_MODULES
276 
277 static struct maple_tree mod_area_mt = MTREE_INIT(mod_area_mt, MT_FLAGS_ALLOC_RANGE);
278 static struct vm_struct *vm_module_tags;
279 /* A dummy object used to indicate an unloaded module */
280 static struct module unloaded_mod;
281 /* A dummy object used to indicate a module prepended area */
282 static struct module prepend_mod;
283 
284 struct alloc_tag_module_section module_tags;
285 
286 static inline unsigned long alloc_tag_align(unsigned long val)
287 {
288 	if (!static_key_enabled(&mem_profiling_compressed)) {
289 		/* No alignment requirements when we are not indexing the tags */
290 		return val;
291 	}
292 
293 	if (val % sizeof(struct alloc_tag) == 0)
294 		return val;
295 	return ((val / sizeof(struct alloc_tag)) + 1) * sizeof(struct alloc_tag);
296 }
297 
298 static bool ensure_alignment(unsigned long align, unsigned int *prepend)
299 {
300 	if (!static_key_enabled(&mem_profiling_compressed)) {
301 		/* No alignment requirements when we are not indexing the tags */
302 		return true;
303 	}
304 
305 	/*
306 	 * If alloc_tag size is not a multiple of required alignment, tag
307 	 * indexing does not work.
308 	 */
309 	if (!IS_ALIGNED(sizeof(struct alloc_tag), align))
310 		return false;
311 
312 	/* Ensure prepend consumes multiple of alloc_tag-sized blocks */
313 	if (*prepend)
314 		*prepend = alloc_tag_align(*prepend);
315 
316 	return true;
317 }
318 
319 static inline bool tags_addressable(void)
320 {
321 	unsigned long tag_idx_count;
322 
323 	if (!static_key_enabled(&mem_profiling_compressed))
324 		return true; /* with page_ext tags are always addressable */
325 
326 	tag_idx_count = CODETAG_ID_FIRST + kernel_tags.count +
327 			module_tags.size / sizeof(struct alloc_tag);
328 
329 	return tag_idx_count < (1UL << NR_UNUSED_PAGEFLAG_BITS);
330 }
331 
332 static bool needs_section_mem(struct module *mod, unsigned long size)
333 {
334 	if (!mem_profiling_support)
335 		return false;
336 
337 	return size >= sizeof(struct alloc_tag);
338 }
339 
340 static bool clean_unused_counters(struct alloc_tag *start_tag,
341 				  struct alloc_tag *end_tag)
342 {
343 	struct alloc_tag *tag;
344 	bool ret = true;
345 
346 	for (tag = start_tag; tag <= end_tag; tag++) {
347 		struct alloc_tag_counters counter;
348 
349 		if (!tag->counters)
350 			continue;
351 
352 		counter = alloc_tag_read(tag);
353 		if (!counter.bytes) {
354 			free_percpu(tag->counters);
355 			tag->counters = NULL;
356 		} else {
357 			ret = false;
358 		}
359 	}
360 
361 	return ret;
362 }
363 
364 /* Called with mod_area_mt locked */
365 static void clean_unused_module_areas_locked(void)
366 {
367 	MA_STATE(mas, &mod_area_mt, 0, module_tags.size);
368 	struct module *val;
369 
370 	mas_for_each(&mas, val, module_tags.size) {
371 		struct alloc_tag *start_tag;
372 		struct alloc_tag *end_tag;
373 
374 		if (val != &unloaded_mod)
375 			continue;
376 
377 		/* Release area if all tags are unused */
378 		start_tag = (struct alloc_tag *)(module_tags.start_addr + mas.index);
379 		end_tag = (struct alloc_tag *)(module_tags.start_addr + mas.last);
380 		if (clean_unused_counters(start_tag, end_tag))
381 			mas_erase(&mas);
382 	}
383 }
384 
385 /* Called with mod_area_mt locked */
386 static bool find_aligned_area(struct ma_state *mas, unsigned long section_size,
387 			      unsigned long size, unsigned int prepend, unsigned long align)
388 {
389 	bool cleanup_done = false;
390 
391 repeat:
392 	/* Try finding exact size and hope the start is aligned */
393 	if (!mas_empty_area(mas, 0, section_size - 1, prepend + size)) {
394 		if (IS_ALIGNED(mas->index + prepend, align))
395 			return true;
396 
397 		/* Try finding larger area to align later */
398 		mas_reset(mas);
399 		if (!mas_empty_area(mas, 0, section_size - 1,
400 				    size + prepend + align - 1))
401 			return true;
402 	}
403 
404 	/* No free area, try cleanup stale data and repeat the search once */
405 	if (!cleanup_done) {
406 		clean_unused_module_areas_locked();
407 		cleanup_done = true;
408 		mas_reset(mas);
409 		goto repeat;
410 	}
411 
412 	return false;
413 }
414 
415 static int vm_module_tags_populate(void)
416 {
417 	unsigned long phys_end = ALIGN_DOWN(module_tags.start_addr, PAGE_SIZE) +
418 				 (vm_module_tags->nr_pages << PAGE_SHIFT);
419 	unsigned long new_end = module_tags.start_addr + module_tags.size;
420 
421 	if (phys_end < new_end) {
422 		struct page **next_page = vm_module_tags->pages + vm_module_tags->nr_pages;
423 		unsigned long old_shadow_end = ALIGN(phys_end, MODULE_ALIGN);
424 		unsigned long new_shadow_end = ALIGN(new_end, MODULE_ALIGN);
425 		unsigned long more_pages;
426 		unsigned long nr = 0;
427 
428 		more_pages = ALIGN(new_end - phys_end, PAGE_SIZE) >> PAGE_SHIFT;
429 		while (nr < more_pages) {
430 			unsigned long allocated;
431 
432 			allocated = alloc_pages_bulk_node(GFP_KERNEL | __GFP_NOWARN,
433 				NUMA_NO_NODE, more_pages - nr, next_page + nr);
434 
435 			if (!allocated)
436 				break;
437 			nr += allocated;
438 		}
439 
440 		if (nr < more_pages ||
441 		    vmap_pages_range(phys_end, phys_end + (nr << PAGE_SHIFT), PAGE_KERNEL,
442 				     next_page, PAGE_SHIFT) < 0) {
443 			release_pages_arg arg = { .pages = next_page };
444 
445 			/* Clean up and error out */
446 			release_pages(arg, nr);
447 			return -ENOMEM;
448 		}
449 
450 		vm_module_tags->nr_pages += nr;
451 
452 		/*
453 		 * Kasan allocates 1 byte of shadow for every 8 bytes of data.
454 		 * When kasan_alloc_module_shadow allocates shadow memory,
455 		 * its unit of allocation is a page.
456 		 * Therefore, here we need to align to MODULE_ALIGN.
457 		 */
458 		if (old_shadow_end < new_shadow_end)
459 			kasan_alloc_module_shadow((void *)old_shadow_end,
460 						  new_shadow_end - old_shadow_end,
461 						  GFP_KERNEL);
462 	}
463 
464 	/*
465 	 * Mark the pages as accessible, now that they are mapped.
466 	 * With hardware tag-based KASAN, marking is skipped for
467 	 * non-VM_ALLOC mappings, see __kasan_unpoison_vmalloc().
468 	 */
469 	kasan_unpoison_vmalloc((void *)module_tags.start_addr,
470 				new_end - module_tags.start_addr,
471 				KASAN_VMALLOC_PROT_NORMAL);
472 
473 	return 0;
474 }
475 
476 static void *reserve_module_tags(struct module *mod, unsigned long size,
477 				 unsigned int prepend, unsigned long align)
478 {
479 	unsigned long section_size = module_tags.end_addr - module_tags.start_addr;
480 	MA_STATE(mas, &mod_area_mt, 0, section_size - 1);
481 	unsigned long offset;
482 	void *ret = NULL;
483 
484 	/* If no tags return error */
485 	if (size < sizeof(struct alloc_tag))
486 		return ERR_PTR(-EINVAL);
487 
488 	/*
489 	 * align is always power of 2, so we can use IS_ALIGNED and ALIGN.
490 	 * align 0 or 1 means no alignment, to simplify set to 1.
491 	 */
492 	if (!align)
493 		align = 1;
494 
495 	if (!ensure_alignment(align, &prepend)) {
496 		shutdown_mem_profiling(true);
497 		pr_err("%s: alignment %lu is incompatible with allocation tag indexing. Memory allocation profiling is disabled!\n",
498 			mod->name, align);
499 		return ERR_PTR(-EINVAL);
500 	}
501 
502 	mas_lock(&mas);
503 	if (!find_aligned_area(&mas, section_size, size, prepend, align)) {
504 		ret = ERR_PTR(-ENOMEM);
505 		goto unlock;
506 	}
507 
508 	/* Mark found area as reserved */
509 	offset = mas.index;
510 	offset += prepend;
511 	offset = ALIGN(offset, align);
512 	if (offset != mas.index) {
513 		unsigned long pad_start = mas.index;
514 
515 		mas.last = offset - 1;
516 		mas_store(&mas, &prepend_mod);
517 		if (mas_is_err(&mas)) {
518 			ret = ERR_PTR(xa_err(mas.node));
519 			goto unlock;
520 		}
521 		mas.index = offset;
522 		mas.last = offset + size - 1;
523 		mas_store(&mas, mod);
524 		if (mas_is_err(&mas)) {
525 			mas.index = pad_start;
526 			mas_erase(&mas);
527 			ret = ERR_PTR(xa_err(mas.node));
528 		}
529 	} else {
530 		mas.last = offset + size - 1;
531 		mas_store(&mas, mod);
532 		if (mas_is_err(&mas))
533 			ret = ERR_PTR(xa_err(mas.node));
534 	}
535 unlock:
536 	mas_unlock(&mas);
537 
538 	if (IS_ERR(ret))
539 		return ret;
540 
541 	if (module_tags.size < offset + size) {
542 		int grow_res;
543 
544 		module_tags.size = offset + size;
545 		if (mem_alloc_profiling_enabled() && !tags_addressable()) {
546 			shutdown_mem_profiling(true);
547 			pr_warn("With module %s there are too many tags to fit in %d page flag bits. Memory allocation profiling is disabled!\n",
548 				mod->name, NR_UNUSED_PAGEFLAG_BITS);
549 		}
550 
551 		grow_res = vm_module_tags_populate();
552 		if (grow_res) {
553 			shutdown_mem_profiling(true);
554 			pr_err("Failed to allocate memory for allocation tags in the module %s. Memory allocation profiling is disabled!\n",
555 			       mod->name);
556 			return ERR_PTR(grow_res);
557 		}
558 	}
559 
560 	return (struct alloc_tag *)(module_tags.start_addr + offset);
561 }
562 
563 static void release_module_tags(struct module *mod, bool used)
564 {
565 	MA_STATE(mas, &mod_area_mt, module_tags.size, module_tags.size);
566 	struct alloc_tag *start_tag;
567 	struct alloc_tag *end_tag;
568 	struct module *val;
569 
570 	mas_lock(&mas);
571 	mas_for_each_rev(&mas, val, 0)
572 		if (val == mod)
573 			break;
574 
575 	if (!val) /* module not found */
576 		goto out;
577 
578 	if (!used)
579 		goto release_area;
580 
581 	start_tag = (struct alloc_tag *)(module_tags.start_addr + mas.index);
582 	end_tag = (struct alloc_tag *)(module_tags.start_addr + mas.last);
583 	if (!clean_unused_counters(start_tag, end_tag)) {
584 		struct alloc_tag *tag;
585 
586 		for (tag = start_tag; tag <= end_tag; tag++) {
587 			struct alloc_tag_counters counter;
588 
589 			if (!tag->counters)
590 				continue;
591 
592 			counter = alloc_tag_read(tag);
593 			pr_info("%s:%u module %s func:%s has %llu allocated at module unload\n",
594 				tag->ct.filename, tag->ct.lineno, tag->ct.modname,
595 				tag->ct.function, counter.bytes);
596 		}
597 	} else {
598 		used = false;
599 	}
600 release_area:
601 	mas_store(&mas, used ? &unloaded_mod : NULL);
602 	val = mas_prev_range(&mas, 0);
603 	if (val == &prepend_mod)
604 		mas_store(&mas, NULL);
605 out:
606 	mas_unlock(&mas);
607 }
608 
609 static int load_module(struct module *mod, struct codetag *start, struct codetag *stop)
610 {
611 	/* Allocate module alloc_tag percpu counters */
612 	struct alloc_tag *start_tag;
613 	struct alloc_tag *stop_tag;
614 	struct alloc_tag *tag;
615 
616 	/* percpu counters for core allocations are already statically allocated */
617 	if (!mod)
618 		return 0;
619 
620 	start_tag = ct_to_alloc_tag(start);
621 	stop_tag = ct_to_alloc_tag(stop);
622 	for (tag = start_tag; tag < stop_tag; tag++) {
623 		WARN_ON(tag->counters);
624 		tag->counters = alloc_percpu(struct alloc_tag_counters);
625 		if (!tag->counters) {
626 			while (--tag >= start_tag) {
627 				free_percpu(tag->counters);
628 				tag->counters = NULL;
629 			}
630 			pr_err("Failed to allocate memory for allocation tag percpu counters in the module %s\n",
631 			       mod->name);
632 			return -ENOMEM;
633 		}
634 
635 		/*
636 		 * Avoid a kmemleak false positive. The pointer to the counters is stored
637 		 * in the alloc_tag section of the module and cannot be directly accessed.
638 		 */
639 		kmemleak_ignore_percpu(tag->counters);
640 	}
641 	return 0;
642 }
643 
644 static void replace_module(struct module *mod, struct module *new_mod)
645 {
646 	MA_STATE(mas, &mod_area_mt, 0, module_tags.size);
647 	struct module *val;
648 
649 	mas_lock(&mas);
650 	mas_for_each(&mas, val, module_tags.size) {
651 		if (val != mod)
652 			continue;
653 
654 		mas_store_gfp(&mas, new_mod, GFP_KERNEL);
655 		break;
656 	}
657 	mas_unlock(&mas);
658 }
659 
660 static int __init alloc_mod_tags_mem(void)
661 {
662 	/* Map space to copy allocation tags */
663 	vm_module_tags = execmem_vmap(MODULE_ALLOC_TAG_VMAP_SIZE);
664 	if (!vm_module_tags) {
665 		pr_err("Failed to map %lu bytes for module allocation tags\n",
666 			MODULE_ALLOC_TAG_VMAP_SIZE);
667 		module_tags.start_addr = 0;
668 		return -ENOMEM;
669 	}
670 
671 	vm_module_tags->pages = kmalloc_array(get_vm_area_size(vm_module_tags) >> PAGE_SHIFT,
672 					sizeof(struct page *), GFP_KERNEL | __GFP_ZERO);
673 	if (!vm_module_tags->pages) {
674 		free_vm_area(vm_module_tags);
675 		return -ENOMEM;
676 	}
677 
678 	module_tags.start_addr = (unsigned long)vm_module_tags->addr;
679 	module_tags.end_addr = module_tags.start_addr + MODULE_ALLOC_TAG_VMAP_SIZE;
680 	/* Ensure the base is alloc_tag aligned when required for indexing */
681 	module_tags.start_addr = alloc_tag_align(module_tags.start_addr);
682 
683 	return 0;
684 }
685 
686 static void __init free_mod_tags_mem(void)
687 {
688 	release_pages_arg arg = { .pages = vm_module_tags->pages };
689 
690 	module_tags.start_addr = 0;
691 	release_pages(arg, vm_module_tags->nr_pages);
692 	kfree(vm_module_tags->pages);
693 	free_vm_area(vm_module_tags);
694 }
695 
696 #else /* CONFIG_MODULES */
697 
698 static inline int alloc_mod_tags_mem(void) { return 0; }
699 static inline void free_mod_tags_mem(void) {}
700 
701 #endif /* CONFIG_MODULES */
702 
703 /* See: Documentation/mm/allocation-profiling.rst */
704 static int __init setup_early_mem_profiling(char *str)
705 {
706 	bool compressed = false;
707 	bool enable;
708 
709 	if (!str || !str[0])
710 		return -EINVAL;
711 
712 	if (!strncmp(str, "never", 5)) {
713 		enable = false;
714 		mem_profiling_support = false;
715 		pr_info("Memory allocation profiling is disabled!\n");
716 	} else {
717 		char *token = strsep(&str, ",");
718 
719 		if (kstrtobool(token, &enable))
720 			return -EINVAL;
721 
722 		if (str) {
723 
724 			if (strcmp(str, "compressed"))
725 				return -EINVAL;
726 
727 			compressed = true;
728 		}
729 		mem_profiling_support = true;
730 		pr_info("Memory allocation profiling is enabled %s compression and is turned %s!\n",
731 			compressed ? "with" : "without", enable ? "on" : "off");
732 	}
733 
734 	if (enable != mem_alloc_profiling_enabled()) {
735 		if (enable)
736 			static_branch_enable(&mem_alloc_profiling_key);
737 		else
738 			static_branch_disable(&mem_alloc_profiling_key);
739 	}
740 	if (compressed != static_key_enabled(&mem_profiling_compressed)) {
741 		if (compressed)
742 			static_branch_enable(&mem_profiling_compressed);
743 		else
744 			static_branch_disable(&mem_profiling_compressed);
745 	}
746 
747 	return 0;
748 }
749 early_param("sysctl.vm.mem_profiling", setup_early_mem_profiling);
750 
751 static __init bool need_page_alloc_tagging(void)
752 {
753 	if (static_key_enabled(&mem_profiling_compressed))
754 		return false;
755 
756 	return mem_profiling_support;
757 }
758 
759 static __init void init_page_alloc_tagging(void)
760 {
761 }
762 
763 struct page_ext_operations page_alloc_tagging_ops = {
764 	.size = sizeof(union codetag_ref),
765 	.need = need_page_alloc_tagging,
766 	.init = init_page_alloc_tagging,
767 };
768 EXPORT_SYMBOL(page_alloc_tagging_ops);
769 
770 #ifdef CONFIG_SYSCTL
771 /*
772  * Not using proc_do_static_key() directly to prevent enabling profiling
773  * after it was shut down.
774  */
775 static int proc_mem_profiling_handler(const struct ctl_table *table, int write,
776 				      void *buffer, size_t *lenp, loff_t *ppos)
777 {
778 	if (!mem_profiling_support && write)
779 		return -EINVAL;
780 
781 	return proc_do_static_key(table, write, buffer, lenp, ppos);
782 }
783 
784 
785 static struct ctl_table memory_allocation_profiling_sysctls[] = {
786 	{
787 		.procname	= "mem_profiling",
788 		.data		= &mem_alloc_profiling_key,
789 #ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG
790 		.mode		= 0444,
791 #else
792 		.mode		= 0644,
793 #endif
794 		.proc_handler	= proc_mem_profiling_handler,
795 	},
796 };
797 
798 static void __init sysctl_init(void)
799 {
800 	if (!mem_profiling_support)
801 		memory_allocation_profiling_sysctls[0].mode = 0444;
802 
803 	register_sysctl_init("vm", memory_allocation_profiling_sysctls);
804 }
805 #else /* CONFIG_SYSCTL */
806 static inline void sysctl_init(void) {}
807 #endif /* CONFIG_SYSCTL */
808 
809 static int __init alloc_tag_init(void)
810 {
811 	const struct codetag_type_desc desc = {
812 		.section		= ALLOC_TAG_SECTION_NAME,
813 		.tag_size		= sizeof(struct alloc_tag),
814 #ifdef CONFIG_MODULES
815 		.needs_section_mem	= needs_section_mem,
816 		.alloc_section_mem	= reserve_module_tags,
817 		.free_section_mem	= release_module_tags,
818 		.module_load		= load_module,
819 		.module_replaced	= replace_module,
820 #endif
821 	};
822 	int res;
823 
824 	sysctl_init();
825 
826 	if (!mem_profiling_support) {
827 		pr_info("Memory allocation profiling is not supported!\n");
828 		return 0;
829 	}
830 
831 	if (!proc_create_seq_private(ALLOCINFO_FILE_NAME, 0400, NULL, &allocinfo_seq_op,
832 				     sizeof(struct allocinfo_private), NULL)) {
833 		pr_err("Failed to create %s file\n", ALLOCINFO_FILE_NAME);
834 		shutdown_mem_profiling(false);
835 		return -ENOMEM;
836 	}
837 
838 	res = alloc_mod_tags_mem();
839 	if (res) {
840 		pr_err("Failed to reserve address space for module tags, errno = %d\n", res);
841 		shutdown_mem_profiling(true);
842 		return res;
843 	}
844 
845 	alloc_tag_cttype = codetag_register_type(&desc);
846 	if (IS_ERR(alloc_tag_cttype)) {
847 		pr_err("Allocation tags registration failed, errno = %ld\n", PTR_ERR(alloc_tag_cttype));
848 		free_mod_tags_mem();
849 		shutdown_mem_profiling(true);
850 		return PTR_ERR(alloc_tag_cttype);
851 	}
852 
853 	return 0;
854 }
855 module_init(alloc_tag_init);
856