xref: /linux/lib/alloc_tag.c (revision e406d57be7bd2a4e73ea512c1ae36a40a44e499e)
1 // SPDX-License-Identifier: GPL-2.0-only
2 #include <linux/alloc_tag.h>
3 #include <linux/execmem.h>
4 #include <linux/fs.h>
5 #include <linux/gfp.h>
6 #include <linux/kallsyms.h>
7 #include <linux/module.h>
8 #include <linux/page_ext.h>
9 #include <linux/proc_fs.h>
10 #include <linux/seq_buf.h>
11 #include <linux/seq_file.h>
12 #include <linux/string_choices.h>
13 #include <linux/vmalloc.h>
14 #include <linux/kmemleak.h>
15 
16 #define ALLOCINFO_FILE_NAME		"allocinfo"
17 #define MODULE_ALLOC_TAG_VMAP_SIZE	(100000UL * sizeof(struct alloc_tag))
18 #define SECTION_START(NAME)		(CODETAG_SECTION_START_PREFIX NAME)
19 #define SECTION_STOP(NAME)		(CODETAG_SECTION_STOP_PREFIX NAME)
20 
21 #ifdef CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT
22 static bool mem_profiling_support = true;
23 #else
24 static bool mem_profiling_support;
25 #endif
26 
27 static struct codetag_type *alloc_tag_cttype;
28 
29 #ifdef CONFIG_ARCH_MODULE_NEEDS_WEAK_PER_CPU
30 DEFINE_PER_CPU(struct alloc_tag_counters, _shared_alloc_tag);
31 EXPORT_SYMBOL(_shared_alloc_tag);
32 #endif
33 
34 DEFINE_STATIC_KEY_MAYBE(CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT,
35 			mem_alloc_profiling_key);
36 EXPORT_SYMBOL(mem_alloc_profiling_key);
37 
38 DEFINE_STATIC_KEY_FALSE(mem_profiling_compressed);
39 
40 struct alloc_tag_kernel_section kernel_tags = { NULL, 0 };
41 unsigned long alloc_tag_ref_mask;
42 int alloc_tag_ref_offs;
43 
44 struct allocinfo_private {
45 	struct codetag_iterator iter;
46 	bool print_header;
47 };
48 
allocinfo_start(struct seq_file * m,loff_t * pos)49 static void *allocinfo_start(struct seq_file *m, loff_t *pos)
50 {
51 	struct allocinfo_private *priv;
52 	loff_t node = *pos;
53 
54 	priv = (struct allocinfo_private *)m->private;
55 	codetag_lock_module_list(alloc_tag_cttype, true);
56 	if (node == 0) {
57 		priv->print_header = true;
58 		priv->iter = codetag_get_ct_iter(alloc_tag_cttype);
59 		codetag_next_ct(&priv->iter);
60 	}
61 	return priv->iter.ct ? priv : NULL;
62 }
63 
allocinfo_next(struct seq_file * m,void * arg,loff_t * pos)64 static void *allocinfo_next(struct seq_file *m, void *arg, loff_t *pos)
65 {
66 	struct allocinfo_private *priv = (struct allocinfo_private *)arg;
67 	struct codetag *ct = codetag_next_ct(&priv->iter);
68 
69 	(*pos)++;
70 	if (!ct)
71 		return NULL;
72 
73 	return priv;
74 }
75 
allocinfo_stop(struct seq_file * m,void * arg)76 static void allocinfo_stop(struct seq_file *m, void *arg)
77 {
78 	codetag_lock_module_list(alloc_tag_cttype, false);
79 }
80 
print_allocinfo_header(struct seq_buf * buf)81 static void print_allocinfo_header(struct seq_buf *buf)
82 {
83 	/* Output format version, so we can change it. */
84 	seq_buf_printf(buf, "allocinfo - version: 2.0\n");
85 	seq_buf_printf(buf, "#     <size>  <calls> <tag info>\n");
86 }
87 
alloc_tag_to_text(struct seq_buf * out,struct codetag * ct)88 static void alloc_tag_to_text(struct seq_buf *out, struct codetag *ct)
89 {
90 	struct alloc_tag *tag = ct_to_alloc_tag(ct);
91 	struct alloc_tag_counters counter = alloc_tag_read(tag);
92 	s64 bytes = counter.bytes;
93 
94 	seq_buf_printf(out, "%12lli %8llu ", bytes, counter.calls);
95 	codetag_to_text(out, ct);
96 	if (unlikely(alloc_tag_is_inaccurate(tag)))
97 		seq_buf_printf(out, " accurate:no");
98 	seq_buf_putc(out, ' ');
99 	seq_buf_putc(out, '\n');
100 }
101 
allocinfo_show(struct seq_file * m,void * arg)102 static int allocinfo_show(struct seq_file *m, void *arg)
103 {
104 	struct allocinfo_private *priv = (struct allocinfo_private *)arg;
105 	char *bufp;
106 	size_t n = seq_get_buf(m, &bufp);
107 	struct seq_buf buf;
108 
109 	seq_buf_init(&buf, bufp, n);
110 	if (priv->print_header) {
111 		print_allocinfo_header(&buf);
112 		priv->print_header = false;
113 	}
114 	alloc_tag_to_text(&buf, priv->iter.ct);
115 	seq_commit(m, seq_buf_used(&buf));
116 	return 0;
117 }
118 
119 static const struct seq_operations allocinfo_seq_op = {
120 	.start	= allocinfo_start,
121 	.next	= allocinfo_next,
122 	.stop	= allocinfo_stop,
123 	.show	= allocinfo_show,
124 };
125 
alloc_tag_top_users(struct codetag_bytes * tags,size_t count,bool can_sleep)126 size_t alloc_tag_top_users(struct codetag_bytes *tags, size_t count, bool can_sleep)
127 {
128 	struct codetag_iterator iter;
129 	struct codetag *ct;
130 	struct codetag_bytes n;
131 	unsigned int i, nr = 0;
132 
133 	if (IS_ERR_OR_NULL(alloc_tag_cttype))
134 		return 0;
135 
136 	if (can_sleep)
137 		codetag_lock_module_list(alloc_tag_cttype, true);
138 	else if (!codetag_trylock_module_list(alloc_tag_cttype))
139 		return 0;
140 
141 	iter = codetag_get_ct_iter(alloc_tag_cttype);
142 	while ((ct = codetag_next_ct(&iter))) {
143 		struct alloc_tag_counters counter = alloc_tag_read(ct_to_alloc_tag(ct));
144 
145 		n.ct	= ct;
146 		n.bytes = counter.bytes;
147 
148 		for (i = 0; i < nr; i++)
149 			if (n.bytes > tags[i].bytes)
150 				break;
151 
152 		if (i < count) {
153 			nr -= nr == count;
154 			memmove(&tags[i + 1],
155 				&tags[i],
156 				sizeof(tags[0]) * (nr - i));
157 			nr++;
158 			tags[i] = n;
159 		}
160 	}
161 
162 	codetag_lock_module_list(alloc_tag_cttype, false);
163 
164 	return nr;
165 }
166 
pgalloc_tag_split(struct folio * folio,int old_order,int new_order)167 void pgalloc_tag_split(struct folio *folio, int old_order, int new_order)
168 {
169 	int i;
170 	struct alloc_tag *tag;
171 	unsigned int nr_pages = 1 << new_order;
172 
173 	if (!mem_alloc_profiling_enabled())
174 		return;
175 
176 	tag = __pgalloc_tag_get(&folio->page);
177 	if (!tag)
178 		return;
179 
180 	for (i = nr_pages; i < (1 << old_order); i += nr_pages) {
181 		union pgtag_ref_handle handle;
182 		union codetag_ref ref;
183 
184 		if (get_page_tag_ref(folio_page(folio, i), &ref, &handle)) {
185 			/* Set new reference to point to the original tag */
186 			alloc_tag_ref_set(&ref, tag);
187 			update_page_tag_ref(handle, &ref);
188 			put_page_tag_ref(handle);
189 		}
190 	}
191 }
192 
pgalloc_tag_swap(struct folio * new,struct folio * old)193 void pgalloc_tag_swap(struct folio *new, struct folio *old)
194 {
195 	union pgtag_ref_handle handle_old, handle_new;
196 	union codetag_ref ref_old, ref_new;
197 	struct alloc_tag *tag_old, *tag_new;
198 
199 	if (!mem_alloc_profiling_enabled())
200 		return;
201 
202 	tag_old = __pgalloc_tag_get(&old->page);
203 	if (!tag_old)
204 		return;
205 	tag_new = __pgalloc_tag_get(&new->page);
206 	if (!tag_new)
207 		return;
208 
209 	if (!get_page_tag_ref(&old->page, &ref_old, &handle_old))
210 		return;
211 	if (!get_page_tag_ref(&new->page, &ref_new, &handle_new)) {
212 		put_page_tag_ref(handle_old);
213 		return;
214 	}
215 
216 	/*
217 	 * Clear tag references to avoid debug warning when using
218 	 * __alloc_tag_ref_set() with non-empty reference.
219 	 */
220 	set_codetag_empty(&ref_old);
221 	set_codetag_empty(&ref_new);
222 
223 	/* swap tags */
224 	__alloc_tag_ref_set(&ref_old, tag_new);
225 	update_page_tag_ref(handle_old, &ref_old);
226 	__alloc_tag_ref_set(&ref_new, tag_old);
227 	update_page_tag_ref(handle_new, &ref_new);
228 
229 	put_page_tag_ref(handle_old);
230 	put_page_tag_ref(handle_new);
231 }
232 
shutdown_mem_profiling(bool remove_file)233 static void shutdown_mem_profiling(bool remove_file)
234 {
235 	if (mem_alloc_profiling_enabled())
236 		static_branch_disable(&mem_alloc_profiling_key);
237 
238 	if (!mem_profiling_support)
239 		return;
240 
241 	if (remove_file)
242 		remove_proc_entry(ALLOCINFO_FILE_NAME, NULL);
243 	mem_profiling_support = false;
244 }
245 
alloc_tag_sec_init(void)246 void __init alloc_tag_sec_init(void)
247 {
248 	struct alloc_tag *last_codetag;
249 
250 	if (!mem_profiling_support)
251 		return;
252 
253 	if (!static_key_enabled(&mem_profiling_compressed))
254 		return;
255 
256 	kernel_tags.first_tag = (struct alloc_tag *)kallsyms_lookup_name(
257 					SECTION_START(ALLOC_TAG_SECTION_NAME));
258 	last_codetag = (struct alloc_tag *)kallsyms_lookup_name(
259 					SECTION_STOP(ALLOC_TAG_SECTION_NAME));
260 	kernel_tags.count = last_codetag - kernel_tags.first_tag;
261 
262 	/* Check if kernel tags fit into page flags */
263 	if (kernel_tags.count > (1UL << NR_UNUSED_PAGEFLAG_BITS)) {
264 		shutdown_mem_profiling(false); /* allocinfo file does not exist yet */
265 		pr_err("%lu allocation tags cannot be references using %d available page flag bits. Memory allocation profiling is disabled!\n",
266 			kernel_tags.count, NR_UNUSED_PAGEFLAG_BITS);
267 		return;
268 	}
269 
270 	alloc_tag_ref_offs = (LRU_REFS_PGOFF - NR_UNUSED_PAGEFLAG_BITS);
271 	alloc_tag_ref_mask = ((1UL << NR_UNUSED_PAGEFLAG_BITS) - 1);
272 	pr_debug("Memory allocation profiling compression is using %d page flag bits!\n",
273 		 NR_UNUSED_PAGEFLAG_BITS);
274 }
275 
276 #ifdef CONFIG_MODULES
277 
278 static struct maple_tree mod_area_mt = MTREE_INIT(mod_area_mt, MT_FLAGS_ALLOC_RANGE);
279 static struct vm_struct *vm_module_tags;
280 /* A dummy object used to indicate an unloaded module */
281 static struct module unloaded_mod;
282 /* A dummy object used to indicate a module prepended area */
283 static struct module prepend_mod;
284 
285 struct alloc_tag_module_section module_tags;
286 
alloc_tag_align(unsigned long val)287 static inline unsigned long alloc_tag_align(unsigned long val)
288 {
289 	if (!static_key_enabled(&mem_profiling_compressed)) {
290 		/* No alignment requirements when we are not indexing the tags */
291 		return val;
292 	}
293 
294 	if (val % sizeof(struct alloc_tag) == 0)
295 		return val;
296 	return ((val / sizeof(struct alloc_tag)) + 1) * sizeof(struct alloc_tag);
297 }
298 
ensure_alignment(unsigned long align,unsigned int * prepend)299 static bool ensure_alignment(unsigned long align, unsigned int *prepend)
300 {
301 	if (!static_key_enabled(&mem_profiling_compressed)) {
302 		/* No alignment requirements when we are not indexing the tags */
303 		return true;
304 	}
305 
306 	/*
307 	 * If alloc_tag size is not a multiple of required alignment, tag
308 	 * indexing does not work.
309 	 */
310 	if (!IS_ALIGNED(sizeof(struct alloc_tag), align))
311 		return false;
312 
313 	/* Ensure prepend consumes multiple of alloc_tag-sized blocks */
314 	if (*prepend)
315 		*prepend = alloc_tag_align(*prepend);
316 
317 	return true;
318 }
319 
tags_addressable(void)320 static inline bool tags_addressable(void)
321 {
322 	unsigned long tag_idx_count;
323 
324 	if (!static_key_enabled(&mem_profiling_compressed))
325 		return true; /* with page_ext tags are always addressable */
326 
327 	tag_idx_count = CODETAG_ID_FIRST + kernel_tags.count +
328 			module_tags.size / sizeof(struct alloc_tag);
329 
330 	return tag_idx_count < (1UL << NR_UNUSED_PAGEFLAG_BITS);
331 }
332 
needs_section_mem(struct module * mod,unsigned long size)333 static bool needs_section_mem(struct module *mod, unsigned long size)
334 {
335 	if (!mem_profiling_support)
336 		return false;
337 
338 	return size >= sizeof(struct alloc_tag);
339 }
340 
clean_unused_counters(struct alloc_tag * start_tag,struct alloc_tag * end_tag)341 static bool clean_unused_counters(struct alloc_tag *start_tag,
342 				  struct alloc_tag *end_tag)
343 {
344 	struct alloc_tag *tag;
345 	bool ret = true;
346 
347 	for (tag = start_tag; tag <= end_tag; tag++) {
348 		struct alloc_tag_counters counter;
349 
350 		if (!tag->counters)
351 			continue;
352 
353 		counter = alloc_tag_read(tag);
354 		if (!counter.bytes) {
355 			free_percpu(tag->counters);
356 			tag->counters = NULL;
357 		} else {
358 			ret = false;
359 		}
360 	}
361 
362 	return ret;
363 }
364 
365 /* Called with mod_area_mt locked */
clean_unused_module_areas_locked(void)366 static void clean_unused_module_areas_locked(void)
367 {
368 	MA_STATE(mas, &mod_area_mt, 0, module_tags.size);
369 	struct module *val;
370 
371 	mas_for_each(&mas, val, module_tags.size) {
372 		struct alloc_tag *start_tag;
373 		struct alloc_tag *end_tag;
374 
375 		if (val != &unloaded_mod)
376 			continue;
377 
378 		/* Release area if all tags are unused */
379 		start_tag = (struct alloc_tag *)(module_tags.start_addr + mas.index);
380 		end_tag = (struct alloc_tag *)(module_tags.start_addr + mas.last);
381 		if (clean_unused_counters(start_tag, end_tag))
382 			mas_erase(&mas);
383 	}
384 }
385 
386 /* Called with mod_area_mt locked */
find_aligned_area(struct ma_state * mas,unsigned long section_size,unsigned long size,unsigned int prepend,unsigned long align)387 static bool find_aligned_area(struct ma_state *mas, unsigned long section_size,
388 			      unsigned long size, unsigned int prepend, unsigned long align)
389 {
390 	bool cleanup_done = false;
391 
392 repeat:
393 	/* Try finding exact size and hope the start is aligned */
394 	if (!mas_empty_area(mas, 0, section_size - 1, prepend + size)) {
395 		if (IS_ALIGNED(mas->index + prepend, align))
396 			return true;
397 
398 		/* Try finding larger area to align later */
399 		mas_reset(mas);
400 		if (!mas_empty_area(mas, 0, section_size - 1,
401 				    size + prepend + align - 1))
402 			return true;
403 	}
404 
405 	/* No free area, try cleanup stale data and repeat the search once */
406 	if (!cleanup_done) {
407 		clean_unused_module_areas_locked();
408 		cleanup_done = true;
409 		mas_reset(mas);
410 		goto repeat;
411 	}
412 
413 	return false;
414 }
415 
vm_module_tags_populate(void)416 static int vm_module_tags_populate(void)
417 {
418 	unsigned long phys_end = ALIGN_DOWN(module_tags.start_addr, PAGE_SIZE) +
419 				 (vm_module_tags->nr_pages << PAGE_SHIFT);
420 	unsigned long new_end = module_tags.start_addr + module_tags.size;
421 
422 	if (phys_end < new_end) {
423 		struct page **next_page = vm_module_tags->pages + vm_module_tags->nr_pages;
424 		unsigned long old_shadow_end = ALIGN(phys_end, MODULE_ALIGN);
425 		unsigned long new_shadow_end = ALIGN(new_end, MODULE_ALIGN);
426 		unsigned long more_pages;
427 		unsigned long nr = 0;
428 
429 		more_pages = ALIGN(new_end - phys_end, PAGE_SIZE) >> PAGE_SHIFT;
430 		while (nr < more_pages) {
431 			unsigned long allocated;
432 
433 			allocated = alloc_pages_bulk_node(GFP_KERNEL | __GFP_NOWARN,
434 				NUMA_NO_NODE, more_pages - nr, next_page + nr);
435 
436 			if (!allocated)
437 				break;
438 			nr += allocated;
439 		}
440 
441 		if (nr < more_pages ||
442 		    vmap_pages_range(phys_end, phys_end + (nr << PAGE_SHIFT), PAGE_KERNEL,
443 				     next_page, PAGE_SHIFT) < 0) {
444 			release_pages_arg arg = { .pages = next_page };
445 
446 			/* Clean up and error out */
447 			release_pages(arg, nr);
448 			return -ENOMEM;
449 		}
450 
451 		vm_module_tags->nr_pages += nr;
452 
453 		/*
454 		 * Kasan allocates 1 byte of shadow for every 8 bytes of data.
455 		 * When kasan_alloc_module_shadow allocates shadow memory,
456 		 * its unit of allocation is a page.
457 		 * Therefore, here we need to align to MODULE_ALIGN.
458 		 */
459 		if (old_shadow_end < new_shadow_end)
460 			kasan_alloc_module_shadow((void *)old_shadow_end,
461 						  new_shadow_end - old_shadow_end,
462 						  GFP_KERNEL);
463 	}
464 
465 	/*
466 	 * Mark the pages as accessible, now that they are mapped.
467 	 * With hardware tag-based KASAN, marking is skipped for
468 	 * non-VM_ALLOC mappings, see __kasan_unpoison_vmalloc().
469 	 */
470 	kasan_unpoison_vmalloc((void *)module_tags.start_addr,
471 				new_end - module_tags.start_addr,
472 				KASAN_VMALLOC_PROT_NORMAL);
473 
474 	return 0;
475 }
476 
reserve_module_tags(struct module * mod,unsigned long size,unsigned int prepend,unsigned long align)477 static void *reserve_module_tags(struct module *mod, unsigned long size,
478 				 unsigned int prepend, unsigned long align)
479 {
480 	unsigned long section_size = module_tags.end_addr - module_tags.start_addr;
481 	MA_STATE(mas, &mod_area_mt, 0, section_size - 1);
482 	unsigned long offset;
483 	void *ret = NULL;
484 
485 	/* If no tags return error */
486 	if (size < sizeof(struct alloc_tag))
487 		return ERR_PTR(-EINVAL);
488 
489 	/*
490 	 * align is always power of 2, so we can use IS_ALIGNED and ALIGN.
491 	 * align 0 or 1 means no alignment, to simplify set to 1.
492 	 */
493 	if (!align)
494 		align = 1;
495 
496 	if (!ensure_alignment(align, &prepend)) {
497 		shutdown_mem_profiling(true);
498 		pr_err("%s: alignment %lu is incompatible with allocation tag indexing. Memory allocation profiling is disabled!\n",
499 			mod->name, align);
500 		return ERR_PTR(-EINVAL);
501 	}
502 
503 	mas_lock(&mas);
504 	if (!find_aligned_area(&mas, section_size, size, prepend, align)) {
505 		ret = ERR_PTR(-ENOMEM);
506 		goto unlock;
507 	}
508 
509 	/* Mark found area as reserved */
510 	offset = mas.index;
511 	offset += prepend;
512 	offset = ALIGN(offset, align);
513 	if (offset != mas.index) {
514 		unsigned long pad_start = mas.index;
515 
516 		mas.last = offset - 1;
517 		mas_store(&mas, &prepend_mod);
518 		if (mas_is_err(&mas)) {
519 			ret = ERR_PTR(xa_err(mas.node));
520 			goto unlock;
521 		}
522 		mas.index = offset;
523 		mas.last = offset + size - 1;
524 		mas_store(&mas, mod);
525 		if (mas_is_err(&mas)) {
526 			mas.index = pad_start;
527 			mas_erase(&mas);
528 			ret = ERR_PTR(xa_err(mas.node));
529 		}
530 	} else {
531 		mas.last = offset + size - 1;
532 		mas_store(&mas, mod);
533 		if (mas_is_err(&mas))
534 			ret = ERR_PTR(xa_err(mas.node));
535 	}
536 unlock:
537 	mas_unlock(&mas);
538 
539 	if (IS_ERR(ret))
540 		return ret;
541 
542 	if (module_tags.size < offset + size) {
543 		int grow_res;
544 
545 		module_tags.size = offset + size;
546 		if (mem_alloc_profiling_enabled() && !tags_addressable()) {
547 			shutdown_mem_profiling(true);
548 			pr_warn("With module %s there are too many tags to fit in %d page flag bits. Memory allocation profiling is disabled!\n",
549 				mod->name, NR_UNUSED_PAGEFLAG_BITS);
550 		}
551 
552 		grow_res = vm_module_tags_populate();
553 		if (grow_res) {
554 			shutdown_mem_profiling(true);
555 			pr_err("Failed to allocate memory for allocation tags in the module %s. Memory allocation profiling is disabled!\n",
556 			       mod->name);
557 			return ERR_PTR(grow_res);
558 		}
559 	}
560 
561 	return (struct alloc_tag *)(module_tags.start_addr + offset);
562 }
563 
release_module_tags(struct module * mod,bool used)564 static void release_module_tags(struct module *mod, bool used)
565 {
566 	MA_STATE(mas, &mod_area_mt, module_tags.size, module_tags.size);
567 	struct alloc_tag *start_tag;
568 	struct alloc_tag *end_tag;
569 	struct module *val;
570 
571 	mas_lock(&mas);
572 	mas_for_each_rev(&mas, val, 0)
573 		if (val == mod)
574 			break;
575 
576 	if (!val) /* module not found */
577 		goto out;
578 
579 	if (!used)
580 		goto release_area;
581 
582 	start_tag = (struct alloc_tag *)(module_tags.start_addr + mas.index);
583 	end_tag = (struct alloc_tag *)(module_tags.start_addr + mas.last);
584 	if (!clean_unused_counters(start_tag, end_tag)) {
585 		struct alloc_tag *tag;
586 
587 		for (tag = start_tag; tag <= end_tag; tag++) {
588 			struct alloc_tag_counters counter;
589 
590 			if (!tag->counters)
591 				continue;
592 
593 			counter = alloc_tag_read(tag);
594 			pr_info("%s:%u module %s func:%s has %llu allocated at module unload\n",
595 				tag->ct.filename, tag->ct.lineno, tag->ct.modname,
596 				tag->ct.function, counter.bytes);
597 		}
598 	} else {
599 		used = false;
600 	}
601 release_area:
602 	mas_store(&mas, used ? &unloaded_mod : NULL);
603 	val = mas_prev_range(&mas, 0);
604 	if (val == &prepend_mod)
605 		mas_store(&mas, NULL);
606 out:
607 	mas_unlock(&mas);
608 }
609 
load_module(struct module * mod,struct codetag * start,struct codetag * stop)610 static int load_module(struct module *mod, struct codetag *start, struct codetag *stop)
611 {
612 	/* Allocate module alloc_tag percpu counters */
613 	struct alloc_tag *start_tag;
614 	struct alloc_tag *stop_tag;
615 	struct alloc_tag *tag;
616 
617 	/* percpu counters for core allocations are already statically allocated */
618 	if (!mod)
619 		return 0;
620 
621 	start_tag = ct_to_alloc_tag(start);
622 	stop_tag = ct_to_alloc_tag(stop);
623 	for (tag = start_tag; tag < stop_tag; tag++) {
624 		WARN_ON(tag->counters);
625 		tag->counters = alloc_percpu(struct alloc_tag_counters);
626 		if (!tag->counters) {
627 			while (--tag >= start_tag) {
628 				free_percpu(tag->counters);
629 				tag->counters = NULL;
630 			}
631 			pr_err("Failed to allocate memory for allocation tag percpu counters in the module %s\n",
632 			       mod->name);
633 			return -ENOMEM;
634 		}
635 
636 		/*
637 		 * Avoid a kmemleak false positive. The pointer to the counters is stored
638 		 * in the alloc_tag section of the module and cannot be directly accessed.
639 		 */
640 		kmemleak_ignore_percpu(tag->counters);
641 	}
642 	return 0;
643 }
644 
replace_module(struct module * mod,struct module * new_mod)645 static void replace_module(struct module *mod, struct module *new_mod)
646 {
647 	MA_STATE(mas, &mod_area_mt, 0, module_tags.size);
648 	struct module *val;
649 
650 	mas_lock(&mas);
651 	mas_for_each(&mas, val, module_tags.size) {
652 		if (val != mod)
653 			continue;
654 
655 		mas_store_gfp(&mas, new_mod, GFP_KERNEL);
656 		break;
657 	}
658 	mas_unlock(&mas);
659 }
660 
alloc_mod_tags_mem(void)661 static int __init alloc_mod_tags_mem(void)
662 {
663 	/* Map space to copy allocation tags */
664 	vm_module_tags = execmem_vmap(MODULE_ALLOC_TAG_VMAP_SIZE);
665 	if (!vm_module_tags) {
666 		pr_err("Failed to map %lu bytes for module allocation tags\n",
667 			MODULE_ALLOC_TAG_VMAP_SIZE);
668 		module_tags.start_addr = 0;
669 		return -ENOMEM;
670 	}
671 
672 	vm_module_tags->pages = kmalloc_array(get_vm_area_size(vm_module_tags) >> PAGE_SHIFT,
673 					sizeof(struct page *), GFP_KERNEL | __GFP_ZERO);
674 	if (!vm_module_tags->pages) {
675 		free_vm_area(vm_module_tags);
676 		return -ENOMEM;
677 	}
678 
679 	module_tags.start_addr = (unsigned long)vm_module_tags->addr;
680 	module_tags.end_addr = module_tags.start_addr + MODULE_ALLOC_TAG_VMAP_SIZE;
681 	/* Ensure the base is alloc_tag aligned when required for indexing */
682 	module_tags.start_addr = alloc_tag_align(module_tags.start_addr);
683 
684 	return 0;
685 }
686 
free_mod_tags_mem(void)687 static void __init free_mod_tags_mem(void)
688 {
689 	release_pages_arg arg = { .pages = vm_module_tags->pages };
690 
691 	module_tags.start_addr = 0;
692 	release_pages(arg, vm_module_tags->nr_pages);
693 	kfree(vm_module_tags->pages);
694 	free_vm_area(vm_module_tags);
695 }
696 
697 #else /* CONFIG_MODULES */
698 
alloc_mod_tags_mem(void)699 static inline int alloc_mod_tags_mem(void) { return 0; }
free_mod_tags_mem(void)700 static inline void free_mod_tags_mem(void) {}
701 
702 #endif /* CONFIG_MODULES */
703 
704 /* See: Documentation/mm/allocation-profiling.rst */
setup_early_mem_profiling(char * str)705 static int __init setup_early_mem_profiling(char *str)
706 {
707 	bool compressed = false;
708 	bool enable;
709 
710 	if (!str || !str[0])
711 		return -EINVAL;
712 
713 	if (!strncmp(str, "never", 5)) {
714 		enable = false;
715 		mem_profiling_support = false;
716 		pr_info("Memory allocation profiling is disabled!\n");
717 	} else {
718 		char *token = strsep(&str, ",");
719 
720 		if (kstrtobool(token, &enable))
721 			return -EINVAL;
722 
723 		if (str) {
724 
725 			if (strcmp(str, "compressed"))
726 				return -EINVAL;
727 
728 			compressed = true;
729 		}
730 		mem_profiling_support = true;
731 		pr_info("Memory allocation profiling is enabled %s compression and is turned %s!\n",
732 			compressed ? "with" : "without", str_on_off(enable));
733 	}
734 
735 	if (enable != mem_alloc_profiling_enabled()) {
736 		if (enable)
737 			static_branch_enable(&mem_alloc_profiling_key);
738 		else
739 			static_branch_disable(&mem_alloc_profiling_key);
740 	}
741 	if (compressed != static_key_enabled(&mem_profiling_compressed)) {
742 		if (compressed)
743 			static_branch_enable(&mem_profiling_compressed);
744 		else
745 			static_branch_disable(&mem_profiling_compressed);
746 	}
747 
748 	return 0;
749 }
750 early_param("sysctl.vm.mem_profiling", setup_early_mem_profiling);
751 
need_page_alloc_tagging(void)752 static __init bool need_page_alloc_tagging(void)
753 {
754 	if (static_key_enabled(&mem_profiling_compressed))
755 		return false;
756 
757 	return mem_profiling_support;
758 }
759 
init_page_alloc_tagging(void)760 static __init void init_page_alloc_tagging(void)
761 {
762 }
763 
764 struct page_ext_operations page_alloc_tagging_ops = {
765 	.size = sizeof(union codetag_ref),
766 	.need = need_page_alloc_tagging,
767 	.init = init_page_alloc_tagging,
768 };
769 EXPORT_SYMBOL(page_alloc_tagging_ops);
770 
771 #ifdef CONFIG_SYSCTL
772 /*
773  * Not using proc_do_static_key() directly to prevent enabling profiling
774  * after it was shut down.
775  */
proc_mem_profiling_handler(const struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)776 static int proc_mem_profiling_handler(const struct ctl_table *table, int write,
777 				      void *buffer, size_t *lenp, loff_t *ppos)
778 {
779 	if (!mem_profiling_support && write)
780 		return -EINVAL;
781 
782 	return proc_do_static_key(table, write, buffer, lenp, ppos);
783 }
784 
785 
786 static struct ctl_table memory_allocation_profiling_sysctls[] = {
787 	{
788 		.procname	= "mem_profiling",
789 		.data		= &mem_alloc_profiling_key,
790 #ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG
791 		.mode		= 0444,
792 #else
793 		.mode		= 0644,
794 #endif
795 		.proc_handler	= proc_mem_profiling_handler,
796 	},
797 };
798 
sysctl_init(void)799 static void __init sysctl_init(void)
800 {
801 	if (!mem_profiling_support)
802 		memory_allocation_profiling_sysctls[0].mode = 0444;
803 
804 	register_sysctl_init("vm", memory_allocation_profiling_sysctls);
805 }
806 #else /* CONFIG_SYSCTL */
sysctl_init(void)807 static inline void sysctl_init(void) {}
808 #endif /* CONFIG_SYSCTL */
809 
alloc_tag_init(void)810 static int __init alloc_tag_init(void)
811 {
812 	const struct codetag_type_desc desc = {
813 		.section		= ALLOC_TAG_SECTION_NAME,
814 		.tag_size		= sizeof(struct alloc_tag),
815 #ifdef CONFIG_MODULES
816 		.needs_section_mem	= needs_section_mem,
817 		.alloc_section_mem	= reserve_module_tags,
818 		.free_section_mem	= release_module_tags,
819 		.module_load		= load_module,
820 		.module_replaced	= replace_module,
821 #endif
822 	};
823 	int res;
824 
825 	sysctl_init();
826 
827 	if (!mem_profiling_support) {
828 		pr_info("Memory allocation profiling is not supported!\n");
829 		return 0;
830 	}
831 
832 	if (!proc_create_seq_private(ALLOCINFO_FILE_NAME, 0400, NULL, &allocinfo_seq_op,
833 				     sizeof(struct allocinfo_private), NULL)) {
834 		pr_err("Failed to create %s file\n", ALLOCINFO_FILE_NAME);
835 		shutdown_mem_profiling(false);
836 		return -ENOMEM;
837 	}
838 
839 	res = alloc_mod_tags_mem();
840 	if (res) {
841 		pr_err("Failed to reserve address space for module tags, errno = %d\n", res);
842 		shutdown_mem_profiling(true);
843 		return res;
844 	}
845 
846 	alloc_tag_cttype = codetag_register_type(&desc);
847 	if (IS_ERR(alloc_tag_cttype)) {
848 		pr_err("Allocation tags registration failed, errno = %ld\n", PTR_ERR(alloc_tag_cttype));
849 		free_mod_tags_mem();
850 		shutdown_mem_profiling(true);
851 		return PTR_ERR(alloc_tag_cttype);
852 	}
853 
854 	return 0;
855 }
856 module_init(alloc_tag_init);
857