1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * KMSAN initialization routines. 4 * 5 * Copyright (C) 2017-2021 Google LLC 6 * Author: Alexander Potapenko <glider@google.com> 7 * 8 */ 9 10 #include "kmsan.h" 11 12 #include <asm/sections.h> 13 #include <linux/mm.h> 14 #include <linux/memblock.h> 15 16 #include "../internal.h" 17 18 #define NUM_FUTURE_RANGES 128 19 struct start_end_pair { 20 u64 start, end; 21 }; 22 23 static struct start_end_pair start_end_pairs[NUM_FUTURE_RANGES] __initdata; 24 static int future_index __initdata; 25 26 /* 27 * Record a range of memory for which the metadata pages will be created once 28 * the page allocator becomes available. 29 */ 30 static void __init kmsan_record_future_shadow_range(void *start, void *end) 31 { 32 u64 nstart = (u64)start, nend = (u64)end, cstart, cend; 33 bool merged = false; 34 35 KMSAN_WARN_ON(future_index == NUM_FUTURE_RANGES); 36 KMSAN_WARN_ON((nstart >= nend) || 37 /* Virtual address 0 is valid on s390. */ 38 (!IS_ENABLED(CONFIG_S390) && !nstart) || 39 !nend); 40 nstart = ALIGN_DOWN(nstart, PAGE_SIZE); 41 nend = ALIGN(nend, PAGE_SIZE); 42 43 /* 44 * Scan the existing ranges to see if any of them overlaps with 45 * [start, end). In that case, merge the two ranges instead of 46 * creating a new one. 47 * The number of ranges is less than 20, so there is no need to organize 48 * them into a more intelligent data structure. 49 */ 50 for (int i = 0; i < future_index; i++) { 51 cstart = start_end_pairs[i].start; 52 cend = start_end_pairs[i].end; 53 if ((cstart < nstart && cend < nstart) || 54 (cstart > nend && cend > nend)) 55 /* ranges are disjoint - do not merge */ 56 continue; 57 start_end_pairs[i].start = min(nstart, cstart); 58 start_end_pairs[i].end = max(nend, cend); 59 merged = true; 60 break; 61 } 62 if (merged) 63 return; 64 start_end_pairs[future_index].start = nstart; 65 start_end_pairs[future_index].end = nend; 66 future_index++; 67 } 68 69 /* 70 * Initialize the shadow for existing mappings during kernel initialization. 71 * These include kernel text/data sections, NODE_DATA and future ranges 72 * registered while creating other data (e.g. percpu). 73 * 74 * Allocations via memblock can be only done before slab is initialized. 75 */ 76 void __init kmsan_init_shadow(void) 77 { 78 const size_t nd_size = sizeof(pg_data_t); 79 phys_addr_t p_start, p_end; 80 u64 loop; 81 int nid; 82 83 for_each_reserved_mem_range(loop, &p_start, &p_end) 84 kmsan_record_future_shadow_range(phys_to_virt(p_start), 85 phys_to_virt(p_end)); 86 /* Allocate shadow for .data */ 87 kmsan_record_future_shadow_range(_sdata, _edata); 88 89 for_each_online_node(nid) 90 kmsan_record_future_shadow_range( 91 NODE_DATA(nid), (char *)NODE_DATA(nid) + nd_size); 92 93 for (int i = 0; i < future_index; i++) 94 kmsan_init_alloc_meta_for_range( 95 (void *)start_end_pairs[i].start, 96 (void *)start_end_pairs[i].end); 97 } 98 99 struct metadata_page_pair { 100 struct page *shadow, *origin; 101 }; 102 static struct metadata_page_pair held_back[NR_PAGE_ORDERS] __initdata; 103 104 /* 105 * Eager metadata allocation. When the memblock allocator is freeing pages to 106 * pagealloc, we use 2/3 of them as metadata for the remaining 1/3. 107 * We store the pointers to the returned blocks of pages in held_back[] grouped 108 * by their order: when kmsan_memblock_free_pages() is called for the first 109 * time with a certain order, it is reserved as a shadow block, for the second 110 * time - as an origin block. On the third time the incoming block receives its 111 * shadow and origin ranges from the previously saved shadow and origin blocks, 112 * after which held_back[order] can be used again. 113 * 114 * At the very end there may be leftover blocks in held_back[]. They are 115 * collected later by kmsan_memblock_discard(). 116 */ 117 bool kmsan_memblock_free_pages(struct page *page, unsigned int order) 118 { 119 struct page *shadow, *origin; 120 121 if (!held_back[order].shadow) { 122 held_back[order].shadow = page; 123 return false; 124 } 125 if (!held_back[order].origin) { 126 held_back[order].origin = page; 127 return false; 128 } 129 shadow = held_back[order].shadow; 130 origin = held_back[order].origin; 131 kmsan_setup_meta(page, shadow, origin, order); 132 133 held_back[order].shadow = NULL; 134 held_back[order].origin = NULL; 135 return true; 136 } 137 138 #define MAX_BLOCKS 8 139 struct smallstack { 140 struct page *items[MAX_BLOCKS]; 141 int index; 142 int order; 143 }; 144 145 static struct smallstack collect = { 146 .index = 0, 147 .order = MAX_PAGE_ORDER, 148 }; 149 150 static void smallstack_push(struct smallstack *stack, struct page *pages) 151 { 152 KMSAN_WARN_ON(stack->index == MAX_BLOCKS); 153 stack->items[stack->index] = pages; 154 stack->index++; 155 } 156 #undef MAX_BLOCKS 157 158 static struct page *smallstack_pop(struct smallstack *stack) 159 { 160 struct page *ret; 161 162 KMSAN_WARN_ON(stack->index == 0); 163 stack->index--; 164 ret = stack->items[stack->index]; 165 stack->items[stack->index] = NULL; 166 return ret; 167 } 168 169 static void do_collection(void) 170 { 171 struct page *page, *shadow, *origin; 172 173 while (collect.index >= 3) { 174 page = smallstack_pop(&collect); 175 shadow = smallstack_pop(&collect); 176 origin = smallstack_pop(&collect); 177 kmsan_setup_meta(page, shadow, origin, collect.order); 178 __free_pages_core(page, collect.order, MEMINIT_EARLY); 179 } 180 } 181 182 static void collect_split(void) 183 { 184 struct smallstack tmp = { 185 .order = collect.order - 1, 186 .index = 0, 187 }; 188 struct page *page; 189 190 if (!collect.order) 191 return; 192 while (collect.index) { 193 page = smallstack_pop(&collect); 194 smallstack_push(&tmp, &page[0]); 195 smallstack_push(&tmp, &page[1 << tmp.order]); 196 } 197 __memcpy(&collect, &tmp, sizeof(tmp)); 198 } 199 200 /* 201 * Memblock is about to go away. Split the page blocks left over in held_back[] 202 * and return 1/3 of that memory to the system. 203 */ 204 static void kmsan_memblock_discard(void) 205 { 206 /* 207 * For each order=N: 208 * - push held_back[N].shadow and .origin to @collect; 209 * - while there are >= 3 elements in @collect, do garbage collection: 210 * - pop 3 ranges from @collect; 211 * - use two of them as shadow and origin for the third one; 212 * - repeat; 213 * - split each remaining element from @collect into 2 ranges of 214 * order=N-1, 215 * - repeat. 216 */ 217 collect.order = MAX_PAGE_ORDER; 218 for (int i = MAX_PAGE_ORDER; i >= 0; i--) { 219 if (held_back[i].shadow) 220 smallstack_push(&collect, held_back[i].shadow); 221 if (held_back[i].origin) 222 smallstack_push(&collect, held_back[i].origin); 223 held_back[i].shadow = NULL; 224 held_back[i].origin = NULL; 225 do_collection(); 226 collect_split(); 227 } 228 } 229 230 void __init kmsan_init_runtime(void) 231 { 232 /* Assuming current is init_task */ 233 kmsan_internal_task_create(current); 234 kmsan_memblock_discard(); 235 pr_info("Starting KernelMemorySanitizer\n"); 236 pr_info("ATTENTION: KMSAN is a debugging tool! Do not use it on production machines!\n"); 237 kmsan_enabled = true; 238 } 239