1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * mm/percpu-vm.c - vmalloc area based chunk allocation 4 * 5 * Copyright (C) 2010 SUSE Linux Products GmbH 6 * Copyright (C) 2010 Tejun Heo <tj@kernel.org> 7 * 8 * Chunks are mapped into vmalloc areas and populated page by page. 9 * This is the default chunk allocator. 10 */ 11 12 static struct page *pcpu_chunk_page(struct pcpu_chunk *chunk, 13 unsigned int cpu, int page_idx) 14 { 15 /* must not be used on pre-mapped chunk */ 16 WARN_ON(chunk->immutable); 17 18 return vmalloc_to_page((void *)pcpu_chunk_addr(chunk, cpu, page_idx)); 19 } 20 21 /** 22 * pcpu_get_pages - get temp pages array 23 * 24 * Returns pointer to array of pointers to struct page which can be indexed 25 * with pcpu_page_idx(). Note that there is only one array and accesses 26 * should be serialized by pcpu_alloc_mutex. 27 * 28 * RETURNS: 29 * Pointer to temp pages array on success. 30 */ 31 static struct page **pcpu_get_pages(void) 32 { 33 static struct page **pages; 34 size_t pages_size = pcpu_nr_units * pcpu_unit_pages * sizeof(pages[0]); 35 36 lockdep_assert_held(&pcpu_alloc_mutex); 37 38 if (!pages) 39 pages = pcpu_mem_zalloc(pages_size, GFP_KERNEL); 40 return pages; 41 } 42 43 /** 44 * pcpu_free_pages - free pages which were allocated for @chunk 45 * @chunk: chunk pages were allocated for 46 * @pages: array of pages to be freed, indexed by pcpu_page_idx() 47 * @page_start: page index of the first page to be freed 48 * @page_end: page index of the last page to be freed + 1 49 * 50 * Free pages [@page_start and @page_end) in @pages for all units. 51 * The pages were allocated for @chunk. 52 */ 53 static void pcpu_free_pages(struct pcpu_chunk *chunk, 54 struct page **pages, int page_start, int page_end) 55 { 56 unsigned int cpu; 57 int i; 58 59 for_each_possible_cpu(cpu) { 60 for (i = page_start; i < page_end; i++) { 61 struct page *page = pages[pcpu_page_idx(cpu, i)]; 62 63 if (page) 64 __free_page(page); 65 } 66 } 67 } 68 69 /** 70 * pcpu_alloc_pages - allocates pages for @chunk 71 * @chunk: target chunk 72 * @pages: array to put the allocated pages into, indexed by pcpu_page_idx() 73 * @page_start: page index of the first page to be allocated 74 * @page_end: page index of the last page to be allocated + 1 75 * @gfp: allocation flags passed to the underlying allocator 76 * 77 * Allocate pages [@page_start,@page_end) into @pages for all units. 78 * The allocation is for @chunk. Percpu core doesn't care about the 79 * content of @pages and will pass it verbatim to pcpu_map_pages(). 80 */ 81 static int pcpu_alloc_pages(struct pcpu_chunk *chunk, 82 struct page **pages, int page_start, int page_end, 83 gfp_t gfp) 84 { 85 unsigned int cpu, tcpu; 86 int i; 87 88 gfp |= __GFP_HIGHMEM; 89 90 for_each_possible_cpu(cpu) { 91 for (i = page_start; i < page_end; i++) { 92 struct page **pagep = &pages[pcpu_page_idx(cpu, i)]; 93 94 *pagep = alloc_pages_node(cpu_to_node(cpu), gfp, 0); 95 if (!*pagep) 96 goto err; 97 } 98 } 99 return 0; 100 101 err: 102 while (--i >= page_start) 103 __free_page(pages[pcpu_page_idx(cpu, i)]); 104 105 for_each_possible_cpu(tcpu) { 106 if (tcpu == cpu) 107 break; 108 for (i = page_start; i < page_end; i++) 109 __free_page(pages[pcpu_page_idx(tcpu, i)]); 110 } 111 return -ENOMEM; 112 } 113 114 /** 115 * pcpu_pre_unmap_flush - flush cache prior to unmapping 116 * @chunk: chunk the regions to be flushed belongs to 117 * @page_start: page index of the first page to be flushed 118 * @page_end: page index of the last page to be flushed + 1 119 * 120 * Pages in [@page_start,@page_end) of @chunk are about to be 121 * unmapped. Flush cache. As each flushing trial can be very 122 * expensive, issue flush on the whole region at once rather than 123 * doing it for each cpu. This could be an overkill but is more 124 * scalable. 125 */ 126 static void pcpu_pre_unmap_flush(struct pcpu_chunk *chunk, 127 int page_start, int page_end) 128 { 129 flush_cache_vunmap( 130 pcpu_chunk_addr(chunk, pcpu_low_unit_cpu, page_start), 131 pcpu_chunk_addr(chunk, pcpu_high_unit_cpu, page_end)); 132 } 133 134 static void __pcpu_unmap_pages(unsigned long addr, int nr_pages) 135 { 136 unmap_kernel_range_noflush(addr, nr_pages << PAGE_SHIFT); 137 } 138 139 /** 140 * pcpu_unmap_pages - unmap pages out of a pcpu_chunk 141 * @chunk: chunk of interest 142 * @pages: pages array which can be used to pass information to free 143 * @page_start: page index of the first page to unmap 144 * @page_end: page index of the last page to unmap + 1 145 * 146 * For each cpu, unmap pages [@page_start,@page_end) out of @chunk. 147 * Corresponding elements in @pages were cleared by the caller and can 148 * be used to carry information to pcpu_free_pages() which will be 149 * called after all unmaps are finished. The caller should call 150 * proper pre/post flush functions. 151 */ 152 static void pcpu_unmap_pages(struct pcpu_chunk *chunk, 153 struct page **pages, int page_start, int page_end) 154 { 155 unsigned int cpu; 156 int i; 157 158 for_each_possible_cpu(cpu) { 159 for (i = page_start; i < page_end; i++) { 160 struct page *page; 161 162 page = pcpu_chunk_page(chunk, cpu, i); 163 WARN_ON(!page); 164 pages[pcpu_page_idx(cpu, i)] = page; 165 } 166 __pcpu_unmap_pages(pcpu_chunk_addr(chunk, cpu, page_start), 167 page_end - page_start); 168 } 169 } 170 171 /** 172 * pcpu_post_unmap_tlb_flush - flush TLB after unmapping 173 * @chunk: pcpu_chunk the regions to be flushed belong to 174 * @page_start: page index of the first page to be flushed 175 * @page_end: page index of the last page to be flushed + 1 176 * 177 * Pages [@page_start,@page_end) of @chunk have been unmapped. Flush 178 * TLB for the regions. This can be skipped if the area is to be 179 * returned to vmalloc as vmalloc will handle TLB flushing lazily. 180 * 181 * As with pcpu_pre_unmap_flush(), TLB flushing also is done at once 182 * for the whole region. 183 */ 184 static void pcpu_post_unmap_tlb_flush(struct pcpu_chunk *chunk, 185 int page_start, int page_end) 186 { 187 flush_tlb_kernel_range( 188 pcpu_chunk_addr(chunk, pcpu_low_unit_cpu, page_start), 189 pcpu_chunk_addr(chunk, pcpu_high_unit_cpu, page_end)); 190 } 191 192 static int __pcpu_map_pages(unsigned long addr, struct page **pages, 193 int nr_pages) 194 { 195 return map_kernel_range_noflush(addr, nr_pages << PAGE_SHIFT, 196 PAGE_KERNEL, pages); 197 } 198 199 /** 200 * pcpu_map_pages - map pages into a pcpu_chunk 201 * @chunk: chunk of interest 202 * @pages: pages array containing pages to be mapped 203 * @page_start: page index of the first page to map 204 * @page_end: page index of the last page to map + 1 205 * 206 * For each cpu, map pages [@page_start,@page_end) into @chunk. The 207 * caller is responsible for calling pcpu_post_map_flush() after all 208 * mappings are complete. 209 * 210 * This function is responsible for setting up whatever is necessary for 211 * reverse lookup (addr -> chunk). 212 */ 213 static int pcpu_map_pages(struct pcpu_chunk *chunk, 214 struct page **pages, int page_start, int page_end) 215 { 216 unsigned int cpu, tcpu; 217 int i, err; 218 219 for_each_possible_cpu(cpu) { 220 err = __pcpu_map_pages(pcpu_chunk_addr(chunk, cpu, page_start), 221 &pages[pcpu_page_idx(cpu, page_start)], 222 page_end - page_start); 223 if (err < 0) 224 goto err; 225 226 for (i = page_start; i < page_end; i++) 227 pcpu_set_page_chunk(pages[pcpu_page_idx(cpu, i)], 228 chunk); 229 } 230 return 0; 231 err: 232 for_each_possible_cpu(tcpu) { 233 if (tcpu == cpu) 234 break; 235 __pcpu_unmap_pages(pcpu_chunk_addr(chunk, tcpu, page_start), 236 page_end - page_start); 237 } 238 pcpu_post_unmap_tlb_flush(chunk, page_start, page_end); 239 return err; 240 } 241 242 /** 243 * pcpu_post_map_flush - flush cache after mapping 244 * @chunk: pcpu_chunk the regions to be flushed belong to 245 * @page_start: page index of the first page to be flushed 246 * @page_end: page index of the last page to be flushed + 1 247 * 248 * Pages [@page_start,@page_end) of @chunk have been mapped. Flush 249 * cache. 250 * 251 * As with pcpu_pre_unmap_flush(), TLB flushing also is done at once 252 * for the whole region. 253 */ 254 static void pcpu_post_map_flush(struct pcpu_chunk *chunk, 255 int page_start, int page_end) 256 { 257 flush_cache_vmap( 258 pcpu_chunk_addr(chunk, pcpu_low_unit_cpu, page_start), 259 pcpu_chunk_addr(chunk, pcpu_high_unit_cpu, page_end)); 260 } 261 262 /** 263 * pcpu_populate_chunk - populate and map an area of a pcpu_chunk 264 * @chunk: chunk of interest 265 * @page_start: the start page 266 * @page_end: the end page 267 * @gfp: allocation flags passed to the underlying memory allocator 268 * 269 * For each cpu, populate and map pages [@page_start,@page_end) into 270 * @chunk. 271 * 272 * CONTEXT: 273 * pcpu_alloc_mutex, does GFP_KERNEL allocation. 274 */ 275 static int pcpu_populate_chunk(struct pcpu_chunk *chunk, 276 int page_start, int page_end, gfp_t gfp) 277 { 278 struct page **pages; 279 280 pages = pcpu_get_pages(); 281 if (!pages) 282 return -ENOMEM; 283 284 if (pcpu_alloc_pages(chunk, pages, page_start, page_end, gfp)) 285 return -ENOMEM; 286 287 if (pcpu_map_pages(chunk, pages, page_start, page_end)) { 288 pcpu_free_pages(chunk, pages, page_start, page_end); 289 return -ENOMEM; 290 } 291 pcpu_post_map_flush(chunk, page_start, page_end); 292 293 return 0; 294 } 295 296 /** 297 * pcpu_depopulate_chunk - depopulate and unmap an area of a pcpu_chunk 298 * @chunk: chunk to depopulate 299 * @page_start: the start page 300 * @page_end: the end page 301 * 302 * For each cpu, depopulate and unmap pages [@page_start,@page_end) 303 * from @chunk. 304 * 305 * CONTEXT: 306 * pcpu_alloc_mutex. 307 */ 308 static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, 309 int page_start, int page_end) 310 { 311 struct page **pages; 312 313 /* 314 * If control reaches here, there must have been at least one 315 * successful population attempt so the temp pages array must 316 * be available now. 317 */ 318 pages = pcpu_get_pages(); 319 BUG_ON(!pages); 320 321 /* unmap and free */ 322 pcpu_pre_unmap_flush(chunk, page_start, page_end); 323 324 pcpu_unmap_pages(chunk, pages, page_start, page_end); 325 326 /* no need to flush tlb, vmalloc will handle it lazily */ 327 328 pcpu_free_pages(chunk, pages, page_start, page_end); 329 } 330 331 static struct pcpu_chunk *pcpu_create_chunk(enum pcpu_chunk_type type, 332 gfp_t gfp) 333 { 334 struct pcpu_chunk *chunk; 335 struct vm_struct **vms; 336 337 chunk = pcpu_alloc_chunk(type, gfp); 338 if (!chunk) 339 return NULL; 340 341 vms = pcpu_get_vm_areas(pcpu_group_offsets, pcpu_group_sizes, 342 pcpu_nr_groups, pcpu_atom_size); 343 if (!vms) { 344 pcpu_free_chunk(chunk); 345 return NULL; 346 } 347 348 chunk->data = vms; 349 chunk->base_addr = vms[0]->addr - pcpu_group_offsets[0]; 350 351 pcpu_stats_chunk_alloc(); 352 trace_percpu_create_chunk(chunk->base_addr); 353 354 return chunk; 355 } 356 357 static void pcpu_destroy_chunk(struct pcpu_chunk *chunk) 358 { 359 if (!chunk) 360 return; 361 362 pcpu_stats_chunk_dealloc(); 363 trace_percpu_destroy_chunk(chunk->base_addr); 364 365 if (chunk->data) 366 pcpu_free_vm_areas(chunk->data, pcpu_nr_groups); 367 pcpu_free_chunk(chunk); 368 } 369 370 static struct page *pcpu_addr_to_page(void *addr) 371 { 372 return vmalloc_to_page(addr); 373 } 374 375 static int __init pcpu_verify_alloc_info(const struct pcpu_alloc_info *ai) 376 { 377 /* no extra restriction */ 378 return 0; 379 } 380