1 /* 2 * mm/percpu-vm.c - vmalloc area based chunk allocation 3 * 4 * Copyright (C) 2010 SUSE Linux Products GmbH 5 * Copyright (C) 2010 Tejun Heo <tj@kernel.org> 6 * 7 * This file is released under the GPLv2. 8 * 9 * Chunks are mapped into vmalloc areas and populated page by page. 10 * This is the default chunk allocator. 11 */ 12 13 static struct page *pcpu_chunk_page(struct pcpu_chunk *chunk, 14 unsigned int cpu, int page_idx) 15 { 16 /* must not be used on pre-mapped chunk */ 17 WARN_ON(chunk->immutable); 18 19 return vmalloc_to_page((void *)pcpu_chunk_addr(chunk, cpu, page_idx)); 20 } 21 22 /** 23 * pcpu_get_pages_and_bitmap - get temp pages array and bitmap 24 * @chunk: chunk of interest 25 * @bitmapp: output parameter for bitmap 26 * @may_alloc: may allocate the array 27 * 28 * Returns pointer to array of pointers to struct page and bitmap, 29 * both of which can be indexed with pcpu_page_idx(). The returned 30 * array is cleared to zero and *@bitmapp is copied from 31 * @chunk->populated. Note that there is only one array and bitmap 32 * and access exclusion is the caller's responsibility. 33 * 34 * CONTEXT: 35 * pcpu_alloc_mutex and does GFP_KERNEL allocation if @may_alloc. 36 * Otherwise, don't care. 37 * 38 * RETURNS: 39 * Pointer to temp pages array on success, NULL on failure. 40 */ 41 static struct page **pcpu_get_pages_and_bitmap(struct pcpu_chunk *chunk, 42 unsigned long **bitmapp, 43 bool may_alloc) 44 { 45 static struct page **pages; 46 static unsigned long *bitmap; 47 size_t pages_size = pcpu_nr_units * pcpu_unit_pages * sizeof(pages[0]); 48 size_t bitmap_size = BITS_TO_LONGS(pcpu_unit_pages) * 49 sizeof(unsigned long); 50 51 if (!pages || !bitmap) { 52 if (may_alloc && !pages) 53 pages = pcpu_mem_zalloc(pages_size); 54 if (may_alloc && !bitmap) 55 bitmap = pcpu_mem_zalloc(bitmap_size); 56 if (!pages || !bitmap) 57 return NULL; 58 } 59 60 bitmap_copy(bitmap, chunk->populated, pcpu_unit_pages); 61 62 *bitmapp = bitmap; 63 return pages; 64 } 65 66 /** 67 * pcpu_free_pages - free pages which were allocated for @chunk 68 * @chunk: chunk pages were allocated for 69 * @pages: array of pages to be freed, indexed by pcpu_page_idx() 70 * @populated: populated bitmap 71 * @page_start: page index of the first page to be freed 72 * @page_end: page index of the last page to be freed + 1 73 * 74 * Free pages [@page_start and @page_end) in @pages for all units. 75 * The pages were allocated for @chunk. 76 */ 77 static void pcpu_free_pages(struct pcpu_chunk *chunk, 78 struct page **pages, unsigned long *populated, 79 int page_start, int page_end) 80 { 81 unsigned int cpu; 82 int i; 83 84 for_each_possible_cpu(cpu) { 85 for (i = page_start; i < page_end; i++) { 86 struct page *page = pages[pcpu_page_idx(cpu, i)]; 87 88 if (page) 89 __free_page(page); 90 } 91 } 92 } 93 94 /** 95 * pcpu_alloc_pages - allocates pages for @chunk 96 * @chunk: target chunk 97 * @pages: array to put the allocated pages into, indexed by pcpu_page_idx() 98 * @populated: populated bitmap 99 * @page_start: page index of the first page to be allocated 100 * @page_end: page index of the last page to be allocated + 1 101 * 102 * Allocate pages [@page_start,@page_end) into @pages for all units. 103 * The allocation is for @chunk. Percpu core doesn't care about the 104 * content of @pages and will pass it verbatim to pcpu_map_pages(). 105 */ 106 static int pcpu_alloc_pages(struct pcpu_chunk *chunk, 107 struct page **pages, unsigned long *populated, 108 int page_start, int page_end) 109 { 110 const gfp_t gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_COLD; 111 unsigned int cpu; 112 int i; 113 114 for_each_possible_cpu(cpu) { 115 for (i = page_start; i < page_end; i++) { 116 struct page **pagep = &pages[pcpu_page_idx(cpu, i)]; 117 118 *pagep = alloc_pages_node(cpu_to_node(cpu), gfp, 0); 119 if (!*pagep) { 120 pcpu_free_pages(chunk, pages, populated, 121 page_start, page_end); 122 return -ENOMEM; 123 } 124 } 125 } 126 return 0; 127 } 128 129 /** 130 * pcpu_pre_unmap_flush - flush cache prior to unmapping 131 * @chunk: chunk the regions to be flushed belongs to 132 * @page_start: page index of the first page to be flushed 133 * @page_end: page index of the last page to be flushed + 1 134 * 135 * Pages in [@page_start,@page_end) of @chunk are about to be 136 * unmapped. Flush cache. As each flushing trial can be very 137 * expensive, issue flush on the whole region at once rather than 138 * doing it for each cpu. This could be an overkill but is more 139 * scalable. 140 */ 141 static void pcpu_pre_unmap_flush(struct pcpu_chunk *chunk, 142 int page_start, int page_end) 143 { 144 flush_cache_vunmap( 145 pcpu_chunk_addr(chunk, pcpu_low_unit_cpu, page_start), 146 pcpu_chunk_addr(chunk, pcpu_high_unit_cpu, page_end)); 147 } 148 149 static void __pcpu_unmap_pages(unsigned long addr, int nr_pages) 150 { 151 unmap_kernel_range_noflush(addr, nr_pages << PAGE_SHIFT); 152 } 153 154 /** 155 * pcpu_unmap_pages - unmap pages out of a pcpu_chunk 156 * @chunk: chunk of interest 157 * @pages: pages array which can be used to pass information to free 158 * @populated: populated bitmap 159 * @page_start: page index of the first page to unmap 160 * @page_end: page index of the last page to unmap + 1 161 * 162 * For each cpu, unmap pages [@page_start,@page_end) out of @chunk. 163 * Corresponding elements in @pages were cleared by the caller and can 164 * be used to carry information to pcpu_free_pages() which will be 165 * called after all unmaps are finished. The caller should call 166 * proper pre/post flush functions. 167 */ 168 static void pcpu_unmap_pages(struct pcpu_chunk *chunk, 169 struct page **pages, unsigned long *populated, 170 int page_start, int page_end) 171 { 172 unsigned int cpu; 173 int i; 174 175 for_each_possible_cpu(cpu) { 176 for (i = page_start; i < page_end; i++) { 177 struct page *page; 178 179 page = pcpu_chunk_page(chunk, cpu, i); 180 WARN_ON(!page); 181 pages[pcpu_page_idx(cpu, i)] = page; 182 } 183 __pcpu_unmap_pages(pcpu_chunk_addr(chunk, cpu, page_start), 184 page_end - page_start); 185 } 186 187 for (i = page_start; i < page_end; i++) 188 __clear_bit(i, populated); 189 } 190 191 /** 192 * pcpu_post_unmap_tlb_flush - flush TLB after unmapping 193 * @chunk: pcpu_chunk the regions to be flushed belong to 194 * @page_start: page index of the first page to be flushed 195 * @page_end: page index of the last page to be flushed + 1 196 * 197 * Pages [@page_start,@page_end) of @chunk have been unmapped. Flush 198 * TLB for the regions. This can be skipped if the area is to be 199 * returned to vmalloc as vmalloc will handle TLB flushing lazily. 200 * 201 * As with pcpu_pre_unmap_flush(), TLB flushing also is done at once 202 * for the whole region. 203 */ 204 static void pcpu_post_unmap_tlb_flush(struct pcpu_chunk *chunk, 205 int page_start, int page_end) 206 { 207 flush_tlb_kernel_range( 208 pcpu_chunk_addr(chunk, pcpu_low_unit_cpu, page_start), 209 pcpu_chunk_addr(chunk, pcpu_high_unit_cpu, page_end)); 210 } 211 212 static int __pcpu_map_pages(unsigned long addr, struct page **pages, 213 int nr_pages) 214 { 215 return map_kernel_range_noflush(addr, nr_pages << PAGE_SHIFT, 216 PAGE_KERNEL, pages); 217 } 218 219 /** 220 * pcpu_map_pages - map pages into a pcpu_chunk 221 * @chunk: chunk of interest 222 * @pages: pages array containing pages to be mapped 223 * @populated: populated bitmap 224 * @page_start: page index of the first page to map 225 * @page_end: page index of the last page to map + 1 226 * 227 * For each cpu, map pages [@page_start,@page_end) into @chunk. The 228 * caller is responsible for calling pcpu_post_map_flush() after all 229 * mappings are complete. 230 * 231 * This function is responsible for setting corresponding bits in 232 * @chunk->populated bitmap and whatever is necessary for reverse 233 * lookup (addr -> chunk). 234 */ 235 static int pcpu_map_pages(struct pcpu_chunk *chunk, 236 struct page **pages, unsigned long *populated, 237 int page_start, int page_end) 238 { 239 unsigned int cpu, tcpu; 240 int i, err; 241 242 for_each_possible_cpu(cpu) { 243 err = __pcpu_map_pages(pcpu_chunk_addr(chunk, cpu, page_start), 244 &pages[pcpu_page_idx(cpu, page_start)], 245 page_end - page_start); 246 if (err < 0) 247 goto err; 248 } 249 250 /* mapping successful, link chunk and mark populated */ 251 for (i = page_start; i < page_end; i++) { 252 for_each_possible_cpu(cpu) 253 pcpu_set_page_chunk(pages[pcpu_page_idx(cpu, i)], 254 chunk); 255 __set_bit(i, populated); 256 } 257 258 return 0; 259 260 err: 261 for_each_possible_cpu(tcpu) { 262 if (tcpu == cpu) 263 break; 264 __pcpu_unmap_pages(pcpu_chunk_addr(chunk, tcpu, page_start), 265 page_end - page_start); 266 } 267 return err; 268 } 269 270 /** 271 * pcpu_post_map_flush - flush cache after mapping 272 * @chunk: pcpu_chunk the regions to be flushed belong to 273 * @page_start: page index of the first page to be flushed 274 * @page_end: page index of the last page to be flushed + 1 275 * 276 * Pages [@page_start,@page_end) of @chunk have been mapped. Flush 277 * cache. 278 * 279 * As with pcpu_pre_unmap_flush(), TLB flushing also is done at once 280 * for the whole region. 281 */ 282 static void pcpu_post_map_flush(struct pcpu_chunk *chunk, 283 int page_start, int page_end) 284 { 285 flush_cache_vmap( 286 pcpu_chunk_addr(chunk, pcpu_low_unit_cpu, page_start), 287 pcpu_chunk_addr(chunk, pcpu_high_unit_cpu, page_end)); 288 } 289 290 /** 291 * pcpu_populate_chunk - populate and map an area of a pcpu_chunk 292 * @chunk: chunk of interest 293 * @off: offset to the area to populate 294 * @size: size of the area to populate in bytes 295 * 296 * For each cpu, populate and map pages [@page_start,@page_end) into 297 * @chunk. The area is cleared on return. 298 * 299 * CONTEXT: 300 * pcpu_alloc_mutex, does GFP_KERNEL allocation. 301 */ 302 static int pcpu_populate_chunk(struct pcpu_chunk *chunk, int off, int size) 303 { 304 int page_start = PFN_DOWN(off); 305 int page_end = PFN_UP(off + size); 306 int free_end = page_start, unmap_end = page_start; 307 struct page **pages; 308 unsigned long *populated; 309 unsigned int cpu; 310 int rs, re, rc; 311 312 /* quick path, check whether all pages are already there */ 313 rs = page_start; 314 pcpu_next_pop(chunk, &rs, &re, page_end); 315 if (rs == page_start && re == page_end) 316 goto clear; 317 318 /* need to allocate and map pages, this chunk can't be immutable */ 319 WARN_ON(chunk->immutable); 320 321 pages = pcpu_get_pages_and_bitmap(chunk, &populated, true); 322 if (!pages) 323 return -ENOMEM; 324 325 /* alloc and map */ 326 pcpu_for_each_unpop_region(chunk, rs, re, page_start, page_end) { 327 rc = pcpu_alloc_pages(chunk, pages, populated, rs, re); 328 if (rc) 329 goto err_free; 330 free_end = re; 331 } 332 333 pcpu_for_each_unpop_region(chunk, rs, re, page_start, page_end) { 334 rc = pcpu_map_pages(chunk, pages, populated, rs, re); 335 if (rc) 336 goto err_unmap; 337 unmap_end = re; 338 } 339 pcpu_post_map_flush(chunk, page_start, page_end); 340 341 /* commit new bitmap */ 342 bitmap_copy(chunk->populated, populated, pcpu_unit_pages); 343 clear: 344 for_each_possible_cpu(cpu) 345 memset((void *)pcpu_chunk_addr(chunk, cpu, 0) + off, 0, size); 346 return 0; 347 348 err_unmap: 349 pcpu_pre_unmap_flush(chunk, page_start, unmap_end); 350 pcpu_for_each_unpop_region(chunk, rs, re, page_start, unmap_end) 351 pcpu_unmap_pages(chunk, pages, populated, rs, re); 352 pcpu_post_unmap_tlb_flush(chunk, page_start, unmap_end); 353 err_free: 354 pcpu_for_each_unpop_region(chunk, rs, re, page_start, free_end) 355 pcpu_free_pages(chunk, pages, populated, rs, re); 356 return rc; 357 } 358 359 /** 360 * pcpu_depopulate_chunk - depopulate and unmap an area of a pcpu_chunk 361 * @chunk: chunk to depopulate 362 * @off: offset to the area to depopulate 363 * @size: size of the area to depopulate in bytes 364 * @flush: whether to flush cache and tlb or not 365 * 366 * For each cpu, depopulate and unmap pages [@page_start,@page_end) 367 * from @chunk. If @flush is true, vcache is flushed before unmapping 368 * and tlb after. 369 * 370 * CONTEXT: 371 * pcpu_alloc_mutex. 372 */ 373 static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, int off, int size) 374 { 375 int page_start = PFN_DOWN(off); 376 int page_end = PFN_UP(off + size); 377 struct page **pages; 378 unsigned long *populated; 379 int rs, re; 380 381 /* quick path, check whether it's empty already */ 382 rs = page_start; 383 pcpu_next_unpop(chunk, &rs, &re, page_end); 384 if (rs == page_start && re == page_end) 385 return; 386 387 /* immutable chunks can't be depopulated */ 388 WARN_ON(chunk->immutable); 389 390 /* 391 * If control reaches here, there must have been at least one 392 * successful population attempt so the temp pages array must 393 * be available now. 394 */ 395 pages = pcpu_get_pages_and_bitmap(chunk, &populated, false); 396 BUG_ON(!pages); 397 398 /* unmap and free */ 399 pcpu_pre_unmap_flush(chunk, page_start, page_end); 400 401 pcpu_for_each_pop_region(chunk, rs, re, page_start, page_end) 402 pcpu_unmap_pages(chunk, pages, populated, rs, re); 403 404 /* no need to flush tlb, vmalloc will handle it lazily */ 405 406 pcpu_for_each_pop_region(chunk, rs, re, page_start, page_end) 407 pcpu_free_pages(chunk, pages, populated, rs, re); 408 409 /* commit new bitmap */ 410 bitmap_copy(chunk->populated, populated, pcpu_unit_pages); 411 } 412 413 static struct pcpu_chunk *pcpu_create_chunk(void) 414 { 415 struct pcpu_chunk *chunk; 416 struct vm_struct **vms; 417 418 chunk = pcpu_alloc_chunk(); 419 if (!chunk) 420 return NULL; 421 422 vms = pcpu_get_vm_areas(pcpu_group_offsets, pcpu_group_sizes, 423 pcpu_nr_groups, pcpu_atom_size); 424 if (!vms) { 425 pcpu_free_chunk(chunk); 426 return NULL; 427 } 428 429 chunk->data = vms; 430 chunk->base_addr = vms[0]->addr - pcpu_group_offsets[0]; 431 return chunk; 432 } 433 434 static void pcpu_destroy_chunk(struct pcpu_chunk *chunk) 435 { 436 if (chunk && chunk->data) 437 pcpu_free_vm_areas(chunk->data, pcpu_nr_groups); 438 pcpu_free_chunk(chunk); 439 } 440 441 static struct page *pcpu_addr_to_page(void *addr) 442 { 443 return vmalloc_to_page(addr); 444 } 445 446 static int __init pcpu_verify_alloc_info(const struct pcpu_alloc_info *ai) 447 { 448 /* no extra restriction */ 449 return 0; 450 } 451