1 /* 2 * mm/percpu-vm.c - vmalloc area based chunk allocation 3 * 4 * Copyright (C) 2010 SUSE Linux Products GmbH 5 * Copyright (C) 2010 Tejun Heo <tj@kernel.org> 6 * 7 * This file is released under the GPLv2. 8 * 9 * Chunks are mapped into vmalloc areas and populated page by page. 10 * This is the default chunk allocator. 11 */ 12 13 static struct page *pcpu_chunk_page(struct pcpu_chunk *chunk, 14 unsigned int cpu, int page_idx) 15 { 16 /* must not be used on pre-mapped chunk */ 17 WARN_ON(chunk->immutable); 18 19 return vmalloc_to_page((void *)pcpu_chunk_addr(chunk, cpu, page_idx)); 20 } 21 22 /** 23 * pcpu_get_pages - get temp pages array 24 * 25 * Returns pointer to array of pointers to struct page which can be indexed 26 * with pcpu_page_idx(). Note that there is only one array and accesses 27 * should be serialized by pcpu_alloc_mutex. 28 * 29 * RETURNS: 30 * Pointer to temp pages array on success. 31 */ 32 static struct page **pcpu_get_pages(void) 33 { 34 static struct page **pages; 35 size_t pages_size = pcpu_nr_units * pcpu_unit_pages * sizeof(pages[0]); 36 37 lockdep_assert_held(&pcpu_alloc_mutex); 38 39 if (!pages) 40 pages = pcpu_mem_zalloc(pages_size); 41 return pages; 42 } 43 44 /** 45 * pcpu_free_pages - free pages which were allocated for @chunk 46 * @chunk: chunk pages were allocated for 47 * @pages: array of pages to be freed, indexed by pcpu_page_idx() 48 * @page_start: page index of the first page to be freed 49 * @page_end: page index of the last page to be freed + 1 50 * 51 * Free pages [@page_start and @page_end) in @pages for all units. 52 * The pages were allocated for @chunk. 53 */ 54 static void pcpu_free_pages(struct pcpu_chunk *chunk, 55 struct page **pages, int page_start, int page_end) 56 { 57 unsigned int cpu; 58 int i; 59 60 for_each_possible_cpu(cpu) { 61 for (i = page_start; i < page_end; i++) { 62 struct page *page = pages[pcpu_page_idx(cpu, i)]; 63 64 if (page) 65 __free_page(page); 66 } 67 } 68 } 69 70 /** 71 * pcpu_alloc_pages - allocates pages for @chunk 72 * @chunk: target chunk 73 * @pages: array to put the allocated pages into, indexed by pcpu_page_idx() 74 * @page_start: page index of the first page to be allocated 75 * @page_end: page index of the last page to be allocated + 1 76 * 77 * Allocate pages [@page_start,@page_end) into @pages for all units. 78 * The allocation is for @chunk. Percpu core doesn't care about the 79 * content of @pages and will pass it verbatim to pcpu_map_pages(). 80 */ 81 static int pcpu_alloc_pages(struct pcpu_chunk *chunk, 82 struct page **pages, int page_start, int page_end) 83 { 84 const gfp_t gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_COLD; 85 unsigned int cpu, tcpu; 86 int i; 87 88 for_each_possible_cpu(cpu) { 89 for (i = page_start; i < page_end; i++) { 90 struct page **pagep = &pages[pcpu_page_idx(cpu, i)]; 91 92 *pagep = alloc_pages_node(cpu_to_node(cpu), gfp, 0); 93 if (!*pagep) 94 goto err; 95 } 96 } 97 return 0; 98 99 err: 100 while (--i >= page_start) 101 __free_page(pages[pcpu_page_idx(cpu, i)]); 102 103 for_each_possible_cpu(tcpu) { 104 if (tcpu == cpu) 105 break; 106 for (i = page_start; i < page_end; i++) 107 __free_page(pages[pcpu_page_idx(tcpu, i)]); 108 } 109 return -ENOMEM; 110 } 111 112 /** 113 * pcpu_pre_unmap_flush - flush cache prior to unmapping 114 * @chunk: chunk the regions to be flushed belongs to 115 * @page_start: page index of the first page to be flushed 116 * @page_end: page index of the last page to be flushed + 1 117 * 118 * Pages in [@page_start,@page_end) of @chunk are about to be 119 * unmapped. Flush cache. As each flushing trial can be very 120 * expensive, issue flush on the whole region at once rather than 121 * doing it for each cpu. This could be an overkill but is more 122 * scalable. 123 */ 124 static void pcpu_pre_unmap_flush(struct pcpu_chunk *chunk, 125 int page_start, int page_end) 126 { 127 flush_cache_vunmap( 128 pcpu_chunk_addr(chunk, pcpu_low_unit_cpu, page_start), 129 pcpu_chunk_addr(chunk, pcpu_high_unit_cpu, page_end)); 130 } 131 132 static void __pcpu_unmap_pages(unsigned long addr, int nr_pages) 133 { 134 unmap_kernel_range_noflush(addr, nr_pages << PAGE_SHIFT); 135 } 136 137 /** 138 * pcpu_unmap_pages - unmap pages out of a pcpu_chunk 139 * @chunk: chunk of interest 140 * @pages: pages array which can be used to pass information to free 141 * @page_start: page index of the first page to unmap 142 * @page_end: page index of the last page to unmap + 1 143 * 144 * For each cpu, unmap pages [@page_start,@page_end) out of @chunk. 145 * Corresponding elements in @pages were cleared by the caller and can 146 * be used to carry information to pcpu_free_pages() which will be 147 * called after all unmaps are finished. The caller should call 148 * proper pre/post flush functions. 149 */ 150 static void pcpu_unmap_pages(struct pcpu_chunk *chunk, 151 struct page **pages, int page_start, int page_end) 152 { 153 unsigned int cpu; 154 int i; 155 156 for_each_possible_cpu(cpu) { 157 for (i = page_start; i < page_end; i++) { 158 struct page *page; 159 160 page = pcpu_chunk_page(chunk, cpu, i); 161 WARN_ON(!page); 162 pages[pcpu_page_idx(cpu, i)] = page; 163 } 164 __pcpu_unmap_pages(pcpu_chunk_addr(chunk, cpu, page_start), 165 page_end - page_start); 166 } 167 } 168 169 /** 170 * pcpu_post_unmap_tlb_flush - flush TLB after unmapping 171 * @chunk: pcpu_chunk the regions to be flushed belong to 172 * @page_start: page index of the first page to be flushed 173 * @page_end: page index of the last page to be flushed + 1 174 * 175 * Pages [@page_start,@page_end) of @chunk have been unmapped. Flush 176 * TLB for the regions. This can be skipped if the area is to be 177 * returned to vmalloc as vmalloc will handle TLB flushing lazily. 178 * 179 * As with pcpu_pre_unmap_flush(), TLB flushing also is done at once 180 * for the whole region. 181 */ 182 static void pcpu_post_unmap_tlb_flush(struct pcpu_chunk *chunk, 183 int page_start, int page_end) 184 { 185 flush_tlb_kernel_range( 186 pcpu_chunk_addr(chunk, pcpu_low_unit_cpu, page_start), 187 pcpu_chunk_addr(chunk, pcpu_high_unit_cpu, page_end)); 188 } 189 190 static int __pcpu_map_pages(unsigned long addr, struct page **pages, 191 int nr_pages) 192 { 193 return map_kernel_range_noflush(addr, nr_pages << PAGE_SHIFT, 194 PAGE_KERNEL, pages); 195 } 196 197 /** 198 * pcpu_map_pages - map pages into a pcpu_chunk 199 * @chunk: chunk of interest 200 * @pages: pages array containing pages to be mapped 201 * @page_start: page index of the first page to map 202 * @page_end: page index of the last page to map + 1 203 * 204 * For each cpu, map pages [@page_start,@page_end) into @chunk. The 205 * caller is responsible for calling pcpu_post_map_flush() after all 206 * mappings are complete. 207 * 208 * This function is responsible for setting up whatever is necessary for 209 * reverse lookup (addr -> chunk). 210 */ 211 static int pcpu_map_pages(struct pcpu_chunk *chunk, 212 struct page **pages, int page_start, int page_end) 213 { 214 unsigned int cpu, tcpu; 215 int i, err; 216 217 for_each_possible_cpu(cpu) { 218 err = __pcpu_map_pages(pcpu_chunk_addr(chunk, cpu, page_start), 219 &pages[pcpu_page_idx(cpu, page_start)], 220 page_end - page_start); 221 if (err < 0) 222 goto err; 223 224 for (i = page_start; i < page_end; i++) 225 pcpu_set_page_chunk(pages[pcpu_page_idx(cpu, i)], 226 chunk); 227 } 228 return 0; 229 err: 230 for_each_possible_cpu(tcpu) { 231 if (tcpu == cpu) 232 break; 233 __pcpu_unmap_pages(pcpu_chunk_addr(chunk, tcpu, page_start), 234 page_end - page_start); 235 } 236 pcpu_post_unmap_tlb_flush(chunk, page_start, page_end); 237 return err; 238 } 239 240 /** 241 * pcpu_post_map_flush - flush cache after mapping 242 * @chunk: pcpu_chunk the regions to be flushed belong to 243 * @page_start: page index of the first page to be flushed 244 * @page_end: page index of the last page to be flushed + 1 245 * 246 * Pages [@page_start,@page_end) of @chunk have been mapped. Flush 247 * cache. 248 * 249 * As with pcpu_pre_unmap_flush(), TLB flushing also is done at once 250 * for the whole region. 251 */ 252 static void pcpu_post_map_flush(struct pcpu_chunk *chunk, 253 int page_start, int page_end) 254 { 255 flush_cache_vmap( 256 pcpu_chunk_addr(chunk, pcpu_low_unit_cpu, page_start), 257 pcpu_chunk_addr(chunk, pcpu_high_unit_cpu, page_end)); 258 } 259 260 /** 261 * pcpu_populate_chunk - populate and map an area of a pcpu_chunk 262 * @chunk: chunk of interest 263 * @page_start: the start page 264 * @page_end: the end page 265 * 266 * For each cpu, populate and map pages [@page_start,@page_end) into 267 * @chunk. 268 * 269 * CONTEXT: 270 * pcpu_alloc_mutex, does GFP_KERNEL allocation. 271 */ 272 static int pcpu_populate_chunk(struct pcpu_chunk *chunk, 273 int page_start, int page_end) 274 { 275 struct page **pages; 276 277 pages = pcpu_get_pages(); 278 if (!pages) 279 return -ENOMEM; 280 281 if (pcpu_alloc_pages(chunk, pages, page_start, page_end)) 282 return -ENOMEM; 283 284 if (pcpu_map_pages(chunk, pages, page_start, page_end)) { 285 pcpu_free_pages(chunk, pages, page_start, page_end); 286 return -ENOMEM; 287 } 288 pcpu_post_map_flush(chunk, page_start, page_end); 289 290 return 0; 291 } 292 293 /** 294 * pcpu_depopulate_chunk - depopulate and unmap an area of a pcpu_chunk 295 * @chunk: chunk to depopulate 296 * @page_start: the start page 297 * @page_end: the end page 298 * 299 * For each cpu, depopulate and unmap pages [@page_start,@page_end) 300 * from @chunk. 301 * 302 * CONTEXT: 303 * pcpu_alloc_mutex. 304 */ 305 static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, 306 int page_start, int page_end) 307 { 308 struct page **pages; 309 310 /* 311 * If control reaches here, there must have been at least one 312 * successful population attempt so the temp pages array must 313 * be available now. 314 */ 315 pages = pcpu_get_pages(); 316 BUG_ON(!pages); 317 318 /* unmap and free */ 319 pcpu_pre_unmap_flush(chunk, page_start, page_end); 320 321 pcpu_unmap_pages(chunk, pages, page_start, page_end); 322 323 /* no need to flush tlb, vmalloc will handle it lazily */ 324 325 pcpu_free_pages(chunk, pages, page_start, page_end); 326 } 327 328 static struct pcpu_chunk *pcpu_create_chunk(void) 329 { 330 struct pcpu_chunk *chunk; 331 struct vm_struct **vms; 332 333 chunk = pcpu_alloc_chunk(); 334 if (!chunk) 335 return NULL; 336 337 vms = pcpu_get_vm_areas(pcpu_group_offsets, pcpu_group_sizes, 338 pcpu_nr_groups, pcpu_atom_size); 339 if (!vms) { 340 pcpu_free_chunk(chunk); 341 return NULL; 342 } 343 344 chunk->data = vms; 345 chunk->base_addr = vms[0]->addr - pcpu_group_offsets[0]; 346 347 pcpu_stats_chunk_alloc(); 348 trace_percpu_create_chunk(chunk->base_addr); 349 350 return chunk; 351 } 352 353 static void pcpu_destroy_chunk(struct pcpu_chunk *chunk) 354 { 355 if (!chunk) 356 return; 357 358 pcpu_stats_chunk_dealloc(); 359 trace_percpu_destroy_chunk(chunk->base_addr); 360 361 if (chunk->data) 362 pcpu_free_vm_areas(chunk->data, pcpu_nr_groups); 363 pcpu_free_chunk(chunk); 364 } 365 366 static struct page *pcpu_addr_to_page(void *addr) 367 { 368 return vmalloc_to_page(addr); 369 } 370 371 static int __init pcpu_verify_alloc_info(const struct pcpu_alloc_info *ai) 372 { 373 /* no extra restriction */ 374 return 0; 375 } 376