setup_percpu.c (89c9215165ca609096e845926d9a18f1306176a4) | setup_percpu.c (8ac837571491e239e64bd87863c1679d8002e8a2) |
---|---|
1#include <linux/kernel.h> 2#include <linux/module.h> 3#include <linux/init.h> 4#include <linux/bootmem.h> 5#include <linux/percpu.h> 6#include <linux/kexec.h> 7#include <linux/crash_dump.h> 8#include <linux/smp.h> --- 97 unchanged lines hidden (view full) --- 106 } 107 return ptr; 108#else 109 return __alloc_bootmem_nopanic(size, align, goal); 110#endif 111} 112 113/* | 1#include <linux/kernel.h> 2#include <linux/module.h> 3#include <linux/init.h> 4#include <linux/bootmem.h> 5#include <linux/percpu.h> 6#include <linux/kexec.h> 7#include <linux/crash_dump.h> 8#include <linux/smp.h> --- 97 unchanged lines hidden (view full) --- 106 } 107 return ptr; 108#else 109 return __alloc_bootmem_nopanic(size, align, goal); 110#endif 111} 112 113/* |
114 * Remap allocator 115 * 116 * This allocator uses PMD page as unit. A PMD page is allocated for 117 * each cpu and each is remapped into vmalloc area using PMD mapping. 118 * As PMD page is quite large, only part of it is used for the first 119 * chunk. Unused part is returned to the bootmem allocator. 120 * 121 * So, the PMD pages are mapped twice - once to the physical mapping 122 * and to the vmalloc area for the first percpu chunk. The double 123 * mapping does add one more PMD TLB entry pressure but still is much 124 * better than only using 4k mappings while still being NUMA friendly. 125 */ 126#ifdef CONFIG_NEED_MULTIPLE_NODES 127static size_t pcpur_size __initdata; 128static void **pcpur_ptrs __initdata; 129 130static struct page * __init pcpur_get_page(unsigned int cpu, int pageno) 131{ 132 size_t off = (size_t)pageno << PAGE_SHIFT; 133 134 if (off >= pcpur_size) 135 return NULL; 136 137 return virt_to_page(pcpur_ptrs[cpu] + off); 138} 139 140static ssize_t __init setup_pcpu_remap(size_t static_size) 141{ 142 static struct vm_struct vm; 143 pg_data_t *last; 144 size_t ptrs_size; 145 unsigned int cpu; 146 ssize_t ret; 147 148 /* 149 * If large page isn't supported, there's no benefit in doing 150 * this. Also, on non-NUMA, embedding is better. 151 */ 152 if (!cpu_has_pse || pcpu_need_numa()) 153 return -EINVAL; 154 155 last = NULL; 156 for_each_possible_cpu(cpu) { 157 int node = early_cpu_to_node(cpu); 158 159 if (node_online(node) && NODE_DATA(node) && 160 last && last != NODE_DATA(node)) 161 goto proceed; 162 163 last = NODE_DATA(node); 164 } 165 return -EINVAL; 166 167proceed: 168 /* 169 * Currently supports only single page. Supporting multiple 170 * pages won't be too difficult if it ever becomes necessary. 171 */ 172 pcpur_size = PFN_ALIGN(static_size + PERCPU_DYNAMIC_RESERVE); 173 if (pcpur_size > PMD_SIZE) { 174 pr_warning("PERCPU: static data is larger than large page, " 175 "can't use large page\n"); 176 return -EINVAL; 177 } 178 179 /* allocate pointer array and alloc large pages */ 180 ptrs_size = PFN_ALIGN(num_possible_cpus() * sizeof(pcpur_ptrs[0])); 181 pcpur_ptrs = alloc_bootmem(ptrs_size); 182 183 for_each_possible_cpu(cpu) { 184 pcpur_ptrs[cpu] = pcpu_alloc_bootmem(cpu, PMD_SIZE, PMD_SIZE); 185 if (!pcpur_ptrs[cpu]) 186 goto enomem; 187 188 /* 189 * Only use pcpur_size bytes and give back the rest. 190 * 191 * Ingo: The 2MB up-rounding bootmem is needed to make 192 * sure the partial 2MB page is still fully RAM - it's 193 * not well-specified to have a PAT-incompatible area 194 * (unmapped RAM, device memory, etc.) in that hole. 195 */ 196 free_bootmem(__pa(pcpur_ptrs[cpu] + pcpur_size), 197 PMD_SIZE - pcpur_size); 198 199 memcpy(pcpur_ptrs[cpu], __per_cpu_load, static_size); 200 } 201 202 /* allocate address and map */ 203 vm.flags = VM_ALLOC; 204 vm.size = num_possible_cpus() * PMD_SIZE; 205 vm_area_register_early(&vm, PMD_SIZE); 206 207 for_each_possible_cpu(cpu) { 208 pmd_t *pmd; 209 210 pmd = populate_extra_pmd((unsigned long)vm.addr 211 + cpu * PMD_SIZE); 212 set_pmd(pmd, pfn_pmd(page_to_pfn(virt_to_page(pcpur_ptrs[cpu])), 213 PAGE_KERNEL_LARGE)); 214 } 215 216 /* we're ready, commit */ 217 pr_info("PERCPU: Remapped at %p with large pages, static data " 218 "%zu bytes\n", vm.addr, static_size); 219 220 ret = pcpu_setup_first_chunk(pcpur_get_page, static_size, PMD_SIZE, 221 pcpur_size - static_size, vm.addr, NULL); 222 goto out_free_ar; 223 224enomem: 225 for_each_possible_cpu(cpu) 226 if (pcpur_ptrs[cpu]) 227 free_bootmem(__pa(pcpur_ptrs[cpu]), PMD_SIZE); 228 ret = -ENOMEM; 229out_free_ar: 230 free_bootmem(__pa(pcpur_ptrs), ptrs_size); 231 return ret; 232} 233#else 234static ssize_t __init setup_pcpu_remap(size_t static_size) 235{ 236 return -EINVAL; 237} 238#endif 239 240/* |
|
114 * Embedding allocator 115 * 116 * The first chunk is sized to just contain the static area plus 117 * PERCPU_DYNAMIC_RESERVE and allocated as a contiguous area using 118 * bootmem allocator and used as-is without being mapped into vmalloc 119 * area. This enables the first chunk to piggy back on the linear 120 * physical PMD mapping and doesn't add any additional pressure to 121 * TLB. --- 132 unchanged lines hidden (view full) --- 254 unsigned int cpu; 255 unsigned long delta; 256 size_t pcpu_unit_size; 257 ssize_t ret; 258 259 pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n", 260 NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids); 261 | 241 * Embedding allocator 242 * 243 * The first chunk is sized to just contain the static area plus 244 * PERCPU_DYNAMIC_RESERVE and allocated as a contiguous area using 245 * bootmem allocator and used as-is without being mapped into vmalloc 246 * area. This enables the first chunk to piggy back on the linear 247 * physical PMD mapping and doesn't add any additional pressure to 248 * TLB. --- 132 unchanged lines hidden (view full) --- 381 unsigned int cpu; 382 unsigned long delta; 383 size_t pcpu_unit_size; 384 ssize_t ret; 385 386 pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n", 387 NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids); 388 |
262 /* allocate percpu area */ 263 ret = setup_pcpu_embed(static_size); | 389 /* 390 * Allocate percpu area. If PSE is supported, try to make use 391 * of large page mappings. Please read comments on top of 392 * each allocator for details. 393 */ 394 ret = setup_pcpu_remap(static_size); |
264 if (ret < 0) | 395 if (ret < 0) |
396 ret = setup_pcpu_embed(static_size); 397 if (ret < 0) |
|
265 ret = setup_pcpu_4k(static_size); 266 if (ret < 0) 267 panic("cannot allocate static percpu area (%zu bytes, err=%zd)", 268 static_size, ret); 269 270 pcpu_unit_size = ret; 271 272 /* alrighty, percpu areas up and running */ --- 54 unchanged lines hidden --- | 398 ret = setup_pcpu_4k(static_size); 399 if (ret < 0) 400 panic("cannot allocate static percpu area (%zu bytes, err=%zd)", 401 static_size, ret); 402 403 pcpu_unit_size = ret; 404 405 /* alrighty, percpu areas up and running */ --- 54 unchanged lines hidden --- |