setup_percpu.c (66a101dda6b26ee566aa9cadcbea904a41d2b268) | setup_percpu.c (458a3e644c3327be529393982e24277eda8f1ac7) |
---|---|
1#include <linux/kernel.h> 2#include <linux/module.h> 3#include <linux/init.h> 4#include <linux/bootmem.h> 5#include <linux/percpu.h> 6#include <linux/kexec.h> 7#include <linux/crash_dump.h> 8#include <linux/smp.h> 9#include <linux/topology.h> 10#include <asm/sections.h> 11#include <asm/processor.h> 12#include <asm/setup.h> 13#include <asm/mpspec.h> 14#include <asm/apicdef.h> 15#include <asm/highmem.h> | 1#include <linux/kernel.h> 2#include <linux/module.h> 3#include <linux/init.h> 4#include <linux/bootmem.h> 5#include <linux/percpu.h> 6#include <linux/kexec.h> 7#include <linux/crash_dump.h> 8#include <linux/smp.h> 9#include <linux/topology.h> 10#include <asm/sections.h> 11#include <asm/processor.h> 12#include <asm/setup.h> 13#include <asm/mpspec.h> 14#include <asm/apicdef.h> 15#include <asm/highmem.h> |
16#include <asm/proto.h> 17#include <asm/cpumask.h> 18#include <asm/cpu.h> 19#include <asm/stackprotector.h> |
|
16 | 20 |
17#ifdef CONFIG_X86_LOCAL_APIC 18unsigned int num_processors; 19unsigned disabled_cpus __cpuinitdata; 20/* Processor that is doing the boot up */ 21unsigned int boot_cpu_physical_apicid = -1U; 22EXPORT_SYMBOL(boot_cpu_physical_apicid); 23unsigned int max_physical_apicid; 24 25/* Bitmask of physically existing CPUs */ 26physid_mask_t phys_cpu_present_map; | 21#ifdef CONFIG_DEBUG_PER_CPU_MAPS 22# define DBG(x...) printk(KERN_DEBUG x) 23#else 24# define DBG(x...) |
27#endif 28 | 25#endif 26 |
29/* map cpu index to physical APIC ID */ 30DEFINE_EARLY_PER_CPU(u16, x86_cpu_to_apicid, BAD_APICID); 31DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID); 32EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid); 33EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid); | 27DEFINE_PER_CPU(int, cpu_number); 28EXPORT_PER_CPU_SYMBOL(cpu_number); |
34 | 29 |
35#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) 36#define X86_64_NUMA 1 37 38/* map cpu index to node index */ 39DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE); 40EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map); 41 42/* which logical CPUs are on which nodes */ 43cpumask_t *node_to_cpumask_map; 44EXPORT_SYMBOL(node_to_cpumask_map); 45 46/* setup node_to_cpumask_map */ 47static void __init setup_node_to_cpumask_map(void); 48 | 30#ifdef CONFIG_X86_64 31#define BOOT_PERCPU_OFFSET ((unsigned long)__per_cpu_load) |
49#else | 32#else |
50static inline void setup_node_to_cpumask_map(void) { } | 33#define BOOT_PERCPU_OFFSET 0 |
51#endif 52 | 34#endif 35 |
53#if defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) && defined(CONFIG_X86_SMP) 54/* 55 * Copy data used in early init routines from the initial arrays to the 56 * per cpu data areas. These arrays then become expendable and the 57 * *_early_ptr's are zeroed indicating that the static arrays are gone. 58 */ 59static void __init setup_per_cpu_maps(void) 60{ 61 int cpu; | 36DEFINE_PER_CPU(unsigned long, this_cpu_off) = BOOT_PERCPU_OFFSET; 37EXPORT_PER_CPU_SYMBOL(this_cpu_off); |
62 | 38 |
63 for_each_possible_cpu(cpu) { 64 per_cpu(x86_cpu_to_apicid, cpu) = 65 early_per_cpu_map(x86_cpu_to_apicid, cpu); 66 per_cpu(x86_bios_cpu_apicid, cpu) = 67 early_per_cpu_map(x86_bios_cpu_apicid, cpu); 68#ifdef X86_64_NUMA 69 per_cpu(x86_cpu_to_node_map, cpu) = 70 early_per_cpu_map(x86_cpu_to_node_map, cpu); 71#endif 72 } 73 74 /* indicate the early static arrays will soon be gone */ 75 early_per_cpu_ptr(x86_cpu_to_apicid) = NULL; 76 early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL; 77#ifdef X86_64_NUMA 78 early_per_cpu_ptr(x86_cpu_to_node_map) = NULL; 79#endif 80} 81 82#ifdef CONFIG_X86_32 83/* 84 * Great future not-so-futuristic plan: make i386 and x86_64 do it 85 * the same way 86 */ 87unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; | 39unsigned long __per_cpu_offset[NR_CPUS] __read_mostly = { 40 [0 ... NR_CPUS-1] = BOOT_PERCPU_OFFSET, 41}; |
88EXPORT_SYMBOL(__per_cpu_offset); | 42EXPORT_SYMBOL(__per_cpu_offset); |
89static inline void setup_cpu_pda_map(void) { } | |
90 | 43 |
91#elif !defined(CONFIG_SMP) 92static inline void setup_cpu_pda_map(void) { } 93 94#else /* CONFIG_SMP && CONFIG_X86_64 */ 95 96/* 97 * Allocate cpu_pda pointer table and array via alloc_bootmem. 98 */ 99static void __init setup_cpu_pda_map(void) | 44static void __init pcpu4k_populate_pte(unsigned long addr) |
100{ | 45{ |
101 char *pda; 102 struct x8664_pda **new_cpu_pda; 103 unsigned long size; 104 int cpu; 105 106 size = roundup(sizeof(struct x8664_pda), cache_line_size()); 107 108 /* allocate cpu_pda array and pointer table */ 109 { 110 unsigned long tsize = nr_cpu_ids * sizeof(void *); 111 unsigned long asize = size * (nr_cpu_ids - 1); 112 113 tsize = roundup(tsize, cache_line_size()); 114 new_cpu_pda = alloc_bootmem(tsize + asize); 115 pda = (char *)new_cpu_pda + tsize; 116 } 117 118 /* initialize pointer table to static pda's */ 119 for_each_possible_cpu(cpu) { 120 if (cpu == 0) { 121 /* leave boot cpu pda in place */ 122 new_cpu_pda[0] = cpu_pda(0); 123 continue; 124 } 125 new_cpu_pda[cpu] = (struct x8664_pda *)pda; 126 new_cpu_pda[cpu]->in_bootmem = 1; 127 pda += size; 128 } 129 130 /* point to new pointer table */ 131 _cpu_pda = new_cpu_pda; | 46 populate_extra_pte(addr); |
132} 133 | 47} 48 |
134#endif /* CONFIG_SMP && CONFIG_X86_64 */ 135 136#ifdef CONFIG_X86_64 137 138/* correctly size the local cpu masks */ 139static void __init setup_cpu_local_masks(void) | 49static inline void setup_percpu_segment(int cpu) |
140{ | 50{ |
141 alloc_bootmem_cpumask_var(&cpu_initialized_mask); 142 alloc_bootmem_cpumask_var(&cpu_callin_mask); 143 alloc_bootmem_cpumask_var(&cpu_callout_mask); 144 alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask); 145} | 51#ifdef CONFIG_X86_32 52 struct desc_struct gdt; |
146 | 53 |
147#else /* CONFIG_X86_32 */ 148 149static inline void setup_cpu_local_masks(void) 150{ | 54 pack_descriptor(&gdt, per_cpu_offset(cpu), 0xFFFFF, 55 0x2 | DESCTYPE_S, 0x8); 56 gdt.s = 1; 57 write_gdt_entry(get_cpu_gdt_table(cpu), 58 GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S); 59#endif |
151} 152 | 60} 61 |
153#endif /* CONFIG_X86_32 */ 154 | |
155/* 156 * Great future plan: 157 * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data. 158 * Always point %gs to its beginning 159 */ 160void __init setup_per_cpu_areas(void) 161{ | 62/* 63 * Great future plan: 64 * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data. 65 * Always point %gs to its beginning 66 */ 67void __init setup_per_cpu_areas(void) 68{ |
162 ssize_t size, old_size; 163 char *ptr; 164 int cpu; 165 unsigned long align = 1; | 69 ssize_t size = __per_cpu_end - __per_cpu_start; 70 unsigned int nr_cpu_pages = DIV_ROUND_UP(size, PAGE_SIZE); 71 static struct page **pages; 72 size_t pages_size; 73 unsigned int cpu, i, j; 74 unsigned long delta; 75 size_t pcpu_unit_size; |
166 | 76 |
167 /* Setup cpu_pda map */ 168 setup_cpu_pda_map(); 169 170 /* Copy section for each CPU (we discard the original) */ 171 old_size = PERCPU_ENOUGH_ROOM; 172 align = max_t(unsigned long, PAGE_SIZE, align); 173 size = roundup(old_size, align); 174 | |
175 pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n", 176 NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids); | 77 pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n", 78 NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids); |
79 pr_info("PERCPU: Allocating %zd bytes for static per cpu data\n", size); |
|
177 | 80 |
178 pr_info("PERCPU: Allocating %zd bytes of per cpu data\n", size); | 81 pages_size = nr_cpu_pages * num_possible_cpus() * sizeof(pages[0]); 82 pages = alloc_bootmem(pages_size); |
179 | 83 |
84 j = 0; |
|
180 for_each_possible_cpu(cpu) { | 85 for_each_possible_cpu(cpu) { |
86 void *ptr; 87 88 for (i = 0; i < nr_cpu_pages; i++) { |
|
181#ifndef CONFIG_NEED_MULTIPLE_NODES | 89#ifndef CONFIG_NEED_MULTIPLE_NODES |
182 ptr = __alloc_bootmem(size, align, 183 __pa(MAX_DMA_ADDRESS)); | 90 ptr = alloc_bootmem_pages(PAGE_SIZE); |
184#else | 91#else |
185 int node = early_cpu_to_node(cpu); 186 if (!node_online(node) || !NODE_DATA(node)) { 187 ptr = __alloc_bootmem(size, align, 188 __pa(MAX_DMA_ADDRESS)); 189 pr_info("cpu %d has no node %d or node-local memory\n", 190 cpu, node); 191 pr_debug("per cpu data for cpu%d at %016lx\n", 192 cpu, __pa(ptr)); 193 } else { 194 ptr = __alloc_bootmem_node(NODE_DATA(node), size, align, 195 __pa(MAX_DMA_ADDRESS)); 196 pr_debug("per cpu data for cpu%d on node%d at %016lx\n", 197 cpu, node, __pa(ptr)); 198 } | 92 int node = early_cpu_to_node(cpu); 93 94 if (!node_online(node) || !NODE_DATA(node)) { 95 ptr = alloc_bootmem_pages(PAGE_SIZE); 96 pr_info("cpu %d has no node %d or node-local " 97 "memory\n", cpu, node); 98 pr_debug("per cpu data for cpu%d at %016lx\n", 99 cpu, __pa(ptr)); 100 } else { 101 ptr = alloc_bootmem_pages_node(NODE_DATA(node), 102 PAGE_SIZE); 103 pr_debug("per cpu data for cpu%d on node%d " 104 "at %016lx\n", cpu, node, __pa(ptr)); 105 } |
199#endif | 106#endif |
200 per_cpu_offset(cpu) = ptr - __per_cpu_start; 201 memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); | 107 memcpy(ptr, __per_cpu_load + i * PAGE_SIZE, PAGE_SIZE); 108 pages[j++] = virt_to_page(ptr); 109 } |
202 } 203 | 110 } 111 |
204 /* Setup percpu data maps */ 205 setup_per_cpu_maps(); | 112 pcpu_unit_size = pcpu_setup_static(pcpu4k_populate_pte, pages, size); |
206 | 113 |
207 /* Setup node to cpumask map */ 208 setup_node_to_cpumask_map(); | 114 free_bootmem(__pa(pages), pages_size); |
209 | 115 |
210 /* Setup cpu initialized, callin, callout masks */ 211 setup_cpu_local_masks(); 212} 213 | 116 delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; 117 for_each_possible_cpu(cpu) { 118 per_cpu_offset(cpu) = delta + cpu * pcpu_unit_size; 119 per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu); 120 per_cpu(cpu_number, cpu) = cpu; 121 setup_percpu_segment(cpu); 122 setup_stack_canary_segment(cpu); 123 /* 124 * Copy data used in early init routines from the 125 * initial arrays to the per cpu data areas. These 126 * arrays then become expendable and the *_early_ptr's 127 * are zeroed indicating that the static arrays are 128 * gone. 129 */ 130#ifdef CONFIG_X86_LOCAL_APIC 131 per_cpu(x86_cpu_to_apicid, cpu) = 132 early_per_cpu_map(x86_cpu_to_apicid, cpu); 133 per_cpu(x86_bios_cpu_apicid, cpu) = 134 early_per_cpu_map(x86_bios_cpu_apicid, cpu); |
214#endif | 135#endif |
136#ifdef CONFIG_X86_64 137 per_cpu(irq_stack_ptr, cpu) = 138 per_cpu(irq_stack_union.irq_stack, cpu) + 139 IRQ_STACK_SIZE - 64; 140#ifdef CONFIG_NUMA 141 per_cpu(x86_cpu_to_node_map, cpu) = 142 early_per_cpu_map(x86_cpu_to_node_map, cpu); 143#endif 144#endif 145 /* 146 * Up to this point, the boot CPU has been using .data.init 147 * area. Reload any changed state for the boot CPU. 148 */ 149 if (cpu == boot_cpu_id) 150 switch_to_new_gdt(cpu); |
|
215 | 151 |
216#ifdef X86_64_NUMA 217 218/* 219 * Allocate node_to_cpumask_map based on number of available nodes 220 * Requires node_possible_map to be valid. 221 * 222 * Note: node_to_cpumask() is not valid until after this is done. 223 */ 224static void __init setup_node_to_cpumask_map(void) 225{ 226 unsigned int node, num = 0; 227 cpumask_t *map; 228 229 /* setup nr_node_ids if not done yet */ 230 if (nr_node_ids == MAX_NUMNODES) { 231 for_each_node_mask(node, node_possible_map) 232 num = node; 233 nr_node_ids = num + 1; | 152 DBG("PERCPU: cpu %4d %p\n", cpu, ptr); |
234 } 235 | 153 } 154 |
236 /* allocate the map */ 237 map = alloc_bootmem_low(nr_node_ids * sizeof(cpumask_t)); | 155 /* indicate the early static arrays will soon be gone */ 156#ifdef CONFIG_X86_LOCAL_APIC 157 early_per_cpu_ptr(x86_cpu_to_apicid) = NULL; 158 early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL; 159#endif 160#if defined(CONFIG_X86_64) && defined(CONFIG_NUMA) 161 early_per_cpu_ptr(x86_cpu_to_node_map) = NULL; 162#endif |
238 | 163 |
239 pr_debug("Node to cpumask map at %p for %d nodes\n", 240 map, nr_node_ids); | 164 /* Setup node to cpumask map */ 165 setup_node_to_cpumask_map(); |
241 | 166 |
242 /* node_to_cpumask() will now work */ 243 node_to_cpumask_map = map; | 167 /* Setup cpu initialized, callin, callout masks */ 168 setup_cpu_local_masks(); |
244} | 169} |
245 246void __cpuinit numa_set_node(int cpu, int node) 247{ 248 int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map); 249 250 if (cpu_pda(cpu) && node != NUMA_NO_NODE) 251 cpu_pda(cpu)->nodenumber = node; 252 253 if (cpu_to_node_map) 254 cpu_to_node_map[cpu] = node; 255 256 else if (per_cpu_offset(cpu)) 257 per_cpu(x86_cpu_to_node_map, cpu) = node; 258 259 else 260 pr_debug("Setting node for non-present cpu %d\n", cpu); 261} 262 263void __cpuinit numa_clear_node(int cpu) 264{ 265 numa_set_node(cpu, NUMA_NO_NODE); 266} 267 268#ifndef CONFIG_DEBUG_PER_CPU_MAPS 269 270void __cpuinit numa_add_cpu(int cpu) 271{ 272 cpu_set(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); 273} 274 275void __cpuinit numa_remove_cpu(int cpu) 276{ 277 cpu_clear(cpu, node_to_cpumask_map[cpu_to_node(cpu)]); 278} 279 280#else /* CONFIG_DEBUG_PER_CPU_MAPS */ 281 282/* 283 * --------- debug versions of the numa functions --------- 284 */ 285static void __cpuinit numa_set_cpumask(int cpu, int enable) 286{ 287 int node = cpu_to_node(cpu); 288 cpumask_t *mask; 289 char buf[64]; 290 291 if (node_to_cpumask_map == NULL) { 292 printk(KERN_ERR "node_to_cpumask_map NULL\n"); 293 dump_stack(); 294 return; 295 } 296 297 mask = &node_to_cpumask_map[node]; 298 if (enable) 299 cpu_set(cpu, *mask); 300 else 301 cpu_clear(cpu, *mask); 302 303 cpulist_scnprintf(buf, sizeof(buf), mask); 304 printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n", 305 enable ? "numa_add_cpu" : "numa_remove_cpu", cpu, node, buf); 306} 307 308void __cpuinit numa_add_cpu(int cpu) 309{ 310 numa_set_cpumask(cpu, 1); 311} 312 313void __cpuinit numa_remove_cpu(int cpu) 314{ 315 numa_set_cpumask(cpu, 0); 316} 317 318int cpu_to_node(int cpu) 319{ 320 if (early_per_cpu_ptr(x86_cpu_to_node_map)) { 321 printk(KERN_WARNING 322 "cpu_to_node(%d): usage too early!\n", cpu); 323 dump_stack(); 324 return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; 325 } 326 return per_cpu(x86_cpu_to_node_map, cpu); 327} 328EXPORT_SYMBOL(cpu_to_node); 329 330/* 331 * Same function as cpu_to_node() but used if called before the 332 * per_cpu areas are setup. 333 */ 334int early_cpu_to_node(int cpu) 335{ 336 if (early_per_cpu_ptr(x86_cpu_to_node_map)) 337 return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; 338 339 if (!per_cpu_offset(cpu)) { 340 printk(KERN_WARNING 341 "early_cpu_to_node(%d): no per_cpu area!\n", cpu); 342 dump_stack(); 343 return NUMA_NO_NODE; 344 } 345 return per_cpu(x86_cpu_to_node_map, cpu); 346} 347 348 349/* empty cpumask */ 350static const cpumask_t cpu_mask_none; 351 352/* 353 * Returns a pointer to the bitmask of CPUs on Node 'node'. 354 */ 355const cpumask_t *cpumask_of_node(int node) 356{ 357 if (node_to_cpumask_map == NULL) { 358 printk(KERN_WARNING 359 "cpumask_of_node(%d): no node_to_cpumask_map!\n", 360 node); 361 dump_stack(); 362 return (const cpumask_t *)&cpu_online_map; 363 } 364 if (node >= nr_node_ids) { 365 printk(KERN_WARNING 366 "cpumask_of_node(%d): node > nr_node_ids(%d)\n", 367 node, nr_node_ids); 368 dump_stack(); 369 return &cpu_mask_none; 370 } 371 return &node_to_cpumask_map[node]; 372} 373EXPORT_SYMBOL(cpumask_of_node); 374 375/* 376 * Returns a bitmask of CPUs on Node 'node'. 377 * 378 * Side note: this function creates the returned cpumask on the stack 379 * so with a high NR_CPUS count, excessive stack space is used. The 380 * node_to_cpumask_ptr function should be used whenever possible. 381 */ 382cpumask_t node_to_cpumask(int node) 383{ 384 if (node_to_cpumask_map == NULL) { 385 printk(KERN_WARNING 386 "node_to_cpumask(%d): no node_to_cpumask_map!\n", node); 387 dump_stack(); 388 return cpu_online_map; 389 } 390 if (node >= nr_node_ids) { 391 printk(KERN_WARNING 392 "node_to_cpumask(%d): node > nr_node_ids(%d)\n", 393 node, nr_node_ids); 394 dump_stack(); 395 return cpu_mask_none; 396 } 397 return node_to_cpumask_map[node]; 398} 399EXPORT_SYMBOL(node_to_cpumask); 400 401/* 402 * --------- end of debug versions of the numa functions --------- 403 */ 404 405#endif /* CONFIG_DEBUG_PER_CPU_MAPS */ 406 407#endif /* X86_64_NUMA */ 408 | |