1 #include <linux/kernel.h> 2 #include <linux/module.h> 3 #include <linux/init.h> 4 #include <linux/bootmem.h> 5 #include <linux/percpu.h> 6 #include <linux/kexec.h> 7 #include <linux/crash_dump.h> 8 #include <asm/smp.h> 9 #include <asm/percpu.h> 10 #include <asm/sections.h> 11 #include <asm/processor.h> 12 #include <asm/setup.h> 13 #include <asm/topology.h> 14 #include <asm/mpspec.h> 15 #include <asm/apicdef.h> 16 #include <asm/highmem.h> 17 18 #ifdef CONFIG_X86_LOCAL_APIC 19 unsigned int num_processors; 20 unsigned disabled_cpus __cpuinitdata; 21 /* Processor that is doing the boot up */ 22 unsigned int boot_cpu_physical_apicid = -1U; 23 unsigned int max_physical_apicid; 24 EXPORT_SYMBOL(boot_cpu_physical_apicid); 25 26 /* Bitmask of physically existing CPUs */ 27 physid_mask_t phys_cpu_present_map; 28 #endif 29 30 /* map cpu index to physical APIC ID */ 31 DEFINE_EARLY_PER_CPU(u16, x86_cpu_to_apicid, BAD_APICID); 32 DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID); 33 EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid); 34 EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid); 35 36 #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) 37 #define X86_64_NUMA 1 38 39 /* map cpu index to node index */ 40 DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE); 41 EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map); 42 43 /* which logical CPUs are on which nodes */ 44 cpumask_t *node_to_cpumask_map; 45 EXPORT_SYMBOL(node_to_cpumask_map); 46 47 /* setup node_to_cpumask_map */ 48 static void __init setup_node_to_cpumask_map(void); 49 50 #else 51 static inline void setup_node_to_cpumask_map(void) { } 52 #endif 53 54 #if defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) && defined(CONFIG_X86_SMP) 55 /* 56 * Copy data used in early init routines from the initial arrays to the 57 * per cpu data areas. These arrays then become expendable and the 58 * *_early_ptr's are zeroed indicating that the static arrays are gone. 59 */ 60 static void __init setup_per_cpu_maps(void) 61 { 62 int cpu; 63 64 for_each_possible_cpu(cpu) { 65 per_cpu(x86_cpu_to_apicid, cpu) = 66 early_per_cpu_map(x86_cpu_to_apicid, cpu); 67 per_cpu(x86_bios_cpu_apicid, cpu) = 68 early_per_cpu_map(x86_bios_cpu_apicid, cpu); 69 #ifdef X86_64_NUMA 70 per_cpu(x86_cpu_to_node_map, cpu) = 71 early_per_cpu_map(x86_cpu_to_node_map, cpu); 72 #endif 73 } 74 75 /* indicate the early static arrays will soon be gone */ 76 early_per_cpu_ptr(x86_cpu_to_apicid) = NULL; 77 early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL; 78 #ifdef X86_64_NUMA 79 early_per_cpu_ptr(x86_cpu_to_node_map) = NULL; 80 #endif 81 } 82 83 #ifdef CONFIG_X86_32 84 /* 85 * Great future not-so-futuristic plan: make i386 and x86_64 do it 86 * the same way 87 */ 88 unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; 89 EXPORT_SYMBOL(__per_cpu_offset); 90 static inline void setup_cpu_pda_map(void) { } 91 92 #elif !defined(CONFIG_SMP) 93 static inline void setup_cpu_pda_map(void) { } 94 95 #else /* CONFIG_SMP && CONFIG_X86_64 */ 96 97 /* 98 * Allocate cpu_pda pointer table and array via alloc_bootmem. 99 */ 100 static void __init setup_cpu_pda_map(void) 101 { 102 char *pda; 103 struct x8664_pda **new_cpu_pda; 104 unsigned long size; 105 int cpu; 106 107 size = roundup(sizeof(struct x8664_pda), cache_line_size()); 108 109 /* allocate cpu_pda array and pointer table */ 110 { 111 unsigned long tsize = nr_cpu_ids * sizeof(void *); 112 unsigned long asize = size * (nr_cpu_ids - 1); 113 114 tsize = roundup(tsize, cache_line_size()); 115 new_cpu_pda = alloc_bootmem(tsize + asize); 116 pda = (char *)new_cpu_pda + tsize; 117 } 118 119 /* initialize pointer table to static pda's */ 120 for_each_possible_cpu(cpu) { 121 if (cpu == 0) { 122 /* leave boot cpu pda in place */ 123 new_cpu_pda[0] = cpu_pda(0); 124 continue; 125 } 126 new_cpu_pda[cpu] = (struct x8664_pda *)pda; 127 new_cpu_pda[cpu]->in_bootmem = 1; 128 pda += size; 129 } 130 131 /* point to new pointer table */ 132 _cpu_pda = new_cpu_pda; 133 } 134 #endif 135 136 /* 137 * Great future plan: 138 * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data. 139 * Always point %gs to its beginning 140 */ 141 void __init setup_per_cpu_areas(void) 142 { 143 ssize_t size, old_size; 144 char *ptr; 145 int cpu; 146 unsigned long align = 1; 147 148 /* Setup cpu_pda map */ 149 setup_cpu_pda_map(); 150 151 /* Copy section for each CPU (we discard the original) */ 152 old_size = PERCPU_ENOUGH_ROOM; 153 align = max_t(unsigned long, PAGE_SIZE, align); 154 size = roundup(old_size, align); 155 printk(KERN_INFO "PERCPU: Allocating %zd bytes of per cpu data\n", 156 size); 157 158 for_each_possible_cpu(cpu) { 159 #ifndef CONFIG_NEED_MULTIPLE_NODES 160 ptr = __alloc_bootmem(size, align, 161 __pa(MAX_DMA_ADDRESS)); 162 #else 163 int node = early_cpu_to_node(cpu); 164 if (!node_online(node) || !NODE_DATA(node)) { 165 ptr = __alloc_bootmem(size, align, 166 __pa(MAX_DMA_ADDRESS)); 167 printk(KERN_INFO 168 "cpu %d has no node %d or node-local memory\n", 169 cpu, node); 170 if (ptr) 171 printk(KERN_DEBUG "per cpu data for cpu%d at %016lx\n", 172 cpu, __pa(ptr)); 173 } 174 else { 175 ptr = __alloc_bootmem_node(NODE_DATA(node), size, align, 176 __pa(MAX_DMA_ADDRESS)); 177 if (ptr) 178 printk(KERN_DEBUG "per cpu data for cpu%d on node%d at %016lx\n", 179 cpu, node, __pa(ptr)); 180 } 181 #endif 182 per_cpu_offset(cpu) = ptr - __per_cpu_start; 183 memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); 184 } 185 186 printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d, nr_node_ids %d\n", 187 NR_CPUS, nr_cpu_ids, nr_node_ids); 188 189 /* Setup percpu data maps */ 190 setup_per_cpu_maps(); 191 192 /* Setup node to cpumask map */ 193 setup_node_to_cpumask_map(); 194 } 195 196 #endif 197 198 #ifdef X86_64_NUMA 199 200 /* 201 * Allocate node_to_cpumask_map based on number of available nodes 202 * Requires node_possible_map to be valid. 203 * 204 * Note: node_to_cpumask() is not valid until after this is done. 205 */ 206 static void __init setup_node_to_cpumask_map(void) 207 { 208 unsigned int node, num = 0; 209 cpumask_t *map; 210 211 /* setup nr_node_ids if not done yet */ 212 if (nr_node_ids == MAX_NUMNODES) { 213 for_each_node_mask(node, node_possible_map) 214 num = node; 215 nr_node_ids = num + 1; 216 } 217 218 /* allocate the map */ 219 map = alloc_bootmem_low(nr_node_ids * sizeof(cpumask_t)); 220 221 pr_debug("Node to cpumask map at %p for %d nodes\n", 222 map, nr_node_ids); 223 224 /* node_to_cpumask() will now work */ 225 node_to_cpumask_map = map; 226 } 227 228 void __cpuinit numa_set_node(int cpu, int node) 229 { 230 int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map); 231 232 if (cpu_pda(cpu) && node != NUMA_NO_NODE) 233 cpu_pda(cpu)->nodenumber = node; 234 235 if (cpu_to_node_map) 236 cpu_to_node_map[cpu] = node; 237 238 else if (per_cpu_offset(cpu)) 239 per_cpu(x86_cpu_to_node_map, cpu) = node; 240 241 else 242 pr_debug("Setting node for non-present cpu %d\n", cpu); 243 } 244 245 void __cpuinit numa_clear_node(int cpu) 246 { 247 numa_set_node(cpu, NUMA_NO_NODE); 248 } 249 250 #ifndef CONFIG_DEBUG_PER_CPU_MAPS 251 252 void __cpuinit numa_add_cpu(int cpu) 253 { 254 cpu_set(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); 255 } 256 257 void __cpuinit numa_remove_cpu(int cpu) 258 { 259 cpu_clear(cpu, node_to_cpumask_map[cpu_to_node(cpu)]); 260 } 261 262 #else /* CONFIG_DEBUG_PER_CPU_MAPS */ 263 264 /* 265 * --------- debug versions of the numa functions --------- 266 */ 267 static void __cpuinit numa_set_cpumask(int cpu, int enable) 268 { 269 int node = cpu_to_node(cpu); 270 cpumask_t *mask; 271 char buf[64]; 272 273 if (node_to_cpumask_map == NULL) { 274 printk(KERN_ERR "node_to_cpumask_map NULL\n"); 275 dump_stack(); 276 return; 277 } 278 279 mask = &node_to_cpumask_map[node]; 280 if (enable) 281 cpu_set(cpu, *mask); 282 else 283 cpu_clear(cpu, *mask); 284 285 cpulist_scnprintf(buf, sizeof(buf), *mask); 286 printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n", 287 enable? "numa_add_cpu":"numa_remove_cpu", cpu, node, buf); 288 } 289 290 void __cpuinit numa_add_cpu(int cpu) 291 { 292 numa_set_cpumask(cpu, 1); 293 } 294 295 void __cpuinit numa_remove_cpu(int cpu) 296 { 297 numa_set_cpumask(cpu, 0); 298 } 299 300 int cpu_to_node(int cpu) 301 { 302 if (early_per_cpu_ptr(x86_cpu_to_node_map)) { 303 printk(KERN_WARNING 304 "cpu_to_node(%d): usage too early!\n", cpu); 305 dump_stack(); 306 return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; 307 } 308 return per_cpu(x86_cpu_to_node_map, cpu); 309 } 310 EXPORT_SYMBOL(cpu_to_node); 311 312 /* 313 * Same function as cpu_to_node() but used if called before the 314 * per_cpu areas are setup. 315 */ 316 int early_cpu_to_node(int cpu) 317 { 318 if (early_per_cpu_ptr(x86_cpu_to_node_map)) 319 return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; 320 321 if (!per_cpu_offset(cpu)) { 322 printk(KERN_WARNING 323 "early_cpu_to_node(%d): no per_cpu area!\n", cpu); 324 dump_stack(); 325 return NUMA_NO_NODE; 326 } 327 return per_cpu(x86_cpu_to_node_map, cpu); 328 } 329 330 331 /* empty cpumask */ 332 static const cpumask_t cpu_mask_none; 333 334 /* 335 * Returns a pointer to the bitmask of CPUs on Node 'node'. 336 */ 337 const cpumask_t *_node_to_cpumask_ptr(int node) 338 { 339 if (node_to_cpumask_map == NULL) { 340 printk(KERN_WARNING 341 "_node_to_cpumask_ptr(%d): no node_to_cpumask_map!\n", 342 node); 343 dump_stack(); 344 return (const cpumask_t *)&cpu_online_map; 345 } 346 if (node >= nr_node_ids) { 347 printk(KERN_WARNING 348 "_node_to_cpumask_ptr(%d): node > nr_node_ids(%d)\n", 349 node, nr_node_ids); 350 dump_stack(); 351 return &cpu_mask_none; 352 } 353 return &node_to_cpumask_map[node]; 354 } 355 EXPORT_SYMBOL(_node_to_cpumask_ptr); 356 357 /* 358 * Returns a bitmask of CPUs on Node 'node'. 359 * 360 * Side note: this function creates the returned cpumask on the stack 361 * so with a high NR_CPUS count, excessive stack space is used. The 362 * node_to_cpumask_ptr function should be used whenever possible. 363 */ 364 cpumask_t node_to_cpumask(int node) 365 { 366 if (node_to_cpumask_map == NULL) { 367 printk(KERN_WARNING 368 "node_to_cpumask(%d): no node_to_cpumask_map!\n", node); 369 dump_stack(); 370 return cpu_online_map; 371 } 372 if (node >= nr_node_ids) { 373 printk(KERN_WARNING 374 "node_to_cpumask(%d): node > nr_node_ids(%d)\n", 375 node, nr_node_ids); 376 dump_stack(); 377 return cpu_mask_none; 378 } 379 return node_to_cpumask_map[node]; 380 } 381 EXPORT_SYMBOL(node_to_cpumask); 382 383 /* 384 * --------- end of debug versions of the numa functions --------- 385 */ 386 387 #endif /* CONFIG_DEBUG_PER_CPU_MAPS */ 388 389 #endif /* X86_64_NUMA */ 390 391