1 #include <linux/kernel.h> 2 #include <linux/module.h> 3 #include <linux/init.h> 4 #include <linux/bootmem.h> 5 #include <linux/percpu.h> 6 #include <linux/kexec.h> 7 #include <linux/crash_dump.h> 8 #include <linux/smp.h> 9 #include <linux/topology.h> 10 #include <asm/sections.h> 11 #include <asm/processor.h> 12 #include <asm/setup.h> 13 #include <asm/mpspec.h> 14 #include <asm/apicdef.h> 15 #include <asm/highmem.h> 16 17 #ifdef CONFIG_X86_LOCAL_APIC 18 unsigned int num_processors; 19 unsigned disabled_cpus __cpuinitdata; 20 /* Processor that is doing the boot up */ 21 unsigned int boot_cpu_physical_apicid = -1U; 22 EXPORT_SYMBOL(boot_cpu_physical_apicid); 23 unsigned int max_physical_apicid; 24 25 /* Bitmask of physically existing CPUs */ 26 physid_mask_t phys_cpu_present_map; 27 #endif 28 29 /* map cpu index to physical APIC ID */ 30 DEFINE_EARLY_PER_CPU(u16, x86_cpu_to_apicid, BAD_APICID); 31 DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID); 32 EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid); 33 EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid); 34 35 #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) 36 #define X86_64_NUMA 1 37 38 /* map cpu index to node index */ 39 DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE); 40 EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map); 41 42 /* which logical CPUs are on which nodes */ 43 cpumask_t *node_to_cpumask_map; 44 EXPORT_SYMBOL(node_to_cpumask_map); 45 46 /* setup node_to_cpumask_map */ 47 static void __init setup_node_to_cpumask_map(void); 48 49 #else 50 static inline void setup_node_to_cpumask_map(void) { } 51 #endif 52 53 #if defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) && defined(CONFIG_X86_SMP) 54 /* 55 * Copy data used in early init routines from the initial arrays to the 56 * per cpu data areas. These arrays then become expendable and the 57 * *_early_ptr's are zeroed indicating that the static arrays are gone. 58 */ 59 static void __init setup_per_cpu_maps(void) 60 { 61 int cpu; 62 63 for_each_possible_cpu(cpu) { 64 per_cpu(x86_cpu_to_apicid, cpu) = 65 early_per_cpu_map(x86_cpu_to_apicid, cpu); 66 per_cpu(x86_bios_cpu_apicid, cpu) = 67 early_per_cpu_map(x86_bios_cpu_apicid, cpu); 68 #ifdef X86_64_NUMA 69 per_cpu(x86_cpu_to_node_map, cpu) = 70 early_per_cpu_map(x86_cpu_to_node_map, cpu); 71 #endif 72 } 73 74 /* indicate the early static arrays will soon be gone */ 75 early_per_cpu_ptr(x86_cpu_to_apicid) = NULL; 76 early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL; 77 #ifdef X86_64_NUMA 78 early_per_cpu_ptr(x86_cpu_to_node_map) = NULL; 79 #endif 80 } 81 82 #ifdef CONFIG_X86_32 83 /* 84 * Great future not-so-futuristic plan: make i386 and x86_64 do it 85 * the same way 86 */ 87 unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; 88 EXPORT_SYMBOL(__per_cpu_offset); 89 static inline void setup_cpu_pda_map(void) { } 90 91 #elif !defined(CONFIG_SMP) 92 static inline void setup_cpu_pda_map(void) { } 93 94 #else /* CONFIG_SMP && CONFIG_X86_64 */ 95 96 /* 97 * Allocate cpu_pda pointer table and array via alloc_bootmem. 98 */ 99 static void __init setup_cpu_pda_map(void) 100 { 101 char *pda; 102 struct x8664_pda **new_cpu_pda; 103 unsigned long size; 104 int cpu; 105 106 size = roundup(sizeof(struct x8664_pda), cache_line_size()); 107 108 /* allocate cpu_pda array and pointer table */ 109 { 110 unsigned long tsize = nr_cpu_ids * sizeof(void *); 111 unsigned long asize = size * (nr_cpu_ids - 1); 112 113 tsize = roundup(tsize, cache_line_size()); 114 new_cpu_pda = alloc_bootmem(tsize + asize); 115 pda = (char *)new_cpu_pda + tsize; 116 } 117 118 /* initialize pointer table to static pda's */ 119 for_each_possible_cpu(cpu) { 120 if (cpu == 0) { 121 /* leave boot cpu pda in place */ 122 new_cpu_pda[0] = cpu_pda(0); 123 continue; 124 } 125 new_cpu_pda[cpu] = (struct x8664_pda *)pda; 126 new_cpu_pda[cpu]->in_bootmem = 1; 127 pda += size; 128 } 129 130 /* point to new pointer table */ 131 _cpu_pda = new_cpu_pda; 132 } 133 134 #endif /* CONFIG_SMP && CONFIG_X86_64 */ 135 136 #ifdef CONFIG_X86_64 137 138 /* correctly size the local cpu masks */ 139 static void __init setup_cpu_local_masks(void) 140 { 141 alloc_bootmem_cpumask_var(&cpu_initialized_mask); 142 alloc_bootmem_cpumask_var(&cpu_callin_mask); 143 alloc_bootmem_cpumask_var(&cpu_callout_mask); 144 alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask); 145 } 146 147 #else /* CONFIG_X86_32 */ 148 149 static inline void setup_cpu_local_masks(void) 150 { 151 } 152 153 #endif /* CONFIG_X86_32 */ 154 155 /* 156 * Great future plan: 157 * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data. 158 * Always point %gs to its beginning 159 */ 160 void __init setup_per_cpu_areas(void) 161 { 162 ssize_t size, old_size; 163 char *ptr; 164 int cpu; 165 unsigned long align = 1; 166 167 /* Setup cpu_pda map */ 168 setup_cpu_pda_map(); 169 170 /* Copy section for each CPU (we discard the original) */ 171 old_size = PERCPU_ENOUGH_ROOM; 172 align = max_t(unsigned long, PAGE_SIZE, align); 173 size = roundup(old_size, align); 174 175 pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n", 176 NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids); 177 178 pr_info("PERCPU: Allocating %zd bytes of per cpu data\n", size); 179 180 for_each_possible_cpu(cpu) { 181 #ifndef CONFIG_NEED_MULTIPLE_NODES 182 ptr = __alloc_bootmem(size, align, 183 __pa(MAX_DMA_ADDRESS)); 184 #else 185 int node = early_cpu_to_node(cpu); 186 if (!node_online(node) || !NODE_DATA(node)) { 187 ptr = __alloc_bootmem(size, align, 188 __pa(MAX_DMA_ADDRESS)); 189 pr_info("cpu %d has no node %d or node-local memory\n", 190 cpu, node); 191 pr_debug("per cpu data for cpu%d at %016lx\n", 192 cpu, __pa(ptr)); 193 } else { 194 ptr = __alloc_bootmem_node(NODE_DATA(node), size, align, 195 __pa(MAX_DMA_ADDRESS)); 196 pr_debug("per cpu data for cpu%d on node%d at %016lx\n", 197 cpu, node, __pa(ptr)); 198 } 199 #endif 200 per_cpu_offset(cpu) = ptr - __per_cpu_start; 201 memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); 202 } 203 204 /* Setup percpu data maps */ 205 setup_per_cpu_maps(); 206 207 /* Setup node to cpumask map */ 208 setup_node_to_cpumask_map(); 209 210 /* Setup cpu initialized, callin, callout masks */ 211 setup_cpu_local_masks(); 212 } 213 214 #endif 215 216 #ifdef X86_64_NUMA 217 218 /* 219 * Allocate node_to_cpumask_map based on number of available nodes 220 * Requires node_possible_map to be valid. 221 * 222 * Note: node_to_cpumask() is not valid until after this is done. 223 */ 224 static void __init setup_node_to_cpumask_map(void) 225 { 226 unsigned int node, num = 0; 227 cpumask_t *map; 228 229 /* setup nr_node_ids if not done yet */ 230 if (nr_node_ids == MAX_NUMNODES) { 231 for_each_node_mask(node, node_possible_map) 232 num = node; 233 nr_node_ids = num + 1; 234 } 235 236 /* allocate the map */ 237 map = alloc_bootmem_low(nr_node_ids * sizeof(cpumask_t)); 238 239 pr_debug("Node to cpumask map at %p for %d nodes\n", 240 map, nr_node_ids); 241 242 /* node_to_cpumask() will now work */ 243 node_to_cpumask_map = map; 244 } 245 246 void __cpuinit numa_set_node(int cpu, int node) 247 { 248 int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map); 249 250 if (cpu_pda(cpu) && node != NUMA_NO_NODE) 251 cpu_pda(cpu)->nodenumber = node; 252 253 if (cpu_to_node_map) 254 cpu_to_node_map[cpu] = node; 255 256 else if (per_cpu_offset(cpu)) 257 per_cpu(x86_cpu_to_node_map, cpu) = node; 258 259 else 260 pr_debug("Setting node for non-present cpu %d\n", cpu); 261 } 262 263 void __cpuinit numa_clear_node(int cpu) 264 { 265 numa_set_node(cpu, NUMA_NO_NODE); 266 } 267 268 #ifndef CONFIG_DEBUG_PER_CPU_MAPS 269 270 void __cpuinit numa_add_cpu(int cpu) 271 { 272 cpu_set(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); 273 } 274 275 void __cpuinit numa_remove_cpu(int cpu) 276 { 277 cpu_clear(cpu, node_to_cpumask_map[cpu_to_node(cpu)]); 278 } 279 280 #else /* CONFIG_DEBUG_PER_CPU_MAPS */ 281 282 /* 283 * --------- debug versions of the numa functions --------- 284 */ 285 static void __cpuinit numa_set_cpumask(int cpu, int enable) 286 { 287 int node = cpu_to_node(cpu); 288 cpumask_t *mask; 289 char buf[64]; 290 291 if (node_to_cpumask_map == NULL) { 292 printk(KERN_ERR "node_to_cpumask_map NULL\n"); 293 dump_stack(); 294 return; 295 } 296 297 mask = &node_to_cpumask_map[node]; 298 if (enable) 299 cpu_set(cpu, *mask); 300 else 301 cpu_clear(cpu, *mask); 302 303 cpulist_scnprintf(buf, sizeof(buf), mask); 304 printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n", 305 enable ? "numa_add_cpu" : "numa_remove_cpu", cpu, node, buf); 306 } 307 308 void __cpuinit numa_add_cpu(int cpu) 309 { 310 numa_set_cpumask(cpu, 1); 311 } 312 313 void __cpuinit numa_remove_cpu(int cpu) 314 { 315 numa_set_cpumask(cpu, 0); 316 } 317 318 int cpu_to_node(int cpu) 319 { 320 if (early_per_cpu_ptr(x86_cpu_to_node_map)) { 321 printk(KERN_WARNING 322 "cpu_to_node(%d): usage too early!\n", cpu); 323 dump_stack(); 324 return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; 325 } 326 return per_cpu(x86_cpu_to_node_map, cpu); 327 } 328 EXPORT_SYMBOL(cpu_to_node); 329 330 /* 331 * Same function as cpu_to_node() but used if called before the 332 * per_cpu areas are setup. 333 */ 334 int early_cpu_to_node(int cpu) 335 { 336 if (early_per_cpu_ptr(x86_cpu_to_node_map)) 337 return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; 338 339 if (!per_cpu_offset(cpu)) { 340 printk(KERN_WARNING 341 "early_cpu_to_node(%d): no per_cpu area!\n", cpu); 342 dump_stack(); 343 return NUMA_NO_NODE; 344 } 345 return per_cpu(x86_cpu_to_node_map, cpu); 346 } 347 348 349 /* empty cpumask */ 350 static const cpumask_t cpu_mask_none; 351 352 /* 353 * Returns a pointer to the bitmask of CPUs on Node 'node'. 354 */ 355 const cpumask_t *cpumask_of_node(int node) 356 { 357 if (node_to_cpumask_map == NULL) { 358 printk(KERN_WARNING 359 "cpumask_of_node(%d): no node_to_cpumask_map!\n", 360 node); 361 dump_stack(); 362 return (const cpumask_t *)&cpu_online_map; 363 } 364 if (node >= nr_node_ids) { 365 printk(KERN_WARNING 366 "cpumask_of_node(%d): node > nr_node_ids(%d)\n", 367 node, nr_node_ids); 368 dump_stack(); 369 return &cpu_mask_none; 370 } 371 return &node_to_cpumask_map[node]; 372 } 373 EXPORT_SYMBOL(cpumask_of_node); 374 375 /* 376 * Returns a bitmask of CPUs on Node 'node'. 377 * 378 * Side note: this function creates the returned cpumask on the stack 379 * so with a high NR_CPUS count, excessive stack space is used. The 380 * node_to_cpumask_ptr function should be used whenever possible. 381 */ 382 cpumask_t node_to_cpumask(int node) 383 { 384 if (node_to_cpumask_map == NULL) { 385 printk(KERN_WARNING 386 "node_to_cpumask(%d): no node_to_cpumask_map!\n", node); 387 dump_stack(); 388 return cpu_online_map; 389 } 390 if (node >= nr_node_ids) { 391 printk(KERN_WARNING 392 "node_to_cpumask(%d): node > nr_node_ids(%d)\n", 393 node, nr_node_ids); 394 dump_stack(); 395 return cpu_mask_none; 396 } 397 return node_to_cpumask_map[node]; 398 } 399 EXPORT_SYMBOL(node_to_cpumask); 400 401 /* 402 * --------- end of debug versions of the numa functions --------- 403 */ 404 405 #endif /* CONFIG_DEBUG_PER_CPU_MAPS */ 406 407 #endif /* X86_64_NUMA */ 408 409