1 #include <linux/kernel.h> 2 #include <linux/module.h> 3 #include <linux/init.h> 4 #include <linux/bootmem.h> 5 #include <linux/percpu.h> 6 #include <linux/kexec.h> 7 #include <linux/crash_dump.h> 8 #include <asm/smp.h> 9 #include <asm/percpu.h> 10 #include <asm/sections.h> 11 #include <asm/processor.h> 12 #include <asm/setup.h> 13 #include <asm/topology.h> 14 #include <asm/mpspec.h> 15 #include <asm/apicdef.h> 16 #include <asm/highmem.h> 17 18 #ifdef CONFIG_X86_LOCAL_APIC 19 unsigned int num_processors; 20 unsigned disabled_cpus __cpuinitdata; 21 /* Processor that is doing the boot up */ 22 unsigned int boot_cpu_physical_apicid = -1U; 23 unsigned int max_physical_apicid; 24 EXPORT_SYMBOL(boot_cpu_physical_apicid); 25 26 /* Bitmask of physically existing CPUs */ 27 physid_mask_t phys_cpu_present_map; 28 #endif 29 30 /* map cpu index to physical APIC ID */ 31 DEFINE_EARLY_PER_CPU(u16, x86_cpu_to_apicid, BAD_APICID); 32 DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID); 33 EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid); 34 EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid); 35 36 #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) 37 #define X86_64_NUMA 1 38 39 /* map cpu index to node index */ 40 DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE); 41 EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map); 42 43 /* which logical CPUs are on which nodes */ 44 cpumask_t *node_to_cpumask_map; 45 EXPORT_SYMBOL(node_to_cpumask_map); 46 47 /* setup node_to_cpumask_map */ 48 static void __init setup_node_to_cpumask_map(void); 49 50 #else 51 static inline void setup_node_to_cpumask_map(void) { } 52 #endif 53 54 #if defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) && defined(CONFIG_X86_SMP) 55 /* 56 * Copy data used in early init routines from the initial arrays to the 57 * per cpu data areas. These arrays then become expendable and the 58 * *_early_ptr's are zeroed indicating that the static arrays are gone. 59 */ 60 static void __init setup_per_cpu_maps(void) 61 { 62 int cpu; 63 64 for_each_possible_cpu(cpu) { 65 per_cpu(x86_cpu_to_apicid, cpu) = 66 early_per_cpu_map(x86_cpu_to_apicid, cpu); 67 per_cpu(x86_bios_cpu_apicid, cpu) = 68 early_per_cpu_map(x86_bios_cpu_apicid, cpu); 69 #ifdef X86_64_NUMA 70 per_cpu(x86_cpu_to_node_map, cpu) = 71 early_per_cpu_map(x86_cpu_to_node_map, cpu); 72 #endif 73 } 74 75 /* indicate the early static arrays will soon be gone */ 76 early_per_cpu_ptr(x86_cpu_to_apicid) = NULL; 77 early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL; 78 #ifdef X86_64_NUMA 79 early_per_cpu_ptr(x86_cpu_to_node_map) = NULL; 80 #endif 81 } 82 83 #ifdef CONFIG_X86_32 84 /* 85 * Great future not-so-futuristic plan: make i386 and x86_64 do it 86 * the same way 87 */ 88 unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; 89 EXPORT_SYMBOL(__per_cpu_offset); 90 static inline void setup_cpu_pda_map(void) { } 91 92 #elif !defined(CONFIG_SMP) 93 static inline void setup_cpu_pda_map(void) { } 94 95 #else /* CONFIG_SMP && CONFIG_X86_64 */ 96 97 /* 98 * Allocate cpu_pda pointer table and array via alloc_bootmem. 99 */ 100 static void __init setup_cpu_pda_map(void) 101 { 102 char *pda; 103 struct x8664_pda **new_cpu_pda; 104 unsigned long size; 105 int cpu; 106 107 size = roundup(sizeof(struct x8664_pda), cache_line_size()); 108 109 /* allocate cpu_pda array and pointer table */ 110 { 111 unsigned long tsize = nr_cpu_ids * sizeof(void *); 112 unsigned long asize = size * (nr_cpu_ids - 1); 113 114 tsize = roundup(tsize, cache_line_size()); 115 new_cpu_pda = alloc_bootmem(tsize + asize); 116 pda = (char *)new_cpu_pda + tsize; 117 } 118 119 /* initialize pointer table to static pda's */ 120 for_each_possible_cpu(cpu) { 121 if (cpu == 0) { 122 /* leave boot cpu pda in place */ 123 new_cpu_pda[0] = cpu_pda(0); 124 continue; 125 } 126 new_cpu_pda[cpu] = (struct x8664_pda *)pda; 127 new_cpu_pda[cpu]->in_bootmem = 1; 128 pda += size; 129 } 130 131 /* point to new pointer table */ 132 _cpu_pda = new_cpu_pda; 133 } 134 #endif 135 136 /* 137 * Great future plan: 138 * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data. 139 * Always point %gs to its beginning 140 */ 141 void __init setup_per_cpu_areas(void) 142 { 143 ssize_t size, old_size; 144 char *ptr; 145 int cpu; 146 unsigned long align = 1; 147 148 /* Setup cpu_pda map */ 149 setup_cpu_pda_map(); 150 151 /* Copy section for each CPU (we discard the original) */ 152 old_size = PERCPU_ENOUGH_ROOM; 153 align = max_t(unsigned long, PAGE_SIZE, align); 154 size = roundup(old_size, align); 155 156 pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n", 157 NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids); 158 159 pr_info("PERCPU: Allocating %zd bytes of per cpu data\n", size); 160 161 for_each_possible_cpu(cpu) { 162 #ifndef CONFIG_NEED_MULTIPLE_NODES 163 ptr = __alloc_bootmem(size, align, 164 __pa(MAX_DMA_ADDRESS)); 165 #else 166 int node = early_cpu_to_node(cpu); 167 if (!node_online(node) || !NODE_DATA(node)) { 168 ptr = __alloc_bootmem(size, align, 169 __pa(MAX_DMA_ADDRESS)); 170 pr_info("cpu %d has no node %d or node-local memory\n", 171 cpu, node); 172 pr_debug("per cpu data for cpu%d at %016lx\n", 173 cpu, __pa(ptr)); 174 } else { 175 ptr = __alloc_bootmem_node(NODE_DATA(node), size, align, 176 __pa(MAX_DMA_ADDRESS)); 177 pr_debug("per cpu data for cpu%d on node%d at %016lx\n", 178 cpu, node, __pa(ptr)); 179 } 180 #endif 181 per_cpu_offset(cpu) = ptr - __per_cpu_start; 182 memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); 183 } 184 185 /* Setup percpu data maps */ 186 setup_per_cpu_maps(); 187 188 /* Setup node to cpumask map */ 189 setup_node_to_cpumask_map(); 190 } 191 192 #endif 193 194 #ifdef X86_64_NUMA 195 196 /* 197 * Allocate node_to_cpumask_map based on number of available nodes 198 * Requires node_possible_map to be valid. 199 * 200 * Note: node_to_cpumask() is not valid until after this is done. 201 */ 202 static void __init setup_node_to_cpumask_map(void) 203 { 204 unsigned int node, num = 0; 205 cpumask_t *map; 206 207 /* setup nr_node_ids if not done yet */ 208 if (nr_node_ids == MAX_NUMNODES) { 209 for_each_node_mask(node, node_possible_map) 210 num = node; 211 nr_node_ids = num + 1; 212 } 213 214 /* allocate the map */ 215 map = alloc_bootmem_low(nr_node_ids * sizeof(cpumask_t)); 216 217 pr_debug("Node to cpumask map at %p for %d nodes\n", 218 map, nr_node_ids); 219 220 /* node_to_cpumask() will now work */ 221 node_to_cpumask_map = map; 222 } 223 224 void __cpuinit numa_set_node(int cpu, int node) 225 { 226 int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map); 227 228 if (cpu_pda(cpu) && node != NUMA_NO_NODE) 229 cpu_pda(cpu)->nodenumber = node; 230 231 if (cpu_to_node_map) 232 cpu_to_node_map[cpu] = node; 233 234 else if (per_cpu_offset(cpu)) 235 per_cpu(x86_cpu_to_node_map, cpu) = node; 236 237 else 238 pr_debug("Setting node for non-present cpu %d\n", cpu); 239 } 240 241 void __cpuinit numa_clear_node(int cpu) 242 { 243 numa_set_node(cpu, NUMA_NO_NODE); 244 } 245 246 #ifndef CONFIG_DEBUG_PER_CPU_MAPS 247 248 void __cpuinit numa_add_cpu(int cpu) 249 { 250 cpu_set(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); 251 } 252 253 void __cpuinit numa_remove_cpu(int cpu) 254 { 255 cpu_clear(cpu, node_to_cpumask_map[cpu_to_node(cpu)]); 256 } 257 258 #else /* CONFIG_DEBUG_PER_CPU_MAPS */ 259 260 /* 261 * --------- debug versions of the numa functions --------- 262 */ 263 static void __cpuinit numa_set_cpumask(int cpu, int enable) 264 { 265 int node = cpu_to_node(cpu); 266 cpumask_t *mask; 267 char buf[64]; 268 269 if (node_to_cpumask_map == NULL) { 270 printk(KERN_ERR "node_to_cpumask_map NULL\n"); 271 dump_stack(); 272 return; 273 } 274 275 mask = &node_to_cpumask_map[node]; 276 if (enable) 277 cpu_set(cpu, *mask); 278 else 279 cpu_clear(cpu, *mask); 280 281 cpulist_scnprintf(buf, sizeof(buf), mask); 282 printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n", 283 enable? "numa_add_cpu":"numa_remove_cpu", cpu, node, buf); 284 } 285 286 void __cpuinit numa_add_cpu(int cpu) 287 { 288 numa_set_cpumask(cpu, 1); 289 } 290 291 void __cpuinit numa_remove_cpu(int cpu) 292 { 293 numa_set_cpumask(cpu, 0); 294 } 295 296 int cpu_to_node(int cpu) 297 { 298 if (early_per_cpu_ptr(x86_cpu_to_node_map)) { 299 printk(KERN_WARNING 300 "cpu_to_node(%d): usage too early!\n", cpu); 301 dump_stack(); 302 return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; 303 } 304 return per_cpu(x86_cpu_to_node_map, cpu); 305 } 306 EXPORT_SYMBOL(cpu_to_node); 307 308 /* 309 * Same function as cpu_to_node() but used if called before the 310 * per_cpu areas are setup. 311 */ 312 int early_cpu_to_node(int cpu) 313 { 314 if (early_per_cpu_ptr(x86_cpu_to_node_map)) 315 return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; 316 317 if (!per_cpu_offset(cpu)) { 318 printk(KERN_WARNING 319 "early_cpu_to_node(%d): no per_cpu area!\n", cpu); 320 dump_stack(); 321 return NUMA_NO_NODE; 322 } 323 return per_cpu(x86_cpu_to_node_map, cpu); 324 } 325 326 327 /* empty cpumask */ 328 static const cpumask_t cpu_mask_none; 329 330 /* 331 * Returns a pointer to the bitmask of CPUs on Node 'node'. 332 */ 333 const cpumask_t *cpumask_of_node(int node) 334 { 335 if (node_to_cpumask_map == NULL) { 336 printk(KERN_WARNING 337 "cpumask_of_node(%d): no node_to_cpumask_map!\n", 338 node); 339 dump_stack(); 340 return (const cpumask_t *)&cpu_online_map; 341 } 342 if (node >= nr_node_ids) { 343 printk(KERN_WARNING 344 "cpumask_of_node(%d): node > nr_node_ids(%d)\n", 345 node, nr_node_ids); 346 dump_stack(); 347 return &cpu_mask_none; 348 } 349 return &node_to_cpumask_map[node]; 350 } 351 EXPORT_SYMBOL(cpumask_of_node); 352 353 /* 354 * Returns a bitmask of CPUs on Node 'node'. 355 * 356 * Side note: this function creates the returned cpumask on the stack 357 * so with a high NR_CPUS count, excessive stack space is used. The 358 * node_to_cpumask_ptr function should be used whenever possible. 359 */ 360 cpumask_t node_to_cpumask(int node) 361 { 362 if (node_to_cpumask_map == NULL) { 363 printk(KERN_WARNING 364 "node_to_cpumask(%d): no node_to_cpumask_map!\n", node); 365 dump_stack(); 366 return cpu_online_map; 367 } 368 if (node >= nr_node_ids) { 369 printk(KERN_WARNING 370 "node_to_cpumask(%d): node > nr_node_ids(%d)\n", 371 node, nr_node_ids); 372 dump_stack(); 373 return cpu_mask_none; 374 } 375 return node_to_cpumask_map[node]; 376 } 377 EXPORT_SYMBOL(node_to_cpumask); 378 379 /* 380 * --------- end of debug versions of the numa functions --------- 381 */ 382 383 #endif /* CONFIG_DEBUG_PER_CPU_MAPS */ 384 385 #endif /* X86_64_NUMA */ 386 387