1 #include <linux/kernel.h> 2 #include <linux/module.h> 3 #include <linux/init.h> 4 #include <linux/bootmem.h> 5 #include <linux/percpu.h> 6 #include <linux/kexec.h> 7 #include <linux/crash_dump.h> 8 #include <asm/smp.h> 9 #include <asm/percpu.h> 10 #include <asm/sections.h> 11 #include <asm/processor.h> 12 #include <asm/setup.h> 13 #include <asm/topology.h> 14 #include <asm/mpspec.h> 15 #include <asm/apicdef.h> 16 #include <asm/highmem.h> 17 18 #ifdef CONFIG_X86_LOCAL_APIC 19 unsigned int num_processors; 20 unsigned disabled_cpus __cpuinitdata; 21 /* Processor that is doing the boot up */ 22 unsigned int boot_cpu_physical_apicid = -1U; 23 unsigned int max_physical_apicid; 24 EXPORT_SYMBOL(boot_cpu_physical_apicid); 25 26 /* Bitmask of physically existing CPUs */ 27 physid_mask_t phys_cpu_present_map; 28 #endif 29 30 /* map cpu index to physical APIC ID */ 31 DEFINE_EARLY_PER_CPU(u16, x86_cpu_to_apicid, BAD_APICID); 32 DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID); 33 EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid); 34 EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid); 35 36 #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) 37 #define X86_64_NUMA 1 38 39 /* map cpu index to node index */ 40 DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE); 41 EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map); 42 43 /* which logical CPUs are on which nodes */ 44 cpumask_t *node_to_cpumask_map; 45 EXPORT_SYMBOL(node_to_cpumask_map); 46 47 /* setup node_to_cpumask_map */ 48 static void __init setup_node_to_cpumask_map(void); 49 50 #else 51 static inline void setup_node_to_cpumask_map(void) { } 52 #endif 53 54 #if defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) && defined(CONFIG_X86_SMP) 55 /* 56 * Copy data used in early init routines from the initial arrays to the 57 * per cpu data areas. These arrays then become expendable and the 58 * *_early_ptr's are zeroed indicating that the static arrays are gone. 59 */ 60 static void __init setup_per_cpu_maps(void) 61 { 62 int cpu; 63 64 for_each_possible_cpu(cpu) { 65 per_cpu(x86_cpu_to_apicid, cpu) = 66 early_per_cpu_map(x86_cpu_to_apicid, cpu); 67 per_cpu(x86_bios_cpu_apicid, cpu) = 68 early_per_cpu_map(x86_bios_cpu_apicid, cpu); 69 #ifdef X86_64_NUMA 70 per_cpu(x86_cpu_to_node_map, cpu) = 71 early_per_cpu_map(x86_cpu_to_node_map, cpu); 72 #endif 73 } 74 75 /* indicate the early static arrays will soon be gone */ 76 early_per_cpu_ptr(x86_cpu_to_apicid) = NULL; 77 early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL; 78 #ifdef X86_64_NUMA 79 early_per_cpu_ptr(x86_cpu_to_node_map) = NULL; 80 #endif 81 } 82 83 #ifdef CONFIG_X86_32 84 /* 85 * Great future not-so-futuristic plan: make i386 and x86_64 do it 86 * the same way 87 */ 88 unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; 89 EXPORT_SYMBOL(__per_cpu_offset); 90 static inline void setup_cpu_pda_map(void) { } 91 92 #elif !defined(CONFIG_SMP) 93 static inline void setup_cpu_pda_map(void) { } 94 95 #else /* CONFIG_SMP && CONFIG_X86_64 */ 96 97 /* 98 * Allocate cpu_pda pointer table and array via alloc_bootmem. 99 */ 100 static void __init setup_cpu_pda_map(void) 101 { 102 char *pda; 103 struct x8664_pda **new_cpu_pda; 104 unsigned long size; 105 int cpu; 106 107 size = roundup(sizeof(struct x8664_pda), cache_line_size()); 108 109 /* allocate cpu_pda array and pointer table */ 110 { 111 unsigned long tsize = nr_cpu_ids * sizeof(void *); 112 unsigned long asize = size * (nr_cpu_ids - 1); 113 114 tsize = roundup(tsize, cache_line_size()); 115 new_cpu_pda = alloc_bootmem(tsize + asize); 116 pda = (char *)new_cpu_pda + tsize; 117 } 118 119 /* initialize pointer table to static pda's */ 120 for_each_possible_cpu(cpu) { 121 if (cpu == 0) { 122 /* leave boot cpu pda in place */ 123 new_cpu_pda[0] = cpu_pda(0); 124 continue; 125 } 126 new_cpu_pda[cpu] = (struct x8664_pda *)pda; 127 new_cpu_pda[cpu]->in_bootmem = 1; 128 pda += size; 129 } 130 131 /* point to new pointer table */ 132 _cpu_pda = new_cpu_pda; 133 } 134 #endif 135 136 /* 137 * Great future plan: 138 * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data. 139 * Always point %gs to its beginning 140 */ 141 void __init setup_per_cpu_areas(void) 142 { 143 ssize_t size = PERCPU_ENOUGH_ROOM; 144 char *ptr; 145 int cpu; 146 147 /* Setup cpu_pda map */ 148 setup_cpu_pda_map(); 149 150 /* Copy section for each CPU (we discard the original) */ 151 size = PERCPU_ENOUGH_ROOM; 152 printk(KERN_INFO "PERCPU: Allocating %zd bytes of per cpu data\n", 153 size); 154 155 for_each_possible_cpu(cpu) { 156 #ifndef CONFIG_NEED_MULTIPLE_NODES 157 ptr = alloc_bootmem_pages(size); 158 #else 159 int node = early_cpu_to_node(cpu); 160 if (!node_online(node) || !NODE_DATA(node)) { 161 ptr = alloc_bootmem_pages(size); 162 printk(KERN_INFO 163 "cpu %d has no node %d or node-local memory\n", 164 cpu, node); 165 } 166 else 167 ptr = alloc_bootmem_pages_node(NODE_DATA(node), size); 168 #endif 169 per_cpu_offset(cpu) = ptr - __per_cpu_start; 170 memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); 171 172 } 173 174 printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d, nr_node_ids %d\n", 175 NR_CPUS, nr_cpu_ids, nr_node_ids); 176 177 /* Setup percpu data maps */ 178 setup_per_cpu_maps(); 179 180 /* Setup node to cpumask map */ 181 setup_node_to_cpumask_map(); 182 } 183 184 #endif 185 186 #ifdef X86_64_NUMA 187 188 /* 189 * Allocate node_to_cpumask_map based on number of available nodes 190 * Requires node_possible_map to be valid. 191 * 192 * Note: node_to_cpumask() is not valid until after this is done. 193 */ 194 static void __init setup_node_to_cpumask_map(void) 195 { 196 unsigned int node, num = 0; 197 cpumask_t *map; 198 199 /* setup nr_node_ids if not done yet */ 200 if (nr_node_ids == MAX_NUMNODES) { 201 for_each_node_mask(node, node_possible_map) 202 num = node; 203 nr_node_ids = num + 1; 204 } 205 206 /* allocate the map */ 207 map = alloc_bootmem_low(nr_node_ids * sizeof(cpumask_t)); 208 209 pr_debug(KERN_DEBUG "Node to cpumask map at %p for %d nodes\n", 210 map, nr_node_ids); 211 212 /* node_to_cpumask() will now work */ 213 node_to_cpumask_map = map; 214 } 215 216 void __cpuinit numa_set_node(int cpu, int node) 217 { 218 int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map); 219 220 if (cpu_pda(cpu) && node != NUMA_NO_NODE) 221 cpu_pda(cpu)->nodenumber = node; 222 223 if (cpu_to_node_map) 224 cpu_to_node_map[cpu] = node; 225 226 else if (per_cpu_offset(cpu)) 227 per_cpu(x86_cpu_to_node_map, cpu) = node; 228 229 else 230 pr_debug("Setting node for non-present cpu %d\n", cpu); 231 } 232 233 void __cpuinit numa_clear_node(int cpu) 234 { 235 numa_set_node(cpu, NUMA_NO_NODE); 236 } 237 238 #ifndef CONFIG_DEBUG_PER_CPU_MAPS 239 240 void __cpuinit numa_add_cpu(int cpu) 241 { 242 cpu_set(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); 243 } 244 245 void __cpuinit numa_remove_cpu(int cpu) 246 { 247 cpu_clear(cpu, node_to_cpumask_map[cpu_to_node(cpu)]); 248 } 249 250 #else /* CONFIG_DEBUG_PER_CPU_MAPS */ 251 252 /* 253 * --------- debug versions of the numa functions --------- 254 */ 255 static void __cpuinit numa_set_cpumask(int cpu, int enable) 256 { 257 int node = cpu_to_node(cpu); 258 cpumask_t *mask; 259 char buf[64]; 260 261 if (node_to_cpumask_map == NULL) { 262 printk(KERN_ERR "node_to_cpumask_map NULL\n"); 263 dump_stack(); 264 return; 265 } 266 267 mask = &node_to_cpumask_map[node]; 268 if (enable) 269 cpu_set(cpu, *mask); 270 else 271 cpu_clear(cpu, *mask); 272 273 cpulist_scnprintf(buf, sizeof(buf), *mask); 274 printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n", 275 enable? "numa_add_cpu":"numa_remove_cpu", cpu, node, buf); 276 } 277 278 void __cpuinit numa_add_cpu(int cpu) 279 { 280 numa_set_cpumask(cpu, 1); 281 } 282 283 void __cpuinit numa_remove_cpu(int cpu) 284 { 285 numa_set_cpumask(cpu, 0); 286 } 287 288 int cpu_to_node(int cpu) 289 { 290 if (early_per_cpu_ptr(x86_cpu_to_node_map)) { 291 printk(KERN_WARNING 292 "cpu_to_node(%d): usage too early!\n", cpu); 293 dump_stack(); 294 return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; 295 } 296 return per_cpu(x86_cpu_to_node_map, cpu); 297 } 298 EXPORT_SYMBOL(cpu_to_node); 299 300 /* 301 * Same function as cpu_to_node() but used if called before the 302 * per_cpu areas are setup. 303 */ 304 int early_cpu_to_node(int cpu) 305 { 306 if (early_per_cpu_ptr(x86_cpu_to_node_map)) 307 return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; 308 309 if (!per_cpu_offset(cpu)) { 310 printk(KERN_WARNING 311 "early_cpu_to_node(%d): no per_cpu area!\n", cpu); 312 dump_stack(); 313 return NUMA_NO_NODE; 314 } 315 return per_cpu(x86_cpu_to_node_map, cpu); 316 } 317 318 319 /* empty cpumask */ 320 static const cpumask_t cpu_mask_none; 321 322 /* 323 * Returns a pointer to the bitmask of CPUs on Node 'node'. 324 */ 325 const cpumask_t *_node_to_cpumask_ptr(int node) 326 { 327 if (node_to_cpumask_map == NULL) { 328 printk(KERN_WARNING 329 "_node_to_cpumask_ptr(%d): no node_to_cpumask_map!\n", 330 node); 331 dump_stack(); 332 return (const cpumask_t *)&cpu_online_map; 333 } 334 if (node >= nr_node_ids) { 335 printk(KERN_WARNING 336 "_node_to_cpumask_ptr(%d): node > nr_node_ids(%d)\n", 337 node, nr_node_ids); 338 dump_stack(); 339 return &cpu_mask_none; 340 } 341 return &node_to_cpumask_map[node]; 342 } 343 EXPORT_SYMBOL(_node_to_cpumask_ptr); 344 345 /* 346 * Returns a bitmask of CPUs on Node 'node'. 347 * 348 * Side note: this function creates the returned cpumask on the stack 349 * so with a high NR_CPUS count, excessive stack space is used. The 350 * node_to_cpumask_ptr function should be used whenever possible. 351 */ 352 cpumask_t node_to_cpumask(int node) 353 { 354 if (node_to_cpumask_map == NULL) { 355 printk(KERN_WARNING 356 "node_to_cpumask(%d): no node_to_cpumask_map!\n", node); 357 dump_stack(); 358 return cpu_online_map; 359 } 360 if (node >= nr_node_ids) { 361 printk(KERN_WARNING 362 "node_to_cpumask(%d): node > nr_node_ids(%d)\n", 363 node, nr_node_ids); 364 dump_stack(); 365 return cpu_mask_none; 366 } 367 return node_to_cpumask_map[node]; 368 } 369 EXPORT_SYMBOL(node_to_cpumask); 370 371 /* 372 * --------- end of debug versions of the numa functions --------- 373 */ 374 375 #endif /* CONFIG_DEBUG_PER_CPU_MAPS */ 376 377 #endif /* X86_64_NUMA */ 378 379