1 #include <linux/bootmem.h> 2 #include <linux/linkage.h> 3 #include <linux/bitops.h> 4 #include <linux/kernel.h> 5 #include <linux/module.h> 6 #include <linux/percpu.h> 7 #include <linux/string.h> 8 #include <linux/delay.h> 9 #include <linux/sched.h> 10 #include <linux/init.h> 11 #include <linux/kgdb.h> 12 #include <linux/smp.h> 13 #include <linux/io.h> 14 15 #include <asm/stackprotector.h> 16 #include <asm/perf_event.h> 17 #include <asm/mmu_context.h> 18 #include <asm/archrandom.h> 19 #include <asm/hypervisor.h> 20 #include <asm/processor.h> 21 #include <asm/debugreg.h> 22 #include <asm/sections.h> 23 #include <linux/topology.h> 24 #include <linux/cpumask.h> 25 #include <asm/pgtable.h> 26 #include <linux/atomic.h> 27 #include <asm/proto.h> 28 #include <asm/setup.h> 29 #include <asm/apic.h> 30 #include <asm/desc.h> 31 #include <asm/i387.h> 32 #include <asm/fpu-internal.h> 33 #include <asm/mtrr.h> 34 #include <linux/numa.h> 35 #include <asm/asm.h> 36 #include <asm/cpu.h> 37 #include <asm/mce.h> 38 #include <asm/msr.h> 39 #include <asm/pat.h> 40 41 #ifdef CONFIG_X86_LOCAL_APIC 42 #include <asm/uv/uv.h> 43 #endif 44 45 #include "cpu.h" 46 47 /* all of these masks are initialized in setup_cpu_local_masks() */ 48 cpumask_var_t cpu_initialized_mask; 49 cpumask_var_t cpu_callout_mask; 50 cpumask_var_t cpu_callin_mask; 51 52 /* representing cpus for which sibling maps can be computed */ 53 cpumask_var_t cpu_sibling_setup_mask; 54 55 /* correctly size the local cpu masks */ 56 void __init setup_cpu_local_masks(void) 57 { 58 alloc_bootmem_cpumask_var(&cpu_initialized_mask); 59 alloc_bootmem_cpumask_var(&cpu_callin_mask); 60 alloc_bootmem_cpumask_var(&cpu_callout_mask); 61 alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask); 62 } 63 64 static void __cpuinit default_init(struct cpuinfo_x86 *c) 65 { 66 #ifdef CONFIG_X86_64 67 cpu_detect_cache_sizes(c); 68 #else 69 /* Not much we can do here... */ 70 /* Check if at least it has cpuid */ 71 if (c->cpuid_level == -1) { 72 /* No cpuid. It must be an ancient CPU */ 73 if (c->x86 == 4) 74 strcpy(c->x86_model_id, "486"); 75 else if (c->x86 == 3) 76 strcpy(c->x86_model_id, "386"); 77 } 78 #endif 79 } 80 81 static const struct cpu_dev __cpuinitconst default_cpu = { 82 .c_init = default_init, 83 .c_vendor = "Unknown", 84 .c_x86_vendor = X86_VENDOR_UNKNOWN, 85 }; 86 87 static const struct cpu_dev *this_cpu __cpuinitdata = &default_cpu; 88 89 DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { 90 #ifdef CONFIG_X86_64 91 /* 92 * We need valid kernel segments for data and code in long mode too 93 * IRET will check the segment types kkeil 2000/10/28 94 * Also sysret mandates a special GDT layout 95 * 96 * TLS descriptors are currently at a different place compared to i386. 97 * Hopefully nobody expects them at a fixed place (Wine?) 98 */ 99 [GDT_ENTRY_KERNEL32_CS] = GDT_ENTRY_INIT(0xc09b, 0, 0xfffff), 100 [GDT_ENTRY_KERNEL_CS] = GDT_ENTRY_INIT(0xa09b, 0, 0xfffff), 101 [GDT_ENTRY_KERNEL_DS] = GDT_ENTRY_INIT(0xc093, 0, 0xfffff), 102 [GDT_ENTRY_DEFAULT_USER32_CS] = GDT_ENTRY_INIT(0xc0fb, 0, 0xfffff), 103 [GDT_ENTRY_DEFAULT_USER_DS] = GDT_ENTRY_INIT(0xc0f3, 0, 0xfffff), 104 [GDT_ENTRY_DEFAULT_USER_CS] = GDT_ENTRY_INIT(0xa0fb, 0, 0xfffff), 105 #else 106 [GDT_ENTRY_KERNEL_CS] = GDT_ENTRY_INIT(0xc09a, 0, 0xfffff), 107 [GDT_ENTRY_KERNEL_DS] = GDT_ENTRY_INIT(0xc092, 0, 0xfffff), 108 [GDT_ENTRY_DEFAULT_USER_CS] = GDT_ENTRY_INIT(0xc0fa, 0, 0xfffff), 109 [GDT_ENTRY_DEFAULT_USER_DS] = GDT_ENTRY_INIT(0xc0f2, 0, 0xfffff), 110 /* 111 * Segments used for calling PnP BIOS have byte granularity. 112 * They code segments and data segments have fixed 64k limits, 113 * the transfer segment sizes are set at run time. 114 */ 115 /* 32-bit code */ 116 [GDT_ENTRY_PNPBIOS_CS32] = GDT_ENTRY_INIT(0x409a, 0, 0xffff), 117 /* 16-bit code */ 118 [GDT_ENTRY_PNPBIOS_CS16] = GDT_ENTRY_INIT(0x009a, 0, 0xffff), 119 /* 16-bit data */ 120 [GDT_ENTRY_PNPBIOS_DS] = GDT_ENTRY_INIT(0x0092, 0, 0xffff), 121 /* 16-bit data */ 122 [GDT_ENTRY_PNPBIOS_TS1] = GDT_ENTRY_INIT(0x0092, 0, 0), 123 /* 16-bit data */ 124 [GDT_ENTRY_PNPBIOS_TS2] = GDT_ENTRY_INIT(0x0092, 0, 0), 125 /* 126 * The APM segments have byte granularity and their bases 127 * are set at run time. All have 64k limits. 128 */ 129 /* 32-bit code */ 130 [GDT_ENTRY_APMBIOS_BASE] = GDT_ENTRY_INIT(0x409a, 0, 0xffff), 131 /* 16-bit code */ 132 [GDT_ENTRY_APMBIOS_BASE+1] = GDT_ENTRY_INIT(0x009a, 0, 0xffff), 133 /* data */ 134 [GDT_ENTRY_APMBIOS_BASE+2] = GDT_ENTRY_INIT(0x4092, 0, 0xffff), 135 136 [GDT_ENTRY_ESPFIX_SS] = GDT_ENTRY_INIT(0xc092, 0, 0xfffff), 137 [GDT_ENTRY_PERCPU] = GDT_ENTRY_INIT(0xc092, 0, 0xfffff), 138 GDT_STACK_CANARY_INIT 139 #endif 140 } }; 141 EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); 142 143 static int __init x86_xsave_setup(char *s) 144 { 145 setup_clear_cpu_cap(X86_FEATURE_XSAVE); 146 setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); 147 return 1; 148 } 149 __setup("noxsave", x86_xsave_setup); 150 151 static int __init x86_xsaveopt_setup(char *s) 152 { 153 setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); 154 return 1; 155 } 156 __setup("noxsaveopt", x86_xsaveopt_setup); 157 158 #ifdef CONFIG_X86_32 159 static int cachesize_override __cpuinitdata = -1; 160 static int disable_x86_serial_nr __cpuinitdata = 1; 161 162 static int __init cachesize_setup(char *str) 163 { 164 get_option(&str, &cachesize_override); 165 return 1; 166 } 167 __setup("cachesize=", cachesize_setup); 168 169 static int __init x86_fxsr_setup(char *s) 170 { 171 setup_clear_cpu_cap(X86_FEATURE_FXSR); 172 setup_clear_cpu_cap(X86_FEATURE_XMM); 173 return 1; 174 } 175 __setup("nofxsr", x86_fxsr_setup); 176 177 static int __init x86_sep_setup(char *s) 178 { 179 setup_clear_cpu_cap(X86_FEATURE_SEP); 180 return 1; 181 } 182 __setup("nosep", x86_sep_setup); 183 184 /* Standard macro to see if a specific flag is changeable */ 185 static inline int flag_is_changeable_p(u32 flag) 186 { 187 u32 f1, f2; 188 189 /* 190 * Cyrix and IDT cpus allow disabling of CPUID 191 * so the code below may return different results 192 * when it is executed before and after enabling 193 * the CPUID. Add "volatile" to not allow gcc to 194 * optimize the subsequent calls to this function. 195 */ 196 asm volatile ("pushfl \n\t" 197 "pushfl \n\t" 198 "popl %0 \n\t" 199 "movl %0, %1 \n\t" 200 "xorl %2, %0 \n\t" 201 "pushl %0 \n\t" 202 "popfl \n\t" 203 "pushfl \n\t" 204 "popl %0 \n\t" 205 "popfl \n\t" 206 207 : "=&r" (f1), "=&r" (f2) 208 : "ir" (flag)); 209 210 return ((f1^f2) & flag) != 0; 211 } 212 213 /* Probe for the CPUID instruction */ 214 static int __cpuinit have_cpuid_p(void) 215 { 216 return flag_is_changeable_p(X86_EFLAGS_ID); 217 } 218 219 static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) 220 { 221 unsigned long lo, hi; 222 223 if (!cpu_has(c, X86_FEATURE_PN) || !disable_x86_serial_nr) 224 return; 225 226 /* Disable processor serial number: */ 227 228 rdmsr(MSR_IA32_BBL_CR_CTL, lo, hi); 229 lo |= 0x200000; 230 wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi); 231 232 printk(KERN_NOTICE "CPU serial number disabled.\n"); 233 clear_cpu_cap(c, X86_FEATURE_PN); 234 235 /* Disabling the serial number may affect the cpuid level */ 236 c->cpuid_level = cpuid_eax(0); 237 } 238 239 static int __init x86_serial_nr_setup(char *s) 240 { 241 disable_x86_serial_nr = 0; 242 return 1; 243 } 244 __setup("serialnumber", x86_serial_nr_setup); 245 #else 246 static inline int flag_is_changeable_p(u32 flag) 247 { 248 return 1; 249 } 250 /* Probe for the CPUID instruction */ 251 static inline int have_cpuid_p(void) 252 { 253 return 1; 254 } 255 static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c) 256 { 257 } 258 #endif 259 260 static int disable_smep __cpuinitdata; 261 static __init int setup_disable_smep(char *arg) 262 { 263 disable_smep = 1; 264 return 1; 265 } 266 __setup("nosmep", setup_disable_smep); 267 268 static __cpuinit void setup_smep(struct cpuinfo_x86 *c) 269 { 270 if (cpu_has(c, X86_FEATURE_SMEP)) { 271 if (unlikely(disable_smep)) { 272 setup_clear_cpu_cap(X86_FEATURE_SMEP); 273 clear_in_cr4(X86_CR4_SMEP); 274 } else 275 set_in_cr4(X86_CR4_SMEP); 276 } 277 } 278 279 /* 280 * Some CPU features depend on higher CPUID levels, which may not always 281 * be available due to CPUID level capping or broken virtualization 282 * software. Add those features to this table to auto-disable them. 283 */ 284 struct cpuid_dependent_feature { 285 u32 feature; 286 u32 level; 287 }; 288 289 static const struct cpuid_dependent_feature __cpuinitconst 290 cpuid_dependent_features[] = { 291 { X86_FEATURE_MWAIT, 0x00000005 }, 292 { X86_FEATURE_DCA, 0x00000009 }, 293 { X86_FEATURE_XSAVE, 0x0000000d }, 294 { 0, 0 } 295 }; 296 297 static void __cpuinit filter_cpuid_features(struct cpuinfo_x86 *c, bool warn) 298 { 299 const struct cpuid_dependent_feature *df; 300 301 for (df = cpuid_dependent_features; df->feature; df++) { 302 303 if (!cpu_has(c, df->feature)) 304 continue; 305 /* 306 * Note: cpuid_level is set to -1 if unavailable, but 307 * extended_extended_level is set to 0 if unavailable 308 * and the legitimate extended levels are all negative 309 * when signed; hence the weird messing around with 310 * signs here... 311 */ 312 if (!((s32)df->level < 0 ? 313 (u32)df->level > (u32)c->extended_cpuid_level : 314 (s32)df->level > (s32)c->cpuid_level)) 315 continue; 316 317 clear_cpu_cap(c, df->feature); 318 if (!warn) 319 continue; 320 321 printk(KERN_WARNING 322 "CPU: CPU feature %s disabled, no CPUID level 0x%x\n", 323 x86_cap_flags[df->feature], df->level); 324 } 325 } 326 327 /* 328 * Naming convention should be: <Name> [(<Codename>)] 329 * This table only is used unless init_<vendor>() below doesn't set it; 330 * in particular, if CPUID levels 0x80000002..4 are supported, this 331 * isn't used 332 */ 333 334 /* Look up CPU names by table lookup. */ 335 static const char *__cpuinit table_lookup_model(struct cpuinfo_x86 *c) 336 { 337 const struct cpu_model_info *info; 338 339 if (c->x86_model >= 16) 340 return NULL; /* Range check */ 341 342 if (!this_cpu) 343 return NULL; 344 345 info = this_cpu->c_models; 346 347 while (info && info->family) { 348 if (info->family == c->x86) 349 return info->model_names[c->x86_model]; 350 info++; 351 } 352 return NULL; /* Not found */ 353 } 354 355 __u32 cpu_caps_cleared[NCAPINTS] __cpuinitdata; 356 __u32 cpu_caps_set[NCAPINTS] __cpuinitdata; 357 358 void load_percpu_segment(int cpu) 359 { 360 #ifdef CONFIG_X86_32 361 loadsegment(fs, __KERNEL_PERCPU); 362 #else 363 loadsegment(gs, 0); 364 wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu)); 365 #endif 366 load_stack_canary_segment(); 367 } 368 369 /* 370 * Current gdt points %fs at the "master" per-cpu area: after this, 371 * it's on the real one. 372 */ 373 void switch_to_new_gdt(int cpu) 374 { 375 struct desc_ptr gdt_descr; 376 377 gdt_descr.address = (long)get_cpu_gdt_table(cpu); 378 gdt_descr.size = GDT_SIZE - 1; 379 load_gdt(&gdt_descr); 380 /* Reload the per-cpu base */ 381 382 load_percpu_segment(cpu); 383 } 384 385 static const struct cpu_dev *__cpuinitdata cpu_devs[X86_VENDOR_NUM] = {}; 386 387 static void __cpuinit get_model_name(struct cpuinfo_x86 *c) 388 { 389 unsigned int *v; 390 char *p, *q; 391 392 if (c->extended_cpuid_level < 0x80000004) 393 return; 394 395 v = (unsigned int *)c->x86_model_id; 396 cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]); 397 cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]); 398 cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]); 399 c->x86_model_id[48] = 0; 400 401 /* 402 * Intel chips right-justify this string for some dumb reason; 403 * undo that brain damage: 404 */ 405 p = q = &c->x86_model_id[0]; 406 while (*p == ' ') 407 p++; 408 if (p != q) { 409 while (*p) 410 *q++ = *p++; 411 while (q <= &c->x86_model_id[48]) 412 *q++ = '\0'; /* Zero-pad the rest */ 413 } 414 } 415 416 void __cpuinit cpu_detect_cache_sizes(struct cpuinfo_x86 *c) 417 { 418 unsigned int n, dummy, ebx, ecx, edx, l2size; 419 420 n = c->extended_cpuid_level; 421 422 if (n >= 0x80000005) { 423 cpuid(0x80000005, &dummy, &ebx, &ecx, &edx); 424 c->x86_cache_size = (ecx>>24) + (edx>>24); 425 #ifdef CONFIG_X86_64 426 /* On K8 L1 TLB is inclusive, so don't count it */ 427 c->x86_tlbsize = 0; 428 #endif 429 } 430 431 if (n < 0x80000006) /* Some chips just has a large L1. */ 432 return; 433 434 cpuid(0x80000006, &dummy, &ebx, &ecx, &edx); 435 l2size = ecx >> 16; 436 437 #ifdef CONFIG_X86_64 438 c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff); 439 #else 440 /* do processor-specific cache resizing */ 441 if (this_cpu->c_size_cache) 442 l2size = this_cpu->c_size_cache(c, l2size); 443 444 /* Allow user to override all this if necessary. */ 445 if (cachesize_override != -1) 446 l2size = cachesize_override; 447 448 if (l2size == 0) 449 return; /* Again, no L2 cache is possible */ 450 #endif 451 452 c->x86_cache_size = l2size; 453 } 454 455 void __cpuinit detect_ht(struct cpuinfo_x86 *c) 456 { 457 #ifdef CONFIG_X86_HT 458 u32 eax, ebx, ecx, edx; 459 int index_msb, core_bits; 460 static bool printed; 461 462 if (!cpu_has(c, X86_FEATURE_HT)) 463 return; 464 465 if (cpu_has(c, X86_FEATURE_CMP_LEGACY)) 466 goto out; 467 468 if (cpu_has(c, X86_FEATURE_XTOPOLOGY)) 469 return; 470 471 cpuid(1, &eax, &ebx, &ecx, &edx); 472 473 smp_num_siblings = (ebx & 0xff0000) >> 16; 474 475 if (smp_num_siblings == 1) { 476 printk_once(KERN_INFO "CPU0: Hyper-Threading is disabled\n"); 477 goto out; 478 } 479 480 if (smp_num_siblings <= 1) 481 goto out; 482 483 index_msb = get_count_order(smp_num_siblings); 484 c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, index_msb); 485 486 smp_num_siblings = smp_num_siblings / c->x86_max_cores; 487 488 index_msb = get_count_order(smp_num_siblings); 489 490 core_bits = get_count_order(c->x86_max_cores); 491 492 c->cpu_core_id = apic->phys_pkg_id(c->initial_apicid, index_msb) & 493 ((1 << core_bits) - 1); 494 495 out: 496 if (!printed && (c->x86_max_cores * smp_num_siblings) > 1) { 497 printk(KERN_INFO "CPU: Physical Processor ID: %d\n", 498 c->phys_proc_id); 499 printk(KERN_INFO "CPU: Processor Core ID: %d\n", 500 c->cpu_core_id); 501 printed = 1; 502 } 503 #endif 504 } 505 506 static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) 507 { 508 char *v = c->x86_vendor_id; 509 int i; 510 511 for (i = 0; i < X86_VENDOR_NUM; i++) { 512 if (!cpu_devs[i]) 513 break; 514 515 if (!strcmp(v, cpu_devs[i]->c_ident[0]) || 516 (cpu_devs[i]->c_ident[1] && 517 !strcmp(v, cpu_devs[i]->c_ident[1]))) { 518 519 this_cpu = cpu_devs[i]; 520 c->x86_vendor = this_cpu->c_x86_vendor; 521 return; 522 } 523 } 524 525 printk_once(KERN_ERR 526 "CPU: vendor_id '%s' unknown, using generic init.\n" \ 527 "CPU: Your system may be unstable.\n", v); 528 529 c->x86_vendor = X86_VENDOR_UNKNOWN; 530 this_cpu = &default_cpu; 531 } 532 533 void __cpuinit cpu_detect(struct cpuinfo_x86 *c) 534 { 535 /* Get vendor name */ 536 cpuid(0x00000000, (unsigned int *)&c->cpuid_level, 537 (unsigned int *)&c->x86_vendor_id[0], 538 (unsigned int *)&c->x86_vendor_id[8], 539 (unsigned int *)&c->x86_vendor_id[4]); 540 541 c->x86 = 4; 542 /* Intel-defined flags: level 0x00000001 */ 543 if (c->cpuid_level >= 0x00000001) { 544 u32 junk, tfms, cap0, misc; 545 546 cpuid(0x00000001, &tfms, &misc, &junk, &cap0); 547 c->x86 = (tfms >> 8) & 0xf; 548 c->x86_model = (tfms >> 4) & 0xf; 549 c->x86_mask = tfms & 0xf; 550 551 if (c->x86 == 0xf) 552 c->x86 += (tfms >> 20) & 0xff; 553 if (c->x86 >= 0x6) 554 c->x86_model += ((tfms >> 16) & 0xf) << 4; 555 556 if (cap0 & (1<<19)) { 557 c->x86_clflush_size = ((misc >> 8) & 0xff) * 8; 558 c->x86_cache_alignment = c->x86_clflush_size; 559 } 560 } 561 } 562 563 void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) 564 { 565 u32 tfms, xlvl; 566 u32 ebx; 567 568 /* Intel-defined flags: level 0x00000001 */ 569 if (c->cpuid_level >= 0x00000001) { 570 u32 capability, excap; 571 572 cpuid(0x00000001, &tfms, &ebx, &excap, &capability); 573 c->x86_capability[0] = capability; 574 c->x86_capability[4] = excap; 575 } 576 577 /* Additional Intel-defined flags: level 0x00000007 */ 578 if (c->cpuid_level >= 0x00000007) { 579 u32 eax, ebx, ecx, edx; 580 581 cpuid_count(0x00000007, 0, &eax, &ebx, &ecx, &edx); 582 583 c->x86_capability[9] = ebx; 584 } 585 586 /* AMD-defined flags: level 0x80000001 */ 587 xlvl = cpuid_eax(0x80000000); 588 c->extended_cpuid_level = xlvl; 589 590 if ((xlvl & 0xffff0000) == 0x80000000) { 591 if (xlvl >= 0x80000001) { 592 c->x86_capability[1] = cpuid_edx(0x80000001); 593 c->x86_capability[6] = cpuid_ecx(0x80000001); 594 } 595 } 596 597 if (c->extended_cpuid_level >= 0x80000008) { 598 u32 eax = cpuid_eax(0x80000008); 599 600 c->x86_virt_bits = (eax >> 8) & 0xff; 601 c->x86_phys_bits = eax & 0xff; 602 } 603 #ifdef CONFIG_X86_32 604 else if (cpu_has(c, X86_FEATURE_PAE) || cpu_has(c, X86_FEATURE_PSE36)) 605 c->x86_phys_bits = 36; 606 #endif 607 608 if (c->extended_cpuid_level >= 0x80000007) 609 c->x86_power = cpuid_edx(0x80000007); 610 611 init_scattered_cpuid_features(c); 612 } 613 614 static void __cpuinit identify_cpu_without_cpuid(struct cpuinfo_x86 *c) 615 { 616 #ifdef CONFIG_X86_32 617 int i; 618 619 /* 620 * First of all, decide if this is a 486 or higher 621 * It's a 486 if we can modify the AC flag 622 */ 623 if (flag_is_changeable_p(X86_EFLAGS_AC)) 624 c->x86 = 4; 625 else 626 c->x86 = 3; 627 628 for (i = 0; i < X86_VENDOR_NUM; i++) 629 if (cpu_devs[i] && cpu_devs[i]->c_identify) { 630 c->x86_vendor_id[0] = 0; 631 cpu_devs[i]->c_identify(c); 632 if (c->x86_vendor_id[0]) { 633 get_cpu_vendor(c); 634 break; 635 } 636 } 637 #endif 638 } 639 640 /* 641 * Do minimum CPU detection early. 642 * Fields really needed: vendor, cpuid_level, family, model, mask, 643 * cache alignment. 644 * The others are not touched to avoid unwanted side effects. 645 * 646 * WARNING: this function is only called on the BP. Don't add code here 647 * that is supposed to run on all CPUs. 648 */ 649 static void __init early_identify_cpu(struct cpuinfo_x86 *c) 650 { 651 #ifdef CONFIG_X86_64 652 c->x86_clflush_size = 64; 653 c->x86_phys_bits = 36; 654 c->x86_virt_bits = 48; 655 #else 656 c->x86_clflush_size = 32; 657 c->x86_phys_bits = 32; 658 c->x86_virt_bits = 32; 659 #endif 660 c->x86_cache_alignment = c->x86_clflush_size; 661 662 memset(&c->x86_capability, 0, sizeof c->x86_capability); 663 c->extended_cpuid_level = 0; 664 665 if (!have_cpuid_p()) 666 identify_cpu_without_cpuid(c); 667 668 /* cyrix could have cpuid enabled via c_identify()*/ 669 if (!have_cpuid_p()) 670 return; 671 672 cpu_detect(c); 673 674 get_cpu_vendor(c); 675 676 get_cpu_cap(c); 677 678 if (this_cpu->c_early_init) 679 this_cpu->c_early_init(c); 680 681 c->cpu_index = 0; 682 filter_cpuid_features(c, false); 683 684 setup_smep(c); 685 686 if (this_cpu->c_bsp_init) 687 this_cpu->c_bsp_init(c); 688 } 689 690 void __init early_cpu_init(void) 691 { 692 const struct cpu_dev *const *cdev; 693 int count = 0; 694 695 #ifdef CONFIG_PROCESSOR_SELECT 696 printk(KERN_INFO "KERNEL supported cpus:\n"); 697 #endif 698 699 for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) { 700 const struct cpu_dev *cpudev = *cdev; 701 702 if (count >= X86_VENDOR_NUM) 703 break; 704 cpu_devs[count] = cpudev; 705 count++; 706 707 #ifdef CONFIG_PROCESSOR_SELECT 708 { 709 unsigned int j; 710 711 for (j = 0; j < 2; j++) { 712 if (!cpudev->c_ident[j]) 713 continue; 714 printk(KERN_INFO " %s %s\n", cpudev->c_vendor, 715 cpudev->c_ident[j]); 716 } 717 } 718 #endif 719 } 720 early_identify_cpu(&boot_cpu_data); 721 } 722 723 /* 724 * The NOPL instruction is supposed to exist on all CPUs of family >= 6; 725 * unfortunately, that's not true in practice because of early VIA 726 * chips and (more importantly) broken virtualizers that are not easy 727 * to detect. In the latter case it doesn't even *fail* reliably, so 728 * probing for it doesn't even work. Disable it completely on 32-bit 729 * unless we can find a reliable way to detect all the broken cases. 730 * Enable it explicitly on 64-bit for non-constant inputs of cpu_has(). 731 */ 732 static void __cpuinit detect_nopl(struct cpuinfo_x86 *c) 733 { 734 #ifdef CONFIG_X86_32 735 clear_cpu_cap(c, X86_FEATURE_NOPL); 736 #else 737 set_cpu_cap(c, X86_FEATURE_NOPL); 738 #endif 739 } 740 741 static void __cpuinit generic_identify(struct cpuinfo_x86 *c) 742 { 743 c->extended_cpuid_level = 0; 744 745 if (!have_cpuid_p()) 746 identify_cpu_without_cpuid(c); 747 748 /* cyrix could have cpuid enabled via c_identify()*/ 749 if (!have_cpuid_p()) 750 return; 751 752 cpu_detect(c); 753 754 get_cpu_vendor(c); 755 756 get_cpu_cap(c); 757 758 if (c->cpuid_level >= 0x00000001) { 759 c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xFF; 760 #ifdef CONFIG_X86_32 761 # ifdef CONFIG_X86_HT 762 c->apicid = apic->phys_pkg_id(c->initial_apicid, 0); 763 # else 764 c->apicid = c->initial_apicid; 765 # endif 766 #endif 767 c->phys_proc_id = c->initial_apicid; 768 } 769 770 setup_smep(c); 771 772 get_model_name(c); /* Default name */ 773 774 detect_nopl(c); 775 } 776 777 /* 778 * This does the hard work of actually picking apart the CPU stuff... 779 */ 780 static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) 781 { 782 int i; 783 784 c->loops_per_jiffy = loops_per_jiffy; 785 c->x86_cache_size = -1; 786 c->x86_vendor = X86_VENDOR_UNKNOWN; 787 c->x86_model = c->x86_mask = 0; /* So far unknown... */ 788 c->x86_vendor_id[0] = '\0'; /* Unset */ 789 c->x86_model_id[0] = '\0'; /* Unset */ 790 c->x86_max_cores = 1; 791 c->x86_coreid_bits = 0; 792 #ifdef CONFIG_X86_64 793 c->x86_clflush_size = 64; 794 c->x86_phys_bits = 36; 795 c->x86_virt_bits = 48; 796 #else 797 c->cpuid_level = -1; /* CPUID not detected */ 798 c->x86_clflush_size = 32; 799 c->x86_phys_bits = 32; 800 c->x86_virt_bits = 32; 801 #endif 802 c->x86_cache_alignment = c->x86_clflush_size; 803 memset(&c->x86_capability, 0, sizeof c->x86_capability); 804 805 generic_identify(c); 806 807 if (this_cpu->c_identify) 808 this_cpu->c_identify(c); 809 810 /* Clear/Set all flags overriden by options, after probe */ 811 for (i = 0; i < NCAPINTS; i++) { 812 c->x86_capability[i] &= ~cpu_caps_cleared[i]; 813 c->x86_capability[i] |= cpu_caps_set[i]; 814 } 815 816 #ifdef CONFIG_X86_64 817 c->apicid = apic->phys_pkg_id(c->initial_apicid, 0); 818 #endif 819 820 /* 821 * Vendor-specific initialization. In this section we 822 * canonicalize the feature flags, meaning if there are 823 * features a certain CPU supports which CPUID doesn't 824 * tell us, CPUID claiming incorrect flags, or other bugs, 825 * we handle them here. 826 * 827 * At the end of this section, c->x86_capability better 828 * indicate the features this CPU genuinely supports! 829 */ 830 if (this_cpu->c_init) 831 this_cpu->c_init(c); 832 833 /* Disable the PN if appropriate */ 834 squash_the_stupid_serial_number(c); 835 836 /* 837 * The vendor-specific functions might have changed features. 838 * Now we do "generic changes." 839 */ 840 841 /* Filter out anything that depends on CPUID levels we don't have */ 842 filter_cpuid_features(c, true); 843 844 /* If the model name is still unset, do table lookup. */ 845 if (!c->x86_model_id[0]) { 846 const char *p; 847 p = table_lookup_model(c); 848 if (p) 849 strcpy(c->x86_model_id, p); 850 else 851 /* Last resort... */ 852 sprintf(c->x86_model_id, "%02x/%02x", 853 c->x86, c->x86_model); 854 } 855 856 #ifdef CONFIG_X86_64 857 detect_ht(c); 858 #endif 859 860 init_hypervisor(c); 861 x86_init_rdrand(c); 862 863 /* 864 * Clear/Set all flags overriden by options, need do it 865 * before following smp all cpus cap AND. 866 */ 867 for (i = 0; i < NCAPINTS; i++) { 868 c->x86_capability[i] &= ~cpu_caps_cleared[i]; 869 c->x86_capability[i] |= cpu_caps_set[i]; 870 } 871 872 /* 873 * On SMP, boot_cpu_data holds the common feature set between 874 * all CPUs; so make sure that we indicate which features are 875 * common between the CPUs. The first time this routine gets 876 * executed, c == &boot_cpu_data. 877 */ 878 if (c != &boot_cpu_data) { 879 /* AND the already accumulated flags with these */ 880 for (i = 0; i < NCAPINTS; i++) 881 boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; 882 } 883 884 /* Init Machine Check Exception if available. */ 885 mcheck_cpu_init(c); 886 887 select_idle_routine(c); 888 889 #ifdef CONFIG_NUMA 890 numa_add_cpu(smp_processor_id()); 891 #endif 892 } 893 894 #ifdef CONFIG_X86_64 895 static void vgetcpu_set_mode(void) 896 { 897 if (cpu_has(&boot_cpu_data, X86_FEATURE_RDTSCP)) 898 vgetcpu_mode = VGETCPU_RDTSCP; 899 else 900 vgetcpu_mode = VGETCPU_LSL; 901 } 902 #endif 903 904 void __init identify_boot_cpu(void) 905 { 906 identify_cpu(&boot_cpu_data); 907 init_amd_e400_c1e_mask(); 908 #ifdef CONFIG_X86_32 909 sysenter_setup(); 910 enable_sep_cpu(); 911 #else 912 vgetcpu_set_mode(); 913 #endif 914 } 915 916 void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) 917 { 918 BUG_ON(c == &boot_cpu_data); 919 identify_cpu(c); 920 #ifdef CONFIG_X86_32 921 enable_sep_cpu(); 922 #endif 923 mtrr_ap_init(); 924 } 925 926 struct msr_range { 927 unsigned min; 928 unsigned max; 929 }; 930 931 static const struct msr_range msr_range_array[] __cpuinitconst = { 932 { 0x00000000, 0x00000418}, 933 { 0xc0000000, 0xc000040b}, 934 { 0xc0010000, 0xc0010142}, 935 { 0xc0011000, 0xc001103b}, 936 }; 937 938 static void __cpuinit __print_cpu_msr(void) 939 { 940 unsigned index_min, index_max; 941 unsigned index; 942 u64 val; 943 int i; 944 945 for (i = 0; i < ARRAY_SIZE(msr_range_array); i++) { 946 index_min = msr_range_array[i].min; 947 index_max = msr_range_array[i].max; 948 949 for (index = index_min; index < index_max; index++) { 950 if (rdmsrl_amd_safe(index, &val)) 951 continue; 952 printk(KERN_INFO " MSR%08x: %016llx\n", index, val); 953 } 954 } 955 } 956 957 static int show_msr __cpuinitdata; 958 959 static __init int setup_show_msr(char *arg) 960 { 961 int num; 962 963 get_option(&arg, &num); 964 965 if (num > 0) 966 show_msr = num; 967 return 1; 968 } 969 __setup("show_msr=", setup_show_msr); 970 971 static __init int setup_noclflush(char *arg) 972 { 973 setup_clear_cpu_cap(X86_FEATURE_CLFLSH); 974 return 1; 975 } 976 __setup("noclflush", setup_noclflush); 977 978 void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) 979 { 980 const char *vendor = NULL; 981 982 if (c->x86_vendor < X86_VENDOR_NUM) { 983 vendor = this_cpu->c_vendor; 984 } else { 985 if (c->cpuid_level >= 0) 986 vendor = c->x86_vendor_id; 987 } 988 989 if (vendor && !strstr(c->x86_model_id, vendor)) 990 printk(KERN_CONT "%s ", vendor); 991 992 if (c->x86_model_id[0]) 993 printk(KERN_CONT "%s", c->x86_model_id); 994 else 995 printk(KERN_CONT "%d86", c->x86); 996 997 if (c->x86_mask || c->cpuid_level >= 0) 998 printk(KERN_CONT " stepping %02x\n", c->x86_mask); 999 else 1000 printk(KERN_CONT "\n"); 1001 1002 print_cpu_msr(c); 1003 } 1004 1005 void __cpuinit print_cpu_msr(struct cpuinfo_x86 *c) 1006 { 1007 if (c->cpu_index < show_msr) 1008 __print_cpu_msr(); 1009 } 1010 1011 static __init int setup_disablecpuid(char *arg) 1012 { 1013 int bit; 1014 1015 if (get_option(&arg, &bit) && bit < NCAPINTS*32) 1016 setup_clear_cpu_cap(bit); 1017 else 1018 return 0; 1019 1020 return 1; 1021 } 1022 __setup("clearcpuid=", setup_disablecpuid); 1023 1024 #ifdef CONFIG_X86_64 1025 struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table }; 1026 struct desc_ptr nmi_idt_descr = { NR_VECTORS * 16 - 1, 1027 (unsigned long) nmi_idt_table }; 1028 1029 DEFINE_PER_CPU_FIRST(union irq_stack_union, 1030 irq_stack_union) __aligned(PAGE_SIZE); 1031 1032 /* 1033 * The following four percpu variables are hot. Align current_task to 1034 * cacheline size such that all four fall in the same cacheline. 1035 */ 1036 DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned = 1037 &init_task; 1038 EXPORT_PER_CPU_SYMBOL(current_task); 1039 1040 DEFINE_PER_CPU(unsigned long, kernel_stack) = 1041 (unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE; 1042 EXPORT_PER_CPU_SYMBOL(kernel_stack); 1043 1044 DEFINE_PER_CPU(char *, irq_stack_ptr) = 1045 init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64; 1046 1047 DEFINE_PER_CPU(unsigned int, irq_count) = -1; 1048 1049 DEFINE_PER_CPU(struct task_struct *, fpu_owner_task); 1050 1051 /* 1052 * Special IST stacks which the CPU switches to when it calls 1053 * an IST-marked descriptor entry. Up to 7 stacks (hardware 1054 * limit), all of them are 4K, except the debug stack which 1055 * is 8K. 1056 */ 1057 static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = { 1058 [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ, 1059 [DEBUG_STACK - 1] = DEBUG_STKSZ 1060 }; 1061 1062 static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks 1063 [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]); 1064 1065 /* May not be marked __init: used by software suspend */ 1066 void syscall_init(void) 1067 { 1068 /* 1069 * LSTAR and STAR live in a bit strange symbiosis. 1070 * They both write to the same internal register. STAR allows to 1071 * set CS/DS but only a 32bit target. LSTAR sets the 64bit rip. 1072 */ 1073 wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32); 1074 wrmsrl(MSR_LSTAR, system_call); 1075 wrmsrl(MSR_CSTAR, ignore_sysret); 1076 1077 #ifdef CONFIG_IA32_EMULATION 1078 syscall32_cpu_init(); 1079 #endif 1080 1081 /* Flags to clear on syscall */ 1082 wrmsrl(MSR_SYSCALL_MASK, 1083 X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_IOPL); 1084 } 1085 1086 unsigned long kernel_eflags; 1087 1088 /* 1089 * Copies of the original ist values from the tss are only accessed during 1090 * debugging, no special alignment required. 1091 */ 1092 DEFINE_PER_CPU(struct orig_ist, orig_ist); 1093 1094 static DEFINE_PER_CPU(unsigned long, debug_stack_addr); 1095 DEFINE_PER_CPU(int, debug_stack_usage); 1096 1097 int is_debug_stack(unsigned long addr) 1098 { 1099 return __get_cpu_var(debug_stack_usage) || 1100 (addr <= __get_cpu_var(debug_stack_addr) && 1101 addr > (__get_cpu_var(debug_stack_addr) - DEBUG_STKSZ)); 1102 } 1103 1104 static DEFINE_PER_CPU(u32, debug_stack_use_ctr); 1105 1106 void debug_stack_set_zero(void) 1107 { 1108 this_cpu_inc(debug_stack_use_ctr); 1109 load_idt((const struct desc_ptr *)&nmi_idt_descr); 1110 } 1111 1112 void debug_stack_reset(void) 1113 { 1114 if (WARN_ON(!this_cpu_read(debug_stack_use_ctr))) 1115 return; 1116 if (this_cpu_dec_return(debug_stack_use_ctr) == 0) 1117 load_idt((const struct desc_ptr *)&idt_descr); 1118 } 1119 1120 #else /* CONFIG_X86_64 */ 1121 1122 DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; 1123 EXPORT_PER_CPU_SYMBOL(current_task); 1124 DEFINE_PER_CPU(struct task_struct *, fpu_owner_task); 1125 1126 #ifdef CONFIG_CC_STACKPROTECTOR 1127 DEFINE_PER_CPU_ALIGNED(struct stack_canary, stack_canary); 1128 #endif 1129 1130 /* Make sure %fs and %gs are initialized properly in idle threads */ 1131 struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs) 1132 { 1133 memset(regs, 0, sizeof(struct pt_regs)); 1134 regs->fs = __KERNEL_PERCPU; 1135 regs->gs = __KERNEL_STACK_CANARY; 1136 1137 return regs; 1138 } 1139 #endif /* CONFIG_X86_64 */ 1140 1141 /* 1142 * Clear all 6 debug registers: 1143 */ 1144 static void clear_all_debug_regs(void) 1145 { 1146 int i; 1147 1148 for (i = 0; i < 8; i++) { 1149 /* Ignore db4, db5 */ 1150 if ((i == 4) || (i == 5)) 1151 continue; 1152 1153 set_debugreg(0, i); 1154 } 1155 } 1156 1157 #ifdef CONFIG_KGDB 1158 /* 1159 * Restore debug regs if using kgdbwait and you have a kernel debugger 1160 * connection established. 1161 */ 1162 static void dbg_restore_debug_regs(void) 1163 { 1164 if (unlikely(kgdb_connected && arch_kgdb_ops.correct_hw_break)) 1165 arch_kgdb_ops.correct_hw_break(); 1166 } 1167 #else /* ! CONFIG_KGDB */ 1168 #define dbg_restore_debug_regs() 1169 #endif /* ! CONFIG_KGDB */ 1170 1171 /* 1172 * cpu_init() initializes state that is per-CPU. Some data is already 1173 * initialized (naturally) in the bootstrap process, such as the GDT 1174 * and IDT. We reload them nevertheless, this function acts as a 1175 * 'CPU state barrier', nothing should get across. 1176 * A lot of state is already set up in PDA init for 64 bit 1177 */ 1178 #ifdef CONFIG_X86_64 1179 1180 void __cpuinit cpu_init(void) 1181 { 1182 struct orig_ist *oist; 1183 struct task_struct *me; 1184 struct tss_struct *t; 1185 unsigned long v; 1186 int cpu; 1187 int i; 1188 1189 cpu = stack_smp_processor_id(); 1190 t = &per_cpu(init_tss, cpu); 1191 oist = &per_cpu(orig_ist, cpu); 1192 1193 #ifdef CONFIG_NUMA 1194 if (cpu != 0 && this_cpu_read(numa_node) == 0 && 1195 early_cpu_to_node(cpu) != NUMA_NO_NODE) 1196 set_numa_node(early_cpu_to_node(cpu)); 1197 #endif 1198 1199 me = current; 1200 1201 if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask)) 1202 panic("CPU#%d already initialized!\n", cpu); 1203 1204 pr_debug("Initializing CPU#%d\n", cpu); 1205 1206 clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); 1207 1208 /* 1209 * Initialize the per-CPU GDT with the boot GDT, 1210 * and set up the GDT descriptor: 1211 */ 1212 1213 switch_to_new_gdt(cpu); 1214 loadsegment(fs, 0); 1215 1216 load_idt((const struct desc_ptr *)&idt_descr); 1217 1218 memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8); 1219 syscall_init(); 1220 1221 wrmsrl(MSR_FS_BASE, 0); 1222 wrmsrl(MSR_KERNEL_GS_BASE, 0); 1223 barrier(); 1224 1225 x86_configure_nx(); 1226 if (cpu != 0) 1227 enable_x2apic(); 1228 1229 /* 1230 * set up and load the per-CPU TSS 1231 */ 1232 if (!oist->ist[0]) { 1233 char *estacks = per_cpu(exception_stacks, cpu); 1234 1235 for (v = 0; v < N_EXCEPTION_STACKS; v++) { 1236 estacks += exception_stack_sizes[v]; 1237 oist->ist[v] = t->x86_tss.ist[v] = 1238 (unsigned long)estacks; 1239 if (v == DEBUG_STACK-1) 1240 per_cpu(debug_stack_addr, cpu) = (unsigned long)estacks; 1241 } 1242 } 1243 1244 t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); 1245 1246 /* 1247 * <= is required because the CPU will access up to 1248 * 8 bits beyond the end of the IO permission bitmap. 1249 */ 1250 for (i = 0; i <= IO_BITMAP_LONGS; i++) 1251 t->io_bitmap[i] = ~0UL; 1252 1253 atomic_inc(&init_mm.mm_count); 1254 me->active_mm = &init_mm; 1255 BUG_ON(me->mm); 1256 enter_lazy_tlb(&init_mm, me); 1257 1258 load_sp0(t, ¤t->thread); 1259 set_tss_desc(cpu, t); 1260 load_TR_desc(); 1261 load_LDT(&init_mm.context); 1262 1263 clear_all_debug_regs(); 1264 dbg_restore_debug_regs(); 1265 1266 fpu_init(); 1267 xsave_init(); 1268 1269 raw_local_save_flags(kernel_eflags); 1270 1271 if (is_uv_system()) 1272 uv_cpu_init(); 1273 } 1274 1275 #else 1276 1277 void __cpuinit cpu_init(void) 1278 { 1279 int cpu = smp_processor_id(); 1280 struct task_struct *curr = current; 1281 struct tss_struct *t = &per_cpu(init_tss, cpu); 1282 struct thread_struct *thread = &curr->thread; 1283 1284 if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask)) { 1285 printk(KERN_WARNING "CPU#%d already initialized!\n", cpu); 1286 for (;;) 1287 local_irq_enable(); 1288 } 1289 1290 printk(KERN_INFO "Initializing CPU#%d\n", cpu); 1291 1292 if (cpu_has_vme || cpu_has_tsc || cpu_has_de) 1293 clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); 1294 1295 load_idt(&idt_descr); 1296 switch_to_new_gdt(cpu); 1297 1298 /* 1299 * Set up and load the per-CPU TSS and LDT 1300 */ 1301 atomic_inc(&init_mm.mm_count); 1302 curr->active_mm = &init_mm; 1303 BUG_ON(curr->mm); 1304 enter_lazy_tlb(&init_mm, curr); 1305 1306 load_sp0(t, thread); 1307 set_tss_desc(cpu, t); 1308 load_TR_desc(); 1309 load_LDT(&init_mm.context); 1310 1311 t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); 1312 1313 #ifdef CONFIG_DOUBLEFAULT 1314 /* Set up doublefault TSS pointer in the GDT */ 1315 __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); 1316 #endif 1317 1318 clear_all_debug_regs(); 1319 dbg_restore_debug_regs(); 1320 1321 fpu_init(); 1322 xsave_init(); 1323 } 1324 #endif 1325