1 #include <linux/kernel.h> 2 #include <linux/init.h> 3 #include <linux/bitops.h> 4 5 #include <asm/processor.h> 6 #include <asm/msr.h> 7 #include <asm/e820.h> 8 #include <asm/mtrr.h> 9 10 #include "cpu.h" 11 12 #ifdef CONFIG_X86_OOSTORE 13 14 static u32 __cpuinit power2(u32 x) 15 { 16 u32 s = 1; 17 18 while (s <= x) 19 s <<= 1; 20 21 return s >>= 1; 22 } 23 24 25 /* 26 * Set up an actual MCR 27 */ 28 static void __cpuinit centaur_mcr_insert(int reg, u32 base, u32 size, int key) 29 { 30 u32 lo, hi; 31 32 hi = base & ~0xFFF; 33 lo = ~(size-1); /* Size is a power of 2 so this makes a mask */ 34 lo &= ~0xFFF; /* Remove the ctrl value bits */ 35 lo |= key; /* Attribute we wish to set */ 36 wrmsr(reg+MSR_IDT_MCR0, lo, hi); 37 mtrr_centaur_report_mcr(reg, lo, hi); /* Tell the mtrr driver */ 38 } 39 40 /* 41 * Figure what we can cover with MCR's 42 * 43 * Shortcut: We know you can't put 4Gig of RAM on a winchip 44 */ 45 static u32 __cpuinit ramtop(void) 46 { 47 u32 clip = 0xFFFFFFFFUL; 48 u32 top = 0; 49 int i; 50 51 for (i = 0; i < e820.nr_map; i++) { 52 unsigned long start, end; 53 54 if (e820.map[i].addr > 0xFFFFFFFFUL) 55 continue; 56 /* 57 * Don't MCR over reserved space. Ignore the ISA hole 58 * we frob around that catastrophe already 59 */ 60 if (e820.map[i].type == E820_RESERVED) { 61 if (e820.map[i].addr >= 0x100000UL && 62 e820.map[i].addr < clip) 63 clip = e820.map[i].addr; 64 continue; 65 } 66 start = e820.map[i].addr; 67 end = e820.map[i].addr + e820.map[i].size; 68 if (start >= end) 69 continue; 70 if (end > top) 71 top = end; 72 } 73 /* 74 * Everything below 'top' should be RAM except for the ISA hole. 75 * Because of the limited MCR's we want to map NV/ACPI into our 76 * MCR range for gunk in RAM 77 * 78 * Clip might cause us to MCR insufficient RAM but that is an 79 * acceptable failure mode and should only bite obscure boxes with 80 * a VESA hole at 15Mb 81 * 82 * The second case Clip sometimes kicks in is when the EBDA is marked 83 * as reserved. Again we fail safe with reasonable results 84 */ 85 if (top > clip) 86 top = clip; 87 88 return top; 89 } 90 91 /* 92 * Compute a set of MCR's to give maximum coverage 93 */ 94 static int __cpuinit centaur_mcr_compute(int nr, int key) 95 { 96 u32 mem = ramtop(); 97 u32 root = power2(mem); 98 u32 base = root; 99 u32 top = root; 100 u32 floor = 0; 101 int ct = 0; 102 103 while (ct < nr) { 104 u32 fspace = 0; 105 u32 high; 106 u32 low; 107 108 /* 109 * Find the largest block we will fill going upwards 110 */ 111 high = power2(mem-top); 112 113 /* 114 * Find the largest block we will fill going downwards 115 */ 116 low = base/2; 117 118 /* 119 * Don't fill below 1Mb going downwards as there 120 * is an ISA hole in the way. 121 */ 122 if (base <= 1024*1024) 123 low = 0; 124 125 /* 126 * See how much space we could cover by filling below 127 * the ISA hole 128 */ 129 130 if (floor == 0) 131 fspace = 512*1024; 132 else if (floor == 512*1024) 133 fspace = 128*1024; 134 135 /* And forget ROM space */ 136 137 /* 138 * Now install the largest coverage we get 139 */ 140 if (fspace > high && fspace > low) { 141 centaur_mcr_insert(ct, floor, fspace, key); 142 floor += fspace; 143 } else if (high > low) { 144 centaur_mcr_insert(ct, top, high, key); 145 top += high; 146 } else if (low > 0) { 147 base -= low; 148 centaur_mcr_insert(ct, base, low, key); 149 } else 150 break; 151 ct++; 152 } 153 /* 154 * We loaded ct values. We now need to set the mask. The caller 155 * must do this bit. 156 */ 157 return ct; 158 } 159 160 static void __cpuinit centaur_create_optimal_mcr(void) 161 { 162 int used; 163 int i; 164 165 /* 166 * Allocate up to 6 mcrs to mark as much of ram as possible 167 * as write combining and weak write ordered. 168 * 169 * To experiment with: Linux never uses stack operations for 170 * mmio spaces so we could globally enable stack operation wc 171 * 172 * Load the registers with type 31 - full write combining, all 173 * writes weakly ordered. 174 */ 175 used = centaur_mcr_compute(6, 31); 176 177 /* 178 * Wipe unused MCRs 179 */ 180 for (i = used; i < 8; i++) 181 wrmsr(MSR_IDT_MCR0+i, 0, 0); 182 } 183 184 static void __cpuinit winchip2_create_optimal_mcr(void) 185 { 186 u32 lo, hi; 187 int used; 188 int i; 189 190 /* 191 * Allocate up to 6 mcrs to mark as much of ram as possible 192 * as write combining, weak store ordered. 193 * 194 * Load the registers with type 25 195 * 8 - weak write ordering 196 * 16 - weak read ordering 197 * 1 - write combining 198 */ 199 used = centaur_mcr_compute(6, 25); 200 201 /* 202 * Mark the registers we are using. 203 */ 204 rdmsr(MSR_IDT_MCR_CTRL, lo, hi); 205 for (i = 0; i < used; i++) 206 lo |= 1<<(9+i); 207 wrmsr(MSR_IDT_MCR_CTRL, lo, hi); 208 209 /* 210 * Wipe unused MCRs 211 */ 212 213 for (i = used; i < 8; i++) 214 wrmsr(MSR_IDT_MCR0+i, 0, 0); 215 } 216 217 /* 218 * Handle the MCR key on the Winchip 2. 219 */ 220 static void __cpuinit winchip2_unprotect_mcr(void) 221 { 222 u32 lo, hi; 223 u32 key; 224 225 rdmsr(MSR_IDT_MCR_CTRL, lo, hi); 226 lo &= ~0x1C0; /* blank bits 8-6 */ 227 key = (lo>>17) & 7; 228 lo |= key<<6; /* replace with unlock key */ 229 wrmsr(MSR_IDT_MCR_CTRL, lo, hi); 230 } 231 232 static void __cpuinit winchip2_protect_mcr(void) 233 { 234 u32 lo, hi; 235 236 rdmsr(MSR_IDT_MCR_CTRL, lo, hi); 237 lo &= ~0x1C0; /* blank bits 8-6 */ 238 wrmsr(MSR_IDT_MCR_CTRL, lo, hi); 239 } 240 #endif /* CONFIG_X86_OOSTORE */ 241 242 #define ACE_PRESENT (1 << 6) 243 #define ACE_ENABLED (1 << 7) 244 #define ACE_FCR (1 << 28) /* MSR_VIA_FCR */ 245 246 #define RNG_PRESENT (1 << 2) 247 #define RNG_ENABLED (1 << 3) 248 #define RNG_ENABLE (1 << 6) /* MSR_VIA_RNG */ 249 250 static void __cpuinit init_c3(struct cpuinfo_x86 *c) 251 { 252 u32 lo, hi; 253 254 /* Test for Centaur Extended Feature Flags presence */ 255 if (cpuid_eax(0xC0000000) >= 0xC0000001) { 256 u32 tmp = cpuid_edx(0xC0000001); 257 258 /* enable ACE unit, if present and disabled */ 259 if ((tmp & (ACE_PRESENT | ACE_ENABLED)) == ACE_PRESENT) { 260 rdmsr(MSR_VIA_FCR, lo, hi); 261 lo |= ACE_FCR; /* enable ACE unit */ 262 wrmsr(MSR_VIA_FCR, lo, hi); 263 printk(KERN_INFO "CPU: Enabled ACE h/w crypto\n"); 264 } 265 266 /* enable RNG unit, if present and disabled */ 267 if ((tmp & (RNG_PRESENT | RNG_ENABLED)) == RNG_PRESENT) { 268 rdmsr(MSR_VIA_RNG, lo, hi); 269 lo |= RNG_ENABLE; /* enable RNG unit */ 270 wrmsr(MSR_VIA_RNG, lo, hi); 271 printk(KERN_INFO "CPU: Enabled h/w RNG\n"); 272 } 273 274 /* store Centaur Extended Feature Flags as 275 * word 5 of the CPU capability bit array 276 */ 277 c->x86_capability[5] = cpuid_edx(0xC0000001); 278 } 279 280 /* Cyrix III family needs CX8 & PGE explicitly enabled. */ 281 if (c->x86_model >= 6 && c->x86_model <= 9) { 282 rdmsr(MSR_VIA_FCR, lo, hi); 283 lo |= (1<<1 | 1<<7); 284 wrmsr(MSR_VIA_FCR, lo, hi); 285 set_cpu_cap(c, X86_FEATURE_CX8); 286 } 287 288 /* Before Nehemiah, the C3's had 3dNOW! */ 289 if (c->x86_model >= 6 && c->x86_model < 9) 290 set_cpu_cap(c, X86_FEATURE_3DNOW); 291 292 display_cacheinfo(c); 293 } 294 295 enum { 296 ECX8 = 1<<1, 297 EIERRINT = 1<<2, 298 DPM = 1<<3, 299 DMCE = 1<<4, 300 DSTPCLK = 1<<5, 301 ELINEAR = 1<<6, 302 DSMC = 1<<7, 303 DTLOCK = 1<<8, 304 EDCTLB = 1<<8, 305 EMMX = 1<<9, 306 DPDC = 1<<11, 307 EBRPRED = 1<<12, 308 DIC = 1<<13, 309 DDC = 1<<14, 310 DNA = 1<<15, 311 ERETSTK = 1<<16, 312 E2MMX = 1<<19, 313 EAMD3D = 1<<20, 314 }; 315 316 static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c) 317 { 318 switch (c->x86) { 319 case 5: 320 /* Emulate MTRRs using Centaur's MCR. */ 321 set_cpu_cap(c, X86_FEATURE_CENTAUR_MCR); 322 break; 323 } 324 } 325 326 static void __cpuinit init_centaur(struct cpuinfo_x86 *c) 327 { 328 329 char *name; 330 u32 fcr_set = 0; 331 u32 fcr_clr = 0; 332 u32 lo, hi, newlo; 333 u32 aa, bb, cc, dd; 334 335 /* 336 * Bit 31 in normal CPUID used for nonstandard 3DNow ID; 337 * 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway 338 */ 339 clear_cpu_cap(c, 0*32+31); 340 341 switch (c->x86) { 342 case 5: 343 switch (c->x86_model) { 344 case 4: 345 name = "C6"; 346 fcr_set = ECX8|DSMC|EDCTLB|EMMX|ERETSTK; 347 fcr_clr = DPDC; 348 printk(KERN_NOTICE "Disabling bugged TSC.\n"); 349 clear_cpu_cap(c, X86_FEATURE_TSC); 350 #ifdef CONFIG_X86_OOSTORE 351 centaur_create_optimal_mcr(); 352 /* 353 * Enable: 354 * write combining on non-stack, non-string 355 * write combining on string, all types 356 * weak write ordering 357 * 358 * The C6 original lacks weak read order 359 * 360 * Note 0x120 is write only on Winchip 1 361 */ 362 wrmsr(MSR_IDT_MCR_CTRL, 0x01F0001F, 0); 363 #endif 364 break; 365 case 8: 366 switch (c->x86_mask) { 367 default: 368 name = "2"; 369 break; 370 case 7 ... 9: 371 name = "2A"; 372 break; 373 case 10 ... 15: 374 name = "2B"; 375 break; 376 } 377 fcr_set = ECX8|DSMC|DTLOCK|EMMX|EBRPRED|ERETSTK| 378 E2MMX|EAMD3D; 379 fcr_clr = DPDC; 380 #ifdef CONFIG_X86_OOSTORE 381 winchip2_unprotect_mcr(); 382 winchip2_create_optimal_mcr(); 383 rdmsr(MSR_IDT_MCR_CTRL, lo, hi); 384 /* 385 * Enable: 386 * write combining on non-stack, non-string 387 * write combining on string, all types 388 * weak write ordering 389 */ 390 lo |= 31; 391 wrmsr(MSR_IDT_MCR_CTRL, lo, hi); 392 winchip2_protect_mcr(); 393 #endif 394 break; 395 case 9: 396 name = "3"; 397 fcr_set = ECX8|DSMC|DTLOCK|EMMX|EBRPRED|ERETSTK| 398 E2MMX|EAMD3D; 399 fcr_clr = DPDC; 400 #ifdef CONFIG_X86_OOSTORE 401 winchip2_unprotect_mcr(); 402 winchip2_create_optimal_mcr(); 403 rdmsr(MSR_IDT_MCR_CTRL, lo, hi); 404 /* 405 * Enable: 406 * write combining on non-stack, non-string 407 * write combining on string, all types 408 * weak write ordering 409 */ 410 lo |= 31; 411 wrmsr(MSR_IDT_MCR_CTRL, lo, hi); 412 winchip2_protect_mcr(); 413 #endif 414 break; 415 default: 416 name = "??"; 417 } 418 419 rdmsr(MSR_IDT_FCR1, lo, hi); 420 newlo = (lo|fcr_set) & (~fcr_clr); 421 422 if (newlo != lo) { 423 printk(KERN_INFO "Centaur FCR was 0x%X now 0x%X\n", 424 lo, newlo); 425 wrmsr(MSR_IDT_FCR1, newlo, hi); 426 } else { 427 printk(KERN_INFO "Centaur FCR is 0x%X\n", lo); 428 } 429 /* Emulate MTRRs using Centaur's MCR. */ 430 set_cpu_cap(c, X86_FEATURE_CENTAUR_MCR); 431 /* Report CX8 */ 432 set_cpu_cap(c, X86_FEATURE_CX8); 433 /* Set 3DNow! on Winchip 2 and above. */ 434 if (c->x86_model >= 8) 435 set_cpu_cap(c, X86_FEATURE_3DNOW); 436 /* See if we can find out some more. */ 437 if (cpuid_eax(0x80000000) >= 0x80000005) { 438 /* Yes, we can. */ 439 cpuid(0x80000005, &aa, &bb, &cc, &dd); 440 /* Add L1 data and code cache sizes. */ 441 c->x86_cache_size = (cc>>24)+(dd>>24); 442 } 443 sprintf(c->x86_model_id, "WinChip %s", name); 444 break; 445 446 case 6: 447 init_c3(c); 448 break; 449 } 450 } 451 452 static unsigned int __cpuinit 453 centaur_size_cache(struct cpuinfo_x86 *c, unsigned int size) 454 { 455 /* VIA C3 CPUs (670-68F) need further shifting. */ 456 if ((c->x86 == 6) && ((c->x86_model == 7) || (c->x86_model == 8))) 457 size >>= 8; 458 459 /* 460 * There's also an erratum in Nehemiah stepping 1, which 461 * returns '65KB' instead of '64KB' 462 * - Note, it seems this may only be in engineering samples. 463 */ 464 if ((c->x86 == 6) && (c->x86_model == 9) && 465 (c->x86_mask == 1) && (size == 65)) 466 size -= 1; 467 468 return size; 469 } 470 471 static struct cpu_dev centaur_cpu_dev __cpuinitdata = { 472 .c_vendor = "Centaur", 473 .c_ident = { "CentaurHauls" }, 474 .c_early_init = early_init_centaur, 475 .c_init = init_centaur, 476 .c_size_cache = centaur_size_cache, 477 .c_x86_vendor = X86_VENDOR_CENTAUR, 478 }; 479 480 cpu_dev_register(centaur_cpu_dev); 481