1 /* Generic MTRR (Memory Type Range Register) driver. 2 3 Copyright (C) 1997-2000 Richard Gooch 4 Copyright (c) 2002 Patrick Mochel 5 6 This library is free software; you can redistribute it and/or 7 modify it under the terms of the GNU Library General Public 8 License as published by the Free Software Foundation; either 9 version 2 of the License, or (at your option) any later version. 10 11 This library is distributed in the hope that it will be useful, 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 Library General Public License for more details. 15 16 You should have received a copy of the GNU Library General Public 17 License along with this library; if not, write to the Free 18 Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 19 20 Richard Gooch may be reached by email at rgooch@atnf.csiro.au 21 The postal address is: 22 Richard Gooch, c/o ATNF, P. O. Box 76, Epping, N.S.W., 2121, Australia. 23 24 Source: "Pentium Pro Family Developer's Manual, Volume 3: 25 Operating System Writer's Guide" (Intel document number 242692), 26 section 11.11.7 27 28 This was cleaned and made readable by Patrick Mochel <mochel@osdl.org> 29 on 6-7 March 2002. 30 Source: Intel Architecture Software Developers Manual, Volume 3: 31 System Programming Guide; Section 9.11. (1997 edition - PPro). 32 */ 33 34 #include <linux/types.h> /* FIXME: kvm_para.h needs this */ 35 36 #include <linux/stop_machine.h> 37 #include <linux/kvm_para.h> 38 #include <linux/uaccess.h> 39 #include <linux/export.h> 40 #include <linux/mutex.h> 41 #include <linux/init.h> 42 #include <linux/sort.h> 43 #include <linux/cpu.h> 44 #include <linux/pci.h> 45 #include <linux/smp.h> 46 #include <linux/syscore_ops.h> 47 #include <linux/rcupdate.h> 48 49 #include <asm/cacheinfo.h> 50 #include <asm/cpufeature.h> 51 #include <asm/e820/api.h> 52 #include <asm/mtrr.h> 53 #include <asm/msr.h> 54 #include <asm/memtype.h> 55 56 #include "mtrr.h" 57 58 static_assert(X86_MEMTYPE_UC == MTRR_TYPE_UNCACHABLE); 59 static_assert(X86_MEMTYPE_WC == MTRR_TYPE_WRCOMB); 60 static_assert(X86_MEMTYPE_WT == MTRR_TYPE_WRTHROUGH); 61 static_assert(X86_MEMTYPE_WP == MTRR_TYPE_WRPROT); 62 static_assert(X86_MEMTYPE_WB == MTRR_TYPE_WRBACK); 63 64 /* arch_phys_wc_add returns an MTRR register index plus this offset. */ 65 #define MTRR_TO_PHYS_WC_OFFSET 1000 66 67 u32 num_var_ranges; 68 69 unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES]; 70 DEFINE_MUTEX(mtrr_mutex); 71 72 const struct mtrr_ops *mtrr_if; 73 74 /* Returns non-zero if we have the write-combining memory type */ 75 static int have_wrcomb(void) 76 { 77 struct pci_dev *dev; 78 79 dev = pci_get_class(PCI_CLASS_BRIDGE_HOST << 8, NULL); 80 if (dev != NULL) { 81 /* 82 * ServerWorks LE chipsets < rev 6 have problems with 83 * write-combining. Don't allow it and leave room for other 84 * chipsets to be tagged 85 */ 86 if (dev->vendor == PCI_VENDOR_ID_SERVERWORKS && 87 dev->device == PCI_DEVICE_ID_SERVERWORKS_LE && 88 dev->revision <= 5) { 89 pr_info("Serverworks LE rev < 6 detected. Write-combining disabled.\n"); 90 pci_dev_put(dev); 91 return 0; 92 } 93 /* 94 * Intel 450NX errata # 23. Non ascending cacheline evictions to 95 * write combining memory may resulting in data corruption 96 */ 97 if (dev->vendor == PCI_VENDOR_ID_INTEL && 98 dev->device == PCI_DEVICE_ID_INTEL_82451NX) { 99 pr_info("Intel 450NX MMC detected. Write-combining disabled.\n"); 100 pci_dev_put(dev); 101 return 0; 102 } 103 pci_dev_put(dev); 104 } 105 return mtrr_if->have_wrcomb ? mtrr_if->have_wrcomb() : 0; 106 } 107 108 static void __init init_table(void) 109 { 110 int i, max; 111 112 max = num_var_ranges; 113 for (i = 0; i < max; i++) 114 mtrr_usage_table[i] = 1; 115 } 116 117 struct set_mtrr_data { 118 unsigned long smp_base; 119 unsigned long smp_size; 120 unsigned int smp_reg; 121 mtrr_type smp_type; 122 }; 123 124 /** 125 * mtrr_rendezvous_handler - Work done in the synchronization handler. Executed 126 * by all the CPUs. 127 * @info: pointer to mtrr configuration data 128 * 129 * Returns nothing. 130 */ 131 static int mtrr_rendezvous_handler(void *info) 132 { 133 struct set_mtrr_data *data = info; 134 135 mtrr_if->set(data->smp_reg, data->smp_base, 136 data->smp_size, data->smp_type); 137 return 0; 138 } 139 140 static inline int types_compatible(mtrr_type type1, mtrr_type type2) 141 { 142 return type1 == MTRR_TYPE_UNCACHABLE || 143 type2 == MTRR_TYPE_UNCACHABLE || 144 (type1 == MTRR_TYPE_WRTHROUGH && type2 == MTRR_TYPE_WRBACK) || 145 (type1 == MTRR_TYPE_WRBACK && type2 == MTRR_TYPE_WRTHROUGH); 146 } 147 148 /** 149 * set_mtrr - update mtrrs on all processors 150 * @reg: mtrr in question 151 * @base: mtrr base 152 * @size: mtrr size 153 * @type: mtrr type 154 * 155 * This is kinda tricky, but fortunately, Intel spelled it out for us cleanly: 156 * 157 * 1. Queue work to do the following on all processors: 158 * 2. Disable Interrupts 159 * 3. Wait for all procs to do so 160 * 4. Enter no-fill cache mode 161 * 5. Flush caches 162 * 6. Clear PGE bit 163 * 7. Flush all TLBs 164 * 8. Disable all range registers 165 * 9. Update the MTRRs 166 * 10. Enable all range registers 167 * 11. Flush all TLBs and caches again 168 * 12. Enter normal cache mode and reenable caching 169 * 13. Set PGE 170 * 14. Wait for buddies to catch up 171 * 15. Enable interrupts. 172 * 173 * What does that mean for us? Well, stop_machine() will ensure that 174 * the rendezvous handler is started on each CPU. And in lockstep they 175 * do the state transition of disabling interrupts, updating MTRR's 176 * (the CPU vendors may each do it differently, so we call mtrr_if->set() 177 * callback and let them take care of it.) and enabling interrupts. 178 * 179 * Note that the mechanism is the same for UP systems, too; all the SMP stuff 180 * becomes nops. 181 */ 182 static void set_mtrr(unsigned int reg, unsigned long base, unsigned long size, 183 mtrr_type type) 184 { 185 struct set_mtrr_data data = { .smp_reg = reg, 186 .smp_base = base, 187 .smp_size = size, 188 .smp_type = type 189 }; 190 191 stop_machine_cpuslocked(mtrr_rendezvous_handler, &data, cpu_online_mask); 192 193 generic_rebuild_map(); 194 } 195 196 /** 197 * mtrr_add_page - Add a memory type region 198 * @base: Physical base address of region in pages (in units of 4 kB!) 199 * @size: Physical size of region in pages (4 kB) 200 * @type: Type of MTRR desired 201 * @increment: If this is true do usage counting on the region 202 * 203 * Memory type region registers control the caching on newer Intel and 204 * non Intel processors. This function allows drivers to request an 205 * MTRR is added. The details and hardware specifics of each processor's 206 * implementation are hidden from the caller, but nevertheless the 207 * caller should expect to need to provide a power of two size on an 208 * equivalent power of two boundary. 209 * 210 * If the region cannot be added either because all regions are in use 211 * or the CPU cannot support it a negative value is returned. On success 212 * the register number for this entry is returned, but should be treated 213 * as a cookie only. 214 * 215 * On a multiprocessor machine the changes are made to all processors. 216 * This is required on x86 by the Intel processors. 217 * 218 * The available types are 219 * 220 * %MTRR_TYPE_UNCACHABLE - No caching 221 * 222 * %MTRR_TYPE_WRBACK - Write data back in bursts whenever 223 * 224 * %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts 225 * 226 * %MTRR_TYPE_WRTHROUGH - Cache reads but not writes 227 * 228 * BUGS: Needs a quiet flag for the cases where drivers do not mind 229 * failures and do not wish system log messages to be sent. 230 */ 231 int mtrr_add_page(unsigned long base, unsigned long size, 232 unsigned int type, bool increment) 233 { 234 unsigned long lbase, lsize; 235 int i, replace, error; 236 mtrr_type ltype; 237 238 if (!mtrr_enabled()) 239 return -ENXIO; 240 241 error = mtrr_if->validate_add_page(base, size, type); 242 if (error) 243 return error; 244 245 if (type >= MTRR_NUM_TYPES) { 246 pr_warn("type: %u invalid\n", type); 247 return -EINVAL; 248 } 249 250 /* If the type is WC, check that this processor supports it */ 251 if ((type == MTRR_TYPE_WRCOMB) && !have_wrcomb()) { 252 pr_warn("your processor doesn't support write-combining\n"); 253 return -ENOSYS; 254 } 255 256 if (!size) { 257 pr_warn("zero sized request\n"); 258 return -EINVAL; 259 } 260 261 if ((base | (base + size - 1)) >> 262 (boot_cpu_data.x86_phys_bits - PAGE_SHIFT)) { 263 pr_warn("base or size exceeds the MTRR width\n"); 264 return -EINVAL; 265 } 266 267 error = -EINVAL; 268 replace = -1; 269 270 /* No CPU hotplug when we change MTRR entries */ 271 cpus_read_lock(); 272 273 /* Search for existing MTRR */ 274 mutex_lock(&mtrr_mutex); 275 for (i = 0; i < num_var_ranges; ++i) { 276 mtrr_if->get(i, &lbase, &lsize, <ype); 277 if (!lsize || base > lbase + lsize - 1 || 278 base + size - 1 < lbase) 279 continue; 280 /* 281 * At this point we know there is some kind of 282 * overlap/enclosure 283 */ 284 if (base < lbase || base + size - 1 > lbase + lsize - 1) { 285 if (base <= lbase && 286 base + size - 1 >= lbase + lsize - 1) { 287 /* New region encloses an existing region */ 288 if (type == ltype) { 289 replace = replace == -1 ? i : -2; 290 continue; 291 } else if (types_compatible(type, ltype)) 292 continue; 293 } 294 pr_warn("0x%lx000,0x%lx000 overlaps existing 0x%lx000,0x%lx000\n", base, size, lbase, 295 lsize); 296 goto out; 297 } 298 /* New region is enclosed by an existing region */ 299 if (ltype != type) { 300 if (types_compatible(type, ltype)) 301 continue; 302 pr_warn("type mismatch for %lx000,%lx000 old: %s new: %s\n", 303 base, size, mtrr_attrib_to_str(ltype), 304 mtrr_attrib_to_str(type)); 305 goto out; 306 } 307 if (increment) 308 ++mtrr_usage_table[i]; 309 error = i; 310 goto out; 311 } 312 /* Search for an empty MTRR */ 313 i = mtrr_if->get_free_region(base, size, replace); 314 if (i >= 0) { 315 set_mtrr(i, base, size, type); 316 if (likely(replace < 0)) { 317 mtrr_usage_table[i] = 1; 318 } else { 319 mtrr_usage_table[i] = mtrr_usage_table[replace]; 320 if (increment) 321 mtrr_usage_table[i]++; 322 if (unlikely(replace != i)) { 323 set_mtrr(replace, 0, 0, 0); 324 mtrr_usage_table[replace] = 0; 325 } 326 } 327 } else { 328 pr_info("no more MTRRs available\n"); 329 } 330 error = i; 331 out: 332 mutex_unlock(&mtrr_mutex); 333 cpus_read_unlock(); 334 return error; 335 } 336 337 static int mtrr_check(unsigned long base, unsigned long size) 338 { 339 if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) { 340 pr_warn("size and base must be multiples of 4 kiB\n"); 341 Dprintk("size: 0x%lx base: 0x%lx\n", size, base); 342 dump_stack(); 343 return -1; 344 } 345 return 0; 346 } 347 348 /** 349 * mtrr_add - Add a memory type region 350 * @base: Physical base address of region 351 * @size: Physical size of region 352 * @type: Type of MTRR desired 353 * @increment: If this is true do usage counting on the region 354 * 355 * Memory type region registers control the caching on newer Intel and 356 * non Intel processors. This function allows drivers to request an 357 * MTRR is added. The details and hardware specifics of each processor's 358 * implementation are hidden from the caller, but nevertheless the 359 * caller should expect to need to provide a power of two size on an 360 * equivalent power of two boundary. 361 * 362 * If the region cannot be added either because all regions are in use 363 * or the CPU cannot support it a negative value is returned. On success 364 * the register number for this entry is returned, but should be treated 365 * as a cookie only. 366 * 367 * On a multiprocessor machine the changes are made to all processors. 368 * This is required on x86 by the Intel processors. 369 * 370 * The available types are 371 * 372 * %MTRR_TYPE_UNCACHABLE - No caching 373 * 374 * %MTRR_TYPE_WRBACK - Write data back in bursts whenever 375 * 376 * %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts 377 * 378 * %MTRR_TYPE_WRTHROUGH - Cache reads but not writes 379 * 380 * BUGS: Needs a quiet flag for the cases where drivers do not mind 381 * failures and do not wish system log messages to be sent. 382 */ 383 int mtrr_add(unsigned long base, unsigned long size, unsigned int type, 384 bool increment) 385 { 386 if (!mtrr_enabled()) 387 return -ENODEV; 388 if (mtrr_check(base, size)) 389 return -EINVAL; 390 return mtrr_add_page(base >> PAGE_SHIFT, size >> PAGE_SHIFT, type, 391 increment); 392 } 393 394 /** 395 * mtrr_del_page - delete a memory type region 396 * @reg: Register returned by mtrr_add 397 * @base: Physical base address 398 * @size: Size of region 399 * 400 * If register is supplied then base and size are ignored. This is 401 * how drivers should call it. 402 * 403 * Releases an MTRR region. If the usage count drops to zero the 404 * register is freed and the region returns to default state. 405 * On success the register is returned, on failure a negative error 406 * code. 407 */ 408 int mtrr_del_page(int reg, unsigned long base, unsigned long size) 409 { 410 int i, max; 411 mtrr_type ltype; 412 unsigned long lbase, lsize; 413 int error = -EINVAL; 414 415 if (!mtrr_enabled()) 416 return -ENODEV; 417 418 max = num_var_ranges; 419 /* No CPU hotplug when we change MTRR entries */ 420 cpus_read_lock(); 421 mutex_lock(&mtrr_mutex); 422 if (reg < 0) { 423 /* Search for existing MTRR */ 424 for (i = 0; i < max; ++i) { 425 mtrr_if->get(i, &lbase, &lsize, <ype); 426 if (lbase == base && lsize == size) { 427 reg = i; 428 break; 429 } 430 } 431 if (reg < 0) { 432 Dprintk("no MTRR for %lx000,%lx000 found\n", base, size); 433 goto out; 434 } 435 } 436 if (reg >= max) { 437 pr_warn("register: %d too big\n", reg); 438 goto out; 439 } 440 mtrr_if->get(reg, &lbase, &lsize, <ype); 441 if (lsize < 1) { 442 pr_warn("MTRR %d not used\n", reg); 443 goto out; 444 } 445 if (mtrr_usage_table[reg] < 1) { 446 pr_warn("reg: %d has count=0\n", reg); 447 goto out; 448 } 449 if (--mtrr_usage_table[reg] < 1) 450 set_mtrr(reg, 0, 0, 0); 451 error = reg; 452 out: 453 mutex_unlock(&mtrr_mutex); 454 cpus_read_unlock(); 455 return error; 456 } 457 458 /** 459 * mtrr_del - delete a memory type region 460 * @reg: Register returned by mtrr_add 461 * @base: Physical base address 462 * @size: Size of region 463 * 464 * If register is supplied then base and size are ignored. This is 465 * how drivers should call it. 466 * 467 * Releases an MTRR region. If the usage count drops to zero the 468 * register is freed and the region returns to default state. 469 * On success the register is returned, on failure a negative error 470 * code. 471 */ 472 int mtrr_del(int reg, unsigned long base, unsigned long size) 473 { 474 if (!mtrr_enabled()) 475 return -ENODEV; 476 if (mtrr_check(base, size)) 477 return -EINVAL; 478 return mtrr_del_page(reg, base >> PAGE_SHIFT, size >> PAGE_SHIFT); 479 } 480 481 /** 482 * arch_phys_wc_add - add a WC MTRR and handle errors if PAT is unavailable 483 * @base: Physical base address 484 * @size: Size of region 485 * 486 * If PAT is available, this does nothing. If PAT is unavailable, it 487 * attempts to add a WC MTRR covering size bytes starting at base and 488 * logs an error if this fails. 489 * 490 * The called should provide a power of two size on an equivalent 491 * power of two boundary. 492 * 493 * Drivers must store the return value to pass to mtrr_del_wc_if_needed, 494 * but drivers should not try to interpret that return value. 495 */ 496 int arch_phys_wc_add(unsigned long base, unsigned long size) 497 { 498 int ret; 499 500 if (pat_enabled() || !mtrr_enabled()) 501 return 0; /* Success! (We don't need to do anything.) */ 502 503 ret = mtrr_add(base, size, MTRR_TYPE_WRCOMB, true); 504 if (ret < 0) { 505 pr_warn("Failed to add WC MTRR for [%p-%p]; performance may suffer.", 506 (void *)base, (void *)(base + size - 1)); 507 return ret; 508 } 509 return ret + MTRR_TO_PHYS_WC_OFFSET; 510 } 511 EXPORT_SYMBOL(arch_phys_wc_add); 512 513 /* 514 * arch_phys_wc_del - undoes arch_phys_wc_add 515 * @handle: Return value from arch_phys_wc_add 516 * 517 * This cleans up after mtrr_add_wc_if_needed. 518 * 519 * The API guarantees that mtrr_del_wc_if_needed(error code) and 520 * mtrr_del_wc_if_needed(0) do nothing. 521 */ 522 void arch_phys_wc_del(int handle) 523 { 524 if (handle >= 1) { 525 WARN_ON(handle < MTRR_TO_PHYS_WC_OFFSET); 526 mtrr_del(handle - MTRR_TO_PHYS_WC_OFFSET, 0, 0); 527 } 528 } 529 EXPORT_SYMBOL(arch_phys_wc_del); 530 531 /* 532 * arch_phys_wc_index - translates arch_phys_wc_add's return value 533 * @handle: Return value from arch_phys_wc_add 534 * 535 * This will turn the return value from arch_phys_wc_add into an mtrr 536 * index suitable for debugging. 537 * 538 * Note: There is no legitimate use for this function, except possibly 539 * in printk line. Alas there is an illegitimate use in some ancient 540 * drm ioctls. 541 */ 542 int arch_phys_wc_index(int handle) 543 { 544 if (handle < MTRR_TO_PHYS_WC_OFFSET) 545 return -1; 546 else 547 return handle - MTRR_TO_PHYS_WC_OFFSET; 548 } 549 EXPORT_SYMBOL_GPL(arch_phys_wc_index); 550 551 int __initdata changed_by_mtrr_cleanup; 552 553 /** 554 * mtrr_bp_init - initialize MTRRs on the boot CPU 555 * 556 * This needs to be called early; before any of the other CPUs are 557 * initialized (i.e. before smp_init()). 558 */ 559 void __init mtrr_bp_init(void) 560 { 561 bool generic_mtrrs = cpu_feature_enabled(X86_FEATURE_MTRR); 562 const char *why = "(not available)"; 563 unsigned long config, dummy; 564 565 phys_hi_rsvd = GENMASK(31, boot_cpu_data.x86_phys_bits - 32); 566 567 if (!generic_mtrrs && mtrr_state.enabled) { 568 /* 569 * Software overwrite of MTRR state, only for generic case. 570 * Note that X86_FEATURE_MTRR has been reset in this case. 571 */ 572 init_table(); 573 mtrr_build_map(); 574 pr_info("MTRRs set to read-only\n"); 575 576 return; 577 } 578 579 if (generic_mtrrs) 580 mtrr_if = &generic_mtrr_ops; 581 else 582 mtrr_set_if(); 583 584 if (mtrr_enabled()) { 585 /* Get the number of variable MTRR ranges. */ 586 if (mtrr_if == &generic_mtrr_ops) 587 rdmsr(MSR_MTRRcap, config, dummy); 588 else 589 config = mtrr_if->var_regs; 590 num_var_ranges = config & MTRR_CAP_VCNT; 591 592 init_table(); 593 if (mtrr_if == &generic_mtrr_ops) { 594 /* BIOS may override */ 595 if (get_mtrr_state()) { 596 memory_caching_control |= CACHE_MTRR; 597 changed_by_mtrr_cleanup = mtrr_cleanup(); 598 mtrr_build_map(); 599 } else { 600 mtrr_if = NULL; 601 why = "by BIOS"; 602 } 603 } 604 } 605 606 if (!mtrr_enabled()) 607 pr_info("MTRRs disabled %s\n", why); 608 } 609 610 /** 611 * mtrr_save_state - Save current fixed-range MTRR state of the first 612 * cpu in cpu_online_mask. 613 */ 614 void mtrr_save_state(void) 615 { 616 int first_cpu; 617 618 if (!mtrr_enabled() || !mtrr_state.have_fixed) 619 return; 620 621 first_cpu = cpumask_first(cpu_online_mask); 622 smp_call_function_single(first_cpu, mtrr_save_fixed_ranges, NULL, 1); 623 } 624 625 static int __init mtrr_init_finalize(void) 626 { 627 /* 628 * Map might exist if guest_force_mtrr_state() has been called or if 629 * mtrr_enabled() returns true. 630 */ 631 mtrr_copy_map(); 632 633 if (!mtrr_enabled()) 634 return 0; 635 636 if (memory_caching_control & CACHE_MTRR) { 637 if (!changed_by_mtrr_cleanup) 638 mtrr_state_warn(); 639 return 0; 640 } 641 642 mtrr_register_syscore(); 643 644 return 0; 645 } 646 subsys_initcall(mtrr_init_finalize); 647