1 // SPDX-License-Identifier: LGPL-2.0+ 2 /* Generic MTRR (Memory Type Range Register) driver. 3 4 Copyright (C) 1997-2000 Richard Gooch 5 Copyright (c) 2002 Patrick Mochel 6 7 Richard Gooch may be reached by email at rgooch@atnf.csiro.au 8 The postal address is: 9 Richard Gooch, c/o ATNF, P. O. Box 76, Epping, N.S.W., 2121, Australia. 10 11 Source: "Pentium Pro Family Developer's Manual, Volume 3: 12 Operating System Writer's Guide" (Intel document number 242692), 13 section 11.11.7 14 15 This was cleaned and made readable by Patrick Mochel <mochel@osdl.org> 16 on 6-7 March 2002. 17 Source: Intel Architecture Software Developers Manual, Volume 3: 18 System Programming Guide; Section 9.11. (1997 edition - PPro). 19 */ 20 21 #include <linux/types.h> /* FIXME: kvm_para.h needs this */ 22 23 #include <linux/stop_machine.h> 24 #include <linux/kvm_para.h> 25 #include <linux/uaccess.h> 26 #include <linux/export.h> 27 #include <linux/mutex.h> 28 #include <linux/init.h> 29 #include <linux/sort.h> 30 #include <linux/cpu.h> 31 #include <linux/pci.h> 32 #include <linux/smp.h> 33 #include <linux/syscore_ops.h> 34 #include <linux/rcupdate.h> 35 36 #include <asm/cacheinfo.h> 37 #include <asm/cpufeature.h> 38 #include <asm/e820/api.h> 39 #include <asm/mtrr.h> 40 #include <asm/msr.h> 41 #include <asm/memtype.h> 42 43 #include "mtrr.h" 44 45 static_assert(X86_MEMTYPE_UC == MTRR_TYPE_UNCACHABLE); 46 static_assert(X86_MEMTYPE_WC == MTRR_TYPE_WRCOMB); 47 static_assert(X86_MEMTYPE_WT == MTRR_TYPE_WRTHROUGH); 48 static_assert(X86_MEMTYPE_WP == MTRR_TYPE_WRPROT); 49 static_assert(X86_MEMTYPE_WB == MTRR_TYPE_WRBACK); 50 51 /* arch_phys_wc_add returns an MTRR register index plus this offset. */ 52 #define MTRR_TO_PHYS_WC_OFFSET 1000 53 54 u32 num_var_ranges; 55 56 unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES]; 57 DEFINE_MUTEX(mtrr_mutex); 58 59 const struct mtrr_ops *mtrr_if; 60 61 /* Returns non-zero if we have the write-combining memory type */ 62 static int have_wrcomb(void) 63 { 64 struct pci_dev *dev; 65 66 dev = pci_get_class(PCI_CLASS_BRIDGE_HOST << 8, NULL); 67 if (dev != NULL) { 68 /* 69 * ServerWorks LE chipsets < rev 6 have problems with 70 * write-combining. Don't allow it and leave room for other 71 * chipsets to be tagged 72 */ 73 if (dev->vendor == PCI_VENDOR_ID_SERVERWORKS && 74 dev->device == PCI_DEVICE_ID_SERVERWORKS_LE && 75 dev->revision <= 5) { 76 pr_info("Serverworks LE rev < 6 detected. Write-combining disabled.\n"); 77 pci_dev_put(dev); 78 return 0; 79 } 80 /* 81 * Intel 450NX errata # 23. Non ascending cacheline evictions to 82 * write combining memory may resulting in data corruption 83 */ 84 if (dev->vendor == PCI_VENDOR_ID_INTEL && 85 dev->device == PCI_DEVICE_ID_INTEL_82451NX) { 86 pr_info("Intel 450NX MMC detected. Write-combining disabled.\n"); 87 pci_dev_put(dev); 88 return 0; 89 } 90 pci_dev_put(dev); 91 } 92 return mtrr_if->have_wrcomb ? mtrr_if->have_wrcomb() : 0; 93 } 94 95 static void __init init_table(void) 96 { 97 int i, max; 98 99 max = num_var_ranges; 100 for (i = 0; i < max; i++) 101 mtrr_usage_table[i] = 1; 102 } 103 104 struct set_mtrr_data { 105 unsigned long smp_base; 106 unsigned long smp_size; 107 unsigned int smp_reg; 108 mtrr_type smp_type; 109 }; 110 111 /** 112 * mtrr_rendezvous_handler - Work done in the synchronization handler. Executed 113 * by all the CPUs. 114 * @info: pointer to mtrr configuration data 115 * 116 * Returns nothing. 117 */ 118 static int mtrr_rendezvous_handler(void *info) 119 { 120 struct set_mtrr_data *data = info; 121 122 mtrr_if->set(data->smp_reg, data->smp_base, 123 data->smp_size, data->smp_type); 124 return 0; 125 } 126 127 static inline int types_compatible(mtrr_type type1, mtrr_type type2) 128 { 129 return type1 == MTRR_TYPE_UNCACHABLE || 130 type2 == MTRR_TYPE_UNCACHABLE || 131 (type1 == MTRR_TYPE_WRTHROUGH && type2 == MTRR_TYPE_WRBACK) || 132 (type1 == MTRR_TYPE_WRBACK && type2 == MTRR_TYPE_WRTHROUGH); 133 } 134 135 /** 136 * set_mtrr - update mtrrs on all processors 137 * @reg: mtrr in question 138 * @base: mtrr base 139 * @size: mtrr size 140 * @type: mtrr type 141 * 142 * This is kinda tricky, but fortunately, Intel spelled it out for us cleanly: 143 * 144 * 1. Queue work to do the following on all processors: 145 * 2. Disable Interrupts 146 * 3. Wait for all procs to do so 147 * 4. Enter no-fill cache mode 148 * 5. Flush caches 149 * 6. Clear PGE bit 150 * 7. Flush all TLBs 151 * 8. Disable all range registers 152 * 9. Update the MTRRs 153 * 10. Enable all range registers 154 * 11. Flush all TLBs and caches again 155 * 12. Enter normal cache mode and reenable caching 156 * 13. Set PGE 157 * 14. Wait for buddies to catch up 158 * 15. Enable interrupts. 159 * 160 * What does that mean for us? Well, stop_machine() will ensure that 161 * the rendezvous handler is started on each CPU. And in lockstep they 162 * do the state transition of disabling interrupts, updating MTRR's 163 * (the CPU vendors may each do it differently, so we call mtrr_if->set() 164 * callback and let them take care of it.) and enabling interrupts. 165 * 166 * Note that the mechanism is the same for UP systems, too; all the SMP stuff 167 * becomes nops. 168 */ 169 static void set_mtrr(unsigned int reg, unsigned long base, unsigned long size, 170 mtrr_type type) 171 { 172 struct set_mtrr_data data = { .smp_reg = reg, 173 .smp_base = base, 174 .smp_size = size, 175 .smp_type = type 176 }; 177 178 stop_machine_cpuslocked(mtrr_rendezvous_handler, &data, cpu_online_mask); 179 180 generic_rebuild_map(); 181 } 182 183 /** 184 * mtrr_add_page - Add a memory type region 185 * @base: Physical base address of region in pages (in units of 4 kB!) 186 * @size: Physical size of region in pages (4 kB) 187 * @type: Type of MTRR desired 188 * @increment: If this is true do usage counting on the region 189 * 190 * Memory type region registers control the caching on newer Intel and 191 * non Intel processors. This function allows drivers to request an 192 * MTRR is added. The details and hardware specifics of each processor's 193 * implementation are hidden from the caller, but nevertheless the 194 * caller should expect to need to provide a power of two size on an 195 * equivalent power of two boundary. 196 * 197 * If the region cannot be added either because all regions are in use 198 * or the CPU cannot support it a negative value is returned. On success 199 * the register number for this entry is returned, but should be treated 200 * as a cookie only. 201 * 202 * On a multiprocessor machine the changes are made to all processors. 203 * This is required on x86 by the Intel processors. 204 * 205 * The available types are 206 * 207 * %MTRR_TYPE_UNCACHABLE - No caching 208 * 209 * %MTRR_TYPE_WRBACK - Write data back in bursts whenever 210 * 211 * %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts 212 * 213 * %MTRR_TYPE_WRTHROUGH - Cache reads but not writes 214 * 215 * BUGS: Needs a quiet flag for the cases where drivers do not mind 216 * failures and do not wish system log messages to be sent. 217 */ 218 int mtrr_add_page(unsigned long base, unsigned long size, 219 unsigned int type, bool increment) 220 { 221 unsigned long lbase, lsize; 222 int i, replace, error; 223 mtrr_type ltype; 224 225 if (!mtrr_enabled()) 226 return -ENXIO; 227 228 error = mtrr_if->validate_add_page(base, size, type); 229 if (error) 230 return error; 231 232 if (type >= MTRR_NUM_TYPES) { 233 pr_warn("type: %u invalid\n", type); 234 return -EINVAL; 235 } 236 237 /* If the type is WC, check that this processor supports it */ 238 if ((type == MTRR_TYPE_WRCOMB) && !have_wrcomb()) { 239 pr_warn("your processor doesn't support write-combining\n"); 240 return -ENOSYS; 241 } 242 243 if (!size) { 244 pr_warn("zero sized request\n"); 245 return -EINVAL; 246 } 247 248 if ((base | (base + size - 1)) >> 249 (boot_cpu_data.x86_phys_bits - PAGE_SHIFT)) { 250 pr_warn("base or size exceeds the MTRR width\n"); 251 return -EINVAL; 252 } 253 254 error = -EINVAL; 255 replace = -1; 256 257 /* No CPU hotplug when we change MTRR entries */ 258 cpus_read_lock(); 259 260 /* Search for existing MTRR */ 261 mutex_lock(&mtrr_mutex); 262 for (i = 0; i < num_var_ranges; ++i) { 263 mtrr_if->get(i, &lbase, &lsize, <ype); 264 if (!lsize || base > lbase + lsize - 1 || 265 base + size - 1 < lbase) 266 continue; 267 /* 268 * At this point we know there is some kind of 269 * overlap/enclosure 270 */ 271 if (base < lbase || base + size - 1 > lbase + lsize - 1) { 272 if (base <= lbase && 273 base + size - 1 >= lbase + lsize - 1) { 274 /* New region encloses an existing region */ 275 if (type == ltype) { 276 replace = replace == -1 ? i : -2; 277 continue; 278 } else if (types_compatible(type, ltype)) 279 continue; 280 } 281 pr_warn("0x%lx000,0x%lx000 overlaps existing 0x%lx000,0x%lx000\n", base, size, lbase, 282 lsize); 283 goto out; 284 } 285 /* New region is enclosed by an existing region */ 286 if (ltype != type) { 287 if (types_compatible(type, ltype)) 288 continue; 289 pr_warn("type mismatch for %lx000,%lx000 old: %s new: %s\n", 290 base, size, mtrr_attrib_to_str(ltype), 291 mtrr_attrib_to_str(type)); 292 goto out; 293 } 294 if (increment) 295 ++mtrr_usage_table[i]; 296 error = i; 297 goto out; 298 } 299 /* Search for an empty MTRR */ 300 i = mtrr_if->get_free_region(base, size, replace); 301 if (i >= 0) { 302 set_mtrr(i, base, size, type); 303 if (likely(replace < 0)) { 304 mtrr_usage_table[i] = 1; 305 } else { 306 mtrr_usage_table[i] = mtrr_usage_table[replace]; 307 if (increment) 308 mtrr_usage_table[i]++; 309 if (unlikely(replace != i)) { 310 set_mtrr(replace, 0, 0, 0); 311 mtrr_usage_table[replace] = 0; 312 } 313 } 314 } else { 315 pr_info("no more MTRRs available\n"); 316 } 317 error = i; 318 out: 319 mutex_unlock(&mtrr_mutex); 320 cpus_read_unlock(); 321 return error; 322 } 323 324 static int mtrr_check(unsigned long base, unsigned long size) 325 { 326 if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) { 327 pr_warn("size and base must be multiples of 4 kiB\n"); 328 Dprintk("size: 0x%lx base: 0x%lx\n", size, base); 329 dump_stack(); 330 return -1; 331 } 332 return 0; 333 } 334 335 /** 336 * mtrr_add - Add a memory type region 337 * @base: Physical base address of region 338 * @size: Physical size of region 339 * @type: Type of MTRR desired 340 * @increment: If this is true do usage counting on the region 341 * 342 * Memory type region registers control the caching on newer Intel and 343 * non Intel processors. This function allows drivers to request an 344 * MTRR is added. The details and hardware specifics of each processor's 345 * implementation are hidden from the caller, but nevertheless the 346 * caller should expect to need to provide a power of two size on an 347 * equivalent power of two boundary. 348 * 349 * If the region cannot be added either because all regions are in use 350 * or the CPU cannot support it a negative value is returned. On success 351 * the register number for this entry is returned, but should be treated 352 * as a cookie only. 353 * 354 * On a multiprocessor machine the changes are made to all processors. 355 * This is required on x86 by the Intel processors. 356 * 357 * The available types are 358 * 359 * %MTRR_TYPE_UNCACHABLE - No caching 360 * 361 * %MTRR_TYPE_WRBACK - Write data back in bursts whenever 362 * 363 * %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts 364 * 365 * %MTRR_TYPE_WRTHROUGH - Cache reads but not writes 366 * 367 * BUGS: Needs a quiet flag for the cases where drivers do not mind 368 * failures and do not wish system log messages to be sent. 369 */ 370 int mtrr_add(unsigned long base, unsigned long size, unsigned int type, 371 bool increment) 372 { 373 if (!mtrr_enabled()) 374 return -ENODEV; 375 if (mtrr_check(base, size)) 376 return -EINVAL; 377 return mtrr_add_page(base >> PAGE_SHIFT, size >> PAGE_SHIFT, type, 378 increment); 379 } 380 381 /** 382 * mtrr_del_page - delete a memory type region 383 * @reg: Register returned by mtrr_add 384 * @base: Physical base address 385 * @size: Size of region 386 * 387 * If register is supplied then base and size are ignored. This is 388 * how drivers should call it. 389 * 390 * Releases an MTRR region. If the usage count drops to zero the 391 * register is freed and the region returns to default state. 392 * On success the register is returned, on failure a negative error 393 * code. 394 */ 395 int mtrr_del_page(int reg, unsigned long base, unsigned long size) 396 { 397 int i, max; 398 mtrr_type ltype; 399 unsigned long lbase, lsize; 400 int error = -EINVAL; 401 402 if (!mtrr_enabled()) 403 return -ENODEV; 404 405 max = num_var_ranges; 406 /* No CPU hotplug when we change MTRR entries */ 407 cpus_read_lock(); 408 mutex_lock(&mtrr_mutex); 409 if (reg < 0) { 410 /* Search for existing MTRR */ 411 for (i = 0; i < max; ++i) { 412 mtrr_if->get(i, &lbase, &lsize, <ype); 413 if (lbase == base && lsize == size) { 414 reg = i; 415 break; 416 } 417 } 418 if (reg < 0) { 419 Dprintk("no MTRR for %lx000,%lx000 found\n", base, size); 420 goto out; 421 } 422 } 423 if (reg >= max) { 424 pr_warn("register: %d too big\n", reg); 425 goto out; 426 } 427 mtrr_if->get(reg, &lbase, &lsize, <ype); 428 if (lsize < 1) { 429 pr_warn("MTRR %d not used\n", reg); 430 goto out; 431 } 432 if (mtrr_usage_table[reg] < 1) { 433 pr_warn("reg: %d has count=0\n", reg); 434 goto out; 435 } 436 if (--mtrr_usage_table[reg] < 1) 437 set_mtrr(reg, 0, 0, 0); 438 error = reg; 439 out: 440 mutex_unlock(&mtrr_mutex); 441 cpus_read_unlock(); 442 return error; 443 } 444 445 /** 446 * mtrr_del - delete a memory type region 447 * @reg: Register returned by mtrr_add 448 * @base: Physical base address 449 * @size: Size of region 450 * 451 * If register is supplied then base and size are ignored. This is 452 * how drivers should call it. 453 * 454 * Releases an MTRR region. If the usage count drops to zero the 455 * register is freed and the region returns to default state. 456 * On success the register is returned, on failure a negative error 457 * code. 458 */ 459 int mtrr_del(int reg, unsigned long base, unsigned long size) 460 { 461 if (!mtrr_enabled()) 462 return -ENODEV; 463 if (mtrr_check(base, size)) 464 return -EINVAL; 465 return mtrr_del_page(reg, base >> PAGE_SHIFT, size >> PAGE_SHIFT); 466 } 467 468 /** 469 * arch_phys_wc_add - add a WC MTRR and handle errors if PAT is unavailable 470 * @base: Physical base address 471 * @size: Size of region 472 * 473 * If PAT is available, this does nothing. If PAT is unavailable, it 474 * attempts to add a WC MTRR covering size bytes starting at base and 475 * logs an error if this fails. 476 * 477 * The called should provide a power of two size on an equivalent 478 * power of two boundary. 479 * 480 * Drivers must store the return value to pass to mtrr_del_wc_if_needed, 481 * but drivers should not try to interpret that return value. 482 */ 483 int arch_phys_wc_add(unsigned long base, unsigned long size) 484 { 485 int ret; 486 487 if (pat_enabled() || !mtrr_enabled()) 488 return 0; /* Success! (We don't need to do anything.) */ 489 490 ret = mtrr_add(base, size, MTRR_TYPE_WRCOMB, true); 491 if (ret < 0) { 492 pr_warn("Failed to add WC MTRR for [%p-%p]; performance may suffer.", 493 (void *)base, (void *)(base + size - 1)); 494 return ret; 495 } 496 return ret + MTRR_TO_PHYS_WC_OFFSET; 497 } 498 EXPORT_SYMBOL(arch_phys_wc_add); 499 500 /* 501 * arch_phys_wc_del - undoes arch_phys_wc_add 502 * @handle: Return value from arch_phys_wc_add 503 * 504 * This cleans up after mtrr_add_wc_if_needed. 505 * 506 * The API guarantees that mtrr_del_wc_if_needed(error code) and 507 * mtrr_del_wc_if_needed(0) do nothing. 508 */ 509 void arch_phys_wc_del(int handle) 510 { 511 if (handle >= 1) { 512 WARN_ON(handle < MTRR_TO_PHYS_WC_OFFSET); 513 mtrr_del(handle - MTRR_TO_PHYS_WC_OFFSET, 0, 0); 514 } 515 } 516 EXPORT_SYMBOL(arch_phys_wc_del); 517 518 /* 519 * arch_phys_wc_index - translates arch_phys_wc_add's return value 520 * @handle: Return value from arch_phys_wc_add 521 * 522 * This will turn the return value from arch_phys_wc_add into an mtrr 523 * index suitable for debugging. 524 * 525 * Note: There is no legitimate use for this function, except possibly 526 * in printk line. Alas there is an illegitimate use in some ancient 527 * drm ioctls. 528 */ 529 int arch_phys_wc_index(int handle) 530 { 531 if (handle < MTRR_TO_PHYS_WC_OFFSET) 532 return -1; 533 else 534 return handle - MTRR_TO_PHYS_WC_OFFSET; 535 } 536 EXPORT_SYMBOL_GPL(arch_phys_wc_index); 537 538 int __initdata changed_by_mtrr_cleanup; 539 540 /** 541 * mtrr_bp_init - initialize MTRRs on the boot CPU 542 * 543 * This needs to be called early; before any of the other CPUs are 544 * initialized (i.e. before smp_init()). 545 */ 546 void __init mtrr_bp_init(void) 547 { 548 bool generic_mtrrs = cpu_feature_enabled(X86_FEATURE_MTRR); 549 const char *why = "(not available)"; 550 unsigned long config, dummy; 551 552 phys_hi_rsvd = GENMASK(31, boot_cpu_data.x86_phys_bits - 32); 553 554 if (!generic_mtrrs && mtrr_state.enabled) { 555 /* 556 * Software overwrite of MTRR state, only for generic case. 557 * Note that X86_FEATURE_MTRR has been reset in this case. 558 */ 559 init_table(); 560 mtrr_build_map(); 561 pr_info("MTRRs set to read-only\n"); 562 563 return; 564 } 565 566 if (generic_mtrrs) 567 mtrr_if = &generic_mtrr_ops; 568 else 569 mtrr_set_if(); 570 571 if (mtrr_enabled()) { 572 /* Get the number of variable MTRR ranges. */ 573 if (mtrr_if == &generic_mtrr_ops) 574 rdmsr(MSR_MTRRcap, config, dummy); 575 else 576 config = mtrr_if->var_regs; 577 num_var_ranges = config & MTRR_CAP_VCNT; 578 579 init_table(); 580 if (mtrr_if == &generic_mtrr_ops) { 581 /* BIOS may override */ 582 if (get_mtrr_state()) { 583 memory_caching_control |= CACHE_MTRR; 584 changed_by_mtrr_cleanup = mtrr_cleanup(); 585 mtrr_build_map(); 586 } else { 587 mtrr_if = NULL; 588 why = "by BIOS"; 589 } 590 } 591 } 592 593 if (!mtrr_enabled()) 594 pr_info("MTRRs disabled %s\n", why); 595 } 596 597 /** 598 * mtrr_save_state - Save current fixed-range MTRR state of the first 599 * cpu in cpu_online_mask. 600 */ 601 void mtrr_save_state(void) 602 { 603 int first_cpu; 604 605 if (!mtrr_enabled() || !mtrr_state.have_fixed) 606 return; 607 608 first_cpu = cpumask_first(cpu_online_mask); 609 smp_call_function_single(first_cpu, mtrr_save_fixed_ranges, NULL, 1); 610 } 611 612 static int __init mtrr_init_finalize(void) 613 { 614 /* 615 * Map might exist if guest_force_mtrr_state() has been called or if 616 * mtrr_enabled() returns true. 617 */ 618 mtrr_copy_map(); 619 620 if (!mtrr_enabled()) 621 return 0; 622 623 if (memory_caching_control & CACHE_MTRR) { 624 if (!changed_by_mtrr_cleanup) 625 mtrr_state_warn(); 626 return 0; 627 } 628 629 mtrr_register_syscore(); 630 631 return 0; 632 } 633 subsys_initcall(mtrr_init_finalize); 634