1 // SPDX-License-Identifier: LGPL-2.0+
2 /* Generic MTRR (Memory Type Range Register) driver.
3
4 Copyright (C) 1997-2000 Richard Gooch
5 Copyright (c) 2002 Patrick Mochel
6
7 Richard Gooch may be reached by email at rgooch@atnf.csiro.au
8 The postal address is:
9 Richard Gooch, c/o ATNF, P. O. Box 76, Epping, N.S.W., 2121, Australia.
10
11 Source: "Pentium Pro Family Developer's Manual, Volume 3:
12 Operating System Writer's Guide" (Intel document number 242692),
13 section 11.11.7
14
15 This was cleaned and made readable by Patrick Mochel <mochel@osdl.org>
16 on 6-7 March 2002.
17 Source: Intel Architecture Software Developers Manual, Volume 3:
18 System Programming Guide; Section 9.11. (1997 edition - PPro).
19 */
20
21 #include <linux/types.h> /* FIXME: kvm_para.h needs this */
22
23 #include <linux/stop_machine.h>
24 #include <linux/kvm_para.h>
25 #include <linux/uaccess.h>
26 #include <linux/export.h>
27 #include <linux/mutex.h>
28 #include <linux/init.h>
29 #include <linux/sort.h>
30 #include <linux/cpu.h>
31 #include <linux/pci.h>
32 #include <linux/smp.h>
33 #include <linux/syscore_ops.h>
34 #include <linux/rcupdate.h>
35
36 #include <asm/cacheinfo.h>
37 #include <asm/cpufeature.h>
38 #include <asm/e820/api.h>
39 #include <asm/mtrr.h>
40 #include <asm/msr.h>
41 #include <asm/memtype.h>
42
43 #include "mtrr.h"
44
45 static_assert(X86_MEMTYPE_UC == MTRR_TYPE_UNCACHABLE);
46 static_assert(X86_MEMTYPE_WC == MTRR_TYPE_WRCOMB);
47 static_assert(X86_MEMTYPE_WT == MTRR_TYPE_WRTHROUGH);
48 static_assert(X86_MEMTYPE_WP == MTRR_TYPE_WRPROT);
49 static_assert(X86_MEMTYPE_WB == MTRR_TYPE_WRBACK);
50
51 /* arch_phys_wc_add returns an MTRR register index plus this offset. */
52 #define MTRR_TO_PHYS_WC_OFFSET 1000
53
54 u32 num_var_ranges;
55
56 unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES];
57 DEFINE_MUTEX(mtrr_mutex);
58
59 const struct mtrr_ops *mtrr_if;
60
61 /* Returns non-zero if we have the write-combining memory type */
have_wrcomb(void)62 static int have_wrcomb(void)
63 {
64 struct pci_dev *dev;
65
66 dev = pci_get_class(PCI_CLASS_BRIDGE_HOST << 8, NULL);
67 if (dev != NULL) {
68 /*
69 * ServerWorks LE chipsets < rev 6 have problems with
70 * write-combining. Don't allow it and leave room for other
71 * chipsets to be tagged
72 */
73 if (dev->vendor == PCI_VENDOR_ID_SERVERWORKS &&
74 dev->device == PCI_DEVICE_ID_SERVERWORKS_LE &&
75 dev->revision <= 5) {
76 pr_info("Serverworks LE rev < 6 detected. Write-combining disabled.\n");
77 pci_dev_put(dev);
78 return 0;
79 }
80 /*
81 * Intel 450NX errata # 23. Non ascending cacheline evictions to
82 * write combining memory may resulting in data corruption
83 */
84 if (dev->vendor == PCI_VENDOR_ID_INTEL &&
85 dev->device == PCI_DEVICE_ID_INTEL_82451NX) {
86 pr_info("Intel 450NX MMC detected. Write-combining disabled.\n");
87 pci_dev_put(dev);
88 return 0;
89 }
90 pci_dev_put(dev);
91 }
92 return mtrr_if->have_wrcomb ? mtrr_if->have_wrcomb() : 0;
93 }
94
init_table(void)95 static void __init init_table(void)
96 {
97 int i, max;
98
99 max = num_var_ranges;
100 for (i = 0; i < max; i++)
101 mtrr_usage_table[i] = 1;
102 }
103
104 struct set_mtrr_data {
105 unsigned long smp_base;
106 unsigned long smp_size;
107 unsigned int smp_reg;
108 mtrr_type smp_type;
109 };
110
111 /**
112 * mtrr_rendezvous_handler - Work done in the synchronization handler. Executed
113 * by all the CPUs.
114 * @info: pointer to mtrr configuration data
115 *
116 * Returns nothing.
117 */
mtrr_rendezvous_handler(void * info)118 static int mtrr_rendezvous_handler(void *info)
119 {
120 struct set_mtrr_data *data = info;
121
122 mtrr_if->set(data->smp_reg, data->smp_base,
123 data->smp_size, data->smp_type);
124 return 0;
125 }
126
types_compatible(mtrr_type type1,mtrr_type type2)127 static inline int types_compatible(mtrr_type type1, mtrr_type type2)
128 {
129 return type1 == MTRR_TYPE_UNCACHABLE ||
130 type2 == MTRR_TYPE_UNCACHABLE ||
131 (type1 == MTRR_TYPE_WRTHROUGH && type2 == MTRR_TYPE_WRBACK) ||
132 (type1 == MTRR_TYPE_WRBACK && type2 == MTRR_TYPE_WRTHROUGH);
133 }
134
135 /**
136 * set_mtrr - update mtrrs on all processors
137 * @reg: mtrr in question
138 * @base: mtrr base
139 * @size: mtrr size
140 * @type: mtrr type
141 *
142 * This is kinda tricky, but fortunately, Intel spelled it out for us cleanly:
143 *
144 * 1. Queue work to do the following on all processors:
145 * 2. Disable Interrupts
146 * 3. Wait for all procs to do so
147 * 4. Enter no-fill cache mode
148 * 5. Flush caches
149 * 6. Clear PGE bit
150 * 7. Flush all TLBs
151 * 8. Disable all range registers
152 * 9. Update the MTRRs
153 * 10. Enable all range registers
154 * 11. Flush all TLBs and caches again
155 * 12. Enter normal cache mode and reenable caching
156 * 13. Set PGE
157 * 14. Wait for buddies to catch up
158 * 15. Enable interrupts.
159 *
160 * What does that mean for us? Well, stop_machine() will ensure that
161 * the rendezvous handler is started on each CPU. And in lockstep they
162 * do the state transition of disabling interrupts, updating MTRR's
163 * (the CPU vendors may each do it differently, so we call mtrr_if->set()
164 * callback and let them take care of it.) and enabling interrupts.
165 *
166 * Note that the mechanism is the same for UP systems, too; all the SMP stuff
167 * becomes nops.
168 */
set_mtrr(unsigned int reg,unsigned long base,unsigned long size,mtrr_type type)169 static void set_mtrr(unsigned int reg, unsigned long base, unsigned long size,
170 mtrr_type type)
171 {
172 struct set_mtrr_data data = { .smp_reg = reg,
173 .smp_base = base,
174 .smp_size = size,
175 .smp_type = type
176 };
177
178 stop_machine_cpuslocked(mtrr_rendezvous_handler, &data, cpu_online_mask);
179
180 generic_rebuild_map();
181 }
182
183 /**
184 * mtrr_add_page - Add a memory type region
185 * @base: Physical base address of region in pages (in units of 4 kB!)
186 * @size: Physical size of region in pages (4 kB)
187 * @type: Type of MTRR desired
188 * @increment: If this is true do usage counting on the region
189 *
190 * Memory type region registers control the caching on newer Intel and
191 * non Intel processors. This function allows drivers to request an
192 * MTRR is added. The details and hardware specifics of each processor's
193 * implementation are hidden from the caller, but nevertheless the
194 * caller should expect to need to provide a power of two size on an
195 * equivalent power of two boundary.
196 *
197 * If the region cannot be added either because all regions are in use
198 * or the CPU cannot support it a negative value is returned. On success
199 * the register number for this entry is returned, but should be treated
200 * as a cookie only.
201 *
202 * On a multiprocessor machine the changes are made to all processors.
203 * This is required on x86 by the Intel processors.
204 *
205 * The available types are
206 *
207 * %MTRR_TYPE_UNCACHABLE - No caching
208 *
209 * %MTRR_TYPE_WRBACK - Write data back in bursts whenever
210 *
211 * %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts
212 *
213 * %MTRR_TYPE_WRTHROUGH - Cache reads but not writes
214 *
215 * BUGS: Needs a quiet flag for the cases where drivers do not mind
216 * failures and do not wish system log messages to be sent.
217 */
mtrr_add_page(unsigned long base,unsigned long size,unsigned int type,bool increment)218 int mtrr_add_page(unsigned long base, unsigned long size,
219 unsigned int type, bool increment)
220 {
221 unsigned long lbase, lsize;
222 int i, replace, error;
223 mtrr_type ltype;
224
225 if (!mtrr_enabled())
226 return -ENXIO;
227
228 error = mtrr_if->validate_add_page(base, size, type);
229 if (error)
230 return error;
231
232 if (type >= MTRR_NUM_TYPES) {
233 pr_warn("type: %u invalid\n", type);
234 return -EINVAL;
235 }
236
237 /* If the type is WC, check that this processor supports it */
238 if ((type == MTRR_TYPE_WRCOMB) && !have_wrcomb()) {
239 pr_warn("your processor doesn't support write-combining\n");
240 return -ENOSYS;
241 }
242
243 if (!size) {
244 pr_warn("zero sized request\n");
245 return -EINVAL;
246 }
247
248 if ((base | (base + size - 1)) >>
249 (boot_cpu_data.x86_phys_bits - PAGE_SHIFT)) {
250 pr_warn("base or size exceeds the MTRR width\n");
251 return -EINVAL;
252 }
253
254 error = -EINVAL;
255 replace = -1;
256
257 /* No CPU hotplug when we change MTRR entries */
258 cpus_read_lock();
259
260 /* Search for existing MTRR */
261 mutex_lock(&mtrr_mutex);
262 for (i = 0; i < num_var_ranges; ++i) {
263 mtrr_if->get(i, &lbase, &lsize, <ype);
264 if (!lsize || base > lbase + lsize - 1 ||
265 base + size - 1 < lbase)
266 continue;
267 /*
268 * At this point we know there is some kind of
269 * overlap/enclosure
270 */
271 if (base < lbase || base + size - 1 > lbase + lsize - 1) {
272 if (base <= lbase &&
273 base + size - 1 >= lbase + lsize - 1) {
274 /* New region encloses an existing region */
275 if (type == ltype) {
276 replace = replace == -1 ? i : -2;
277 continue;
278 } else if (types_compatible(type, ltype))
279 continue;
280 }
281 pr_warn("0x%lx000,0x%lx000 overlaps existing 0x%lx000,0x%lx000\n", base, size, lbase,
282 lsize);
283 goto out;
284 }
285 /* New region is enclosed by an existing region */
286 if (ltype != type) {
287 if (types_compatible(type, ltype))
288 continue;
289 pr_warn("type mismatch for %lx000,%lx000 old: %s new: %s\n",
290 base, size, mtrr_attrib_to_str(ltype),
291 mtrr_attrib_to_str(type));
292 goto out;
293 }
294 if (increment)
295 ++mtrr_usage_table[i];
296 error = i;
297 goto out;
298 }
299 /* Search for an empty MTRR */
300 i = mtrr_if->get_free_region(base, size, replace);
301 if (i >= 0) {
302 set_mtrr(i, base, size, type);
303 if (likely(replace < 0)) {
304 mtrr_usage_table[i] = 1;
305 } else {
306 mtrr_usage_table[i] = mtrr_usage_table[replace];
307 if (increment)
308 mtrr_usage_table[i]++;
309 if (unlikely(replace != i)) {
310 set_mtrr(replace, 0, 0, 0);
311 mtrr_usage_table[replace] = 0;
312 }
313 }
314 } else {
315 pr_info("no more MTRRs available\n");
316 }
317 error = i;
318 out:
319 mutex_unlock(&mtrr_mutex);
320 cpus_read_unlock();
321 return error;
322 }
323
mtrr_check(unsigned long base,unsigned long size)324 static int mtrr_check(unsigned long base, unsigned long size)
325 {
326 if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) {
327 pr_warn("size and base must be multiples of 4 kiB\n");
328 Dprintk("size: 0x%lx base: 0x%lx\n", size, base);
329 dump_stack();
330 return -1;
331 }
332 return 0;
333 }
334
335 /**
336 * mtrr_add - Add a memory type region
337 * @base: Physical base address of region
338 * @size: Physical size of region
339 * @type: Type of MTRR desired
340 * @increment: If this is true do usage counting on the region
341 *
342 * Memory type region registers control the caching on newer Intel and
343 * non Intel processors. This function allows drivers to request an
344 * MTRR is added. The details and hardware specifics of each processor's
345 * implementation are hidden from the caller, but nevertheless the
346 * caller should expect to need to provide a power of two size on an
347 * equivalent power of two boundary.
348 *
349 * If the region cannot be added either because all regions are in use
350 * or the CPU cannot support it a negative value is returned. On success
351 * the register number for this entry is returned, but should be treated
352 * as a cookie only.
353 *
354 * On a multiprocessor machine the changes are made to all processors.
355 * This is required on x86 by the Intel processors.
356 *
357 * The available types are
358 *
359 * %MTRR_TYPE_UNCACHABLE - No caching
360 *
361 * %MTRR_TYPE_WRBACK - Write data back in bursts whenever
362 *
363 * %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts
364 *
365 * %MTRR_TYPE_WRTHROUGH - Cache reads but not writes
366 *
367 * BUGS: Needs a quiet flag for the cases where drivers do not mind
368 * failures and do not wish system log messages to be sent.
369 */
mtrr_add(unsigned long base,unsigned long size,unsigned int type,bool increment)370 int mtrr_add(unsigned long base, unsigned long size, unsigned int type,
371 bool increment)
372 {
373 if (!mtrr_enabled())
374 return -ENODEV;
375 if (mtrr_check(base, size))
376 return -EINVAL;
377 return mtrr_add_page(base >> PAGE_SHIFT, size >> PAGE_SHIFT, type,
378 increment);
379 }
380
381 /**
382 * mtrr_del_page - delete a memory type region
383 * @reg: Register returned by mtrr_add
384 * @base: Physical base address
385 * @size: Size of region
386 *
387 * If register is supplied then base and size are ignored. This is
388 * how drivers should call it.
389 *
390 * Releases an MTRR region. If the usage count drops to zero the
391 * register is freed and the region returns to default state.
392 * On success the register is returned, on failure a negative error
393 * code.
394 */
mtrr_del_page(int reg,unsigned long base,unsigned long size)395 int mtrr_del_page(int reg, unsigned long base, unsigned long size)
396 {
397 int i, max;
398 mtrr_type ltype;
399 unsigned long lbase, lsize;
400 int error = -EINVAL;
401
402 if (!mtrr_enabled())
403 return -ENODEV;
404
405 max = num_var_ranges;
406 /* No CPU hotplug when we change MTRR entries */
407 cpus_read_lock();
408 mutex_lock(&mtrr_mutex);
409 if (reg < 0) {
410 /* Search for existing MTRR */
411 for (i = 0; i < max; ++i) {
412 mtrr_if->get(i, &lbase, &lsize, <ype);
413 if (lbase == base && lsize == size) {
414 reg = i;
415 break;
416 }
417 }
418 if (reg < 0) {
419 Dprintk("no MTRR for %lx000,%lx000 found\n", base, size);
420 goto out;
421 }
422 }
423 if (reg >= max) {
424 pr_warn("register: %d too big\n", reg);
425 goto out;
426 }
427 mtrr_if->get(reg, &lbase, &lsize, <ype);
428 if (lsize < 1) {
429 pr_warn("MTRR %d not used\n", reg);
430 goto out;
431 }
432 if (mtrr_usage_table[reg] < 1) {
433 pr_warn("reg: %d has count=0\n", reg);
434 goto out;
435 }
436 if (--mtrr_usage_table[reg] < 1)
437 set_mtrr(reg, 0, 0, 0);
438 error = reg;
439 out:
440 mutex_unlock(&mtrr_mutex);
441 cpus_read_unlock();
442 return error;
443 }
444
445 /**
446 * mtrr_del - delete a memory type region
447 * @reg: Register returned by mtrr_add
448 * @base: Physical base address
449 * @size: Size of region
450 *
451 * If register is supplied then base and size are ignored. This is
452 * how drivers should call it.
453 *
454 * Releases an MTRR region. If the usage count drops to zero the
455 * register is freed and the region returns to default state.
456 * On success the register is returned, on failure a negative error
457 * code.
458 */
mtrr_del(int reg,unsigned long base,unsigned long size)459 int mtrr_del(int reg, unsigned long base, unsigned long size)
460 {
461 if (!mtrr_enabled())
462 return -ENODEV;
463 if (mtrr_check(base, size))
464 return -EINVAL;
465 return mtrr_del_page(reg, base >> PAGE_SHIFT, size >> PAGE_SHIFT);
466 }
467
468 /**
469 * arch_phys_wc_add - add a WC MTRR and handle errors if PAT is unavailable
470 * @base: Physical base address
471 * @size: Size of region
472 *
473 * If PAT is available, this does nothing. If PAT is unavailable, it
474 * attempts to add a WC MTRR covering size bytes starting at base and
475 * logs an error if this fails.
476 *
477 * The called should provide a power of two size on an equivalent
478 * power of two boundary.
479 *
480 * Drivers must store the return value to pass to mtrr_del_wc_if_needed,
481 * but drivers should not try to interpret that return value.
482 */
arch_phys_wc_add(unsigned long base,unsigned long size)483 int arch_phys_wc_add(unsigned long base, unsigned long size)
484 {
485 int ret;
486
487 if (pat_enabled() || !mtrr_enabled())
488 return 0; /* Success! (We don't need to do anything.) */
489
490 ret = mtrr_add(base, size, MTRR_TYPE_WRCOMB, true);
491 if (ret < 0) {
492 pr_warn("Failed to add WC MTRR for [%p-%p]; performance may suffer.",
493 (void *)base, (void *)(base + size - 1));
494 return ret;
495 }
496 return ret + MTRR_TO_PHYS_WC_OFFSET;
497 }
498 EXPORT_SYMBOL(arch_phys_wc_add);
499
500 /*
501 * arch_phys_wc_del - undoes arch_phys_wc_add
502 * @handle: Return value from arch_phys_wc_add
503 *
504 * This cleans up after mtrr_add_wc_if_needed.
505 *
506 * The API guarantees that mtrr_del_wc_if_needed(error code) and
507 * mtrr_del_wc_if_needed(0) do nothing.
508 */
arch_phys_wc_del(int handle)509 void arch_phys_wc_del(int handle)
510 {
511 if (handle >= 1) {
512 WARN_ON(handle < MTRR_TO_PHYS_WC_OFFSET);
513 mtrr_del(handle - MTRR_TO_PHYS_WC_OFFSET, 0, 0);
514 }
515 }
516 EXPORT_SYMBOL(arch_phys_wc_del);
517
518 /*
519 * arch_phys_wc_index - translates arch_phys_wc_add's return value
520 * @handle: Return value from arch_phys_wc_add
521 *
522 * This will turn the return value from arch_phys_wc_add into an mtrr
523 * index suitable for debugging.
524 *
525 * Note: There is no legitimate use for this function, except possibly
526 * in printk line. Alas there is an illegitimate use in some ancient
527 * drm ioctls.
528 */
arch_phys_wc_index(int handle)529 int arch_phys_wc_index(int handle)
530 {
531 if (handle < MTRR_TO_PHYS_WC_OFFSET)
532 return -1;
533 else
534 return handle - MTRR_TO_PHYS_WC_OFFSET;
535 }
536 EXPORT_SYMBOL_GPL(arch_phys_wc_index);
537
538 int __initdata changed_by_mtrr_cleanup;
539
540 /**
541 * mtrr_bp_init - initialize MTRRs on the boot CPU
542 *
543 * This needs to be called early; before any of the other CPUs are
544 * initialized (i.e. before smp_init()).
545 */
mtrr_bp_init(void)546 void __init mtrr_bp_init(void)
547 {
548 bool generic_mtrrs = cpu_feature_enabled(X86_FEATURE_MTRR);
549 const char *why = "(not available)";
550 unsigned long config, dummy;
551
552 phys_hi_rsvd = GENMASK(31, boot_cpu_data.x86_phys_bits - 32);
553
554 if (!generic_mtrrs && mtrr_state.enabled) {
555 /*
556 * Software overwrite of MTRR state, only for generic case.
557 * Note that X86_FEATURE_MTRR has been reset in this case.
558 */
559 init_table();
560 mtrr_build_map();
561 pr_info("MTRRs set to read-only\n");
562
563 return;
564 }
565
566 if (generic_mtrrs)
567 mtrr_if = &generic_mtrr_ops;
568 else
569 mtrr_set_if();
570
571 if (mtrr_enabled()) {
572 /* Get the number of variable MTRR ranges. */
573 if (mtrr_if == &generic_mtrr_ops)
574 rdmsr(MSR_MTRRcap, config, dummy);
575 else
576 config = mtrr_if->var_regs;
577 num_var_ranges = config & MTRR_CAP_VCNT;
578
579 init_table();
580 if (mtrr_if == &generic_mtrr_ops) {
581 /* BIOS may override */
582 if (get_mtrr_state()) {
583 memory_caching_control |= CACHE_MTRR;
584 changed_by_mtrr_cleanup = mtrr_cleanup();
585 mtrr_build_map();
586 } else {
587 mtrr_if = NULL;
588 why = "by BIOS";
589 }
590 }
591 }
592
593 if (!mtrr_enabled())
594 pr_info("MTRRs disabled %s\n", why);
595 }
596
597 /**
598 * mtrr_save_state - Save current fixed-range MTRR state of the first
599 * cpu in cpu_online_mask.
600 */
mtrr_save_state(void)601 void mtrr_save_state(void)
602 {
603 int first_cpu;
604
605 if (!mtrr_enabled() || !mtrr_state.have_fixed)
606 return;
607
608 first_cpu = cpumask_first(cpu_online_mask);
609 smp_call_function_single(first_cpu, mtrr_save_fixed_ranges, NULL, 1);
610 }
611
mtrr_init_finalize(void)612 static int __init mtrr_init_finalize(void)
613 {
614 /*
615 * Map might exist if guest_force_mtrr_state() has been called or if
616 * mtrr_enabled() returns true.
617 */
618 mtrr_copy_map();
619
620 if (!mtrr_enabled())
621 return 0;
622
623 if (memory_caching_control & CACHE_MTRR) {
624 if (!changed_by_mtrr_cleanup)
625 mtrr_state_warn();
626 return 0;
627 }
628
629 mtrr_register_syscore();
630
631 return 0;
632 }
633 subsys_initcall(mtrr_init_finalize);
634