xref: /linux/arch/x86/kernel/cpu/mtrr/mtrr.c (revision c532de5a67a70f8533d495f8f2aaa9a0491c3ad0)
1 /*  Generic MTRR (Memory Type Range Register) driver.
2 
3     Copyright (C) 1997-2000  Richard Gooch
4     Copyright (c) 2002	     Patrick Mochel
5 
6     This library is free software; you can redistribute it and/or
7     modify it under the terms of the GNU Library General Public
8     License as published by the Free Software Foundation; either
9     version 2 of the License, or (at your option) any later version.
10 
11     This library is distributed in the hope that it will be useful,
12     but WITHOUT ANY WARRANTY; without even the implied warranty of
13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14     Library General Public License for more details.
15 
16     You should have received a copy of the GNU Library General Public
17     License along with this library; if not, write to the Free
18     Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 
20     Richard Gooch may be reached by email at  rgooch@atnf.csiro.au
21     The postal address is:
22       Richard Gooch, c/o ATNF, P. O. Box 76, Epping, N.S.W., 2121, Australia.
23 
24     Source: "Pentium Pro Family Developer's Manual, Volume 3:
25     Operating System Writer's Guide" (Intel document number 242692),
26     section 11.11.7
27 
28     This was cleaned and made readable by Patrick Mochel <mochel@osdl.org>
29     on 6-7 March 2002.
30     Source: Intel Architecture Software Developers Manual, Volume 3:
31     System Programming Guide; Section 9.11. (1997 edition - PPro).
32 */
33 
34 #include <linux/types.h> /* FIXME: kvm_para.h needs this */
35 
36 #include <linux/stop_machine.h>
37 #include <linux/kvm_para.h>
38 #include <linux/uaccess.h>
39 #include <linux/export.h>
40 #include <linux/mutex.h>
41 #include <linux/init.h>
42 #include <linux/sort.h>
43 #include <linux/cpu.h>
44 #include <linux/pci.h>
45 #include <linux/smp.h>
46 #include <linux/syscore_ops.h>
47 #include <linux/rcupdate.h>
48 
49 #include <asm/cacheinfo.h>
50 #include <asm/cpufeature.h>
51 #include <asm/e820/api.h>
52 #include <asm/mtrr.h>
53 #include <asm/msr.h>
54 #include <asm/memtype.h>
55 
56 #include "mtrr.h"
57 
58 static_assert(X86_MEMTYPE_UC == MTRR_TYPE_UNCACHABLE);
59 static_assert(X86_MEMTYPE_WC == MTRR_TYPE_WRCOMB);
60 static_assert(X86_MEMTYPE_WT == MTRR_TYPE_WRTHROUGH);
61 static_assert(X86_MEMTYPE_WP == MTRR_TYPE_WRPROT);
62 static_assert(X86_MEMTYPE_WB == MTRR_TYPE_WRBACK);
63 
64 /* arch_phys_wc_add returns an MTRR register index plus this offset. */
65 #define MTRR_TO_PHYS_WC_OFFSET 1000
66 
67 u32 num_var_ranges;
68 
69 unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES];
70 DEFINE_MUTEX(mtrr_mutex);
71 
72 const struct mtrr_ops *mtrr_if;
73 
74 /*  Returns non-zero if we have the write-combining memory type  */
75 static int have_wrcomb(void)
76 {
77 	struct pci_dev *dev;
78 
79 	dev = pci_get_class(PCI_CLASS_BRIDGE_HOST << 8, NULL);
80 	if (dev != NULL) {
81 		/*
82 		 * ServerWorks LE chipsets < rev 6 have problems with
83 		 * write-combining. Don't allow it and leave room for other
84 		 * chipsets to be tagged
85 		 */
86 		if (dev->vendor == PCI_VENDOR_ID_SERVERWORKS &&
87 		    dev->device == PCI_DEVICE_ID_SERVERWORKS_LE &&
88 		    dev->revision <= 5) {
89 			pr_info("Serverworks LE rev < 6 detected. Write-combining disabled.\n");
90 			pci_dev_put(dev);
91 			return 0;
92 		}
93 		/*
94 		 * Intel 450NX errata # 23. Non ascending cacheline evictions to
95 		 * write combining memory may resulting in data corruption
96 		 */
97 		if (dev->vendor == PCI_VENDOR_ID_INTEL &&
98 		    dev->device == PCI_DEVICE_ID_INTEL_82451NX) {
99 			pr_info("Intel 450NX MMC detected. Write-combining disabled.\n");
100 			pci_dev_put(dev);
101 			return 0;
102 		}
103 		pci_dev_put(dev);
104 	}
105 	return mtrr_if->have_wrcomb ? mtrr_if->have_wrcomb() : 0;
106 }
107 
108 static void __init init_table(void)
109 {
110 	int i, max;
111 
112 	max = num_var_ranges;
113 	for (i = 0; i < max; i++)
114 		mtrr_usage_table[i] = 1;
115 }
116 
117 struct set_mtrr_data {
118 	unsigned long	smp_base;
119 	unsigned long	smp_size;
120 	unsigned int	smp_reg;
121 	mtrr_type	smp_type;
122 };
123 
124 /**
125  * mtrr_rendezvous_handler - Work done in the synchronization handler. Executed
126  * by all the CPUs.
127  * @info: pointer to mtrr configuration data
128  *
129  * Returns nothing.
130  */
131 static int mtrr_rendezvous_handler(void *info)
132 {
133 	struct set_mtrr_data *data = info;
134 
135 	mtrr_if->set(data->smp_reg, data->smp_base,
136 		     data->smp_size, data->smp_type);
137 	return 0;
138 }
139 
140 static inline int types_compatible(mtrr_type type1, mtrr_type type2)
141 {
142 	return type1 == MTRR_TYPE_UNCACHABLE ||
143 	       type2 == MTRR_TYPE_UNCACHABLE ||
144 	       (type1 == MTRR_TYPE_WRTHROUGH && type2 == MTRR_TYPE_WRBACK) ||
145 	       (type1 == MTRR_TYPE_WRBACK && type2 == MTRR_TYPE_WRTHROUGH);
146 }
147 
148 /**
149  * set_mtrr - update mtrrs on all processors
150  * @reg:	mtrr in question
151  * @base:	mtrr base
152  * @size:	mtrr size
153  * @type:	mtrr type
154  *
155  * This is kinda tricky, but fortunately, Intel spelled it out for us cleanly:
156  *
157  * 1. Queue work to do the following on all processors:
158  * 2. Disable Interrupts
159  * 3. Wait for all procs to do so
160  * 4. Enter no-fill cache mode
161  * 5. Flush caches
162  * 6. Clear PGE bit
163  * 7. Flush all TLBs
164  * 8. Disable all range registers
165  * 9. Update the MTRRs
166  * 10. Enable all range registers
167  * 11. Flush all TLBs and caches again
168  * 12. Enter normal cache mode and reenable caching
169  * 13. Set PGE
170  * 14. Wait for buddies to catch up
171  * 15. Enable interrupts.
172  *
173  * What does that mean for us? Well, stop_machine() will ensure that
174  * the rendezvous handler is started on each CPU. And in lockstep they
175  * do the state transition of disabling interrupts, updating MTRR's
176  * (the CPU vendors may each do it differently, so we call mtrr_if->set()
177  * callback and let them take care of it.) and enabling interrupts.
178  *
179  * Note that the mechanism is the same for UP systems, too; all the SMP stuff
180  * becomes nops.
181  */
182 static void set_mtrr(unsigned int reg, unsigned long base, unsigned long size,
183 		     mtrr_type type)
184 {
185 	struct set_mtrr_data data = { .smp_reg = reg,
186 				      .smp_base = base,
187 				      .smp_size = size,
188 				      .smp_type = type
189 				    };
190 
191 	stop_machine_cpuslocked(mtrr_rendezvous_handler, &data, cpu_online_mask);
192 
193 	generic_rebuild_map();
194 }
195 
196 /**
197  * mtrr_add_page - Add a memory type region
198  * @base: Physical base address of region in pages (in units of 4 kB!)
199  * @size: Physical size of region in pages (4 kB)
200  * @type: Type of MTRR desired
201  * @increment: If this is true do usage counting on the region
202  *
203  * Memory type region registers control the caching on newer Intel and
204  * non Intel processors. This function allows drivers to request an
205  * MTRR is added. The details and hardware specifics of each processor's
206  * implementation are hidden from the caller, but nevertheless the
207  * caller should expect to need to provide a power of two size on an
208  * equivalent power of two boundary.
209  *
210  * If the region cannot be added either because all regions are in use
211  * or the CPU cannot support it a negative value is returned. On success
212  * the register number for this entry is returned, but should be treated
213  * as a cookie only.
214  *
215  * On a multiprocessor machine the changes are made to all processors.
216  * This is required on x86 by the Intel processors.
217  *
218  * The available types are
219  *
220  * %MTRR_TYPE_UNCACHABLE - No caching
221  *
222  * %MTRR_TYPE_WRBACK - Write data back in bursts whenever
223  *
224  * %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts
225  *
226  * %MTRR_TYPE_WRTHROUGH - Cache reads but not writes
227  *
228  * BUGS: Needs a quiet flag for the cases where drivers do not mind
229  * failures and do not wish system log messages to be sent.
230  */
231 int mtrr_add_page(unsigned long base, unsigned long size,
232 		  unsigned int type, bool increment)
233 {
234 	unsigned long lbase, lsize;
235 	int i, replace, error;
236 	mtrr_type ltype;
237 
238 	if (!mtrr_enabled())
239 		return -ENXIO;
240 
241 	error = mtrr_if->validate_add_page(base, size, type);
242 	if (error)
243 		return error;
244 
245 	if (type >= MTRR_NUM_TYPES) {
246 		pr_warn("type: %u invalid\n", type);
247 		return -EINVAL;
248 	}
249 
250 	/* If the type is WC, check that this processor supports it */
251 	if ((type == MTRR_TYPE_WRCOMB) && !have_wrcomb()) {
252 		pr_warn("your processor doesn't support write-combining\n");
253 		return -ENOSYS;
254 	}
255 
256 	if (!size) {
257 		pr_warn("zero sized request\n");
258 		return -EINVAL;
259 	}
260 
261 	if ((base | (base + size - 1)) >>
262 	    (boot_cpu_data.x86_phys_bits - PAGE_SHIFT)) {
263 		pr_warn("base or size exceeds the MTRR width\n");
264 		return -EINVAL;
265 	}
266 
267 	error = -EINVAL;
268 	replace = -1;
269 
270 	/* No CPU hotplug when we change MTRR entries */
271 	cpus_read_lock();
272 
273 	/* Search for existing MTRR  */
274 	mutex_lock(&mtrr_mutex);
275 	for (i = 0; i < num_var_ranges; ++i) {
276 		mtrr_if->get(i, &lbase, &lsize, &ltype);
277 		if (!lsize || base > lbase + lsize - 1 ||
278 		    base + size - 1 < lbase)
279 			continue;
280 		/*
281 		 * At this point we know there is some kind of
282 		 * overlap/enclosure
283 		 */
284 		if (base < lbase || base + size - 1 > lbase + lsize - 1) {
285 			if (base <= lbase &&
286 			    base + size - 1 >= lbase + lsize - 1) {
287 				/*  New region encloses an existing region  */
288 				if (type == ltype) {
289 					replace = replace == -1 ? i : -2;
290 					continue;
291 				} else if (types_compatible(type, ltype))
292 					continue;
293 			}
294 			pr_warn("0x%lx000,0x%lx000 overlaps existing 0x%lx000,0x%lx000\n", base, size, lbase,
295 				lsize);
296 			goto out;
297 		}
298 		/* New region is enclosed by an existing region */
299 		if (ltype != type) {
300 			if (types_compatible(type, ltype))
301 				continue;
302 			pr_warn("type mismatch for %lx000,%lx000 old: %s new: %s\n",
303 				base, size, mtrr_attrib_to_str(ltype),
304 				mtrr_attrib_to_str(type));
305 			goto out;
306 		}
307 		if (increment)
308 			++mtrr_usage_table[i];
309 		error = i;
310 		goto out;
311 	}
312 	/* Search for an empty MTRR */
313 	i = mtrr_if->get_free_region(base, size, replace);
314 	if (i >= 0) {
315 		set_mtrr(i, base, size, type);
316 		if (likely(replace < 0)) {
317 			mtrr_usage_table[i] = 1;
318 		} else {
319 			mtrr_usage_table[i] = mtrr_usage_table[replace];
320 			if (increment)
321 				mtrr_usage_table[i]++;
322 			if (unlikely(replace != i)) {
323 				set_mtrr(replace, 0, 0, 0);
324 				mtrr_usage_table[replace] = 0;
325 			}
326 		}
327 	} else {
328 		pr_info("no more MTRRs available\n");
329 	}
330 	error = i;
331  out:
332 	mutex_unlock(&mtrr_mutex);
333 	cpus_read_unlock();
334 	return error;
335 }
336 
337 static int mtrr_check(unsigned long base, unsigned long size)
338 {
339 	if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) {
340 		pr_warn("size and base must be multiples of 4 kiB\n");
341 		Dprintk("size: 0x%lx  base: 0x%lx\n", size, base);
342 		dump_stack();
343 		return -1;
344 	}
345 	return 0;
346 }
347 
348 /**
349  * mtrr_add - Add a memory type region
350  * @base: Physical base address of region
351  * @size: Physical size of region
352  * @type: Type of MTRR desired
353  * @increment: If this is true do usage counting on the region
354  *
355  * Memory type region registers control the caching on newer Intel and
356  * non Intel processors. This function allows drivers to request an
357  * MTRR is added. The details and hardware specifics of each processor's
358  * implementation are hidden from the caller, but nevertheless the
359  * caller should expect to need to provide a power of two size on an
360  * equivalent power of two boundary.
361  *
362  * If the region cannot be added either because all regions are in use
363  * or the CPU cannot support it a negative value is returned. On success
364  * the register number for this entry is returned, but should be treated
365  * as a cookie only.
366  *
367  * On a multiprocessor machine the changes are made to all processors.
368  * This is required on x86 by the Intel processors.
369  *
370  * The available types are
371  *
372  * %MTRR_TYPE_UNCACHABLE - No caching
373  *
374  * %MTRR_TYPE_WRBACK - Write data back in bursts whenever
375  *
376  * %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts
377  *
378  * %MTRR_TYPE_WRTHROUGH - Cache reads but not writes
379  *
380  * BUGS: Needs a quiet flag for the cases where drivers do not mind
381  * failures and do not wish system log messages to be sent.
382  */
383 int mtrr_add(unsigned long base, unsigned long size, unsigned int type,
384 	     bool increment)
385 {
386 	if (!mtrr_enabled())
387 		return -ENODEV;
388 	if (mtrr_check(base, size))
389 		return -EINVAL;
390 	return mtrr_add_page(base >> PAGE_SHIFT, size >> PAGE_SHIFT, type,
391 			     increment);
392 }
393 
394 /**
395  * mtrr_del_page - delete a memory type region
396  * @reg: Register returned by mtrr_add
397  * @base: Physical base address
398  * @size: Size of region
399  *
400  * If register is supplied then base and size are ignored. This is
401  * how drivers should call it.
402  *
403  * Releases an MTRR region. If the usage count drops to zero the
404  * register is freed and the region returns to default state.
405  * On success the register is returned, on failure a negative error
406  * code.
407  */
408 int mtrr_del_page(int reg, unsigned long base, unsigned long size)
409 {
410 	int i, max;
411 	mtrr_type ltype;
412 	unsigned long lbase, lsize;
413 	int error = -EINVAL;
414 
415 	if (!mtrr_enabled())
416 		return -ENODEV;
417 
418 	max = num_var_ranges;
419 	/* No CPU hotplug when we change MTRR entries */
420 	cpus_read_lock();
421 	mutex_lock(&mtrr_mutex);
422 	if (reg < 0) {
423 		/*  Search for existing MTRR  */
424 		for (i = 0; i < max; ++i) {
425 			mtrr_if->get(i, &lbase, &lsize, &ltype);
426 			if (lbase == base && lsize == size) {
427 				reg = i;
428 				break;
429 			}
430 		}
431 		if (reg < 0) {
432 			Dprintk("no MTRR for %lx000,%lx000 found\n", base, size);
433 			goto out;
434 		}
435 	}
436 	if (reg >= max) {
437 		pr_warn("register: %d too big\n", reg);
438 		goto out;
439 	}
440 	mtrr_if->get(reg, &lbase, &lsize, &ltype);
441 	if (lsize < 1) {
442 		pr_warn("MTRR %d not used\n", reg);
443 		goto out;
444 	}
445 	if (mtrr_usage_table[reg] < 1) {
446 		pr_warn("reg: %d has count=0\n", reg);
447 		goto out;
448 	}
449 	if (--mtrr_usage_table[reg] < 1)
450 		set_mtrr(reg, 0, 0, 0);
451 	error = reg;
452  out:
453 	mutex_unlock(&mtrr_mutex);
454 	cpus_read_unlock();
455 	return error;
456 }
457 
458 /**
459  * mtrr_del - delete a memory type region
460  * @reg: Register returned by mtrr_add
461  * @base: Physical base address
462  * @size: Size of region
463  *
464  * If register is supplied then base and size are ignored. This is
465  * how drivers should call it.
466  *
467  * Releases an MTRR region. If the usage count drops to zero the
468  * register is freed and the region returns to default state.
469  * On success the register is returned, on failure a negative error
470  * code.
471  */
472 int mtrr_del(int reg, unsigned long base, unsigned long size)
473 {
474 	if (!mtrr_enabled())
475 		return -ENODEV;
476 	if (mtrr_check(base, size))
477 		return -EINVAL;
478 	return mtrr_del_page(reg, base >> PAGE_SHIFT, size >> PAGE_SHIFT);
479 }
480 
481 /**
482  * arch_phys_wc_add - add a WC MTRR and handle errors if PAT is unavailable
483  * @base: Physical base address
484  * @size: Size of region
485  *
486  * If PAT is available, this does nothing.  If PAT is unavailable, it
487  * attempts to add a WC MTRR covering size bytes starting at base and
488  * logs an error if this fails.
489  *
490  * The called should provide a power of two size on an equivalent
491  * power of two boundary.
492  *
493  * Drivers must store the return value to pass to mtrr_del_wc_if_needed,
494  * but drivers should not try to interpret that return value.
495  */
496 int arch_phys_wc_add(unsigned long base, unsigned long size)
497 {
498 	int ret;
499 
500 	if (pat_enabled() || !mtrr_enabled())
501 		return 0;  /* Success!  (We don't need to do anything.) */
502 
503 	ret = mtrr_add(base, size, MTRR_TYPE_WRCOMB, true);
504 	if (ret < 0) {
505 		pr_warn("Failed to add WC MTRR for [%p-%p]; performance may suffer.",
506 			(void *)base, (void *)(base + size - 1));
507 		return ret;
508 	}
509 	return ret + MTRR_TO_PHYS_WC_OFFSET;
510 }
511 EXPORT_SYMBOL(arch_phys_wc_add);
512 
513 /*
514  * arch_phys_wc_del - undoes arch_phys_wc_add
515  * @handle: Return value from arch_phys_wc_add
516  *
517  * This cleans up after mtrr_add_wc_if_needed.
518  *
519  * The API guarantees that mtrr_del_wc_if_needed(error code) and
520  * mtrr_del_wc_if_needed(0) do nothing.
521  */
522 void arch_phys_wc_del(int handle)
523 {
524 	if (handle >= 1) {
525 		WARN_ON(handle < MTRR_TO_PHYS_WC_OFFSET);
526 		mtrr_del(handle - MTRR_TO_PHYS_WC_OFFSET, 0, 0);
527 	}
528 }
529 EXPORT_SYMBOL(arch_phys_wc_del);
530 
531 /*
532  * arch_phys_wc_index - translates arch_phys_wc_add's return value
533  * @handle: Return value from arch_phys_wc_add
534  *
535  * This will turn the return value from arch_phys_wc_add into an mtrr
536  * index suitable for debugging.
537  *
538  * Note: There is no legitimate use for this function, except possibly
539  * in printk line.  Alas there is an illegitimate use in some ancient
540  * drm ioctls.
541  */
542 int arch_phys_wc_index(int handle)
543 {
544 	if (handle < MTRR_TO_PHYS_WC_OFFSET)
545 		return -1;
546 	else
547 		return handle - MTRR_TO_PHYS_WC_OFFSET;
548 }
549 EXPORT_SYMBOL_GPL(arch_phys_wc_index);
550 
551 int __initdata changed_by_mtrr_cleanup;
552 
553 /**
554  * mtrr_bp_init - initialize MTRRs on the boot CPU
555  *
556  * This needs to be called early; before any of the other CPUs are
557  * initialized (i.e. before smp_init()).
558  */
559 void __init mtrr_bp_init(void)
560 {
561 	bool generic_mtrrs = cpu_feature_enabled(X86_FEATURE_MTRR);
562 	const char *why = "(not available)";
563 	unsigned long config, dummy;
564 
565 	phys_hi_rsvd = GENMASK(31, boot_cpu_data.x86_phys_bits - 32);
566 
567 	if (!generic_mtrrs && mtrr_state.enabled) {
568 		/*
569 		 * Software overwrite of MTRR state, only for generic case.
570 		 * Note that X86_FEATURE_MTRR has been reset in this case.
571 		 */
572 		init_table();
573 		mtrr_build_map();
574 		pr_info("MTRRs set to read-only\n");
575 
576 		return;
577 	}
578 
579 	if (generic_mtrrs)
580 		mtrr_if = &generic_mtrr_ops;
581 	else
582 		mtrr_set_if();
583 
584 	if (mtrr_enabled()) {
585 		/* Get the number of variable MTRR ranges. */
586 		if (mtrr_if == &generic_mtrr_ops)
587 			rdmsr(MSR_MTRRcap, config, dummy);
588 		else
589 			config = mtrr_if->var_regs;
590 		num_var_ranges = config & MTRR_CAP_VCNT;
591 
592 		init_table();
593 		if (mtrr_if == &generic_mtrr_ops) {
594 			/* BIOS may override */
595 			if (get_mtrr_state()) {
596 				memory_caching_control |= CACHE_MTRR;
597 				changed_by_mtrr_cleanup = mtrr_cleanup();
598 				mtrr_build_map();
599 			} else {
600 				mtrr_if = NULL;
601 				why = "by BIOS";
602 			}
603 		}
604 	}
605 
606 	if (!mtrr_enabled())
607 		pr_info("MTRRs disabled %s\n", why);
608 }
609 
610 /**
611  * mtrr_save_state - Save current fixed-range MTRR state of the first
612  *	cpu in cpu_online_mask.
613  */
614 void mtrr_save_state(void)
615 {
616 	int first_cpu;
617 
618 	if (!mtrr_enabled() || !mtrr_state.have_fixed)
619 		return;
620 
621 	first_cpu = cpumask_first(cpu_online_mask);
622 	smp_call_function_single(first_cpu, mtrr_save_fixed_ranges, NULL, 1);
623 }
624 
625 static int __init mtrr_init_finalize(void)
626 {
627 	/*
628 	 * Map might exist if mtrr_overwrite_state() has been called or if
629 	 * mtrr_enabled() returns true.
630 	 */
631 	mtrr_copy_map();
632 
633 	if (!mtrr_enabled())
634 		return 0;
635 
636 	if (memory_caching_control & CACHE_MTRR) {
637 		if (!changed_by_mtrr_cleanup)
638 			mtrr_state_warn();
639 		return 0;
640 	}
641 
642 	mtrr_register_syscore();
643 
644 	return 0;
645 }
646 subsys_initcall(mtrr_init_finalize);
647