1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * vMTRR implementation 4 * 5 * Copyright (C) 2006 Qumranet, Inc. 6 * Copyright 2010 Red Hat, Inc. and/or its affiliates. 7 * Copyright(C) 2015 Intel Corporation. 8 * 9 * Authors: 10 * Yaniv Kamay <yaniv@qumranet.com> 11 * Avi Kivity <avi@qumranet.com> 12 * Marcelo Tosatti <mtosatti@redhat.com> 13 * Paolo Bonzini <pbonzini@redhat.com> 14 * Xiao Guangrong <guangrong.xiao@linux.intel.com> 15 */ 16 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 17 18 #include <linux/kvm_host.h> 19 #include <asm/mtrr.h> 20 21 #include "cpuid.h" 22 #include "mmu.h" 23 24 #define IA32_MTRR_DEF_TYPE_E (1ULL << 11) 25 #define IA32_MTRR_DEF_TYPE_FE (1ULL << 10) 26 #define IA32_MTRR_DEF_TYPE_TYPE_MASK (0xff) 27 28 static bool is_mtrr_base_msr(unsigned int msr) 29 { 30 /* MTRR base MSRs use even numbers, masks use odd numbers. */ 31 return !(msr & 0x1); 32 } 33 34 static struct kvm_mtrr_range *var_mtrr_msr_to_range(struct kvm_vcpu *vcpu, 35 unsigned int msr) 36 { 37 int index = (msr - MTRRphysBase_MSR(0)) / 2; 38 39 return &vcpu->arch.mtrr_state.var_ranges[index]; 40 } 41 42 static bool msr_mtrr_valid(unsigned msr) 43 { 44 switch (msr) { 45 case MTRRphysBase_MSR(0) ... MTRRphysMask_MSR(KVM_NR_VAR_MTRR - 1): 46 case MSR_MTRRfix64K_00000: 47 case MSR_MTRRfix16K_80000: 48 case MSR_MTRRfix16K_A0000: 49 case MSR_MTRRfix4K_C0000: 50 case MSR_MTRRfix4K_C8000: 51 case MSR_MTRRfix4K_D0000: 52 case MSR_MTRRfix4K_D8000: 53 case MSR_MTRRfix4K_E0000: 54 case MSR_MTRRfix4K_E8000: 55 case MSR_MTRRfix4K_F0000: 56 case MSR_MTRRfix4K_F8000: 57 case MSR_MTRRdefType: 58 return true; 59 } 60 return false; 61 } 62 63 static bool valid_mtrr_type(unsigned t) 64 { 65 return t < 8 && (1 << t) & 0x73; /* 0, 1, 4, 5, 6 */ 66 } 67 68 static bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data) 69 { 70 int i; 71 u64 mask; 72 73 if (!msr_mtrr_valid(msr)) 74 return false; 75 76 if (msr == MSR_MTRRdefType) { 77 if (data & ~0xcff) 78 return false; 79 return valid_mtrr_type(data & 0xff); 80 } else if (msr >= MSR_MTRRfix64K_00000 && msr <= MSR_MTRRfix4K_F8000) { 81 for (i = 0; i < 8 ; i++) 82 if (!valid_mtrr_type((data >> (i * 8)) & 0xff)) 83 return false; 84 return true; 85 } 86 87 /* variable MTRRs */ 88 WARN_ON(!(msr >= MTRRphysBase_MSR(0) && 89 msr <= MTRRphysMask_MSR(KVM_NR_VAR_MTRR - 1))); 90 91 mask = kvm_vcpu_reserved_gpa_bits_raw(vcpu); 92 if ((msr & 1) == 0) { 93 /* MTRR base */ 94 if (!valid_mtrr_type(data & 0xff)) 95 return false; 96 mask |= 0xf00; 97 } else 98 /* MTRR mask */ 99 mask |= 0x7ff; 100 101 return (data & mask) == 0; 102 } 103 104 static bool mtrr_is_enabled(struct kvm_mtrr *mtrr_state) 105 { 106 return !!(mtrr_state->deftype & IA32_MTRR_DEF_TYPE_E); 107 } 108 109 static bool fixed_mtrr_is_enabled(struct kvm_mtrr *mtrr_state) 110 { 111 return !!(mtrr_state->deftype & IA32_MTRR_DEF_TYPE_FE); 112 } 113 114 static u8 mtrr_default_type(struct kvm_mtrr *mtrr_state) 115 { 116 return mtrr_state->deftype & IA32_MTRR_DEF_TYPE_TYPE_MASK; 117 } 118 119 static u8 mtrr_disabled_type(struct kvm_vcpu *vcpu) 120 { 121 /* 122 * Intel SDM 11.11.2.2: all MTRRs are disabled when 123 * IA32_MTRR_DEF_TYPE.E bit is cleared, and the UC 124 * memory type is applied to all of physical memory. 125 * 126 * However, virtual machines can be run with CPUID such that 127 * there are no MTRRs. In that case, the firmware will never 128 * enable MTRRs and it is obviously undesirable to run the 129 * guest entirely with UC memory and we use WB. 130 */ 131 if (guest_cpuid_has(vcpu, X86_FEATURE_MTRR)) 132 return MTRR_TYPE_UNCACHABLE; 133 else 134 return MTRR_TYPE_WRBACK; 135 } 136 137 /* 138 * Three terms are used in the following code: 139 * - segment, it indicates the address segments covered by fixed MTRRs. 140 * - unit, it corresponds to the MSR entry in the segment. 141 * - range, a range is covered in one memory cache type. 142 */ 143 struct fixed_mtrr_segment { 144 u64 start; 145 u64 end; 146 147 int range_shift; 148 149 /* the start position in kvm_mtrr.fixed_ranges[]. */ 150 int range_start; 151 }; 152 153 static struct fixed_mtrr_segment fixed_seg_table[] = { 154 /* MSR_MTRRfix64K_00000, 1 unit. 64K fixed mtrr. */ 155 { 156 .start = 0x0, 157 .end = 0x80000, 158 .range_shift = 16, /* 64K */ 159 .range_start = 0, 160 }, 161 162 /* 163 * MSR_MTRRfix16K_80000 ... MSR_MTRRfix16K_A0000, 2 units, 164 * 16K fixed mtrr. 165 */ 166 { 167 .start = 0x80000, 168 .end = 0xc0000, 169 .range_shift = 14, /* 16K */ 170 .range_start = 8, 171 }, 172 173 /* 174 * MSR_MTRRfix4K_C0000 ... MSR_MTRRfix4K_F8000, 8 units, 175 * 4K fixed mtrr. 176 */ 177 { 178 .start = 0xc0000, 179 .end = 0x100000, 180 .range_shift = 12, /* 12K */ 181 .range_start = 24, 182 } 183 }; 184 185 /* 186 * The size of unit is covered in one MSR, one MSR entry contains 187 * 8 ranges so that unit size is always 8 * 2^range_shift. 188 */ 189 static u64 fixed_mtrr_seg_unit_size(int seg) 190 { 191 return 8 << fixed_seg_table[seg].range_shift; 192 } 193 194 static bool fixed_msr_to_seg_unit(u32 msr, int *seg, int *unit) 195 { 196 switch (msr) { 197 case MSR_MTRRfix64K_00000: 198 *seg = 0; 199 *unit = 0; 200 break; 201 case MSR_MTRRfix16K_80000 ... MSR_MTRRfix16K_A0000: 202 *seg = 1; 203 *unit = array_index_nospec( 204 msr - MSR_MTRRfix16K_80000, 205 MSR_MTRRfix16K_A0000 - MSR_MTRRfix16K_80000 + 1); 206 break; 207 case MSR_MTRRfix4K_C0000 ... MSR_MTRRfix4K_F8000: 208 *seg = 2; 209 *unit = array_index_nospec( 210 msr - MSR_MTRRfix4K_C0000, 211 MSR_MTRRfix4K_F8000 - MSR_MTRRfix4K_C0000 + 1); 212 break; 213 default: 214 return false; 215 } 216 217 return true; 218 } 219 220 static void fixed_mtrr_seg_unit_range(int seg, int unit, u64 *start, u64 *end) 221 { 222 struct fixed_mtrr_segment *mtrr_seg = &fixed_seg_table[seg]; 223 u64 unit_size = fixed_mtrr_seg_unit_size(seg); 224 225 *start = mtrr_seg->start + unit * unit_size; 226 *end = *start + unit_size; 227 WARN_ON(*end > mtrr_seg->end); 228 } 229 230 static int fixed_mtrr_seg_unit_range_index(int seg, int unit) 231 { 232 struct fixed_mtrr_segment *mtrr_seg = &fixed_seg_table[seg]; 233 234 WARN_ON(mtrr_seg->start + unit * fixed_mtrr_seg_unit_size(seg) 235 > mtrr_seg->end); 236 237 /* each unit has 8 ranges. */ 238 return mtrr_seg->range_start + 8 * unit; 239 } 240 241 static int fixed_mtrr_seg_end_range_index(int seg) 242 { 243 struct fixed_mtrr_segment *mtrr_seg = &fixed_seg_table[seg]; 244 int n; 245 246 n = (mtrr_seg->end - mtrr_seg->start) >> mtrr_seg->range_shift; 247 return mtrr_seg->range_start + n - 1; 248 } 249 250 static bool fixed_msr_to_range(u32 msr, u64 *start, u64 *end) 251 { 252 int seg, unit; 253 254 if (!fixed_msr_to_seg_unit(msr, &seg, &unit)) 255 return false; 256 257 fixed_mtrr_seg_unit_range(seg, unit, start, end); 258 return true; 259 } 260 261 static int fixed_msr_to_range_index(u32 msr) 262 { 263 int seg, unit; 264 265 if (!fixed_msr_to_seg_unit(msr, &seg, &unit)) 266 return -1; 267 268 return fixed_mtrr_seg_unit_range_index(seg, unit); 269 } 270 271 static int fixed_mtrr_addr_to_seg(u64 addr) 272 { 273 struct fixed_mtrr_segment *mtrr_seg; 274 int seg, seg_num = ARRAY_SIZE(fixed_seg_table); 275 276 for (seg = 0; seg < seg_num; seg++) { 277 mtrr_seg = &fixed_seg_table[seg]; 278 if (mtrr_seg->start <= addr && addr < mtrr_seg->end) 279 return seg; 280 } 281 282 return -1; 283 } 284 285 static int fixed_mtrr_addr_seg_to_range_index(u64 addr, int seg) 286 { 287 struct fixed_mtrr_segment *mtrr_seg; 288 int index; 289 290 mtrr_seg = &fixed_seg_table[seg]; 291 index = mtrr_seg->range_start; 292 index += (addr - mtrr_seg->start) >> mtrr_seg->range_shift; 293 return index; 294 } 295 296 static u64 fixed_mtrr_range_end_addr(int seg, int index) 297 { 298 struct fixed_mtrr_segment *mtrr_seg = &fixed_seg_table[seg]; 299 int pos = index - mtrr_seg->range_start; 300 301 return mtrr_seg->start + ((pos + 1) << mtrr_seg->range_shift); 302 } 303 304 static void var_mtrr_range(struct kvm_mtrr_range *range, u64 *start, u64 *end) 305 { 306 u64 mask; 307 308 *start = range->base & PAGE_MASK; 309 310 mask = range->mask & PAGE_MASK; 311 312 /* This cannot overflow because writing to the reserved bits of 313 * variable MTRRs causes a #GP. 314 */ 315 *end = (*start | ~mask) + 1; 316 } 317 318 static void update_mtrr(struct kvm_vcpu *vcpu, u32 msr) 319 { 320 struct kvm_mtrr *mtrr_state = &vcpu->arch.mtrr_state; 321 gfn_t start, end; 322 323 if (!tdp_enabled || !kvm_arch_has_noncoherent_dma(vcpu->kvm)) 324 return; 325 326 if (!mtrr_is_enabled(mtrr_state) && msr != MSR_MTRRdefType) 327 return; 328 329 /* fixed MTRRs. */ 330 if (fixed_msr_to_range(msr, &start, &end)) { 331 if (!fixed_mtrr_is_enabled(mtrr_state)) 332 return; 333 } else if (msr == MSR_MTRRdefType) { 334 start = 0x0; 335 end = ~0ULL; 336 } else { 337 /* variable range MTRRs. */ 338 var_mtrr_range(var_mtrr_msr_to_range(vcpu, msr), &start, &end); 339 } 340 341 kvm_zap_gfn_range(vcpu->kvm, gpa_to_gfn(start), gpa_to_gfn(end)); 342 } 343 344 static bool var_mtrr_range_is_valid(struct kvm_mtrr_range *range) 345 { 346 return (range->mask & (1 << 11)) != 0; 347 } 348 349 static void set_var_mtrr_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data) 350 { 351 struct kvm_mtrr *mtrr_state = &vcpu->arch.mtrr_state; 352 struct kvm_mtrr_range *tmp, *cur; 353 354 cur = var_mtrr_msr_to_range(vcpu, msr); 355 356 /* remove the entry if it's in the list. */ 357 if (var_mtrr_range_is_valid(cur)) 358 list_del(&cur->node); 359 360 /* 361 * Set all illegal GPA bits in the mask, since those bits must 362 * implicitly be 0. The bits are then cleared when reading them. 363 */ 364 if (is_mtrr_base_msr(msr)) 365 cur->base = data; 366 else 367 cur->mask = data | kvm_vcpu_reserved_gpa_bits_raw(vcpu); 368 369 /* add it to the list if it's enabled. */ 370 if (var_mtrr_range_is_valid(cur)) { 371 list_for_each_entry(tmp, &mtrr_state->head, node) 372 if (cur->base >= tmp->base) 373 break; 374 list_add_tail(&cur->node, &tmp->node); 375 } 376 } 377 378 int kvm_mtrr_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data) 379 { 380 int index; 381 382 if (!kvm_mtrr_valid(vcpu, msr, data)) 383 return 1; 384 385 index = fixed_msr_to_range_index(msr); 386 if (index >= 0) 387 *(u64 *)&vcpu->arch.mtrr_state.fixed_ranges[index] = data; 388 else if (msr == MSR_MTRRdefType) 389 vcpu->arch.mtrr_state.deftype = data; 390 else 391 set_var_mtrr_msr(vcpu, msr, data); 392 393 update_mtrr(vcpu, msr); 394 return 0; 395 } 396 397 int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) 398 { 399 int index; 400 401 /* MSR_MTRRcap is a readonly MSR. */ 402 if (msr == MSR_MTRRcap) { 403 /* 404 * SMRR = 0 405 * WC = 1 406 * FIX = 1 407 * VCNT = KVM_NR_VAR_MTRR 408 */ 409 *pdata = 0x500 | KVM_NR_VAR_MTRR; 410 return 0; 411 } 412 413 if (!msr_mtrr_valid(msr)) 414 return 1; 415 416 index = fixed_msr_to_range_index(msr); 417 if (index >= 0) { 418 *pdata = *(u64 *)&vcpu->arch.mtrr_state.fixed_ranges[index]; 419 } else if (msr == MSR_MTRRdefType) { 420 *pdata = vcpu->arch.mtrr_state.deftype; 421 } else { 422 /* Variable MTRRs */ 423 if (is_mtrr_base_msr(msr)) 424 *pdata = var_mtrr_msr_to_range(vcpu, msr)->base; 425 else 426 *pdata = var_mtrr_msr_to_range(vcpu, msr)->mask; 427 428 *pdata &= ~kvm_vcpu_reserved_gpa_bits_raw(vcpu); 429 } 430 431 return 0; 432 } 433 434 void kvm_vcpu_mtrr_init(struct kvm_vcpu *vcpu) 435 { 436 INIT_LIST_HEAD(&vcpu->arch.mtrr_state.head); 437 } 438 439 struct mtrr_iter { 440 /* input fields. */ 441 struct kvm_mtrr *mtrr_state; 442 u64 start; 443 u64 end; 444 445 /* output fields. */ 446 int mem_type; 447 /* mtrr is completely disabled? */ 448 bool mtrr_disabled; 449 /* [start, end) is not fully covered in MTRRs? */ 450 bool partial_map; 451 452 /* private fields. */ 453 union { 454 /* used for fixed MTRRs. */ 455 struct { 456 int index; 457 int seg; 458 }; 459 460 /* used for var MTRRs. */ 461 struct { 462 struct kvm_mtrr_range *range; 463 /* max address has been covered in var MTRRs. */ 464 u64 start_max; 465 }; 466 }; 467 468 bool fixed; 469 }; 470 471 static bool mtrr_lookup_fixed_start(struct mtrr_iter *iter) 472 { 473 int seg, index; 474 475 if (!fixed_mtrr_is_enabled(iter->mtrr_state)) 476 return false; 477 478 seg = fixed_mtrr_addr_to_seg(iter->start); 479 if (seg < 0) 480 return false; 481 482 iter->fixed = true; 483 index = fixed_mtrr_addr_seg_to_range_index(iter->start, seg); 484 iter->index = index; 485 iter->seg = seg; 486 return true; 487 } 488 489 static bool match_var_range(struct mtrr_iter *iter, 490 struct kvm_mtrr_range *range) 491 { 492 u64 start, end; 493 494 var_mtrr_range(range, &start, &end); 495 if (!(start >= iter->end || end <= iter->start)) { 496 iter->range = range; 497 498 /* 499 * the function is called when we do kvm_mtrr.head walking. 500 * Range has the minimum base address which interleaves 501 * [looker->start_max, looker->end). 502 */ 503 iter->partial_map |= iter->start_max < start; 504 505 /* update the max address has been covered. */ 506 iter->start_max = max(iter->start_max, end); 507 return true; 508 } 509 510 return false; 511 } 512 513 static void __mtrr_lookup_var_next(struct mtrr_iter *iter) 514 { 515 struct kvm_mtrr *mtrr_state = iter->mtrr_state; 516 517 list_for_each_entry_continue(iter->range, &mtrr_state->head, node) 518 if (match_var_range(iter, iter->range)) 519 return; 520 521 iter->range = NULL; 522 iter->partial_map |= iter->start_max < iter->end; 523 } 524 525 static void mtrr_lookup_var_start(struct mtrr_iter *iter) 526 { 527 struct kvm_mtrr *mtrr_state = iter->mtrr_state; 528 529 iter->fixed = false; 530 iter->start_max = iter->start; 531 iter->range = NULL; 532 iter->range = list_prepare_entry(iter->range, &mtrr_state->head, node); 533 534 __mtrr_lookup_var_next(iter); 535 } 536 537 static void mtrr_lookup_fixed_next(struct mtrr_iter *iter) 538 { 539 /* terminate the lookup. */ 540 if (fixed_mtrr_range_end_addr(iter->seg, iter->index) >= iter->end) { 541 iter->fixed = false; 542 iter->range = NULL; 543 return; 544 } 545 546 iter->index++; 547 548 /* have looked up for all fixed MTRRs. */ 549 if (iter->index >= ARRAY_SIZE(iter->mtrr_state->fixed_ranges)) 550 return mtrr_lookup_var_start(iter); 551 552 /* switch to next segment. */ 553 if (iter->index > fixed_mtrr_seg_end_range_index(iter->seg)) 554 iter->seg++; 555 } 556 557 static void mtrr_lookup_var_next(struct mtrr_iter *iter) 558 { 559 __mtrr_lookup_var_next(iter); 560 } 561 562 static void mtrr_lookup_start(struct mtrr_iter *iter) 563 { 564 if (!mtrr_is_enabled(iter->mtrr_state)) { 565 iter->mtrr_disabled = true; 566 return; 567 } 568 569 if (!mtrr_lookup_fixed_start(iter)) 570 mtrr_lookup_var_start(iter); 571 } 572 573 static void mtrr_lookup_init(struct mtrr_iter *iter, 574 struct kvm_mtrr *mtrr_state, u64 start, u64 end) 575 { 576 iter->mtrr_state = mtrr_state; 577 iter->start = start; 578 iter->end = end; 579 iter->mtrr_disabled = false; 580 iter->partial_map = false; 581 iter->fixed = false; 582 iter->range = NULL; 583 584 mtrr_lookup_start(iter); 585 } 586 587 static bool mtrr_lookup_okay(struct mtrr_iter *iter) 588 { 589 if (iter->fixed) { 590 iter->mem_type = iter->mtrr_state->fixed_ranges[iter->index]; 591 return true; 592 } 593 594 if (iter->range) { 595 iter->mem_type = iter->range->base & 0xff; 596 return true; 597 } 598 599 return false; 600 } 601 602 static void mtrr_lookup_next(struct mtrr_iter *iter) 603 { 604 if (iter->fixed) 605 mtrr_lookup_fixed_next(iter); 606 else 607 mtrr_lookup_var_next(iter); 608 } 609 610 #define mtrr_for_each_mem_type(_iter_, _mtrr_, _gpa_start_, _gpa_end_) \ 611 for (mtrr_lookup_init(_iter_, _mtrr_, _gpa_start_, _gpa_end_); \ 612 mtrr_lookup_okay(_iter_); mtrr_lookup_next(_iter_)) 613 614 u8 kvm_mtrr_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn) 615 { 616 struct kvm_mtrr *mtrr_state = &vcpu->arch.mtrr_state; 617 struct mtrr_iter iter; 618 u64 start, end; 619 int type = -1; 620 const int wt_wb_mask = (1 << MTRR_TYPE_WRBACK) 621 | (1 << MTRR_TYPE_WRTHROUGH); 622 623 start = gfn_to_gpa(gfn); 624 end = start + PAGE_SIZE; 625 626 mtrr_for_each_mem_type(&iter, mtrr_state, start, end) { 627 int curr_type = iter.mem_type; 628 629 /* 630 * Please refer to Intel SDM Volume 3: 11.11.4.1 MTRR 631 * Precedences. 632 */ 633 634 if (type == -1) { 635 type = curr_type; 636 continue; 637 } 638 639 /* 640 * If two or more variable memory ranges match and the 641 * memory types are identical, then that memory type is 642 * used. 643 */ 644 if (type == curr_type) 645 continue; 646 647 /* 648 * If two or more variable memory ranges match and one of 649 * the memory types is UC, the UC memory type used. 650 */ 651 if (curr_type == MTRR_TYPE_UNCACHABLE) 652 return MTRR_TYPE_UNCACHABLE; 653 654 /* 655 * If two or more variable memory ranges match and the 656 * memory types are WT and WB, the WT memory type is used. 657 */ 658 if (((1 << type) & wt_wb_mask) && 659 ((1 << curr_type) & wt_wb_mask)) { 660 type = MTRR_TYPE_WRTHROUGH; 661 continue; 662 } 663 664 /* 665 * For overlaps not defined by the above rules, processor 666 * behavior is undefined. 667 */ 668 669 /* We use WB for this undefined behavior. :( */ 670 return MTRR_TYPE_WRBACK; 671 } 672 673 if (iter.mtrr_disabled) 674 return mtrr_disabled_type(vcpu); 675 676 /* not contained in any MTRRs. */ 677 if (type == -1) 678 return mtrr_default_type(mtrr_state); 679 680 /* 681 * We just check one page, partially covered by MTRRs is 682 * impossible. 683 */ 684 WARN_ON(iter.partial_map); 685 686 return type; 687 } 688 EXPORT_SYMBOL_GPL(kvm_mtrr_get_guest_memory_type); 689 690 bool kvm_mtrr_check_gfn_range_consistency(struct kvm_vcpu *vcpu, gfn_t gfn, 691 int page_num) 692 { 693 struct kvm_mtrr *mtrr_state = &vcpu->arch.mtrr_state; 694 struct mtrr_iter iter; 695 u64 start, end; 696 int type = -1; 697 698 start = gfn_to_gpa(gfn); 699 end = gfn_to_gpa(gfn + page_num); 700 mtrr_for_each_mem_type(&iter, mtrr_state, start, end) { 701 if (type == -1) { 702 type = iter.mem_type; 703 continue; 704 } 705 706 if (type != iter.mem_type) 707 return false; 708 } 709 710 if (iter.mtrr_disabled) 711 return true; 712 713 if (!iter.partial_map) 714 return true; 715 716 if (type == -1) 717 return true; 718 719 return type == mtrr_default_type(mtrr_state); 720 } 721