1 /* 2 * vMTRR implementation 3 * 4 * Copyright (C) 2006 Qumranet, Inc. 5 * Copyright 2010 Red Hat, Inc. and/or its affiliates. 6 * Copyright(C) 2015 Intel Corporation. 7 * 8 * Authors: 9 * Yaniv Kamay <yaniv@qumranet.com> 10 * Avi Kivity <avi@qumranet.com> 11 * Marcelo Tosatti <mtosatti@redhat.com> 12 * Paolo Bonzini <pbonzini@redhat.com> 13 * Xiao Guangrong <guangrong.xiao@linux.intel.com> 14 * 15 * This work is licensed under the terms of the GNU GPL, version 2. See 16 * the COPYING file in the top-level directory. 17 */ 18 19 #include <linux/kvm_host.h> 20 #include <asm/mtrr.h> 21 22 #include "cpuid.h" 23 #include "mmu.h" 24 25 #define IA32_MTRR_DEF_TYPE_E (1ULL << 11) 26 #define IA32_MTRR_DEF_TYPE_FE (1ULL << 10) 27 #define IA32_MTRR_DEF_TYPE_TYPE_MASK (0xff) 28 29 static bool msr_mtrr_valid(unsigned msr) 30 { 31 switch (msr) { 32 case 0x200 ... 0x200 + 2 * KVM_NR_VAR_MTRR - 1: 33 case MSR_MTRRfix64K_00000: 34 case MSR_MTRRfix16K_80000: 35 case MSR_MTRRfix16K_A0000: 36 case MSR_MTRRfix4K_C0000: 37 case MSR_MTRRfix4K_C8000: 38 case MSR_MTRRfix4K_D0000: 39 case MSR_MTRRfix4K_D8000: 40 case MSR_MTRRfix4K_E0000: 41 case MSR_MTRRfix4K_E8000: 42 case MSR_MTRRfix4K_F0000: 43 case MSR_MTRRfix4K_F8000: 44 case MSR_MTRRdefType: 45 case MSR_IA32_CR_PAT: 46 return true; 47 case 0x2f8: 48 return true; 49 } 50 return false; 51 } 52 53 static bool valid_pat_type(unsigned t) 54 { 55 return t < 8 && (1 << t) & 0xf3; /* 0, 1, 4, 5, 6, 7 */ 56 } 57 58 static bool valid_mtrr_type(unsigned t) 59 { 60 return t < 8 && (1 << t) & 0x73; /* 0, 1, 4, 5, 6 */ 61 } 62 63 bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data) 64 { 65 int i; 66 u64 mask; 67 68 if (!msr_mtrr_valid(msr)) 69 return false; 70 71 if (msr == MSR_IA32_CR_PAT) { 72 for (i = 0; i < 8; i++) 73 if (!valid_pat_type((data >> (i * 8)) & 0xff)) 74 return false; 75 return true; 76 } else if (msr == MSR_MTRRdefType) { 77 if (data & ~0xcff) 78 return false; 79 return valid_mtrr_type(data & 0xff); 80 } else if (msr >= MSR_MTRRfix64K_00000 && msr <= MSR_MTRRfix4K_F8000) { 81 for (i = 0; i < 8 ; i++) 82 if (!valid_mtrr_type((data >> (i * 8)) & 0xff)) 83 return false; 84 return true; 85 } 86 87 /* variable MTRRs */ 88 WARN_ON(!(msr >= 0x200 && msr < 0x200 + 2 * KVM_NR_VAR_MTRR)); 89 90 mask = (~0ULL) << cpuid_maxphyaddr(vcpu); 91 if ((msr & 1) == 0) { 92 /* MTRR base */ 93 if (!valid_mtrr_type(data & 0xff)) 94 return false; 95 mask |= 0xf00; 96 } else 97 /* MTRR mask */ 98 mask |= 0x7ff; 99 if (data & mask) { 100 kvm_inject_gp(vcpu, 0); 101 return false; 102 } 103 104 return true; 105 } 106 EXPORT_SYMBOL_GPL(kvm_mtrr_valid); 107 108 static bool mtrr_is_enabled(struct kvm_mtrr *mtrr_state) 109 { 110 return !!(mtrr_state->deftype & IA32_MTRR_DEF_TYPE_E); 111 } 112 113 static bool fixed_mtrr_is_enabled(struct kvm_mtrr *mtrr_state) 114 { 115 return !!(mtrr_state->deftype & IA32_MTRR_DEF_TYPE_FE); 116 } 117 118 static u8 mtrr_default_type(struct kvm_mtrr *mtrr_state) 119 { 120 return mtrr_state->deftype & IA32_MTRR_DEF_TYPE_TYPE_MASK; 121 } 122 123 /* 124 * Three terms are used in the following code: 125 * - segment, it indicates the address segments covered by fixed MTRRs. 126 * - unit, it corresponds to the MSR entry in the segment. 127 * - range, a range is covered in one memory cache type. 128 */ 129 struct fixed_mtrr_segment { 130 u64 start; 131 u64 end; 132 133 int range_shift; 134 135 /* the start position in kvm_mtrr.fixed_ranges[]. */ 136 int range_start; 137 }; 138 139 static struct fixed_mtrr_segment fixed_seg_table[] = { 140 /* MSR_MTRRfix64K_00000, 1 unit. 64K fixed mtrr. */ 141 { 142 .start = 0x0, 143 .end = 0x80000, 144 .range_shift = 16, /* 64K */ 145 .range_start = 0, 146 }, 147 148 /* 149 * MSR_MTRRfix16K_80000 ... MSR_MTRRfix16K_A0000, 2 units, 150 * 16K fixed mtrr. 151 */ 152 { 153 .start = 0x80000, 154 .end = 0xc0000, 155 .range_shift = 14, /* 16K */ 156 .range_start = 8, 157 }, 158 159 /* 160 * MSR_MTRRfix4K_C0000 ... MSR_MTRRfix4K_F8000, 8 units, 161 * 4K fixed mtrr. 162 */ 163 { 164 .start = 0xc0000, 165 .end = 0x100000, 166 .range_shift = 12, /* 12K */ 167 .range_start = 24, 168 } 169 }; 170 171 /* 172 * The size of unit is covered in one MSR, one MSR entry contains 173 * 8 ranges so that unit size is always 8 * 2^range_shift. 174 */ 175 static u64 fixed_mtrr_seg_unit_size(int seg) 176 { 177 return 8 << fixed_seg_table[seg].range_shift; 178 } 179 180 static bool fixed_msr_to_seg_unit(u32 msr, int *seg, int *unit) 181 { 182 switch (msr) { 183 case MSR_MTRRfix64K_00000: 184 *seg = 0; 185 *unit = 0; 186 break; 187 case MSR_MTRRfix16K_80000 ... MSR_MTRRfix16K_A0000: 188 *seg = 1; 189 *unit = msr - MSR_MTRRfix16K_80000; 190 break; 191 case MSR_MTRRfix4K_C0000 ... MSR_MTRRfix4K_F8000: 192 *seg = 2; 193 *unit = msr - MSR_MTRRfix4K_C0000; 194 break; 195 default: 196 return false; 197 } 198 199 return true; 200 } 201 202 static void fixed_mtrr_seg_unit_range(int seg, int unit, u64 *start, u64 *end) 203 { 204 struct fixed_mtrr_segment *mtrr_seg = &fixed_seg_table[seg]; 205 u64 unit_size = fixed_mtrr_seg_unit_size(seg); 206 207 *start = mtrr_seg->start + unit * unit_size; 208 *end = *start + unit_size; 209 WARN_ON(*end > mtrr_seg->end); 210 } 211 212 static int fixed_mtrr_seg_unit_range_index(int seg, int unit) 213 { 214 struct fixed_mtrr_segment *mtrr_seg = &fixed_seg_table[seg]; 215 216 WARN_ON(mtrr_seg->start + unit * fixed_mtrr_seg_unit_size(seg) 217 > mtrr_seg->end); 218 219 /* each unit has 8 ranges. */ 220 return mtrr_seg->range_start + 8 * unit; 221 } 222 223 static int fixed_mtrr_seg_end_range_index(int seg) 224 { 225 struct fixed_mtrr_segment *mtrr_seg = &fixed_seg_table[seg]; 226 int n; 227 228 n = (mtrr_seg->end - mtrr_seg->start) >> mtrr_seg->range_shift; 229 return mtrr_seg->range_start + n - 1; 230 } 231 232 static bool fixed_msr_to_range(u32 msr, u64 *start, u64 *end) 233 { 234 int seg, unit; 235 236 if (!fixed_msr_to_seg_unit(msr, &seg, &unit)) 237 return false; 238 239 fixed_mtrr_seg_unit_range(seg, unit, start, end); 240 return true; 241 } 242 243 static int fixed_msr_to_range_index(u32 msr) 244 { 245 int seg, unit; 246 247 if (!fixed_msr_to_seg_unit(msr, &seg, &unit)) 248 return -1; 249 250 return fixed_mtrr_seg_unit_range_index(seg, unit); 251 } 252 253 static int fixed_mtrr_addr_to_seg(u64 addr) 254 { 255 struct fixed_mtrr_segment *mtrr_seg; 256 int seg, seg_num = ARRAY_SIZE(fixed_seg_table); 257 258 for (seg = 0; seg < seg_num; seg++) { 259 mtrr_seg = &fixed_seg_table[seg]; 260 if (mtrr_seg->start >= addr && addr < mtrr_seg->end) 261 return seg; 262 } 263 264 return -1; 265 } 266 267 static int fixed_mtrr_addr_seg_to_range_index(u64 addr, int seg) 268 { 269 struct fixed_mtrr_segment *mtrr_seg; 270 int index; 271 272 mtrr_seg = &fixed_seg_table[seg]; 273 index = mtrr_seg->range_start; 274 index += (addr - mtrr_seg->start) >> mtrr_seg->range_shift; 275 return index; 276 } 277 278 static u64 fixed_mtrr_range_end_addr(int seg, int index) 279 { 280 struct fixed_mtrr_segment *mtrr_seg = &fixed_seg_table[seg]; 281 int pos = index - mtrr_seg->range_start; 282 283 return mtrr_seg->start + ((pos + 1) << mtrr_seg->range_shift); 284 } 285 286 static void var_mtrr_range(struct kvm_mtrr_range *range, u64 *start, u64 *end) 287 { 288 u64 mask; 289 290 *start = range->base & PAGE_MASK; 291 292 mask = range->mask & PAGE_MASK; 293 mask |= ~0ULL << boot_cpu_data.x86_phys_bits; 294 295 /* This cannot overflow because writing to the reserved bits of 296 * variable MTRRs causes a #GP. 297 */ 298 *end = (*start | ~mask) + 1; 299 } 300 301 static void update_mtrr(struct kvm_vcpu *vcpu, u32 msr) 302 { 303 struct kvm_mtrr *mtrr_state = &vcpu->arch.mtrr_state; 304 gfn_t start, end; 305 int index; 306 307 if (msr == MSR_IA32_CR_PAT || !tdp_enabled || 308 !kvm_arch_has_noncoherent_dma(vcpu->kvm)) 309 return; 310 311 if (!mtrr_is_enabled(mtrr_state) && msr != MSR_MTRRdefType) 312 return; 313 314 /* fixed MTRRs. */ 315 if (fixed_msr_to_range(msr, &start, &end)) { 316 if (!fixed_mtrr_is_enabled(mtrr_state)) 317 return; 318 } else if (msr == MSR_MTRRdefType) { 319 start = 0x0; 320 end = ~0ULL; 321 } else { 322 /* variable range MTRRs. */ 323 index = (msr - 0x200) / 2; 324 var_mtrr_range(&mtrr_state->var_ranges[index], &start, &end); 325 } 326 327 kvm_zap_gfn_range(vcpu->kvm, gpa_to_gfn(start), gpa_to_gfn(end)); 328 } 329 330 static bool var_mtrr_range_is_valid(struct kvm_mtrr_range *range) 331 { 332 return (range->mask & (1 << 11)) != 0; 333 } 334 335 static void set_var_mtrr_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data) 336 { 337 struct kvm_mtrr *mtrr_state = &vcpu->arch.mtrr_state; 338 struct kvm_mtrr_range *tmp, *cur; 339 int index, is_mtrr_mask; 340 341 index = (msr - 0x200) / 2; 342 is_mtrr_mask = msr - 0x200 - 2 * index; 343 cur = &mtrr_state->var_ranges[index]; 344 345 /* remove the entry if it's in the list. */ 346 if (var_mtrr_range_is_valid(cur)) 347 list_del(&mtrr_state->var_ranges[index].node); 348 349 if (!is_mtrr_mask) 350 cur->base = data; 351 else 352 cur->mask = data; 353 354 /* add it to the list if it's enabled. */ 355 if (var_mtrr_range_is_valid(cur)) { 356 list_for_each_entry(tmp, &mtrr_state->head, node) 357 if (cur->base >= tmp->base) 358 break; 359 list_add_tail(&cur->node, &tmp->node); 360 } 361 } 362 363 int kvm_mtrr_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data) 364 { 365 int index; 366 367 if (!kvm_mtrr_valid(vcpu, msr, data)) 368 return 1; 369 370 index = fixed_msr_to_range_index(msr); 371 if (index >= 0) 372 *(u64 *)&vcpu->arch.mtrr_state.fixed_ranges[index] = data; 373 else if (msr == MSR_MTRRdefType) 374 vcpu->arch.mtrr_state.deftype = data; 375 else if (msr == MSR_IA32_CR_PAT) 376 vcpu->arch.pat = data; 377 else 378 set_var_mtrr_msr(vcpu, msr, data); 379 380 update_mtrr(vcpu, msr); 381 return 0; 382 } 383 384 int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) 385 { 386 int index; 387 388 /* MSR_MTRRcap is a readonly MSR. */ 389 if (msr == MSR_MTRRcap) { 390 /* 391 * SMRR = 0 392 * WC = 1 393 * FIX = 1 394 * VCNT = KVM_NR_VAR_MTRR 395 */ 396 *pdata = 0x500 | KVM_NR_VAR_MTRR; 397 return 0; 398 } 399 400 if (!msr_mtrr_valid(msr)) 401 return 1; 402 403 index = fixed_msr_to_range_index(msr); 404 if (index >= 0) 405 *pdata = *(u64 *)&vcpu->arch.mtrr_state.fixed_ranges[index]; 406 else if (msr == MSR_MTRRdefType) 407 *pdata = vcpu->arch.mtrr_state.deftype; 408 else if (msr == MSR_IA32_CR_PAT) 409 *pdata = vcpu->arch.pat; 410 else { /* Variable MTRRs */ 411 int is_mtrr_mask; 412 413 index = (msr - 0x200) / 2; 414 is_mtrr_mask = msr - 0x200 - 2 * index; 415 if (!is_mtrr_mask) 416 *pdata = vcpu->arch.mtrr_state.var_ranges[index].base; 417 else 418 *pdata = vcpu->arch.mtrr_state.var_ranges[index].mask; 419 } 420 421 return 0; 422 } 423 424 void kvm_vcpu_mtrr_init(struct kvm_vcpu *vcpu) 425 { 426 INIT_LIST_HEAD(&vcpu->arch.mtrr_state.head); 427 } 428 429 struct mtrr_iter { 430 /* input fields. */ 431 struct kvm_mtrr *mtrr_state; 432 u64 start; 433 u64 end; 434 435 /* output fields. */ 436 int mem_type; 437 /* [start, end) is not fully covered in MTRRs? */ 438 bool partial_map; 439 440 /* private fields. */ 441 union { 442 /* used for fixed MTRRs. */ 443 struct { 444 int index; 445 int seg; 446 }; 447 448 /* used for var MTRRs. */ 449 struct { 450 struct kvm_mtrr_range *range; 451 /* max address has been covered in var MTRRs. */ 452 u64 start_max; 453 }; 454 }; 455 456 bool fixed; 457 }; 458 459 static bool mtrr_lookup_fixed_start(struct mtrr_iter *iter) 460 { 461 int seg, index; 462 463 if (!fixed_mtrr_is_enabled(iter->mtrr_state)) 464 return false; 465 466 seg = fixed_mtrr_addr_to_seg(iter->start); 467 if (seg < 0) 468 return false; 469 470 iter->fixed = true; 471 index = fixed_mtrr_addr_seg_to_range_index(iter->start, seg); 472 iter->index = index; 473 iter->seg = seg; 474 return true; 475 } 476 477 static bool match_var_range(struct mtrr_iter *iter, 478 struct kvm_mtrr_range *range) 479 { 480 u64 start, end; 481 482 var_mtrr_range(range, &start, &end); 483 if (!(start >= iter->end || end <= iter->start)) { 484 iter->range = range; 485 486 /* 487 * the function is called when we do kvm_mtrr.head walking. 488 * Range has the minimum base address which interleaves 489 * [looker->start_max, looker->end). 490 */ 491 iter->partial_map |= iter->start_max < start; 492 493 /* update the max address has been covered. */ 494 iter->start_max = max(iter->start_max, end); 495 return true; 496 } 497 498 return false; 499 } 500 501 static void __mtrr_lookup_var_next(struct mtrr_iter *iter) 502 { 503 struct kvm_mtrr *mtrr_state = iter->mtrr_state; 504 505 list_for_each_entry_continue(iter->range, &mtrr_state->head, node) 506 if (match_var_range(iter, iter->range)) 507 return; 508 509 iter->range = NULL; 510 iter->partial_map |= iter->start_max < iter->end; 511 } 512 513 static void mtrr_lookup_var_start(struct mtrr_iter *iter) 514 { 515 struct kvm_mtrr *mtrr_state = iter->mtrr_state; 516 517 iter->fixed = false; 518 iter->start_max = iter->start; 519 iter->range = list_prepare_entry(iter->range, &mtrr_state->head, node); 520 521 __mtrr_lookup_var_next(iter); 522 } 523 524 static void mtrr_lookup_fixed_next(struct mtrr_iter *iter) 525 { 526 /* terminate the lookup. */ 527 if (fixed_mtrr_range_end_addr(iter->seg, iter->index) >= iter->end) { 528 iter->fixed = false; 529 iter->range = NULL; 530 return; 531 } 532 533 iter->index++; 534 535 /* have looked up for all fixed MTRRs. */ 536 if (iter->index >= ARRAY_SIZE(iter->mtrr_state->fixed_ranges)) 537 return mtrr_lookup_var_start(iter); 538 539 /* switch to next segment. */ 540 if (iter->index > fixed_mtrr_seg_end_range_index(iter->seg)) 541 iter->seg++; 542 } 543 544 static void mtrr_lookup_var_next(struct mtrr_iter *iter) 545 { 546 __mtrr_lookup_var_next(iter); 547 } 548 549 static void mtrr_lookup_start(struct mtrr_iter *iter) 550 { 551 if (!mtrr_is_enabled(iter->mtrr_state)) { 552 iter->partial_map = true; 553 return; 554 } 555 556 if (!mtrr_lookup_fixed_start(iter)) 557 mtrr_lookup_var_start(iter); 558 } 559 560 static void mtrr_lookup_init(struct mtrr_iter *iter, 561 struct kvm_mtrr *mtrr_state, u64 start, u64 end) 562 { 563 iter->mtrr_state = mtrr_state; 564 iter->start = start; 565 iter->end = end; 566 iter->partial_map = false; 567 iter->fixed = false; 568 iter->range = NULL; 569 570 mtrr_lookup_start(iter); 571 } 572 573 static bool mtrr_lookup_okay(struct mtrr_iter *iter) 574 { 575 if (iter->fixed) { 576 iter->mem_type = iter->mtrr_state->fixed_ranges[iter->index]; 577 return true; 578 } 579 580 if (iter->range) { 581 iter->mem_type = iter->range->base & 0xff; 582 return true; 583 } 584 585 return false; 586 } 587 588 static void mtrr_lookup_next(struct mtrr_iter *iter) 589 { 590 if (iter->fixed) 591 mtrr_lookup_fixed_next(iter); 592 else 593 mtrr_lookup_var_next(iter); 594 } 595 596 #define mtrr_for_each_mem_type(_iter_, _mtrr_, _gpa_start_, _gpa_end_) \ 597 for (mtrr_lookup_init(_iter_, _mtrr_, _gpa_start_, _gpa_end_); \ 598 mtrr_lookup_okay(_iter_); mtrr_lookup_next(_iter_)) 599 600 u8 kvm_mtrr_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn) 601 { 602 struct kvm_mtrr *mtrr_state = &vcpu->arch.mtrr_state; 603 struct mtrr_iter iter; 604 u64 start, end; 605 int type = -1; 606 const int wt_wb_mask = (1 << MTRR_TYPE_WRBACK) 607 | (1 << MTRR_TYPE_WRTHROUGH); 608 609 start = gfn_to_gpa(gfn); 610 end = start + PAGE_SIZE; 611 612 mtrr_for_each_mem_type(&iter, mtrr_state, start, end) { 613 int curr_type = iter.mem_type; 614 615 /* 616 * Please refer to Intel SDM Volume 3: 11.11.4.1 MTRR 617 * Precedences. 618 */ 619 620 if (type == -1) { 621 type = curr_type; 622 continue; 623 } 624 625 /* 626 * If two or more variable memory ranges match and the 627 * memory types are identical, then that memory type is 628 * used. 629 */ 630 if (type == curr_type) 631 continue; 632 633 /* 634 * If two or more variable memory ranges match and one of 635 * the memory types is UC, the UC memory type used. 636 */ 637 if (curr_type == MTRR_TYPE_UNCACHABLE) 638 return MTRR_TYPE_UNCACHABLE; 639 640 /* 641 * If two or more variable memory ranges match and the 642 * memory types are WT and WB, the WT memory type is used. 643 */ 644 if (((1 << type) & wt_wb_mask) && 645 ((1 << curr_type) & wt_wb_mask)) { 646 type = MTRR_TYPE_WRTHROUGH; 647 continue; 648 } 649 650 /* 651 * For overlaps not defined by the above rules, processor 652 * behavior is undefined. 653 */ 654 655 /* We use WB for this undefined behavior. :( */ 656 return MTRR_TYPE_WRBACK; 657 } 658 659 /* It is not covered by MTRRs. */ 660 if (iter.partial_map) { 661 /* 662 * We just check one page, partially covered by MTRRs is 663 * impossible. 664 */ 665 WARN_ON(type != -1); 666 type = mtrr_default_type(mtrr_state); 667 } 668 return type; 669 } 670 EXPORT_SYMBOL_GPL(kvm_mtrr_get_guest_memory_type); 671 672 bool kvm_mtrr_check_gfn_range_consistency(struct kvm_vcpu *vcpu, gfn_t gfn, 673 int page_num) 674 { 675 struct kvm_mtrr *mtrr_state = &vcpu->arch.mtrr_state; 676 struct mtrr_iter iter; 677 u64 start, end; 678 int type = -1; 679 680 start = gfn_to_gpa(gfn); 681 end = gfn_to_gpa(gfn + page_num); 682 mtrr_for_each_mem_type(&iter, mtrr_state, start, end) { 683 if (type == -1) { 684 type = iter.mem_type; 685 continue; 686 } 687 688 if (type != iter.mem_type) 689 return false; 690 } 691 692 if (!iter.partial_map) 693 return true; 694 695 if (type == -1) 696 return true; 697 698 return type == mtrr_default_type(mtrr_state); 699 } 700