1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2012 ARM Ltd. 4 * Author: Marc Zyngier <marc.zyngier@arm.com> 5 */ 6 7 #include <linux/cpu.h> 8 #include <linux/kvm.h> 9 #include <linux/kvm_host.h> 10 #include <linux/interrupt.h> 11 #include <linux/irq.h> 12 #include <linux/irqdomain.h> 13 #include <linux/uaccess.h> 14 15 #include <clocksource/arm_arch_timer.h> 16 #include <asm/arch_timer.h> 17 #include <asm/kvm_emulate.h> 18 #include <asm/kvm_hyp.h> 19 #include <asm/kvm_nested.h> 20 21 #include <kvm/arm_vgic.h> 22 #include <kvm/arm_arch_timer.h> 23 24 #include "trace.h" 25 26 static struct timecounter *timecounter; 27 static unsigned int host_vtimer_irq; 28 static unsigned int host_ptimer_irq; 29 static u32 host_vtimer_irq_flags; 30 static u32 host_ptimer_irq_flags; 31 32 static DEFINE_STATIC_KEY_FALSE(has_gic_active_state); 33 DEFINE_STATIC_KEY_FALSE(broken_cntvoff_key); 34 35 static const u8 default_ppi[] = { 36 [TIMER_PTIMER] = 30, 37 [TIMER_VTIMER] = 27, 38 [TIMER_HPTIMER] = 26, 39 [TIMER_HVTIMER] = 28, 40 }; 41 42 static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx); 43 static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level, 44 struct arch_timer_context *timer_ctx); 45 static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx); 46 static void kvm_arm_timer_write(struct kvm_vcpu *vcpu, 47 struct arch_timer_context *timer, 48 enum kvm_arch_timer_regs treg, 49 u64 val); 50 static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu, 51 struct arch_timer_context *timer, 52 enum kvm_arch_timer_regs treg); 53 static bool kvm_arch_timer_get_input_level(int vintid); 54 55 static struct irq_ops arch_timer_irq_ops = { 56 .get_input_level = kvm_arch_timer_get_input_level, 57 }; 58 59 static int nr_timers(struct kvm_vcpu *vcpu) 60 { 61 if (!vcpu_has_nv(vcpu)) 62 return NR_KVM_EL0_TIMERS; 63 64 return NR_KVM_TIMERS; 65 } 66 67 u32 timer_get_ctl(struct arch_timer_context *ctxt) 68 { 69 struct kvm_vcpu *vcpu = ctxt->vcpu; 70 71 switch(arch_timer_ctx_index(ctxt)) { 72 case TIMER_VTIMER: 73 return __vcpu_sys_reg(vcpu, CNTV_CTL_EL0); 74 case TIMER_PTIMER: 75 return __vcpu_sys_reg(vcpu, CNTP_CTL_EL0); 76 case TIMER_HVTIMER: 77 return __vcpu_sys_reg(vcpu, CNTHV_CTL_EL2); 78 case TIMER_HPTIMER: 79 return __vcpu_sys_reg(vcpu, CNTHP_CTL_EL2); 80 default: 81 WARN_ON(1); 82 return 0; 83 } 84 } 85 86 u64 timer_get_cval(struct arch_timer_context *ctxt) 87 { 88 struct kvm_vcpu *vcpu = ctxt->vcpu; 89 90 switch(arch_timer_ctx_index(ctxt)) { 91 case TIMER_VTIMER: 92 return __vcpu_sys_reg(vcpu, CNTV_CVAL_EL0); 93 case TIMER_PTIMER: 94 return __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0); 95 case TIMER_HVTIMER: 96 return __vcpu_sys_reg(vcpu, CNTHV_CVAL_EL2); 97 case TIMER_HPTIMER: 98 return __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2); 99 default: 100 WARN_ON(1); 101 return 0; 102 } 103 } 104 105 static void timer_set_ctl(struct arch_timer_context *ctxt, u32 ctl) 106 { 107 struct kvm_vcpu *vcpu = ctxt->vcpu; 108 109 switch(arch_timer_ctx_index(ctxt)) { 110 case TIMER_VTIMER: 111 __vcpu_sys_reg(vcpu, CNTV_CTL_EL0) = ctl; 112 break; 113 case TIMER_PTIMER: 114 __vcpu_sys_reg(vcpu, CNTP_CTL_EL0) = ctl; 115 break; 116 case TIMER_HVTIMER: 117 __vcpu_sys_reg(vcpu, CNTHV_CTL_EL2) = ctl; 118 break; 119 case TIMER_HPTIMER: 120 __vcpu_sys_reg(vcpu, CNTHP_CTL_EL2) = ctl; 121 break; 122 default: 123 WARN_ON(1); 124 } 125 } 126 127 static void timer_set_cval(struct arch_timer_context *ctxt, u64 cval) 128 { 129 struct kvm_vcpu *vcpu = ctxt->vcpu; 130 131 switch(arch_timer_ctx_index(ctxt)) { 132 case TIMER_VTIMER: 133 __vcpu_sys_reg(vcpu, CNTV_CVAL_EL0) = cval; 134 break; 135 case TIMER_PTIMER: 136 __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0) = cval; 137 break; 138 case TIMER_HVTIMER: 139 __vcpu_sys_reg(vcpu, CNTHV_CVAL_EL2) = cval; 140 break; 141 case TIMER_HPTIMER: 142 __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2) = cval; 143 break; 144 default: 145 WARN_ON(1); 146 } 147 } 148 149 static void timer_set_offset(struct arch_timer_context *ctxt, u64 offset) 150 { 151 if (!ctxt->offset.vm_offset) { 152 WARN(offset, "timer %ld\n", arch_timer_ctx_index(ctxt)); 153 return; 154 } 155 156 WRITE_ONCE(*ctxt->offset.vm_offset, offset); 157 } 158 159 u64 kvm_phys_timer_read(void) 160 { 161 return timecounter->cc->read(timecounter->cc); 162 } 163 164 void get_timer_map(struct kvm_vcpu *vcpu, struct timer_map *map) 165 { 166 if (vcpu_has_nv(vcpu)) { 167 if (is_hyp_ctxt(vcpu)) { 168 map->direct_vtimer = vcpu_hvtimer(vcpu); 169 map->direct_ptimer = vcpu_hptimer(vcpu); 170 map->emul_vtimer = vcpu_vtimer(vcpu); 171 map->emul_ptimer = vcpu_ptimer(vcpu); 172 } else { 173 map->direct_vtimer = vcpu_vtimer(vcpu); 174 map->direct_ptimer = vcpu_ptimer(vcpu); 175 map->emul_vtimer = vcpu_hvtimer(vcpu); 176 map->emul_ptimer = vcpu_hptimer(vcpu); 177 } 178 } else if (has_vhe()) { 179 map->direct_vtimer = vcpu_vtimer(vcpu); 180 map->direct_ptimer = vcpu_ptimer(vcpu); 181 map->emul_vtimer = NULL; 182 map->emul_ptimer = NULL; 183 } else { 184 map->direct_vtimer = vcpu_vtimer(vcpu); 185 map->direct_ptimer = NULL; 186 map->emul_vtimer = NULL; 187 map->emul_ptimer = vcpu_ptimer(vcpu); 188 } 189 190 trace_kvm_get_timer_map(vcpu->vcpu_id, map); 191 } 192 193 static inline bool userspace_irqchip(struct kvm *kvm) 194 { 195 return unlikely(!irqchip_in_kernel(kvm)); 196 } 197 198 static void soft_timer_start(struct hrtimer *hrt, u64 ns) 199 { 200 hrtimer_start(hrt, ktime_add_ns(ktime_get(), ns), 201 HRTIMER_MODE_ABS_HARD); 202 } 203 204 static void soft_timer_cancel(struct hrtimer *hrt) 205 { 206 hrtimer_cancel(hrt); 207 } 208 209 static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id) 210 { 211 struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id; 212 struct arch_timer_context *ctx; 213 struct timer_map map; 214 215 /* 216 * We may see a timer interrupt after vcpu_put() has been called which 217 * sets the CPU's vcpu pointer to NULL, because even though the timer 218 * has been disabled in timer_save_state(), the hardware interrupt 219 * signal may not have been retired from the interrupt controller yet. 220 */ 221 if (!vcpu) 222 return IRQ_HANDLED; 223 224 get_timer_map(vcpu, &map); 225 226 if (irq == host_vtimer_irq) 227 ctx = map.direct_vtimer; 228 else 229 ctx = map.direct_ptimer; 230 231 if (kvm_timer_should_fire(ctx)) 232 kvm_timer_update_irq(vcpu, true, ctx); 233 234 if (userspace_irqchip(vcpu->kvm) && 235 !static_branch_unlikely(&has_gic_active_state)) 236 disable_percpu_irq(host_vtimer_irq); 237 238 return IRQ_HANDLED; 239 } 240 241 static u64 kvm_counter_compute_delta(struct arch_timer_context *timer_ctx, 242 u64 val) 243 { 244 u64 now = kvm_phys_timer_read() - timer_get_offset(timer_ctx); 245 246 if (now < val) { 247 u64 ns; 248 249 ns = cyclecounter_cyc2ns(timecounter->cc, 250 val - now, 251 timecounter->mask, 252 &timer_ctx->ns_frac); 253 return ns; 254 } 255 256 return 0; 257 } 258 259 static u64 kvm_timer_compute_delta(struct arch_timer_context *timer_ctx) 260 { 261 return kvm_counter_compute_delta(timer_ctx, timer_get_cval(timer_ctx)); 262 } 263 264 static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx) 265 { 266 WARN_ON(timer_ctx && timer_ctx->loaded); 267 return timer_ctx && 268 ((timer_get_ctl(timer_ctx) & 269 (ARCH_TIMER_CTRL_IT_MASK | ARCH_TIMER_CTRL_ENABLE)) == ARCH_TIMER_CTRL_ENABLE); 270 } 271 272 static bool vcpu_has_wfit_active(struct kvm_vcpu *vcpu) 273 { 274 return (cpus_have_final_cap(ARM64_HAS_WFXT) && 275 vcpu_get_flag(vcpu, IN_WFIT)); 276 } 277 278 static u64 wfit_delay_ns(struct kvm_vcpu *vcpu) 279 { 280 u64 val = vcpu_get_reg(vcpu, kvm_vcpu_sys_get_rt(vcpu)); 281 struct arch_timer_context *ctx; 282 283 ctx = is_hyp_ctxt(vcpu) ? vcpu_hvtimer(vcpu) : vcpu_vtimer(vcpu); 284 285 return kvm_counter_compute_delta(ctx, val); 286 } 287 288 /* 289 * Returns the earliest expiration time in ns among guest timers. 290 * Note that it will return 0 if none of timers can fire. 291 */ 292 static u64 kvm_timer_earliest_exp(struct kvm_vcpu *vcpu) 293 { 294 u64 min_delta = ULLONG_MAX; 295 int i; 296 297 for (i = 0; i < nr_timers(vcpu); i++) { 298 struct arch_timer_context *ctx = &vcpu->arch.timer_cpu.timers[i]; 299 300 WARN(ctx->loaded, "timer %d loaded\n", i); 301 if (kvm_timer_irq_can_fire(ctx)) 302 min_delta = min(min_delta, kvm_timer_compute_delta(ctx)); 303 } 304 305 if (vcpu_has_wfit_active(vcpu)) 306 min_delta = min(min_delta, wfit_delay_ns(vcpu)); 307 308 /* If none of timers can fire, then return 0 */ 309 if (min_delta == ULLONG_MAX) 310 return 0; 311 312 return min_delta; 313 } 314 315 static enum hrtimer_restart kvm_bg_timer_expire(struct hrtimer *hrt) 316 { 317 struct arch_timer_cpu *timer; 318 struct kvm_vcpu *vcpu; 319 u64 ns; 320 321 timer = container_of(hrt, struct arch_timer_cpu, bg_timer); 322 vcpu = container_of(timer, struct kvm_vcpu, arch.timer_cpu); 323 324 /* 325 * Check that the timer has really expired from the guest's 326 * PoV (NTP on the host may have forced it to expire 327 * early). If we should have slept longer, restart it. 328 */ 329 ns = kvm_timer_earliest_exp(vcpu); 330 if (unlikely(ns)) { 331 hrtimer_forward_now(hrt, ns_to_ktime(ns)); 332 return HRTIMER_RESTART; 333 } 334 335 kvm_vcpu_wake_up(vcpu); 336 return HRTIMER_NORESTART; 337 } 338 339 static enum hrtimer_restart kvm_hrtimer_expire(struct hrtimer *hrt) 340 { 341 struct arch_timer_context *ctx; 342 struct kvm_vcpu *vcpu; 343 u64 ns; 344 345 ctx = container_of(hrt, struct arch_timer_context, hrtimer); 346 vcpu = ctx->vcpu; 347 348 trace_kvm_timer_hrtimer_expire(ctx); 349 350 /* 351 * Check that the timer has really expired from the guest's 352 * PoV (NTP on the host may have forced it to expire 353 * early). If not ready, schedule for a later time. 354 */ 355 ns = kvm_timer_compute_delta(ctx); 356 if (unlikely(ns)) { 357 hrtimer_forward_now(hrt, ns_to_ktime(ns)); 358 return HRTIMER_RESTART; 359 } 360 361 kvm_timer_update_irq(vcpu, true, ctx); 362 return HRTIMER_NORESTART; 363 } 364 365 static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx) 366 { 367 enum kvm_arch_timers index; 368 u64 cval, now; 369 370 if (!timer_ctx) 371 return false; 372 373 index = arch_timer_ctx_index(timer_ctx); 374 375 if (timer_ctx->loaded) { 376 u32 cnt_ctl = 0; 377 378 switch (index) { 379 case TIMER_VTIMER: 380 case TIMER_HVTIMER: 381 cnt_ctl = read_sysreg_el0(SYS_CNTV_CTL); 382 break; 383 case TIMER_PTIMER: 384 case TIMER_HPTIMER: 385 cnt_ctl = read_sysreg_el0(SYS_CNTP_CTL); 386 break; 387 case NR_KVM_TIMERS: 388 /* GCC is braindead */ 389 cnt_ctl = 0; 390 break; 391 } 392 393 return (cnt_ctl & ARCH_TIMER_CTRL_ENABLE) && 394 (cnt_ctl & ARCH_TIMER_CTRL_IT_STAT) && 395 !(cnt_ctl & ARCH_TIMER_CTRL_IT_MASK); 396 } 397 398 if (!kvm_timer_irq_can_fire(timer_ctx)) 399 return false; 400 401 cval = timer_get_cval(timer_ctx); 402 now = kvm_phys_timer_read() - timer_get_offset(timer_ctx); 403 404 return cval <= now; 405 } 406 407 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) 408 { 409 return vcpu_has_wfit_active(vcpu) && wfit_delay_ns(vcpu) == 0; 410 } 411 412 /* 413 * Reflect the timer output level into the kvm_run structure 414 */ 415 void kvm_timer_update_run(struct kvm_vcpu *vcpu) 416 { 417 struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); 418 struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); 419 struct kvm_sync_regs *regs = &vcpu->run->s.regs; 420 421 /* Populate the device bitmap with the timer states */ 422 regs->device_irq_level &= ~(KVM_ARM_DEV_EL1_VTIMER | 423 KVM_ARM_DEV_EL1_PTIMER); 424 if (kvm_timer_should_fire(vtimer)) 425 regs->device_irq_level |= KVM_ARM_DEV_EL1_VTIMER; 426 if (kvm_timer_should_fire(ptimer)) 427 regs->device_irq_level |= KVM_ARM_DEV_EL1_PTIMER; 428 } 429 430 static void kvm_timer_update_status(struct arch_timer_context *ctx, bool level) 431 { 432 /* 433 * Paper over NV2 brokenness by publishing the interrupt status 434 * bit. This still results in a poor quality of emulation (guest 435 * writes will have no effect until the next exit). 436 * 437 * But hey, it's fast, right? 438 */ 439 if (is_hyp_ctxt(ctx->vcpu) && 440 (ctx == vcpu_vtimer(ctx->vcpu) || ctx == vcpu_ptimer(ctx->vcpu))) { 441 unsigned long val = timer_get_ctl(ctx); 442 __assign_bit(__ffs(ARCH_TIMER_CTRL_IT_STAT), &val, level); 443 timer_set_ctl(ctx, val); 444 } 445 } 446 447 static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level, 448 struct arch_timer_context *timer_ctx) 449 { 450 int ret; 451 452 kvm_timer_update_status(timer_ctx, new_level); 453 454 timer_ctx->irq.level = new_level; 455 trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_irq(timer_ctx), 456 timer_ctx->irq.level); 457 458 if (!userspace_irqchip(vcpu->kvm)) { 459 ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu, 460 timer_irq(timer_ctx), 461 timer_ctx->irq.level, 462 timer_ctx); 463 WARN_ON(ret); 464 } 465 } 466 467 /* Only called for a fully emulated timer */ 468 static void timer_emulate(struct arch_timer_context *ctx) 469 { 470 bool should_fire = kvm_timer_should_fire(ctx); 471 472 trace_kvm_timer_emulate(ctx, should_fire); 473 474 if (should_fire != ctx->irq.level) 475 kvm_timer_update_irq(ctx->vcpu, should_fire, ctx); 476 477 kvm_timer_update_status(ctx, should_fire); 478 479 /* 480 * If the timer can fire now, we don't need to have a soft timer 481 * scheduled for the future. If the timer cannot fire at all, 482 * then we also don't need a soft timer. 483 */ 484 if (should_fire || !kvm_timer_irq_can_fire(ctx)) 485 return; 486 487 soft_timer_start(&ctx->hrtimer, kvm_timer_compute_delta(ctx)); 488 } 489 490 static void set_cntvoff(u64 cntvoff) 491 { 492 kvm_call_hyp(__kvm_timer_set_cntvoff, cntvoff); 493 } 494 495 static void set_cntpoff(u64 cntpoff) 496 { 497 if (has_cntpoff()) 498 write_sysreg_s(cntpoff, SYS_CNTPOFF_EL2); 499 } 500 501 static void timer_save_state(struct arch_timer_context *ctx) 502 { 503 struct arch_timer_cpu *timer = vcpu_timer(ctx->vcpu); 504 enum kvm_arch_timers index = arch_timer_ctx_index(ctx); 505 unsigned long flags; 506 507 if (!timer->enabled) 508 return; 509 510 local_irq_save(flags); 511 512 if (!ctx->loaded) 513 goto out; 514 515 switch (index) { 516 u64 cval; 517 518 case TIMER_VTIMER: 519 case TIMER_HVTIMER: 520 timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTV_CTL)); 521 cval = read_sysreg_el0(SYS_CNTV_CVAL); 522 523 if (has_broken_cntvoff()) 524 cval -= timer_get_offset(ctx); 525 526 timer_set_cval(ctx, cval); 527 528 /* Disable the timer */ 529 write_sysreg_el0(0, SYS_CNTV_CTL); 530 isb(); 531 532 /* 533 * The kernel may decide to run userspace after 534 * calling vcpu_put, so we reset cntvoff to 0 to 535 * ensure a consistent read between user accesses to 536 * the virtual counter and kernel access to the 537 * physical counter of non-VHE case. 538 * 539 * For VHE, the virtual counter uses a fixed virtual 540 * offset of zero, so no need to zero CNTVOFF_EL2 541 * register, but this is actually useful when switching 542 * between EL1/vEL2 with NV. 543 * 544 * Do it unconditionally, as this is either unavoidable 545 * or dirt cheap. 546 */ 547 set_cntvoff(0); 548 break; 549 case TIMER_PTIMER: 550 case TIMER_HPTIMER: 551 timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTP_CTL)); 552 cval = read_sysreg_el0(SYS_CNTP_CVAL); 553 554 cval -= timer_get_offset(ctx); 555 556 timer_set_cval(ctx, cval); 557 558 /* Disable the timer */ 559 write_sysreg_el0(0, SYS_CNTP_CTL); 560 isb(); 561 562 set_cntpoff(0); 563 break; 564 case NR_KVM_TIMERS: 565 BUG(); 566 } 567 568 trace_kvm_timer_save_state(ctx); 569 570 ctx->loaded = false; 571 out: 572 local_irq_restore(flags); 573 } 574 575 /* 576 * Schedule the background timer before calling kvm_vcpu_halt, so that this 577 * thread is removed from its waitqueue and made runnable when there's a timer 578 * interrupt to handle. 579 */ 580 static void kvm_timer_blocking(struct kvm_vcpu *vcpu) 581 { 582 struct arch_timer_cpu *timer = vcpu_timer(vcpu); 583 struct timer_map map; 584 585 get_timer_map(vcpu, &map); 586 587 /* 588 * If no timers are capable of raising interrupts (disabled or 589 * masked), then there's no more work for us to do. 590 */ 591 if (!kvm_timer_irq_can_fire(map.direct_vtimer) && 592 !kvm_timer_irq_can_fire(map.direct_ptimer) && 593 !kvm_timer_irq_can_fire(map.emul_vtimer) && 594 !kvm_timer_irq_can_fire(map.emul_ptimer) && 595 !vcpu_has_wfit_active(vcpu)) 596 return; 597 598 /* 599 * At least one guest time will expire. Schedule a background timer. 600 * Set the earliest expiration time among the guest timers. 601 */ 602 soft_timer_start(&timer->bg_timer, kvm_timer_earliest_exp(vcpu)); 603 } 604 605 static void kvm_timer_unblocking(struct kvm_vcpu *vcpu) 606 { 607 struct arch_timer_cpu *timer = vcpu_timer(vcpu); 608 609 soft_timer_cancel(&timer->bg_timer); 610 } 611 612 static void timer_restore_state(struct arch_timer_context *ctx) 613 { 614 struct arch_timer_cpu *timer = vcpu_timer(ctx->vcpu); 615 enum kvm_arch_timers index = arch_timer_ctx_index(ctx); 616 unsigned long flags; 617 618 if (!timer->enabled) 619 return; 620 621 local_irq_save(flags); 622 623 if (ctx->loaded) 624 goto out; 625 626 switch (index) { 627 u64 cval, offset; 628 629 case TIMER_VTIMER: 630 case TIMER_HVTIMER: 631 cval = timer_get_cval(ctx); 632 offset = timer_get_offset(ctx); 633 if (has_broken_cntvoff()) { 634 set_cntvoff(0); 635 cval += offset; 636 } else { 637 set_cntvoff(offset); 638 } 639 write_sysreg_el0(cval, SYS_CNTV_CVAL); 640 isb(); 641 write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTV_CTL); 642 break; 643 case TIMER_PTIMER: 644 case TIMER_HPTIMER: 645 cval = timer_get_cval(ctx); 646 offset = timer_get_offset(ctx); 647 set_cntpoff(offset); 648 cval += offset; 649 write_sysreg_el0(cval, SYS_CNTP_CVAL); 650 isb(); 651 write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTP_CTL); 652 break; 653 case NR_KVM_TIMERS: 654 BUG(); 655 } 656 657 trace_kvm_timer_restore_state(ctx); 658 659 ctx->loaded = true; 660 out: 661 local_irq_restore(flags); 662 } 663 664 static inline void set_timer_irq_phys_active(struct arch_timer_context *ctx, bool active) 665 { 666 int r; 667 r = irq_set_irqchip_state(ctx->host_timer_irq, IRQCHIP_STATE_ACTIVE, active); 668 WARN_ON(r); 669 } 670 671 static void kvm_timer_vcpu_load_gic(struct arch_timer_context *ctx) 672 { 673 struct kvm_vcpu *vcpu = ctx->vcpu; 674 bool phys_active = false; 675 676 /* 677 * Update the timer output so that it is likely to match the 678 * state we're about to restore. If the timer expires between 679 * this point and the register restoration, we'll take the 680 * interrupt anyway. 681 */ 682 kvm_timer_update_irq(ctx->vcpu, kvm_timer_should_fire(ctx), ctx); 683 684 if (irqchip_in_kernel(vcpu->kvm)) 685 phys_active = kvm_vgic_map_is_active(vcpu, timer_irq(ctx)); 686 687 phys_active |= ctx->irq.level; 688 689 set_timer_irq_phys_active(ctx, phys_active); 690 } 691 692 static void kvm_timer_vcpu_load_nogic(struct kvm_vcpu *vcpu) 693 { 694 struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); 695 696 /* 697 * Update the timer output so that it is likely to match the 698 * state we're about to restore. If the timer expires between 699 * this point and the register restoration, we'll take the 700 * interrupt anyway. 701 */ 702 kvm_timer_update_irq(vcpu, kvm_timer_should_fire(vtimer), vtimer); 703 704 /* 705 * When using a userspace irqchip with the architected timers and a 706 * host interrupt controller that doesn't support an active state, we 707 * must still prevent continuously exiting from the guest, and 708 * therefore mask the physical interrupt by disabling it on the host 709 * interrupt controller when the virtual level is high, such that the 710 * guest can make forward progress. Once we detect the output level 711 * being de-asserted, we unmask the interrupt again so that we exit 712 * from the guest when the timer fires. 713 */ 714 if (vtimer->irq.level) 715 disable_percpu_irq(host_vtimer_irq); 716 else 717 enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags); 718 } 719 720 /* If _pred is true, set bit in _set, otherwise set it in _clr */ 721 #define assign_clear_set_bit(_pred, _bit, _clr, _set) \ 722 do { \ 723 if (_pred) \ 724 (_set) |= (_bit); \ 725 else \ 726 (_clr) |= (_bit); \ 727 } while (0) 728 729 static void kvm_timer_vcpu_load_nested_switch(struct kvm_vcpu *vcpu, 730 struct timer_map *map) 731 { 732 int hw, ret; 733 734 if (!irqchip_in_kernel(vcpu->kvm)) 735 return; 736 737 /* 738 * We only ever unmap the vtimer irq on a VHE system that runs nested 739 * virtualization, in which case we have both a valid emul_vtimer, 740 * emul_ptimer, direct_vtimer, and direct_ptimer. 741 * 742 * Since this is called from kvm_timer_vcpu_load(), a change between 743 * vEL2 and vEL1/0 will have just happened, and the timer_map will 744 * represent this, and therefore we switch the emul/direct mappings 745 * below. 746 */ 747 hw = kvm_vgic_get_map(vcpu, timer_irq(map->direct_vtimer)); 748 if (hw < 0) { 749 kvm_vgic_unmap_phys_irq(vcpu, timer_irq(map->emul_vtimer)); 750 kvm_vgic_unmap_phys_irq(vcpu, timer_irq(map->emul_ptimer)); 751 752 ret = kvm_vgic_map_phys_irq(vcpu, 753 map->direct_vtimer->host_timer_irq, 754 timer_irq(map->direct_vtimer), 755 &arch_timer_irq_ops); 756 WARN_ON_ONCE(ret); 757 ret = kvm_vgic_map_phys_irq(vcpu, 758 map->direct_ptimer->host_timer_irq, 759 timer_irq(map->direct_ptimer), 760 &arch_timer_irq_ops); 761 WARN_ON_ONCE(ret); 762 } 763 } 764 765 static void timer_set_traps(struct kvm_vcpu *vcpu, struct timer_map *map) 766 { 767 bool tvt, tpt, tvc, tpc, tvt02, tpt02; 768 u64 clr, set; 769 770 /* 771 * No trapping gets configured here with nVHE. See 772 * __timer_enable_traps(), which is where the stuff happens. 773 */ 774 if (!has_vhe()) 775 return; 776 777 /* 778 * Our default policy is not to trap anything. As we progress 779 * within this function, reality kicks in and we start adding 780 * traps based on emulation requirements. 781 */ 782 tvt = tpt = tvc = tpc = false; 783 tvt02 = tpt02 = false; 784 785 /* 786 * NV2 badly breaks the timer semantics by redirecting accesses to 787 * the EL1 timer state to memory, so let's call ECV to the rescue if 788 * available: we trap all CNT{P,V}_{CTL,CVAL,TVAL}_EL0 accesses. 789 * 790 * The treatment slightly varies depending whether we run a nVHE or 791 * VHE guest: nVHE will use the _EL0 registers directly, while VHE 792 * will use the _EL02 accessors. This translates in different trap 793 * bits. 794 * 795 * None of the trapping is required when running in non-HYP context, 796 * unless required by the L1 hypervisor settings once we advertise 797 * ECV+NV in the guest, or that we need trapping for other reasons. 798 */ 799 if (cpus_have_final_cap(ARM64_HAS_ECV) && is_hyp_ctxt(vcpu)) { 800 if (vcpu_el2_e2h_is_set(vcpu)) 801 tvt02 = tpt02 = true; 802 else 803 tvt = tpt = true; 804 } 805 806 /* 807 * We have two possibility to deal with a physical offset: 808 * 809 * - Either we have CNTPOFF (yay!) or the offset is 0: 810 * we let the guest freely access the HW 811 * 812 * - or neither of these condition apply: 813 * we trap accesses to the HW, but still use it 814 * after correcting the physical offset 815 */ 816 if (!has_cntpoff() && timer_get_offset(map->direct_ptimer)) 817 tpt = tpc = true; 818 819 /* 820 * For the poor sods that could not correctly substract one value 821 * from another, trap the full virtual timer and counter. 822 */ 823 if (has_broken_cntvoff() && timer_get_offset(map->direct_vtimer)) 824 tvt = tvc = true; 825 826 /* 827 * Apply the enable bits that the guest hypervisor has requested for 828 * its own guest. We can only add traps that wouldn't have been set 829 * above. 830 * Implementation choices: we do not support NV when E2H=0 in the 831 * guest, and we don't support configuration where E2H is writable 832 * by the guest (either FEAT_VHE or FEAT_E2H0 is implemented, but 833 * not both). This simplifies the handling of the EL1NV* bits. 834 */ 835 if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) { 836 u64 val = __vcpu_sys_reg(vcpu, CNTHCTL_EL2); 837 838 /* Use the VHE format for mental sanity */ 839 if (!vcpu_el2_e2h_is_set(vcpu)) 840 val = (val & (CNTHCTL_EL1PCEN | CNTHCTL_EL1PCTEN)) << 10; 841 842 tpt |= !(val & (CNTHCTL_EL1PCEN << 10)); 843 tpc |= !(val & (CNTHCTL_EL1PCTEN << 10)); 844 845 tpt02 |= (val & CNTHCTL_EL1NVPCT); 846 tvt02 |= (val & CNTHCTL_EL1NVVCT); 847 } 848 849 /* 850 * Now that we have collected our requirements, compute the 851 * trap and enable bits. 852 */ 853 set = 0; 854 clr = 0; 855 856 assign_clear_set_bit(tpt, CNTHCTL_EL1PCEN << 10, set, clr); 857 assign_clear_set_bit(tpc, CNTHCTL_EL1PCTEN << 10, set, clr); 858 assign_clear_set_bit(tvt, CNTHCTL_EL1TVT, clr, set); 859 assign_clear_set_bit(tvc, CNTHCTL_EL1TVCT, clr, set); 860 assign_clear_set_bit(tvt02, CNTHCTL_EL1NVVCT, clr, set); 861 assign_clear_set_bit(tpt02, CNTHCTL_EL1NVPCT, clr, set); 862 863 /* This only happens on VHE, so use the CNTHCTL_EL2 accessor. */ 864 sysreg_clear_set(cnthctl_el2, clr, set); 865 } 866 867 void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu) 868 { 869 struct arch_timer_cpu *timer = vcpu_timer(vcpu); 870 struct timer_map map; 871 872 if (unlikely(!timer->enabled)) 873 return; 874 875 get_timer_map(vcpu, &map); 876 877 if (static_branch_likely(&has_gic_active_state)) { 878 if (vcpu_has_nv(vcpu)) 879 kvm_timer_vcpu_load_nested_switch(vcpu, &map); 880 881 kvm_timer_vcpu_load_gic(map.direct_vtimer); 882 if (map.direct_ptimer) 883 kvm_timer_vcpu_load_gic(map.direct_ptimer); 884 } else { 885 kvm_timer_vcpu_load_nogic(vcpu); 886 } 887 888 kvm_timer_unblocking(vcpu); 889 890 timer_restore_state(map.direct_vtimer); 891 if (map.direct_ptimer) 892 timer_restore_state(map.direct_ptimer); 893 if (map.emul_vtimer) 894 timer_emulate(map.emul_vtimer); 895 if (map.emul_ptimer) 896 timer_emulate(map.emul_ptimer); 897 898 timer_set_traps(vcpu, &map); 899 } 900 901 bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu) 902 { 903 struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); 904 struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); 905 struct kvm_sync_regs *sregs = &vcpu->run->s.regs; 906 bool vlevel, plevel; 907 908 if (likely(irqchip_in_kernel(vcpu->kvm))) 909 return false; 910 911 vlevel = sregs->device_irq_level & KVM_ARM_DEV_EL1_VTIMER; 912 plevel = sregs->device_irq_level & KVM_ARM_DEV_EL1_PTIMER; 913 914 return kvm_timer_should_fire(vtimer) != vlevel || 915 kvm_timer_should_fire(ptimer) != plevel; 916 } 917 918 void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu) 919 { 920 struct arch_timer_cpu *timer = vcpu_timer(vcpu); 921 struct timer_map map; 922 923 if (unlikely(!timer->enabled)) 924 return; 925 926 get_timer_map(vcpu, &map); 927 928 timer_save_state(map.direct_vtimer); 929 if (map.direct_ptimer) 930 timer_save_state(map.direct_ptimer); 931 932 /* 933 * Cancel soft timer emulation, because the only case where we 934 * need it after a vcpu_put is in the context of a sleeping VCPU, and 935 * in that case we already factor in the deadline for the physical 936 * timer when scheduling the bg_timer. 937 * 938 * In any case, we re-schedule the hrtimer for the physical timer when 939 * coming back to the VCPU thread in kvm_timer_vcpu_load(). 940 */ 941 if (map.emul_vtimer) 942 soft_timer_cancel(&map.emul_vtimer->hrtimer); 943 if (map.emul_ptimer) 944 soft_timer_cancel(&map.emul_ptimer->hrtimer); 945 946 if (kvm_vcpu_is_blocking(vcpu)) 947 kvm_timer_blocking(vcpu); 948 } 949 950 void kvm_timer_sync_nested(struct kvm_vcpu *vcpu) 951 { 952 /* 953 * When NV2 is on, guest hypervisors have their EL1 timer register 954 * accesses redirected to the VNCR page. Any guest action taken on 955 * the timer is postponed until the next exit, leading to a very 956 * poor quality of emulation. 957 * 958 * This is an unmitigated disaster, only papered over by FEAT_ECV, 959 * which allows trapping of the timer registers even with NV2. 960 * Still, this is still worse than FEAT_NV on its own. Meh. 961 */ 962 if (!cpus_have_final_cap(ARM64_HAS_ECV)) { 963 /* 964 * For a VHE guest hypervisor, the EL2 state is directly 965 * stored in the host EL1 timers, while the emulated EL1 966 * state is stored in the VNCR page. The latter could have 967 * been updated behind our back, and we must reset the 968 * emulation of the timers. 969 * 970 * A non-VHE guest hypervisor doesn't have any direct access 971 * to its timers: the EL2 registers trap despite being 972 * notionally direct (we use the EL1 HW, as for VHE), while 973 * the EL1 registers access memory. 974 * 975 * In both cases, process the emulated timers on each guest 976 * exit. Boo. 977 */ 978 struct timer_map map; 979 get_timer_map(vcpu, &map); 980 981 soft_timer_cancel(&map.emul_vtimer->hrtimer); 982 soft_timer_cancel(&map.emul_ptimer->hrtimer); 983 timer_emulate(map.emul_vtimer); 984 timer_emulate(map.emul_ptimer); 985 } 986 } 987 988 /* 989 * With a userspace irqchip we have to check if the guest de-asserted the 990 * timer and if so, unmask the timer irq signal on the host interrupt 991 * controller to ensure that we see future timer signals. 992 */ 993 static void unmask_vtimer_irq_user(struct kvm_vcpu *vcpu) 994 { 995 struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); 996 997 if (!kvm_timer_should_fire(vtimer)) { 998 kvm_timer_update_irq(vcpu, false, vtimer); 999 if (static_branch_likely(&has_gic_active_state)) 1000 set_timer_irq_phys_active(vtimer, false); 1001 else 1002 enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags); 1003 } 1004 } 1005 1006 void kvm_timer_sync_user(struct kvm_vcpu *vcpu) 1007 { 1008 struct arch_timer_cpu *timer = vcpu_timer(vcpu); 1009 1010 if (unlikely(!timer->enabled)) 1011 return; 1012 1013 if (unlikely(!irqchip_in_kernel(vcpu->kvm))) 1014 unmask_vtimer_irq_user(vcpu); 1015 } 1016 1017 void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu) 1018 { 1019 struct arch_timer_cpu *timer = vcpu_timer(vcpu); 1020 struct timer_map map; 1021 1022 get_timer_map(vcpu, &map); 1023 1024 /* 1025 * The bits in CNTV_CTL are architecturally reset to UNKNOWN for ARMv8 1026 * and to 0 for ARMv7. We provide an implementation that always 1027 * resets the timer to be disabled and unmasked and is compliant with 1028 * the ARMv7 architecture. 1029 */ 1030 for (int i = 0; i < nr_timers(vcpu); i++) 1031 timer_set_ctl(vcpu_get_timer(vcpu, i), 0); 1032 1033 /* 1034 * A vcpu running at EL2 is in charge of the offset applied to 1035 * the virtual timer, so use the physical VM offset, and point 1036 * the vcpu offset to CNTVOFF_EL2. 1037 */ 1038 if (vcpu_has_nv(vcpu)) { 1039 struct arch_timer_offset *offs = &vcpu_vtimer(vcpu)->offset; 1040 1041 offs->vcpu_offset = &__vcpu_sys_reg(vcpu, CNTVOFF_EL2); 1042 offs->vm_offset = &vcpu->kvm->arch.timer_data.poffset; 1043 } 1044 1045 if (timer->enabled) { 1046 for (int i = 0; i < nr_timers(vcpu); i++) 1047 kvm_timer_update_irq(vcpu, false, 1048 vcpu_get_timer(vcpu, i)); 1049 1050 if (irqchip_in_kernel(vcpu->kvm)) { 1051 kvm_vgic_reset_mapped_irq(vcpu, timer_irq(map.direct_vtimer)); 1052 if (map.direct_ptimer) 1053 kvm_vgic_reset_mapped_irq(vcpu, timer_irq(map.direct_ptimer)); 1054 } 1055 } 1056 1057 if (map.emul_vtimer) 1058 soft_timer_cancel(&map.emul_vtimer->hrtimer); 1059 if (map.emul_ptimer) 1060 soft_timer_cancel(&map.emul_ptimer->hrtimer); 1061 } 1062 1063 static void timer_context_init(struct kvm_vcpu *vcpu, int timerid) 1064 { 1065 struct arch_timer_context *ctxt = vcpu_get_timer(vcpu, timerid); 1066 struct kvm *kvm = vcpu->kvm; 1067 1068 ctxt->vcpu = vcpu; 1069 1070 if (timerid == TIMER_VTIMER) 1071 ctxt->offset.vm_offset = &kvm->arch.timer_data.voffset; 1072 else 1073 ctxt->offset.vm_offset = &kvm->arch.timer_data.poffset; 1074 1075 hrtimer_init(&ctxt->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); 1076 ctxt->hrtimer.function = kvm_hrtimer_expire; 1077 1078 switch (timerid) { 1079 case TIMER_PTIMER: 1080 case TIMER_HPTIMER: 1081 ctxt->host_timer_irq = host_ptimer_irq; 1082 break; 1083 case TIMER_VTIMER: 1084 case TIMER_HVTIMER: 1085 ctxt->host_timer_irq = host_vtimer_irq; 1086 break; 1087 } 1088 } 1089 1090 void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) 1091 { 1092 struct arch_timer_cpu *timer = vcpu_timer(vcpu); 1093 1094 for (int i = 0; i < NR_KVM_TIMERS; i++) 1095 timer_context_init(vcpu, i); 1096 1097 /* Synchronize offsets across timers of a VM if not already provided */ 1098 if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, &vcpu->kvm->arch.flags)) { 1099 timer_set_offset(vcpu_vtimer(vcpu), kvm_phys_timer_read()); 1100 timer_set_offset(vcpu_ptimer(vcpu), 0); 1101 } 1102 1103 hrtimer_init(&timer->bg_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); 1104 timer->bg_timer.function = kvm_bg_timer_expire; 1105 } 1106 1107 void kvm_timer_init_vm(struct kvm *kvm) 1108 { 1109 for (int i = 0; i < NR_KVM_TIMERS; i++) 1110 kvm->arch.timer_data.ppi[i] = default_ppi[i]; 1111 } 1112 1113 void kvm_timer_cpu_up(void) 1114 { 1115 enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags); 1116 if (host_ptimer_irq) 1117 enable_percpu_irq(host_ptimer_irq, host_ptimer_irq_flags); 1118 } 1119 1120 void kvm_timer_cpu_down(void) 1121 { 1122 disable_percpu_irq(host_vtimer_irq); 1123 if (host_ptimer_irq) 1124 disable_percpu_irq(host_ptimer_irq); 1125 } 1126 1127 int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value) 1128 { 1129 struct arch_timer_context *timer; 1130 1131 switch (regid) { 1132 case KVM_REG_ARM_TIMER_CTL: 1133 timer = vcpu_vtimer(vcpu); 1134 kvm_arm_timer_write(vcpu, timer, TIMER_REG_CTL, value); 1135 break; 1136 case KVM_REG_ARM_TIMER_CNT: 1137 if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, 1138 &vcpu->kvm->arch.flags)) { 1139 timer = vcpu_vtimer(vcpu); 1140 timer_set_offset(timer, kvm_phys_timer_read() - value); 1141 } 1142 break; 1143 case KVM_REG_ARM_TIMER_CVAL: 1144 timer = vcpu_vtimer(vcpu); 1145 kvm_arm_timer_write(vcpu, timer, TIMER_REG_CVAL, value); 1146 break; 1147 case KVM_REG_ARM_PTIMER_CTL: 1148 timer = vcpu_ptimer(vcpu); 1149 kvm_arm_timer_write(vcpu, timer, TIMER_REG_CTL, value); 1150 break; 1151 case KVM_REG_ARM_PTIMER_CNT: 1152 if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, 1153 &vcpu->kvm->arch.flags)) { 1154 timer = vcpu_ptimer(vcpu); 1155 timer_set_offset(timer, kvm_phys_timer_read() - value); 1156 } 1157 break; 1158 case KVM_REG_ARM_PTIMER_CVAL: 1159 timer = vcpu_ptimer(vcpu); 1160 kvm_arm_timer_write(vcpu, timer, TIMER_REG_CVAL, value); 1161 break; 1162 1163 default: 1164 return -1; 1165 } 1166 1167 return 0; 1168 } 1169 1170 static u64 read_timer_ctl(struct arch_timer_context *timer) 1171 { 1172 /* 1173 * Set ISTATUS bit if it's expired. 1174 * Note that according to ARMv8 ARM Issue A.k, ISTATUS bit is 1175 * UNKNOWN when ENABLE bit is 0, so we chose to set ISTATUS bit 1176 * regardless of ENABLE bit for our implementation convenience. 1177 */ 1178 u32 ctl = timer_get_ctl(timer); 1179 1180 if (!kvm_timer_compute_delta(timer)) 1181 ctl |= ARCH_TIMER_CTRL_IT_STAT; 1182 1183 return ctl; 1184 } 1185 1186 u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid) 1187 { 1188 switch (regid) { 1189 case KVM_REG_ARM_TIMER_CTL: 1190 return kvm_arm_timer_read(vcpu, 1191 vcpu_vtimer(vcpu), TIMER_REG_CTL); 1192 case KVM_REG_ARM_TIMER_CNT: 1193 return kvm_arm_timer_read(vcpu, 1194 vcpu_vtimer(vcpu), TIMER_REG_CNT); 1195 case KVM_REG_ARM_TIMER_CVAL: 1196 return kvm_arm_timer_read(vcpu, 1197 vcpu_vtimer(vcpu), TIMER_REG_CVAL); 1198 case KVM_REG_ARM_PTIMER_CTL: 1199 return kvm_arm_timer_read(vcpu, 1200 vcpu_ptimer(vcpu), TIMER_REG_CTL); 1201 case KVM_REG_ARM_PTIMER_CNT: 1202 return kvm_arm_timer_read(vcpu, 1203 vcpu_ptimer(vcpu), TIMER_REG_CNT); 1204 case KVM_REG_ARM_PTIMER_CVAL: 1205 return kvm_arm_timer_read(vcpu, 1206 vcpu_ptimer(vcpu), TIMER_REG_CVAL); 1207 } 1208 return (u64)-1; 1209 } 1210 1211 static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu, 1212 struct arch_timer_context *timer, 1213 enum kvm_arch_timer_regs treg) 1214 { 1215 u64 val; 1216 1217 switch (treg) { 1218 case TIMER_REG_TVAL: 1219 val = timer_get_cval(timer) - kvm_phys_timer_read() + timer_get_offset(timer); 1220 val = lower_32_bits(val); 1221 break; 1222 1223 case TIMER_REG_CTL: 1224 val = read_timer_ctl(timer); 1225 break; 1226 1227 case TIMER_REG_CVAL: 1228 val = timer_get_cval(timer); 1229 break; 1230 1231 case TIMER_REG_CNT: 1232 val = kvm_phys_timer_read() - timer_get_offset(timer); 1233 break; 1234 1235 case TIMER_REG_VOFF: 1236 val = *timer->offset.vcpu_offset; 1237 break; 1238 1239 default: 1240 BUG(); 1241 } 1242 1243 return val; 1244 } 1245 1246 u64 kvm_arm_timer_read_sysreg(struct kvm_vcpu *vcpu, 1247 enum kvm_arch_timers tmr, 1248 enum kvm_arch_timer_regs treg) 1249 { 1250 struct arch_timer_context *timer; 1251 struct timer_map map; 1252 u64 val; 1253 1254 get_timer_map(vcpu, &map); 1255 timer = vcpu_get_timer(vcpu, tmr); 1256 1257 if (timer == map.emul_vtimer || timer == map.emul_ptimer) 1258 return kvm_arm_timer_read(vcpu, timer, treg); 1259 1260 preempt_disable(); 1261 timer_save_state(timer); 1262 1263 val = kvm_arm_timer_read(vcpu, timer, treg); 1264 1265 timer_restore_state(timer); 1266 preempt_enable(); 1267 1268 return val; 1269 } 1270 1271 static void kvm_arm_timer_write(struct kvm_vcpu *vcpu, 1272 struct arch_timer_context *timer, 1273 enum kvm_arch_timer_regs treg, 1274 u64 val) 1275 { 1276 switch (treg) { 1277 case TIMER_REG_TVAL: 1278 timer_set_cval(timer, kvm_phys_timer_read() - timer_get_offset(timer) + (s32)val); 1279 break; 1280 1281 case TIMER_REG_CTL: 1282 timer_set_ctl(timer, val & ~ARCH_TIMER_CTRL_IT_STAT); 1283 break; 1284 1285 case TIMER_REG_CVAL: 1286 timer_set_cval(timer, val); 1287 break; 1288 1289 case TIMER_REG_VOFF: 1290 *timer->offset.vcpu_offset = val; 1291 break; 1292 1293 default: 1294 BUG(); 1295 } 1296 } 1297 1298 void kvm_arm_timer_write_sysreg(struct kvm_vcpu *vcpu, 1299 enum kvm_arch_timers tmr, 1300 enum kvm_arch_timer_regs treg, 1301 u64 val) 1302 { 1303 struct arch_timer_context *timer; 1304 struct timer_map map; 1305 1306 get_timer_map(vcpu, &map); 1307 timer = vcpu_get_timer(vcpu, tmr); 1308 if (timer == map.emul_vtimer || timer == map.emul_ptimer) { 1309 soft_timer_cancel(&timer->hrtimer); 1310 kvm_arm_timer_write(vcpu, timer, treg, val); 1311 timer_emulate(timer); 1312 } else { 1313 preempt_disable(); 1314 timer_save_state(timer); 1315 kvm_arm_timer_write(vcpu, timer, treg, val); 1316 timer_restore_state(timer); 1317 preempt_enable(); 1318 } 1319 } 1320 1321 static int timer_irq_set_vcpu_affinity(struct irq_data *d, void *vcpu) 1322 { 1323 if (vcpu) 1324 irqd_set_forwarded_to_vcpu(d); 1325 else 1326 irqd_clr_forwarded_to_vcpu(d); 1327 1328 return 0; 1329 } 1330 1331 static int timer_irq_set_irqchip_state(struct irq_data *d, 1332 enum irqchip_irq_state which, bool val) 1333 { 1334 if (which != IRQCHIP_STATE_ACTIVE || !irqd_is_forwarded_to_vcpu(d)) 1335 return irq_chip_set_parent_state(d, which, val); 1336 1337 if (val) 1338 irq_chip_mask_parent(d); 1339 else 1340 irq_chip_unmask_parent(d); 1341 1342 return 0; 1343 } 1344 1345 static void timer_irq_eoi(struct irq_data *d) 1346 { 1347 if (!irqd_is_forwarded_to_vcpu(d)) 1348 irq_chip_eoi_parent(d); 1349 } 1350 1351 static void timer_irq_ack(struct irq_data *d) 1352 { 1353 d = d->parent_data; 1354 if (d->chip->irq_ack) 1355 d->chip->irq_ack(d); 1356 } 1357 1358 static struct irq_chip timer_chip = { 1359 .name = "KVM", 1360 .irq_ack = timer_irq_ack, 1361 .irq_mask = irq_chip_mask_parent, 1362 .irq_unmask = irq_chip_unmask_parent, 1363 .irq_eoi = timer_irq_eoi, 1364 .irq_set_type = irq_chip_set_type_parent, 1365 .irq_set_vcpu_affinity = timer_irq_set_vcpu_affinity, 1366 .irq_set_irqchip_state = timer_irq_set_irqchip_state, 1367 }; 1368 1369 static int timer_irq_domain_alloc(struct irq_domain *domain, unsigned int virq, 1370 unsigned int nr_irqs, void *arg) 1371 { 1372 irq_hw_number_t hwirq = (uintptr_t)arg; 1373 1374 return irq_domain_set_hwirq_and_chip(domain, virq, hwirq, 1375 &timer_chip, NULL); 1376 } 1377 1378 static void timer_irq_domain_free(struct irq_domain *domain, unsigned int virq, 1379 unsigned int nr_irqs) 1380 { 1381 } 1382 1383 static const struct irq_domain_ops timer_domain_ops = { 1384 .alloc = timer_irq_domain_alloc, 1385 .free = timer_irq_domain_free, 1386 }; 1387 1388 static void kvm_irq_fixup_flags(unsigned int virq, u32 *flags) 1389 { 1390 *flags = irq_get_trigger_type(virq); 1391 if (*flags != IRQF_TRIGGER_HIGH && *flags != IRQF_TRIGGER_LOW) { 1392 kvm_err("Invalid trigger for timer IRQ%d, assuming level low\n", 1393 virq); 1394 *flags = IRQF_TRIGGER_LOW; 1395 } 1396 } 1397 1398 static int kvm_irq_init(struct arch_timer_kvm_info *info) 1399 { 1400 struct irq_domain *domain = NULL; 1401 1402 if (info->virtual_irq <= 0) { 1403 kvm_err("kvm_arch_timer: invalid virtual timer IRQ: %d\n", 1404 info->virtual_irq); 1405 return -ENODEV; 1406 } 1407 1408 host_vtimer_irq = info->virtual_irq; 1409 kvm_irq_fixup_flags(host_vtimer_irq, &host_vtimer_irq_flags); 1410 1411 if (kvm_vgic_global_state.no_hw_deactivation) { 1412 struct fwnode_handle *fwnode; 1413 struct irq_data *data; 1414 1415 fwnode = irq_domain_alloc_named_fwnode("kvm-timer"); 1416 if (!fwnode) 1417 return -ENOMEM; 1418 1419 /* Assume both vtimer and ptimer in the same parent */ 1420 data = irq_get_irq_data(host_vtimer_irq); 1421 domain = irq_domain_create_hierarchy(data->domain, 0, 1422 NR_KVM_TIMERS, fwnode, 1423 &timer_domain_ops, NULL); 1424 if (!domain) { 1425 irq_domain_free_fwnode(fwnode); 1426 return -ENOMEM; 1427 } 1428 1429 arch_timer_irq_ops.flags |= VGIC_IRQ_SW_RESAMPLE; 1430 WARN_ON(irq_domain_push_irq(domain, host_vtimer_irq, 1431 (void *)TIMER_VTIMER)); 1432 } 1433 1434 if (info->physical_irq > 0) { 1435 host_ptimer_irq = info->physical_irq; 1436 kvm_irq_fixup_flags(host_ptimer_irq, &host_ptimer_irq_flags); 1437 1438 if (domain) 1439 WARN_ON(irq_domain_push_irq(domain, host_ptimer_irq, 1440 (void *)TIMER_PTIMER)); 1441 } 1442 1443 return 0; 1444 } 1445 1446 static void kvm_timer_handle_errata(void) 1447 { 1448 u64 mmfr0, mmfr1, mmfr4; 1449 1450 /* 1451 * CNTVOFF_EL2 is broken on some implementations. For those, we trap 1452 * all virtual timer/counter accesses, requiring FEAT_ECV. 1453 * 1454 * However, a hypervisor supporting nesting is likely to mitigate the 1455 * erratum at L0, and not require other levels to mitigate it (which 1456 * would otherwise be a terrible performance sink due to trap 1457 * amplification). 1458 * 1459 * Given that the affected HW implements both FEAT_VHE and FEAT_E2H0, 1460 * and that NV is likely not to (because of limitations of the 1461 * architecture), only enable the workaround when FEAT_VHE and 1462 * FEAT_E2H0 are both detected. Time will tell if this actually holds. 1463 */ 1464 mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1); 1465 mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1); 1466 mmfr4 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR4_EL1); 1467 if (SYS_FIELD_GET(ID_AA64MMFR1_EL1, VH, mmfr1) && 1468 !SYS_FIELD_GET(ID_AA64MMFR4_EL1, E2H0, mmfr4) && 1469 SYS_FIELD_GET(ID_AA64MMFR0_EL1, ECV, mmfr0) && 1470 (has_vhe() || has_hvhe()) && 1471 cpus_have_final_cap(ARM64_WORKAROUND_QCOM_ORYON_CNTVOFF)) { 1472 static_branch_enable(&broken_cntvoff_key); 1473 kvm_info("Broken CNTVOFF_EL2, trapping virtual timer\n"); 1474 } 1475 } 1476 1477 int __init kvm_timer_hyp_init(bool has_gic) 1478 { 1479 struct arch_timer_kvm_info *info; 1480 int err; 1481 1482 info = arch_timer_get_kvm_info(); 1483 timecounter = &info->timecounter; 1484 1485 if (!timecounter->cc) { 1486 kvm_err("kvm_arch_timer: uninitialized timecounter\n"); 1487 return -ENODEV; 1488 } 1489 1490 err = kvm_irq_init(info); 1491 if (err) 1492 return err; 1493 1494 /* First, do the virtual EL1 timer irq */ 1495 1496 err = request_percpu_irq(host_vtimer_irq, kvm_arch_timer_handler, 1497 "kvm guest vtimer", kvm_get_running_vcpus()); 1498 if (err) { 1499 kvm_err("kvm_arch_timer: can't request vtimer interrupt %d (%d)\n", 1500 host_vtimer_irq, err); 1501 return err; 1502 } 1503 1504 if (has_gic) { 1505 err = irq_set_vcpu_affinity(host_vtimer_irq, 1506 kvm_get_running_vcpus()); 1507 if (err) { 1508 kvm_err("kvm_arch_timer: error setting vcpu affinity\n"); 1509 goto out_free_vtimer_irq; 1510 } 1511 1512 static_branch_enable(&has_gic_active_state); 1513 } 1514 1515 kvm_debug("virtual timer IRQ%d\n", host_vtimer_irq); 1516 1517 /* Now let's do the physical EL1 timer irq */ 1518 1519 if (info->physical_irq > 0) { 1520 err = request_percpu_irq(host_ptimer_irq, kvm_arch_timer_handler, 1521 "kvm guest ptimer", kvm_get_running_vcpus()); 1522 if (err) { 1523 kvm_err("kvm_arch_timer: can't request ptimer interrupt %d (%d)\n", 1524 host_ptimer_irq, err); 1525 goto out_free_vtimer_irq; 1526 } 1527 1528 if (has_gic) { 1529 err = irq_set_vcpu_affinity(host_ptimer_irq, 1530 kvm_get_running_vcpus()); 1531 if (err) { 1532 kvm_err("kvm_arch_timer: error setting vcpu affinity\n"); 1533 goto out_free_ptimer_irq; 1534 } 1535 } 1536 1537 kvm_debug("physical timer IRQ%d\n", host_ptimer_irq); 1538 } else if (has_vhe()) { 1539 kvm_err("kvm_arch_timer: invalid physical timer IRQ: %d\n", 1540 info->physical_irq); 1541 err = -ENODEV; 1542 goto out_free_vtimer_irq; 1543 } 1544 1545 kvm_timer_handle_errata(); 1546 return 0; 1547 1548 out_free_ptimer_irq: 1549 if (info->physical_irq > 0) 1550 free_percpu_irq(host_ptimer_irq, kvm_get_running_vcpus()); 1551 out_free_vtimer_irq: 1552 free_percpu_irq(host_vtimer_irq, kvm_get_running_vcpus()); 1553 return err; 1554 } 1555 1556 void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu) 1557 { 1558 struct arch_timer_cpu *timer = vcpu_timer(vcpu); 1559 1560 soft_timer_cancel(&timer->bg_timer); 1561 } 1562 1563 static bool timer_irqs_are_valid(struct kvm_vcpu *vcpu) 1564 { 1565 u32 ppis = 0; 1566 bool valid; 1567 1568 mutex_lock(&vcpu->kvm->arch.config_lock); 1569 1570 for (int i = 0; i < nr_timers(vcpu); i++) { 1571 struct arch_timer_context *ctx; 1572 int irq; 1573 1574 ctx = vcpu_get_timer(vcpu, i); 1575 irq = timer_irq(ctx); 1576 if (kvm_vgic_set_owner(vcpu, irq, ctx)) 1577 break; 1578 1579 /* 1580 * We know by construction that we only have PPIs, so 1581 * all values are less than 32. 1582 */ 1583 ppis |= BIT(irq); 1584 } 1585 1586 valid = hweight32(ppis) == nr_timers(vcpu); 1587 1588 if (valid) 1589 set_bit(KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE, &vcpu->kvm->arch.flags); 1590 1591 mutex_unlock(&vcpu->kvm->arch.config_lock); 1592 1593 return valid; 1594 } 1595 1596 static bool kvm_arch_timer_get_input_level(int vintid) 1597 { 1598 struct kvm_vcpu *vcpu = kvm_get_running_vcpu(); 1599 1600 if (WARN(!vcpu, "No vcpu context!\n")) 1601 return false; 1602 1603 for (int i = 0; i < nr_timers(vcpu); i++) { 1604 struct arch_timer_context *ctx; 1605 1606 ctx = vcpu_get_timer(vcpu, i); 1607 if (timer_irq(ctx) == vintid) 1608 return kvm_timer_should_fire(ctx); 1609 } 1610 1611 /* A timer IRQ has fired, but no matching timer was found? */ 1612 WARN_RATELIMIT(1, "timer INTID%d unknown\n", vintid); 1613 1614 return false; 1615 } 1616 1617 int kvm_timer_enable(struct kvm_vcpu *vcpu) 1618 { 1619 struct arch_timer_cpu *timer = vcpu_timer(vcpu); 1620 struct timer_map map; 1621 int ret; 1622 1623 if (timer->enabled) 1624 return 0; 1625 1626 /* Without a VGIC we do not map virtual IRQs to physical IRQs */ 1627 if (!irqchip_in_kernel(vcpu->kvm)) 1628 goto no_vgic; 1629 1630 /* 1631 * At this stage, we have the guarantee that the vgic is both 1632 * available and initialized. 1633 */ 1634 if (!timer_irqs_are_valid(vcpu)) { 1635 kvm_debug("incorrectly configured timer irqs\n"); 1636 return -EINVAL; 1637 } 1638 1639 get_timer_map(vcpu, &map); 1640 1641 ret = kvm_vgic_map_phys_irq(vcpu, 1642 map.direct_vtimer->host_timer_irq, 1643 timer_irq(map.direct_vtimer), 1644 &arch_timer_irq_ops); 1645 if (ret) 1646 return ret; 1647 1648 if (map.direct_ptimer) { 1649 ret = kvm_vgic_map_phys_irq(vcpu, 1650 map.direct_ptimer->host_timer_irq, 1651 timer_irq(map.direct_ptimer), 1652 &arch_timer_irq_ops); 1653 } 1654 1655 if (ret) 1656 return ret; 1657 1658 no_vgic: 1659 timer->enabled = 1; 1660 return 0; 1661 } 1662 1663 /* If we have CNTPOFF, permanently set ECV to enable it */ 1664 void kvm_timer_init_vhe(void) 1665 { 1666 if (cpus_have_final_cap(ARM64_HAS_ECV_CNTPOFF)) 1667 sysreg_clear_set(cnthctl_el2, 0, CNTHCTL_ECV); 1668 } 1669 1670 int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) 1671 { 1672 int __user *uaddr = (int __user *)(long)attr->addr; 1673 int irq, idx, ret = 0; 1674 1675 if (!irqchip_in_kernel(vcpu->kvm)) 1676 return -EINVAL; 1677 1678 if (get_user(irq, uaddr)) 1679 return -EFAULT; 1680 1681 if (!(irq_is_ppi(irq))) 1682 return -EINVAL; 1683 1684 mutex_lock(&vcpu->kvm->arch.config_lock); 1685 1686 if (test_bit(KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE, 1687 &vcpu->kvm->arch.flags)) { 1688 ret = -EBUSY; 1689 goto out; 1690 } 1691 1692 switch (attr->attr) { 1693 case KVM_ARM_VCPU_TIMER_IRQ_VTIMER: 1694 idx = TIMER_VTIMER; 1695 break; 1696 case KVM_ARM_VCPU_TIMER_IRQ_PTIMER: 1697 idx = TIMER_PTIMER; 1698 break; 1699 case KVM_ARM_VCPU_TIMER_IRQ_HVTIMER: 1700 idx = TIMER_HVTIMER; 1701 break; 1702 case KVM_ARM_VCPU_TIMER_IRQ_HPTIMER: 1703 idx = TIMER_HPTIMER; 1704 break; 1705 default: 1706 ret = -ENXIO; 1707 goto out; 1708 } 1709 1710 /* 1711 * We cannot validate the IRQ unicity before we run, so take it at 1712 * face value. The verdict will be given on first vcpu run, for each 1713 * vcpu. Yes this is late. Blame it on the stupid API. 1714 */ 1715 vcpu->kvm->arch.timer_data.ppi[idx] = irq; 1716 1717 out: 1718 mutex_unlock(&vcpu->kvm->arch.config_lock); 1719 return ret; 1720 } 1721 1722 int kvm_arm_timer_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) 1723 { 1724 int __user *uaddr = (int __user *)(long)attr->addr; 1725 struct arch_timer_context *timer; 1726 int irq; 1727 1728 switch (attr->attr) { 1729 case KVM_ARM_VCPU_TIMER_IRQ_VTIMER: 1730 timer = vcpu_vtimer(vcpu); 1731 break; 1732 case KVM_ARM_VCPU_TIMER_IRQ_PTIMER: 1733 timer = vcpu_ptimer(vcpu); 1734 break; 1735 case KVM_ARM_VCPU_TIMER_IRQ_HVTIMER: 1736 timer = vcpu_hvtimer(vcpu); 1737 break; 1738 case KVM_ARM_VCPU_TIMER_IRQ_HPTIMER: 1739 timer = vcpu_hptimer(vcpu); 1740 break; 1741 default: 1742 return -ENXIO; 1743 } 1744 1745 irq = timer_irq(timer); 1746 return put_user(irq, uaddr); 1747 } 1748 1749 int kvm_arm_timer_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) 1750 { 1751 switch (attr->attr) { 1752 case KVM_ARM_VCPU_TIMER_IRQ_VTIMER: 1753 case KVM_ARM_VCPU_TIMER_IRQ_PTIMER: 1754 case KVM_ARM_VCPU_TIMER_IRQ_HVTIMER: 1755 case KVM_ARM_VCPU_TIMER_IRQ_HPTIMER: 1756 return 0; 1757 } 1758 1759 return -ENXIO; 1760 } 1761 1762 int kvm_vm_ioctl_set_counter_offset(struct kvm *kvm, 1763 struct kvm_arm_counter_offset *offset) 1764 { 1765 int ret = 0; 1766 1767 if (offset->reserved) 1768 return -EINVAL; 1769 1770 mutex_lock(&kvm->lock); 1771 1772 if (lock_all_vcpus(kvm)) { 1773 set_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, &kvm->arch.flags); 1774 1775 /* 1776 * If userspace decides to set the offset using this 1777 * API rather than merely restoring the counter 1778 * values, the offset applies to both the virtual and 1779 * physical views. 1780 */ 1781 kvm->arch.timer_data.voffset = offset->counter_offset; 1782 kvm->arch.timer_data.poffset = offset->counter_offset; 1783 1784 unlock_all_vcpus(kvm); 1785 } else { 1786 ret = -EBUSY; 1787 } 1788 1789 mutex_unlock(&kvm->lock); 1790 1791 return ret; 1792 } 1793