1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2012 ARM Ltd. 4 * Author: Marc Zyngier <marc.zyngier@arm.com> 5 */ 6 7 #include <linux/cpu.h> 8 #include <linux/kvm.h> 9 #include <linux/kvm_host.h> 10 #include <linux/interrupt.h> 11 #include <linux/irq.h> 12 #include <linux/irqdomain.h> 13 #include <linux/uaccess.h> 14 15 #include <clocksource/arm_arch_timer.h> 16 #include <asm/arch_timer.h> 17 #include <asm/kvm_emulate.h> 18 #include <asm/kvm_hyp.h> 19 #include <asm/kvm_nested.h> 20 21 #include <kvm/arm_vgic.h> 22 #include <kvm/arm_arch_timer.h> 23 24 #include "trace.h" 25 26 static struct timecounter *timecounter; 27 static unsigned int host_vtimer_irq; 28 static unsigned int host_ptimer_irq; 29 static u32 host_vtimer_irq_flags; 30 static u32 host_ptimer_irq_flags; 31 32 static DEFINE_STATIC_KEY_FALSE(has_gic_active_state); 33 DEFINE_STATIC_KEY_FALSE(broken_cntvoff_key); 34 35 static const u8 default_ppi[] = { 36 [TIMER_PTIMER] = 30, 37 [TIMER_VTIMER] = 27, 38 [TIMER_HPTIMER] = 26, 39 [TIMER_HVTIMER] = 28, 40 }; 41 42 static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx); 43 static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level, 44 struct arch_timer_context *timer_ctx); 45 static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx); 46 static void kvm_arm_timer_write(struct kvm_vcpu *vcpu, 47 struct arch_timer_context *timer, 48 enum kvm_arch_timer_regs treg, 49 u64 val); 50 static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu, 51 struct arch_timer_context *timer, 52 enum kvm_arch_timer_regs treg); 53 static bool kvm_arch_timer_get_input_level(int vintid); 54 55 static struct irq_ops arch_timer_irq_ops = { 56 .get_input_level = kvm_arch_timer_get_input_level, 57 }; 58 59 static int nr_timers(struct kvm_vcpu *vcpu) 60 { 61 if (!vcpu_has_nv(vcpu)) 62 return NR_KVM_EL0_TIMERS; 63 64 return NR_KVM_TIMERS; 65 } 66 67 u32 timer_get_ctl(struct arch_timer_context *ctxt) 68 { 69 struct kvm_vcpu *vcpu = ctxt->vcpu; 70 71 switch(arch_timer_ctx_index(ctxt)) { 72 case TIMER_VTIMER: 73 return __vcpu_sys_reg(vcpu, CNTV_CTL_EL0); 74 case TIMER_PTIMER: 75 return __vcpu_sys_reg(vcpu, CNTP_CTL_EL0); 76 case TIMER_HVTIMER: 77 return __vcpu_sys_reg(vcpu, CNTHV_CTL_EL2); 78 case TIMER_HPTIMER: 79 return __vcpu_sys_reg(vcpu, CNTHP_CTL_EL2); 80 default: 81 WARN_ON(1); 82 return 0; 83 } 84 } 85 86 u64 timer_get_cval(struct arch_timer_context *ctxt) 87 { 88 struct kvm_vcpu *vcpu = ctxt->vcpu; 89 90 switch(arch_timer_ctx_index(ctxt)) { 91 case TIMER_VTIMER: 92 return __vcpu_sys_reg(vcpu, CNTV_CVAL_EL0); 93 case TIMER_PTIMER: 94 return __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0); 95 case TIMER_HVTIMER: 96 return __vcpu_sys_reg(vcpu, CNTHV_CVAL_EL2); 97 case TIMER_HPTIMER: 98 return __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2); 99 default: 100 WARN_ON(1); 101 return 0; 102 } 103 } 104 105 static void timer_set_ctl(struct arch_timer_context *ctxt, u32 ctl) 106 { 107 struct kvm_vcpu *vcpu = ctxt->vcpu; 108 109 switch(arch_timer_ctx_index(ctxt)) { 110 case TIMER_VTIMER: 111 __vcpu_sys_reg(vcpu, CNTV_CTL_EL0) = ctl; 112 break; 113 case TIMER_PTIMER: 114 __vcpu_sys_reg(vcpu, CNTP_CTL_EL0) = ctl; 115 break; 116 case TIMER_HVTIMER: 117 __vcpu_sys_reg(vcpu, CNTHV_CTL_EL2) = ctl; 118 break; 119 case TIMER_HPTIMER: 120 __vcpu_sys_reg(vcpu, CNTHP_CTL_EL2) = ctl; 121 break; 122 default: 123 WARN_ON(1); 124 } 125 } 126 127 static void timer_set_cval(struct arch_timer_context *ctxt, u64 cval) 128 { 129 struct kvm_vcpu *vcpu = ctxt->vcpu; 130 131 switch(arch_timer_ctx_index(ctxt)) { 132 case TIMER_VTIMER: 133 __vcpu_sys_reg(vcpu, CNTV_CVAL_EL0) = cval; 134 break; 135 case TIMER_PTIMER: 136 __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0) = cval; 137 break; 138 case TIMER_HVTIMER: 139 __vcpu_sys_reg(vcpu, CNTHV_CVAL_EL2) = cval; 140 break; 141 case TIMER_HPTIMER: 142 __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2) = cval; 143 break; 144 default: 145 WARN_ON(1); 146 } 147 } 148 149 static void timer_set_offset(struct arch_timer_context *ctxt, u64 offset) 150 { 151 if (!ctxt->offset.vm_offset) { 152 WARN(offset, "timer %ld\n", arch_timer_ctx_index(ctxt)); 153 return; 154 } 155 156 WRITE_ONCE(*ctxt->offset.vm_offset, offset); 157 } 158 159 u64 kvm_phys_timer_read(void) 160 { 161 return timecounter->cc->read(timecounter->cc); 162 } 163 164 void get_timer_map(struct kvm_vcpu *vcpu, struct timer_map *map) 165 { 166 if (vcpu_has_nv(vcpu)) { 167 if (is_hyp_ctxt(vcpu)) { 168 map->direct_vtimer = vcpu_hvtimer(vcpu); 169 map->direct_ptimer = vcpu_hptimer(vcpu); 170 map->emul_vtimer = vcpu_vtimer(vcpu); 171 map->emul_ptimer = vcpu_ptimer(vcpu); 172 } else { 173 map->direct_vtimer = vcpu_vtimer(vcpu); 174 map->direct_ptimer = vcpu_ptimer(vcpu); 175 map->emul_vtimer = vcpu_hvtimer(vcpu); 176 map->emul_ptimer = vcpu_hptimer(vcpu); 177 } 178 } else if (has_vhe()) { 179 map->direct_vtimer = vcpu_vtimer(vcpu); 180 map->direct_ptimer = vcpu_ptimer(vcpu); 181 map->emul_vtimer = NULL; 182 map->emul_ptimer = NULL; 183 } else { 184 map->direct_vtimer = vcpu_vtimer(vcpu); 185 map->direct_ptimer = NULL; 186 map->emul_vtimer = NULL; 187 map->emul_ptimer = vcpu_ptimer(vcpu); 188 } 189 190 trace_kvm_get_timer_map(vcpu->vcpu_id, map); 191 } 192 193 static inline bool userspace_irqchip(struct kvm *kvm) 194 { 195 return unlikely(!irqchip_in_kernel(kvm)); 196 } 197 198 static void soft_timer_start(struct hrtimer *hrt, u64 ns) 199 { 200 hrtimer_start(hrt, ktime_add_ns(ktime_get(), ns), 201 HRTIMER_MODE_ABS_HARD); 202 } 203 204 static void soft_timer_cancel(struct hrtimer *hrt) 205 { 206 hrtimer_cancel(hrt); 207 } 208 209 static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id) 210 { 211 struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id; 212 struct arch_timer_context *ctx; 213 struct timer_map map; 214 215 /* 216 * We may see a timer interrupt after vcpu_put() has been called which 217 * sets the CPU's vcpu pointer to NULL, because even though the timer 218 * has been disabled in timer_save_state(), the hardware interrupt 219 * signal may not have been retired from the interrupt controller yet. 220 */ 221 if (!vcpu) 222 return IRQ_HANDLED; 223 224 get_timer_map(vcpu, &map); 225 226 if (irq == host_vtimer_irq) 227 ctx = map.direct_vtimer; 228 else 229 ctx = map.direct_ptimer; 230 231 if (kvm_timer_should_fire(ctx)) 232 kvm_timer_update_irq(vcpu, true, ctx); 233 234 if (userspace_irqchip(vcpu->kvm) && 235 !static_branch_unlikely(&has_gic_active_state)) 236 disable_percpu_irq(host_vtimer_irq); 237 238 return IRQ_HANDLED; 239 } 240 241 static u64 kvm_counter_compute_delta(struct arch_timer_context *timer_ctx, 242 u64 val) 243 { 244 u64 now = kvm_phys_timer_read() - timer_get_offset(timer_ctx); 245 246 if (now < val) { 247 u64 ns; 248 249 ns = cyclecounter_cyc2ns(timecounter->cc, 250 val - now, 251 timecounter->mask, 252 &timer_ctx->ns_frac); 253 return ns; 254 } 255 256 return 0; 257 } 258 259 static u64 kvm_timer_compute_delta(struct arch_timer_context *timer_ctx) 260 { 261 return kvm_counter_compute_delta(timer_ctx, timer_get_cval(timer_ctx)); 262 } 263 264 static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx) 265 { 266 WARN_ON(timer_ctx && timer_ctx->loaded); 267 return timer_ctx && 268 ((timer_get_ctl(timer_ctx) & 269 (ARCH_TIMER_CTRL_IT_MASK | ARCH_TIMER_CTRL_ENABLE)) == ARCH_TIMER_CTRL_ENABLE); 270 } 271 272 static bool vcpu_has_wfit_active(struct kvm_vcpu *vcpu) 273 { 274 return (cpus_have_final_cap(ARM64_HAS_WFXT) && 275 vcpu_get_flag(vcpu, IN_WFIT)); 276 } 277 278 static u64 wfit_delay_ns(struct kvm_vcpu *vcpu) 279 { 280 u64 val = vcpu_get_reg(vcpu, kvm_vcpu_sys_get_rt(vcpu)); 281 struct arch_timer_context *ctx; 282 283 ctx = is_hyp_ctxt(vcpu) ? vcpu_hvtimer(vcpu) : vcpu_vtimer(vcpu); 284 285 return kvm_counter_compute_delta(ctx, val); 286 } 287 288 /* 289 * Returns the earliest expiration time in ns among guest timers. 290 * Note that it will return 0 if none of timers can fire. 291 */ 292 static u64 kvm_timer_earliest_exp(struct kvm_vcpu *vcpu) 293 { 294 u64 min_delta = ULLONG_MAX; 295 int i; 296 297 for (i = 0; i < nr_timers(vcpu); i++) { 298 struct arch_timer_context *ctx = &vcpu->arch.timer_cpu.timers[i]; 299 300 WARN(ctx->loaded, "timer %d loaded\n", i); 301 if (kvm_timer_irq_can_fire(ctx)) 302 min_delta = min(min_delta, kvm_timer_compute_delta(ctx)); 303 } 304 305 if (vcpu_has_wfit_active(vcpu)) 306 min_delta = min(min_delta, wfit_delay_ns(vcpu)); 307 308 /* If none of timers can fire, then return 0 */ 309 if (min_delta == ULLONG_MAX) 310 return 0; 311 312 return min_delta; 313 } 314 315 static enum hrtimer_restart kvm_bg_timer_expire(struct hrtimer *hrt) 316 { 317 struct arch_timer_cpu *timer; 318 struct kvm_vcpu *vcpu; 319 u64 ns; 320 321 timer = container_of(hrt, struct arch_timer_cpu, bg_timer); 322 vcpu = container_of(timer, struct kvm_vcpu, arch.timer_cpu); 323 324 /* 325 * Check that the timer has really expired from the guest's 326 * PoV (NTP on the host may have forced it to expire 327 * early). If we should have slept longer, restart it. 328 */ 329 ns = kvm_timer_earliest_exp(vcpu); 330 if (unlikely(ns)) { 331 hrtimer_forward_now(hrt, ns_to_ktime(ns)); 332 return HRTIMER_RESTART; 333 } 334 335 kvm_vcpu_wake_up(vcpu); 336 return HRTIMER_NORESTART; 337 } 338 339 static enum hrtimer_restart kvm_hrtimer_expire(struct hrtimer *hrt) 340 { 341 struct arch_timer_context *ctx; 342 struct kvm_vcpu *vcpu; 343 u64 ns; 344 345 ctx = container_of(hrt, struct arch_timer_context, hrtimer); 346 vcpu = ctx->vcpu; 347 348 trace_kvm_timer_hrtimer_expire(ctx); 349 350 /* 351 * Check that the timer has really expired from the guest's 352 * PoV (NTP on the host may have forced it to expire 353 * early). If not ready, schedule for a later time. 354 */ 355 ns = kvm_timer_compute_delta(ctx); 356 if (unlikely(ns)) { 357 hrtimer_forward_now(hrt, ns_to_ktime(ns)); 358 return HRTIMER_RESTART; 359 } 360 361 kvm_timer_update_irq(vcpu, true, ctx); 362 return HRTIMER_NORESTART; 363 } 364 365 static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx) 366 { 367 enum kvm_arch_timers index; 368 u64 cval, now; 369 370 if (!timer_ctx) 371 return false; 372 373 index = arch_timer_ctx_index(timer_ctx); 374 375 if (timer_ctx->loaded) { 376 u32 cnt_ctl = 0; 377 378 switch (index) { 379 case TIMER_VTIMER: 380 case TIMER_HVTIMER: 381 cnt_ctl = read_sysreg_el0(SYS_CNTV_CTL); 382 break; 383 case TIMER_PTIMER: 384 case TIMER_HPTIMER: 385 cnt_ctl = read_sysreg_el0(SYS_CNTP_CTL); 386 break; 387 case NR_KVM_TIMERS: 388 /* GCC is braindead */ 389 cnt_ctl = 0; 390 break; 391 } 392 393 return (cnt_ctl & ARCH_TIMER_CTRL_ENABLE) && 394 (cnt_ctl & ARCH_TIMER_CTRL_IT_STAT) && 395 !(cnt_ctl & ARCH_TIMER_CTRL_IT_MASK); 396 } 397 398 if (!kvm_timer_irq_can_fire(timer_ctx)) 399 return false; 400 401 cval = timer_get_cval(timer_ctx); 402 now = kvm_phys_timer_read() - timer_get_offset(timer_ctx); 403 404 return cval <= now; 405 } 406 407 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) 408 { 409 return vcpu_has_wfit_active(vcpu) && wfit_delay_ns(vcpu) == 0; 410 } 411 412 /* 413 * Reflect the timer output level into the kvm_run structure 414 */ 415 void kvm_timer_update_run(struct kvm_vcpu *vcpu) 416 { 417 struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); 418 struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); 419 struct kvm_sync_regs *regs = &vcpu->run->s.regs; 420 421 /* Populate the device bitmap with the timer states */ 422 regs->device_irq_level &= ~(KVM_ARM_DEV_EL1_VTIMER | 423 KVM_ARM_DEV_EL1_PTIMER); 424 if (kvm_timer_should_fire(vtimer)) 425 regs->device_irq_level |= KVM_ARM_DEV_EL1_VTIMER; 426 if (kvm_timer_should_fire(ptimer)) 427 regs->device_irq_level |= KVM_ARM_DEV_EL1_PTIMER; 428 } 429 430 static void kvm_timer_update_status(struct arch_timer_context *ctx, bool level) 431 { 432 /* 433 * Paper over NV2 brokenness by publishing the interrupt status 434 * bit. This still results in a poor quality of emulation (guest 435 * writes will have no effect until the next exit). 436 * 437 * But hey, it's fast, right? 438 */ 439 if (is_hyp_ctxt(ctx->vcpu) && 440 (ctx == vcpu_vtimer(ctx->vcpu) || ctx == vcpu_ptimer(ctx->vcpu))) { 441 unsigned long val = timer_get_ctl(ctx); 442 __assign_bit(__ffs(ARCH_TIMER_CTRL_IT_STAT), &val, level); 443 timer_set_ctl(ctx, val); 444 } 445 } 446 447 static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level, 448 struct arch_timer_context *timer_ctx) 449 { 450 kvm_timer_update_status(timer_ctx, new_level); 451 452 timer_ctx->irq.level = new_level; 453 trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_irq(timer_ctx), 454 timer_ctx->irq.level); 455 456 if (userspace_irqchip(vcpu->kvm)) 457 return; 458 459 kvm_vgic_inject_irq(vcpu->kvm, vcpu, 460 timer_irq(timer_ctx), 461 timer_ctx->irq.level, 462 timer_ctx); 463 } 464 465 /* Only called for a fully emulated timer */ 466 static void timer_emulate(struct arch_timer_context *ctx) 467 { 468 bool should_fire = kvm_timer_should_fire(ctx); 469 470 trace_kvm_timer_emulate(ctx, should_fire); 471 472 if (should_fire != ctx->irq.level) 473 kvm_timer_update_irq(ctx->vcpu, should_fire, ctx); 474 475 kvm_timer_update_status(ctx, should_fire); 476 477 /* 478 * If the timer can fire now, we don't need to have a soft timer 479 * scheduled for the future. If the timer cannot fire at all, 480 * then we also don't need a soft timer. 481 */ 482 if (should_fire || !kvm_timer_irq_can_fire(ctx)) 483 return; 484 485 soft_timer_start(&ctx->hrtimer, kvm_timer_compute_delta(ctx)); 486 } 487 488 static void set_cntvoff(u64 cntvoff) 489 { 490 kvm_call_hyp(__kvm_timer_set_cntvoff, cntvoff); 491 } 492 493 static void set_cntpoff(u64 cntpoff) 494 { 495 if (has_cntpoff()) 496 write_sysreg_s(cntpoff, SYS_CNTPOFF_EL2); 497 } 498 499 static void timer_save_state(struct arch_timer_context *ctx) 500 { 501 struct arch_timer_cpu *timer = vcpu_timer(ctx->vcpu); 502 enum kvm_arch_timers index = arch_timer_ctx_index(ctx); 503 unsigned long flags; 504 505 if (!timer->enabled) 506 return; 507 508 local_irq_save(flags); 509 510 if (!ctx->loaded) 511 goto out; 512 513 switch (index) { 514 u64 cval; 515 516 case TIMER_VTIMER: 517 case TIMER_HVTIMER: 518 timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTV_CTL)); 519 cval = read_sysreg_el0(SYS_CNTV_CVAL); 520 521 if (has_broken_cntvoff()) 522 cval -= timer_get_offset(ctx); 523 524 timer_set_cval(ctx, cval); 525 526 /* Disable the timer */ 527 write_sysreg_el0(0, SYS_CNTV_CTL); 528 isb(); 529 530 /* 531 * The kernel may decide to run userspace after 532 * calling vcpu_put, so we reset cntvoff to 0 to 533 * ensure a consistent read between user accesses to 534 * the virtual counter and kernel access to the 535 * physical counter of non-VHE case. 536 * 537 * For VHE, the virtual counter uses a fixed virtual 538 * offset of zero, so no need to zero CNTVOFF_EL2 539 * register, but this is actually useful when switching 540 * between EL1/vEL2 with NV. 541 * 542 * Do it unconditionally, as this is either unavoidable 543 * or dirt cheap. 544 */ 545 set_cntvoff(0); 546 break; 547 case TIMER_PTIMER: 548 case TIMER_HPTIMER: 549 timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTP_CTL)); 550 cval = read_sysreg_el0(SYS_CNTP_CVAL); 551 552 cval -= timer_get_offset(ctx); 553 554 timer_set_cval(ctx, cval); 555 556 /* Disable the timer */ 557 write_sysreg_el0(0, SYS_CNTP_CTL); 558 isb(); 559 560 set_cntpoff(0); 561 break; 562 case NR_KVM_TIMERS: 563 BUG(); 564 } 565 566 trace_kvm_timer_save_state(ctx); 567 568 ctx->loaded = false; 569 out: 570 local_irq_restore(flags); 571 } 572 573 /* 574 * Schedule the background timer before calling kvm_vcpu_halt, so that this 575 * thread is removed from its waitqueue and made runnable when there's a timer 576 * interrupt to handle. 577 */ 578 static void kvm_timer_blocking(struct kvm_vcpu *vcpu) 579 { 580 struct arch_timer_cpu *timer = vcpu_timer(vcpu); 581 struct timer_map map; 582 583 get_timer_map(vcpu, &map); 584 585 /* 586 * If no timers are capable of raising interrupts (disabled or 587 * masked), then there's no more work for us to do. 588 */ 589 if (!kvm_timer_irq_can_fire(map.direct_vtimer) && 590 !kvm_timer_irq_can_fire(map.direct_ptimer) && 591 !kvm_timer_irq_can_fire(map.emul_vtimer) && 592 !kvm_timer_irq_can_fire(map.emul_ptimer) && 593 !vcpu_has_wfit_active(vcpu)) 594 return; 595 596 /* 597 * At least one guest time will expire. Schedule a background timer. 598 * Set the earliest expiration time among the guest timers. 599 */ 600 soft_timer_start(&timer->bg_timer, kvm_timer_earliest_exp(vcpu)); 601 } 602 603 static void kvm_timer_unblocking(struct kvm_vcpu *vcpu) 604 { 605 struct arch_timer_cpu *timer = vcpu_timer(vcpu); 606 607 soft_timer_cancel(&timer->bg_timer); 608 } 609 610 static void timer_restore_state(struct arch_timer_context *ctx) 611 { 612 struct arch_timer_cpu *timer = vcpu_timer(ctx->vcpu); 613 enum kvm_arch_timers index = arch_timer_ctx_index(ctx); 614 unsigned long flags; 615 616 if (!timer->enabled) 617 return; 618 619 local_irq_save(flags); 620 621 if (ctx->loaded) 622 goto out; 623 624 switch (index) { 625 u64 cval, offset; 626 627 case TIMER_VTIMER: 628 case TIMER_HVTIMER: 629 cval = timer_get_cval(ctx); 630 offset = timer_get_offset(ctx); 631 if (has_broken_cntvoff()) { 632 set_cntvoff(0); 633 cval += offset; 634 } else { 635 set_cntvoff(offset); 636 } 637 write_sysreg_el0(cval, SYS_CNTV_CVAL); 638 isb(); 639 write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTV_CTL); 640 break; 641 case TIMER_PTIMER: 642 case TIMER_HPTIMER: 643 cval = timer_get_cval(ctx); 644 offset = timer_get_offset(ctx); 645 set_cntpoff(offset); 646 cval += offset; 647 write_sysreg_el0(cval, SYS_CNTP_CVAL); 648 isb(); 649 write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTP_CTL); 650 break; 651 case NR_KVM_TIMERS: 652 BUG(); 653 } 654 655 trace_kvm_timer_restore_state(ctx); 656 657 ctx->loaded = true; 658 out: 659 local_irq_restore(flags); 660 } 661 662 static inline void set_timer_irq_phys_active(struct arch_timer_context *ctx, bool active) 663 { 664 int r; 665 r = irq_set_irqchip_state(ctx->host_timer_irq, IRQCHIP_STATE_ACTIVE, active); 666 WARN_ON(r); 667 } 668 669 static void kvm_timer_vcpu_load_gic(struct arch_timer_context *ctx) 670 { 671 struct kvm_vcpu *vcpu = ctx->vcpu; 672 bool phys_active = false; 673 674 /* 675 * Update the timer output so that it is likely to match the 676 * state we're about to restore. If the timer expires between 677 * this point and the register restoration, we'll take the 678 * interrupt anyway. 679 */ 680 kvm_timer_update_irq(ctx->vcpu, kvm_timer_should_fire(ctx), ctx); 681 682 if (irqchip_in_kernel(vcpu->kvm)) 683 phys_active = kvm_vgic_map_is_active(vcpu, timer_irq(ctx)); 684 685 phys_active |= ctx->irq.level; 686 687 set_timer_irq_phys_active(ctx, phys_active); 688 } 689 690 static void kvm_timer_vcpu_load_nogic(struct kvm_vcpu *vcpu) 691 { 692 struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); 693 694 /* 695 * Update the timer output so that it is likely to match the 696 * state we're about to restore. If the timer expires between 697 * this point and the register restoration, we'll take the 698 * interrupt anyway. 699 */ 700 kvm_timer_update_irq(vcpu, kvm_timer_should_fire(vtimer), vtimer); 701 702 /* 703 * When using a userspace irqchip with the architected timers and a 704 * host interrupt controller that doesn't support an active state, we 705 * must still prevent continuously exiting from the guest, and 706 * therefore mask the physical interrupt by disabling it on the host 707 * interrupt controller when the virtual level is high, such that the 708 * guest can make forward progress. Once we detect the output level 709 * being de-asserted, we unmask the interrupt again so that we exit 710 * from the guest when the timer fires. 711 */ 712 if (vtimer->irq.level) 713 disable_percpu_irq(host_vtimer_irq); 714 else 715 enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags); 716 } 717 718 /* If _pred is true, set bit in _set, otherwise set it in _clr */ 719 #define assign_clear_set_bit(_pred, _bit, _clr, _set) \ 720 do { \ 721 if (_pred) \ 722 (_set) |= (_bit); \ 723 else \ 724 (_clr) |= (_bit); \ 725 } while (0) 726 727 static void kvm_timer_vcpu_load_nested_switch(struct kvm_vcpu *vcpu, 728 struct timer_map *map) 729 { 730 int hw, ret; 731 732 if (!irqchip_in_kernel(vcpu->kvm)) 733 return; 734 735 /* 736 * We only ever unmap the vtimer irq on a VHE system that runs nested 737 * virtualization, in which case we have both a valid emul_vtimer, 738 * emul_ptimer, direct_vtimer, and direct_ptimer. 739 * 740 * Since this is called from kvm_timer_vcpu_load(), a change between 741 * vEL2 and vEL1/0 will have just happened, and the timer_map will 742 * represent this, and therefore we switch the emul/direct mappings 743 * below. 744 */ 745 hw = kvm_vgic_get_map(vcpu, timer_irq(map->direct_vtimer)); 746 if (hw < 0) { 747 kvm_vgic_unmap_phys_irq(vcpu, timer_irq(map->emul_vtimer)); 748 kvm_vgic_unmap_phys_irq(vcpu, timer_irq(map->emul_ptimer)); 749 750 ret = kvm_vgic_map_phys_irq(vcpu, 751 map->direct_vtimer->host_timer_irq, 752 timer_irq(map->direct_vtimer), 753 &arch_timer_irq_ops); 754 WARN_ON_ONCE(ret); 755 ret = kvm_vgic_map_phys_irq(vcpu, 756 map->direct_ptimer->host_timer_irq, 757 timer_irq(map->direct_ptimer), 758 &arch_timer_irq_ops); 759 WARN_ON_ONCE(ret); 760 } 761 } 762 763 static void timer_set_traps(struct kvm_vcpu *vcpu, struct timer_map *map) 764 { 765 bool tvt, tpt, tvc, tpc, tvt02, tpt02; 766 u64 clr, set; 767 768 /* 769 * No trapping gets configured here with nVHE. See 770 * __timer_enable_traps(), which is where the stuff happens. 771 */ 772 if (!has_vhe()) 773 return; 774 775 /* 776 * Our default policy is not to trap anything. As we progress 777 * within this function, reality kicks in and we start adding 778 * traps based on emulation requirements. 779 */ 780 tvt = tpt = tvc = tpc = false; 781 tvt02 = tpt02 = false; 782 783 /* 784 * NV2 badly breaks the timer semantics by redirecting accesses to 785 * the EL1 timer state to memory, so let's call ECV to the rescue if 786 * available: we trap all CNT{P,V}_{CTL,CVAL,TVAL}_EL0 accesses. 787 * 788 * The treatment slightly varies depending whether we run a nVHE or 789 * VHE guest: nVHE will use the _EL0 registers directly, while VHE 790 * will use the _EL02 accessors. This translates in different trap 791 * bits. 792 * 793 * None of the trapping is required when running in non-HYP context, 794 * unless required by the L1 hypervisor settings once we advertise 795 * ECV+NV in the guest, or that we need trapping for other reasons. 796 */ 797 if (cpus_have_final_cap(ARM64_HAS_ECV) && is_hyp_ctxt(vcpu)) { 798 if (vcpu_el2_e2h_is_set(vcpu)) 799 tvt02 = tpt02 = true; 800 else 801 tvt = tpt = true; 802 } 803 804 /* 805 * We have two possibility to deal with a physical offset: 806 * 807 * - Either we have CNTPOFF (yay!) or the offset is 0: 808 * we let the guest freely access the HW 809 * 810 * - or neither of these condition apply: 811 * we trap accesses to the HW, but still use it 812 * after correcting the physical offset 813 */ 814 if (!has_cntpoff() && timer_get_offset(map->direct_ptimer)) 815 tpt = tpc = true; 816 817 /* 818 * For the poor sods that could not correctly substract one value 819 * from another, trap the full virtual timer and counter. 820 */ 821 if (has_broken_cntvoff() && timer_get_offset(map->direct_vtimer)) 822 tvt = tvc = true; 823 824 /* 825 * Apply the enable bits that the guest hypervisor has requested for 826 * its own guest. We can only add traps that wouldn't have been set 827 * above. 828 * Implementation choices: we do not support NV when E2H=0 in the 829 * guest, and we don't support configuration where E2H is writable 830 * by the guest (either FEAT_VHE or FEAT_E2H0 is implemented, but 831 * not both). This simplifies the handling of the EL1NV* bits. 832 */ 833 if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) { 834 u64 val = __vcpu_sys_reg(vcpu, CNTHCTL_EL2); 835 836 /* Use the VHE format for mental sanity */ 837 if (!vcpu_el2_e2h_is_set(vcpu)) 838 val = (val & (CNTHCTL_EL1PCEN | CNTHCTL_EL1PCTEN)) << 10; 839 840 tpt |= !(val & (CNTHCTL_EL1PCEN << 10)); 841 tpc |= !(val & (CNTHCTL_EL1PCTEN << 10)); 842 843 tpt02 |= (val & CNTHCTL_EL1NVPCT); 844 tvt02 |= (val & CNTHCTL_EL1NVVCT); 845 } 846 847 /* 848 * Now that we have collected our requirements, compute the 849 * trap and enable bits. 850 */ 851 set = 0; 852 clr = 0; 853 854 assign_clear_set_bit(tpt, CNTHCTL_EL1PCEN << 10, set, clr); 855 assign_clear_set_bit(tpc, CNTHCTL_EL1PCTEN << 10, set, clr); 856 assign_clear_set_bit(tvt, CNTHCTL_EL1TVT, clr, set); 857 assign_clear_set_bit(tvc, CNTHCTL_EL1TVCT, clr, set); 858 assign_clear_set_bit(tvt02, CNTHCTL_EL1NVVCT, clr, set); 859 assign_clear_set_bit(tpt02, CNTHCTL_EL1NVPCT, clr, set); 860 861 /* This only happens on VHE, so use the CNTHCTL_EL2 accessor. */ 862 sysreg_clear_set(cnthctl_el2, clr, set); 863 } 864 865 void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu) 866 { 867 struct arch_timer_cpu *timer = vcpu_timer(vcpu); 868 struct timer_map map; 869 870 if (unlikely(!timer->enabled)) 871 return; 872 873 get_timer_map(vcpu, &map); 874 875 if (static_branch_likely(&has_gic_active_state)) { 876 if (vcpu_has_nv(vcpu)) 877 kvm_timer_vcpu_load_nested_switch(vcpu, &map); 878 879 kvm_timer_vcpu_load_gic(map.direct_vtimer); 880 if (map.direct_ptimer) 881 kvm_timer_vcpu_load_gic(map.direct_ptimer); 882 } else { 883 kvm_timer_vcpu_load_nogic(vcpu); 884 } 885 886 kvm_timer_unblocking(vcpu); 887 888 timer_restore_state(map.direct_vtimer); 889 if (map.direct_ptimer) 890 timer_restore_state(map.direct_ptimer); 891 if (map.emul_vtimer) 892 timer_emulate(map.emul_vtimer); 893 if (map.emul_ptimer) 894 timer_emulate(map.emul_ptimer); 895 896 timer_set_traps(vcpu, &map); 897 } 898 899 bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu) 900 { 901 struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); 902 struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); 903 struct kvm_sync_regs *sregs = &vcpu->run->s.regs; 904 bool vlevel, plevel; 905 906 if (likely(irqchip_in_kernel(vcpu->kvm))) 907 return false; 908 909 vlevel = sregs->device_irq_level & KVM_ARM_DEV_EL1_VTIMER; 910 plevel = sregs->device_irq_level & KVM_ARM_DEV_EL1_PTIMER; 911 912 return kvm_timer_should_fire(vtimer) != vlevel || 913 kvm_timer_should_fire(ptimer) != plevel; 914 } 915 916 void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu) 917 { 918 struct arch_timer_cpu *timer = vcpu_timer(vcpu); 919 struct timer_map map; 920 921 if (unlikely(!timer->enabled)) 922 return; 923 924 get_timer_map(vcpu, &map); 925 926 timer_save_state(map.direct_vtimer); 927 if (map.direct_ptimer) 928 timer_save_state(map.direct_ptimer); 929 930 /* 931 * Cancel soft timer emulation, because the only case where we 932 * need it after a vcpu_put is in the context of a sleeping VCPU, and 933 * in that case we already factor in the deadline for the physical 934 * timer when scheduling the bg_timer. 935 * 936 * In any case, we re-schedule the hrtimer for the physical timer when 937 * coming back to the VCPU thread in kvm_timer_vcpu_load(). 938 */ 939 if (map.emul_vtimer) 940 soft_timer_cancel(&map.emul_vtimer->hrtimer); 941 if (map.emul_ptimer) 942 soft_timer_cancel(&map.emul_ptimer->hrtimer); 943 944 if (kvm_vcpu_is_blocking(vcpu)) 945 kvm_timer_blocking(vcpu); 946 } 947 948 void kvm_timer_sync_nested(struct kvm_vcpu *vcpu) 949 { 950 /* 951 * When NV2 is on, guest hypervisors have their EL1 timer register 952 * accesses redirected to the VNCR page. Any guest action taken on 953 * the timer is postponed until the next exit, leading to a very 954 * poor quality of emulation. 955 * 956 * This is an unmitigated disaster, only papered over by FEAT_ECV, 957 * which allows trapping of the timer registers even with NV2. 958 * Still, this is still worse than FEAT_NV on its own. Meh. 959 */ 960 if (!cpus_have_final_cap(ARM64_HAS_ECV)) { 961 /* 962 * For a VHE guest hypervisor, the EL2 state is directly 963 * stored in the host EL1 timers, while the emulated EL1 964 * state is stored in the VNCR page. The latter could have 965 * been updated behind our back, and we must reset the 966 * emulation of the timers. 967 * 968 * A non-VHE guest hypervisor doesn't have any direct access 969 * to its timers: the EL2 registers trap despite being 970 * notionally direct (we use the EL1 HW, as for VHE), while 971 * the EL1 registers access memory. 972 * 973 * In both cases, process the emulated timers on each guest 974 * exit. Boo. 975 */ 976 struct timer_map map; 977 get_timer_map(vcpu, &map); 978 979 soft_timer_cancel(&map.emul_vtimer->hrtimer); 980 soft_timer_cancel(&map.emul_ptimer->hrtimer); 981 timer_emulate(map.emul_vtimer); 982 timer_emulate(map.emul_ptimer); 983 } 984 } 985 986 /* 987 * With a userspace irqchip we have to check if the guest de-asserted the 988 * timer and if so, unmask the timer irq signal on the host interrupt 989 * controller to ensure that we see future timer signals. 990 */ 991 static void unmask_vtimer_irq_user(struct kvm_vcpu *vcpu) 992 { 993 struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); 994 995 if (!kvm_timer_should_fire(vtimer)) { 996 kvm_timer_update_irq(vcpu, false, vtimer); 997 if (static_branch_likely(&has_gic_active_state)) 998 set_timer_irq_phys_active(vtimer, false); 999 else 1000 enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags); 1001 } 1002 } 1003 1004 void kvm_timer_sync_user(struct kvm_vcpu *vcpu) 1005 { 1006 struct arch_timer_cpu *timer = vcpu_timer(vcpu); 1007 1008 if (unlikely(!timer->enabled)) 1009 return; 1010 1011 if (unlikely(!irqchip_in_kernel(vcpu->kvm))) 1012 unmask_vtimer_irq_user(vcpu); 1013 } 1014 1015 void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu) 1016 { 1017 struct arch_timer_cpu *timer = vcpu_timer(vcpu); 1018 struct timer_map map; 1019 1020 get_timer_map(vcpu, &map); 1021 1022 /* 1023 * The bits in CNTV_CTL are architecturally reset to UNKNOWN for ARMv8 1024 * and to 0 for ARMv7. We provide an implementation that always 1025 * resets the timer to be disabled and unmasked and is compliant with 1026 * the ARMv7 architecture. 1027 */ 1028 for (int i = 0; i < nr_timers(vcpu); i++) 1029 timer_set_ctl(vcpu_get_timer(vcpu, i), 0); 1030 1031 /* 1032 * A vcpu running at EL2 is in charge of the offset applied to 1033 * the virtual timer, so use the physical VM offset, and point 1034 * the vcpu offset to CNTVOFF_EL2. 1035 */ 1036 if (vcpu_has_nv(vcpu)) { 1037 struct arch_timer_offset *offs = &vcpu_vtimer(vcpu)->offset; 1038 1039 offs->vcpu_offset = &__vcpu_sys_reg(vcpu, CNTVOFF_EL2); 1040 offs->vm_offset = &vcpu->kvm->arch.timer_data.poffset; 1041 } 1042 1043 if (timer->enabled) { 1044 for (int i = 0; i < nr_timers(vcpu); i++) 1045 kvm_timer_update_irq(vcpu, false, 1046 vcpu_get_timer(vcpu, i)); 1047 1048 if (irqchip_in_kernel(vcpu->kvm)) { 1049 kvm_vgic_reset_mapped_irq(vcpu, timer_irq(map.direct_vtimer)); 1050 if (map.direct_ptimer) 1051 kvm_vgic_reset_mapped_irq(vcpu, timer_irq(map.direct_ptimer)); 1052 } 1053 } 1054 1055 if (map.emul_vtimer) 1056 soft_timer_cancel(&map.emul_vtimer->hrtimer); 1057 if (map.emul_ptimer) 1058 soft_timer_cancel(&map.emul_ptimer->hrtimer); 1059 } 1060 1061 static void timer_context_init(struct kvm_vcpu *vcpu, int timerid) 1062 { 1063 struct arch_timer_context *ctxt = vcpu_get_timer(vcpu, timerid); 1064 struct kvm *kvm = vcpu->kvm; 1065 1066 ctxt->vcpu = vcpu; 1067 1068 if (timerid == TIMER_VTIMER) 1069 ctxt->offset.vm_offset = &kvm->arch.timer_data.voffset; 1070 else 1071 ctxt->offset.vm_offset = &kvm->arch.timer_data.poffset; 1072 1073 hrtimer_init(&ctxt->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); 1074 ctxt->hrtimer.function = kvm_hrtimer_expire; 1075 1076 switch (timerid) { 1077 case TIMER_PTIMER: 1078 case TIMER_HPTIMER: 1079 ctxt->host_timer_irq = host_ptimer_irq; 1080 break; 1081 case TIMER_VTIMER: 1082 case TIMER_HVTIMER: 1083 ctxt->host_timer_irq = host_vtimer_irq; 1084 break; 1085 } 1086 } 1087 1088 void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) 1089 { 1090 struct arch_timer_cpu *timer = vcpu_timer(vcpu); 1091 1092 for (int i = 0; i < NR_KVM_TIMERS; i++) 1093 timer_context_init(vcpu, i); 1094 1095 /* Synchronize offsets across timers of a VM if not already provided */ 1096 if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, &vcpu->kvm->arch.flags)) { 1097 timer_set_offset(vcpu_vtimer(vcpu), kvm_phys_timer_read()); 1098 timer_set_offset(vcpu_ptimer(vcpu), 0); 1099 } 1100 1101 hrtimer_init(&timer->bg_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); 1102 timer->bg_timer.function = kvm_bg_timer_expire; 1103 } 1104 1105 void kvm_timer_init_vm(struct kvm *kvm) 1106 { 1107 for (int i = 0; i < NR_KVM_TIMERS; i++) 1108 kvm->arch.timer_data.ppi[i] = default_ppi[i]; 1109 } 1110 1111 void kvm_timer_cpu_up(void) 1112 { 1113 enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags); 1114 if (host_ptimer_irq) 1115 enable_percpu_irq(host_ptimer_irq, host_ptimer_irq_flags); 1116 } 1117 1118 void kvm_timer_cpu_down(void) 1119 { 1120 disable_percpu_irq(host_vtimer_irq); 1121 if (host_ptimer_irq) 1122 disable_percpu_irq(host_ptimer_irq); 1123 } 1124 1125 int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value) 1126 { 1127 struct arch_timer_context *timer; 1128 1129 switch (regid) { 1130 case KVM_REG_ARM_TIMER_CTL: 1131 timer = vcpu_vtimer(vcpu); 1132 kvm_arm_timer_write(vcpu, timer, TIMER_REG_CTL, value); 1133 break; 1134 case KVM_REG_ARM_TIMER_CNT: 1135 if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, 1136 &vcpu->kvm->arch.flags)) { 1137 timer = vcpu_vtimer(vcpu); 1138 timer_set_offset(timer, kvm_phys_timer_read() - value); 1139 } 1140 break; 1141 case KVM_REG_ARM_TIMER_CVAL: 1142 timer = vcpu_vtimer(vcpu); 1143 kvm_arm_timer_write(vcpu, timer, TIMER_REG_CVAL, value); 1144 break; 1145 case KVM_REG_ARM_PTIMER_CTL: 1146 timer = vcpu_ptimer(vcpu); 1147 kvm_arm_timer_write(vcpu, timer, TIMER_REG_CTL, value); 1148 break; 1149 case KVM_REG_ARM_PTIMER_CNT: 1150 if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, 1151 &vcpu->kvm->arch.flags)) { 1152 timer = vcpu_ptimer(vcpu); 1153 timer_set_offset(timer, kvm_phys_timer_read() - value); 1154 } 1155 break; 1156 case KVM_REG_ARM_PTIMER_CVAL: 1157 timer = vcpu_ptimer(vcpu); 1158 kvm_arm_timer_write(vcpu, timer, TIMER_REG_CVAL, value); 1159 break; 1160 1161 default: 1162 return -1; 1163 } 1164 1165 return 0; 1166 } 1167 1168 static u64 read_timer_ctl(struct arch_timer_context *timer) 1169 { 1170 /* 1171 * Set ISTATUS bit if it's expired. 1172 * Note that according to ARMv8 ARM Issue A.k, ISTATUS bit is 1173 * UNKNOWN when ENABLE bit is 0, so we chose to set ISTATUS bit 1174 * regardless of ENABLE bit for our implementation convenience. 1175 */ 1176 u32 ctl = timer_get_ctl(timer); 1177 1178 if (!kvm_timer_compute_delta(timer)) 1179 ctl |= ARCH_TIMER_CTRL_IT_STAT; 1180 1181 return ctl; 1182 } 1183 1184 u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid) 1185 { 1186 switch (regid) { 1187 case KVM_REG_ARM_TIMER_CTL: 1188 return kvm_arm_timer_read(vcpu, 1189 vcpu_vtimer(vcpu), TIMER_REG_CTL); 1190 case KVM_REG_ARM_TIMER_CNT: 1191 return kvm_arm_timer_read(vcpu, 1192 vcpu_vtimer(vcpu), TIMER_REG_CNT); 1193 case KVM_REG_ARM_TIMER_CVAL: 1194 return kvm_arm_timer_read(vcpu, 1195 vcpu_vtimer(vcpu), TIMER_REG_CVAL); 1196 case KVM_REG_ARM_PTIMER_CTL: 1197 return kvm_arm_timer_read(vcpu, 1198 vcpu_ptimer(vcpu), TIMER_REG_CTL); 1199 case KVM_REG_ARM_PTIMER_CNT: 1200 return kvm_arm_timer_read(vcpu, 1201 vcpu_ptimer(vcpu), TIMER_REG_CNT); 1202 case KVM_REG_ARM_PTIMER_CVAL: 1203 return kvm_arm_timer_read(vcpu, 1204 vcpu_ptimer(vcpu), TIMER_REG_CVAL); 1205 } 1206 return (u64)-1; 1207 } 1208 1209 static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu, 1210 struct arch_timer_context *timer, 1211 enum kvm_arch_timer_regs treg) 1212 { 1213 u64 val; 1214 1215 switch (treg) { 1216 case TIMER_REG_TVAL: 1217 val = timer_get_cval(timer) - kvm_phys_timer_read() + timer_get_offset(timer); 1218 val = lower_32_bits(val); 1219 break; 1220 1221 case TIMER_REG_CTL: 1222 val = read_timer_ctl(timer); 1223 break; 1224 1225 case TIMER_REG_CVAL: 1226 val = timer_get_cval(timer); 1227 break; 1228 1229 case TIMER_REG_CNT: 1230 val = kvm_phys_timer_read() - timer_get_offset(timer); 1231 break; 1232 1233 case TIMER_REG_VOFF: 1234 val = *timer->offset.vcpu_offset; 1235 break; 1236 1237 default: 1238 BUG(); 1239 } 1240 1241 return val; 1242 } 1243 1244 u64 kvm_arm_timer_read_sysreg(struct kvm_vcpu *vcpu, 1245 enum kvm_arch_timers tmr, 1246 enum kvm_arch_timer_regs treg) 1247 { 1248 struct arch_timer_context *timer; 1249 struct timer_map map; 1250 u64 val; 1251 1252 get_timer_map(vcpu, &map); 1253 timer = vcpu_get_timer(vcpu, tmr); 1254 1255 if (timer == map.emul_vtimer || timer == map.emul_ptimer) 1256 return kvm_arm_timer_read(vcpu, timer, treg); 1257 1258 preempt_disable(); 1259 timer_save_state(timer); 1260 1261 val = kvm_arm_timer_read(vcpu, timer, treg); 1262 1263 timer_restore_state(timer); 1264 preempt_enable(); 1265 1266 return val; 1267 } 1268 1269 static void kvm_arm_timer_write(struct kvm_vcpu *vcpu, 1270 struct arch_timer_context *timer, 1271 enum kvm_arch_timer_regs treg, 1272 u64 val) 1273 { 1274 switch (treg) { 1275 case TIMER_REG_TVAL: 1276 timer_set_cval(timer, kvm_phys_timer_read() - timer_get_offset(timer) + (s32)val); 1277 break; 1278 1279 case TIMER_REG_CTL: 1280 timer_set_ctl(timer, val & ~ARCH_TIMER_CTRL_IT_STAT); 1281 break; 1282 1283 case TIMER_REG_CVAL: 1284 timer_set_cval(timer, val); 1285 break; 1286 1287 case TIMER_REG_VOFF: 1288 *timer->offset.vcpu_offset = val; 1289 break; 1290 1291 default: 1292 BUG(); 1293 } 1294 } 1295 1296 void kvm_arm_timer_write_sysreg(struct kvm_vcpu *vcpu, 1297 enum kvm_arch_timers tmr, 1298 enum kvm_arch_timer_regs treg, 1299 u64 val) 1300 { 1301 struct arch_timer_context *timer; 1302 struct timer_map map; 1303 1304 get_timer_map(vcpu, &map); 1305 timer = vcpu_get_timer(vcpu, tmr); 1306 if (timer == map.emul_vtimer || timer == map.emul_ptimer) { 1307 soft_timer_cancel(&timer->hrtimer); 1308 kvm_arm_timer_write(vcpu, timer, treg, val); 1309 timer_emulate(timer); 1310 } else { 1311 preempt_disable(); 1312 timer_save_state(timer); 1313 kvm_arm_timer_write(vcpu, timer, treg, val); 1314 timer_restore_state(timer); 1315 preempt_enable(); 1316 } 1317 } 1318 1319 static int timer_irq_set_vcpu_affinity(struct irq_data *d, void *vcpu) 1320 { 1321 if (vcpu) 1322 irqd_set_forwarded_to_vcpu(d); 1323 else 1324 irqd_clr_forwarded_to_vcpu(d); 1325 1326 return 0; 1327 } 1328 1329 static int timer_irq_set_irqchip_state(struct irq_data *d, 1330 enum irqchip_irq_state which, bool val) 1331 { 1332 if (which != IRQCHIP_STATE_ACTIVE || !irqd_is_forwarded_to_vcpu(d)) 1333 return irq_chip_set_parent_state(d, which, val); 1334 1335 if (val) 1336 irq_chip_mask_parent(d); 1337 else 1338 irq_chip_unmask_parent(d); 1339 1340 return 0; 1341 } 1342 1343 static void timer_irq_eoi(struct irq_data *d) 1344 { 1345 if (!irqd_is_forwarded_to_vcpu(d)) 1346 irq_chip_eoi_parent(d); 1347 } 1348 1349 static void timer_irq_ack(struct irq_data *d) 1350 { 1351 d = d->parent_data; 1352 if (d->chip->irq_ack) 1353 d->chip->irq_ack(d); 1354 } 1355 1356 static struct irq_chip timer_chip = { 1357 .name = "KVM", 1358 .irq_ack = timer_irq_ack, 1359 .irq_mask = irq_chip_mask_parent, 1360 .irq_unmask = irq_chip_unmask_parent, 1361 .irq_eoi = timer_irq_eoi, 1362 .irq_set_type = irq_chip_set_type_parent, 1363 .irq_set_vcpu_affinity = timer_irq_set_vcpu_affinity, 1364 .irq_set_irqchip_state = timer_irq_set_irqchip_state, 1365 }; 1366 1367 static int timer_irq_domain_alloc(struct irq_domain *domain, unsigned int virq, 1368 unsigned int nr_irqs, void *arg) 1369 { 1370 irq_hw_number_t hwirq = (uintptr_t)arg; 1371 1372 return irq_domain_set_hwirq_and_chip(domain, virq, hwirq, 1373 &timer_chip, NULL); 1374 } 1375 1376 static void timer_irq_domain_free(struct irq_domain *domain, unsigned int virq, 1377 unsigned int nr_irqs) 1378 { 1379 } 1380 1381 static const struct irq_domain_ops timer_domain_ops = { 1382 .alloc = timer_irq_domain_alloc, 1383 .free = timer_irq_domain_free, 1384 }; 1385 1386 static void kvm_irq_fixup_flags(unsigned int virq, u32 *flags) 1387 { 1388 *flags = irq_get_trigger_type(virq); 1389 if (*flags != IRQF_TRIGGER_HIGH && *flags != IRQF_TRIGGER_LOW) { 1390 kvm_err("Invalid trigger for timer IRQ%d, assuming level low\n", 1391 virq); 1392 *flags = IRQF_TRIGGER_LOW; 1393 } 1394 } 1395 1396 static int kvm_irq_init(struct arch_timer_kvm_info *info) 1397 { 1398 struct irq_domain *domain = NULL; 1399 1400 if (info->virtual_irq <= 0) { 1401 kvm_err("kvm_arch_timer: invalid virtual timer IRQ: %d\n", 1402 info->virtual_irq); 1403 return -ENODEV; 1404 } 1405 1406 host_vtimer_irq = info->virtual_irq; 1407 kvm_irq_fixup_flags(host_vtimer_irq, &host_vtimer_irq_flags); 1408 1409 if (kvm_vgic_global_state.no_hw_deactivation) { 1410 struct fwnode_handle *fwnode; 1411 struct irq_data *data; 1412 1413 fwnode = irq_domain_alloc_named_fwnode("kvm-timer"); 1414 if (!fwnode) 1415 return -ENOMEM; 1416 1417 /* Assume both vtimer and ptimer in the same parent */ 1418 data = irq_get_irq_data(host_vtimer_irq); 1419 domain = irq_domain_create_hierarchy(data->domain, 0, 1420 NR_KVM_TIMERS, fwnode, 1421 &timer_domain_ops, NULL); 1422 if (!domain) { 1423 irq_domain_free_fwnode(fwnode); 1424 return -ENOMEM; 1425 } 1426 1427 arch_timer_irq_ops.flags |= VGIC_IRQ_SW_RESAMPLE; 1428 WARN_ON(irq_domain_push_irq(domain, host_vtimer_irq, 1429 (void *)TIMER_VTIMER)); 1430 } 1431 1432 if (info->physical_irq > 0) { 1433 host_ptimer_irq = info->physical_irq; 1434 kvm_irq_fixup_flags(host_ptimer_irq, &host_ptimer_irq_flags); 1435 1436 if (domain) 1437 WARN_ON(irq_domain_push_irq(domain, host_ptimer_irq, 1438 (void *)TIMER_PTIMER)); 1439 } 1440 1441 return 0; 1442 } 1443 1444 static void kvm_timer_handle_errata(void) 1445 { 1446 u64 mmfr0, mmfr1, mmfr4; 1447 1448 /* 1449 * CNTVOFF_EL2 is broken on some implementations. For those, we trap 1450 * all virtual timer/counter accesses, requiring FEAT_ECV. 1451 * 1452 * However, a hypervisor supporting nesting is likely to mitigate the 1453 * erratum at L0, and not require other levels to mitigate it (which 1454 * would otherwise be a terrible performance sink due to trap 1455 * amplification). 1456 * 1457 * Given that the affected HW implements both FEAT_VHE and FEAT_E2H0, 1458 * and that NV is likely not to (because of limitations of the 1459 * architecture), only enable the workaround when FEAT_VHE and 1460 * FEAT_E2H0 are both detected. Time will tell if this actually holds. 1461 */ 1462 mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1); 1463 mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1); 1464 mmfr4 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR4_EL1); 1465 if (SYS_FIELD_GET(ID_AA64MMFR1_EL1, VH, mmfr1) && 1466 !SYS_FIELD_GET(ID_AA64MMFR4_EL1, E2H0, mmfr4) && 1467 SYS_FIELD_GET(ID_AA64MMFR0_EL1, ECV, mmfr0) && 1468 (has_vhe() || has_hvhe()) && 1469 cpus_have_final_cap(ARM64_WORKAROUND_QCOM_ORYON_CNTVOFF)) { 1470 static_branch_enable(&broken_cntvoff_key); 1471 kvm_info("Broken CNTVOFF_EL2, trapping virtual timer\n"); 1472 } 1473 } 1474 1475 int __init kvm_timer_hyp_init(bool has_gic) 1476 { 1477 struct arch_timer_kvm_info *info; 1478 int err; 1479 1480 info = arch_timer_get_kvm_info(); 1481 timecounter = &info->timecounter; 1482 1483 if (!timecounter->cc) { 1484 kvm_err("kvm_arch_timer: uninitialized timecounter\n"); 1485 return -ENODEV; 1486 } 1487 1488 err = kvm_irq_init(info); 1489 if (err) 1490 return err; 1491 1492 /* First, do the virtual EL1 timer irq */ 1493 1494 err = request_percpu_irq(host_vtimer_irq, kvm_arch_timer_handler, 1495 "kvm guest vtimer", kvm_get_running_vcpus()); 1496 if (err) { 1497 kvm_err("kvm_arch_timer: can't request vtimer interrupt %d (%d)\n", 1498 host_vtimer_irq, err); 1499 return err; 1500 } 1501 1502 if (has_gic) { 1503 err = irq_set_vcpu_affinity(host_vtimer_irq, 1504 kvm_get_running_vcpus()); 1505 if (err) { 1506 kvm_err("kvm_arch_timer: error setting vcpu affinity\n"); 1507 goto out_free_vtimer_irq; 1508 } 1509 1510 static_branch_enable(&has_gic_active_state); 1511 } 1512 1513 kvm_debug("virtual timer IRQ%d\n", host_vtimer_irq); 1514 1515 /* Now let's do the physical EL1 timer irq */ 1516 1517 if (info->physical_irq > 0) { 1518 err = request_percpu_irq(host_ptimer_irq, kvm_arch_timer_handler, 1519 "kvm guest ptimer", kvm_get_running_vcpus()); 1520 if (err) { 1521 kvm_err("kvm_arch_timer: can't request ptimer interrupt %d (%d)\n", 1522 host_ptimer_irq, err); 1523 goto out_free_vtimer_irq; 1524 } 1525 1526 if (has_gic) { 1527 err = irq_set_vcpu_affinity(host_ptimer_irq, 1528 kvm_get_running_vcpus()); 1529 if (err) { 1530 kvm_err("kvm_arch_timer: error setting vcpu affinity\n"); 1531 goto out_free_ptimer_irq; 1532 } 1533 } 1534 1535 kvm_debug("physical timer IRQ%d\n", host_ptimer_irq); 1536 } else if (has_vhe()) { 1537 kvm_err("kvm_arch_timer: invalid physical timer IRQ: %d\n", 1538 info->physical_irq); 1539 err = -ENODEV; 1540 goto out_free_vtimer_irq; 1541 } 1542 1543 kvm_timer_handle_errata(); 1544 return 0; 1545 1546 out_free_ptimer_irq: 1547 if (info->physical_irq > 0) 1548 free_percpu_irq(host_ptimer_irq, kvm_get_running_vcpus()); 1549 out_free_vtimer_irq: 1550 free_percpu_irq(host_vtimer_irq, kvm_get_running_vcpus()); 1551 return err; 1552 } 1553 1554 void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu) 1555 { 1556 struct arch_timer_cpu *timer = vcpu_timer(vcpu); 1557 1558 soft_timer_cancel(&timer->bg_timer); 1559 } 1560 1561 static bool timer_irqs_are_valid(struct kvm_vcpu *vcpu) 1562 { 1563 u32 ppis = 0; 1564 bool valid; 1565 1566 mutex_lock(&vcpu->kvm->arch.config_lock); 1567 1568 for (int i = 0; i < nr_timers(vcpu); i++) { 1569 struct arch_timer_context *ctx; 1570 int irq; 1571 1572 ctx = vcpu_get_timer(vcpu, i); 1573 irq = timer_irq(ctx); 1574 if (kvm_vgic_set_owner(vcpu, irq, ctx)) 1575 break; 1576 1577 /* 1578 * We know by construction that we only have PPIs, so 1579 * all values are less than 32. 1580 */ 1581 ppis |= BIT(irq); 1582 } 1583 1584 valid = hweight32(ppis) == nr_timers(vcpu); 1585 1586 if (valid) 1587 set_bit(KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE, &vcpu->kvm->arch.flags); 1588 1589 mutex_unlock(&vcpu->kvm->arch.config_lock); 1590 1591 return valid; 1592 } 1593 1594 static bool kvm_arch_timer_get_input_level(int vintid) 1595 { 1596 struct kvm_vcpu *vcpu = kvm_get_running_vcpu(); 1597 1598 if (WARN(!vcpu, "No vcpu context!\n")) 1599 return false; 1600 1601 for (int i = 0; i < nr_timers(vcpu); i++) { 1602 struct arch_timer_context *ctx; 1603 1604 ctx = vcpu_get_timer(vcpu, i); 1605 if (timer_irq(ctx) == vintid) 1606 return kvm_timer_should_fire(ctx); 1607 } 1608 1609 /* A timer IRQ has fired, but no matching timer was found? */ 1610 WARN_RATELIMIT(1, "timer INTID%d unknown\n", vintid); 1611 1612 return false; 1613 } 1614 1615 int kvm_timer_enable(struct kvm_vcpu *vcpu) 1616 { 1617 struct arch_timer_cpu *timer = vcpu_timer(vcpu); 1618 struct timer_map map; 1619 int ret; 1620 1621 if (timer->enabled) 1622 return 0; 1623 1624 /* Without a VGIC we do not map virtual IRQs to physical IRQs */ 1625 if (!irqchip_in_kernel(vcpu->kvm)) 1626 goto no_vgic; 1627 1628 /* 1629 * At this stage, we have the guarantee that the vgic is both 1630 * available and initialized. 1631 */ 1632 if (!timer_irqs_are_valid(vcpu)) { 1633 kvm_debug("incorrectly configured timer irqs\n"); 1634 return -EINVAL; 1635 } 1636 1637 get_timer_map(vcpu, &map); 1638 1639 ret = kvm_vgic_map_phys_irq(vcpu, 1640 map.direct_vtimer->host_timer_irq, 1641 timer_irq(map.direct_vtimer), 1642 &arch_timer_irq_ops); 1643 if (ret) 1644 return ret; 1645 1646 if (map.direct_ptimer) { 1647 ret = kvm_vgic_map_phys_irq(vcpu, 1648 map.direct_ptimer->host_timer_irq, 1649 timer_irq(map.direct_ptimer), 1650 &arch_timer_irq_ops); 1651 } 1652 1653 if (ret) 1654 return ret; 1655 1656 no_vgic: 1657 timer->enabled = 1; 1658 return 0; 1659 } 1660 1661 /* If we have CNTPOFF, permanently set ECV to enable it */ 1662 void kvm_timer_init_vhe(void) 1663 { 1664 if (cpus_have_final_cap(ARM64_HAS_ECV_CNTPOFF)) 1665 sysreg_clear_set(cnthctl_el2, 0, CNTHCTL_ECV); 1666 } 1667 1668 int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) 1669 { 1670 int __user *uaddr = (int __user *)(long)attr->addr; 1671 int irq, idx, ret = 0; 1672 1673 if (!irqchip_in_kernel(vcpu->kvm)) 1674 return -EINVAL; 1675 1676 if (get_user(irq, uaddr)) 1677 return -EFAULT; 1678 1679 if (!(irq_is_ppi(irq))) 1680 return -EINVAL; 1681 1682 mutex_lock(&vcpu->kvm->arch.config_lock); 1683 1684 if (test_bit(KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE, 1685 &vcpu->kvm->arch.flags)) { 1686 ret = -EBUSY; 1687 goto out; 1688 } 1689 1690 switch (attr->attr) { 1691 case KVM_ARM_VCPU_TIMER_IRQ_VTIMER: 1692 idx = TIMER_VTIMER; 1693 break; 1694 case KVM_ARM_VCPU_TIMER_IRQ_PTIMER: 1695 idx = TIMER_PTIMER; 1696 break; 1697 case KVM_ARM_VCPU_TIMER_IRQ_HVTIMER: 1698 idx = TIMER_HVTIMER; 1699 break; 1700 case KVM_ARM_VCPU_TIMER_IRQ_HPTIMER: 1701 idx = TIMER_HPTIMER; 1702 break; 1703 default: 1704 ret = -ENXIO; 1705 goto out; 1706 } 1707 1708 /* 1709 * We cannot validate the IRQ unicity before we run, so take it at 1710 * face value. The verdict will be given on first vcpu run, for each 1711 * vcpu. Yes this is late. Blame it on the stupid API. 1712 */ 1713 vcpu->kvm->arch.timer_data.ppi[idx] = irq; 1714 1715 out: 1716 mutex_unlock(&vcpu->kvm->arch.config_lock); 1717 return ret; 1718 } 1719 1720 int kvm_arm_timer_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) 1721 { 1722 int __user *uaddr = (int __user *)(long)attr->addr; 1723 struct arch_timer_context *timer; 1724 int irq; 1725 1726 switch (attr->attr) { 1727 case KVM_ARM_VCPU_TIMER_IRQ_VTIMER: 1728 timer = vcpu_vtimer(vcpu); 1729 break; 1730 case KVM_ARM_VCPU_TIMER_IRQ_PTIMER: 1731 timer = vcpu_ptimer(vcpu); 1732 break; 1733 case KVM_ARM_VCPU_TIMER_IRQ_HVTIMER: 1734 timer = vcpu_hvtimer(vcpu); 1735 break; 1736 case KVM_ARM_VCPU_TIMER_IRQ_HPTIMER: 1737 timer = vcpu_hptimer(vcpu); 1738 break; 1739 default: 1740 return -ENXIO; 1741 } 1742 1743 irq = timer_irq(timer); 1744 return put_user(irq, uaddr); 1745 } 1746 1747 int kvm_arm_timer_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) 1748 { 1749 switch (attr->attr) { 1750 case KVM_ARM_VCPU_TIMER_IRQ_VTIMER: 1751 case KVM_ARM_VCPU_TIMER_IRQ_PTIMER: 1752 case KVM_ARM_VCPU_TIMER_IRQ_HVTIMER: 1753 case KVM_ARM_VCPU_TIMER_IRQ_HPTIMER: 1754 return 0; 1755 } 1756 1757 return -ENXIO; 1758 } 1759 1760 int kvm_vm_ioctl_set_counter_offset(struct kvm *kvm, 1761 struct kvm_arm_counter_offset *offset) 1762 { 1763 int ret = 0; 1764 1765 if (offset->reserved) 1766 return -EINVAL; 1767 1768 mutex_lock(&kvm->lock); 1769 1770 if (lock_all_vcpus(kvm)) { 1771 set_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, &kvm->arch.flags); 1772 1773 /* 1774 * If userspace decides to set the offset using this 1775 * API rather than merely restoring the counter 1776 * values, the offset applies to both the virtual and 1777 * physical views. 1778 */ 1779 kvm->arch.timer_data.voffset = offset->counter_offset; 1780 kvm->arch.timer_data.poffset = offset->counter_offset; 1781 1782 unlock_all_vcpus(kvm); 1783 } else { 1784 ret = -EBUSY; 1785 } 1786 1787 mutex_unlock(&kvm->lock); 1788 1789 return ret; 1790 } 1791