1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2012 ARM Ltd. 4 * Author: Marc Zyngier <marc.zyngier@arm.com> 5 */ 6 7 #include <linux/cpu.h> 8 #include <linux/kvm.h> 9 #include <linux/kvm_host.h> 10 #include <linux/interrupt.h> 11 #include <linux/irq.h> 12 #include <linux/irqdomain.h> 13 #include <linux/uaccess.h> 14 15 #include <clocksource/arm_arch_timer.h> 16 #include <asm/arch_timer.h> 17 #include <asm/kvm_emulate.h> 18 #include <asm/kvm_hyp.h> 19 #include <asm/kvm_nested.h> 20 21 #include <kvm/arm_vgic.h> 22 #include <kvm/arm_arch_timer.h> 23 24 #include "trace.h" 25 26 static struct timecounter *timecounter; 27 static unsigned int host_vtimer_irq; 28 static unsigned int host_ptimer_irq; 29 static u32 host_vtimer_irq_flags; 30 static u32 host_ptimer_irq_flags; 31 32 static DEFINE_STATIC_KEY_FALSE(has_gic_active_state); 33 DEFINE_STATIC_KEY_FALSE(broken_cntvoff_key); 34 35 static const u8 default_ppi[] = { 36 [TIMER_PTIMER] = 30, 37 [TIMER_VTIMER] = 27, 38 [TIMER_HPTIMER] = 26, 39 [TIMER_HVTIMER] = 28, 40 }; 41 42 static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx); 43 static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level, 44 struct arch_timer_context *timer_ctx); 45 static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx); 46 static void kvm_arm_timer_write(struct kvm_vcpu *vcpu, 47 struct arch_timer_context *timer, 48 enum kvm_arch_timer_regs treg, 49 u64 val); 50 static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu, 51 struct arch_timer_context *timer, 52 enum kvm_arch_timer_regs treg); 53 static bool kvm_arch_timer_get_input_level(int vintid); 54 55 static struct irq_ops arch_timer_irq_ops = { 56 .get_input_level = kvm_arch_timer_get_input_level, 57 }; 58 59 static int nr_timers(struct kvm_vcpu *vcpu) 60 { 61 if (!vcpu_has_nv(vcpu)) 62 return NR_KVM_EL0_TIMERS; 63 64 return NR_KVM_TIMERS; 65 } 66 67 u32 timer_get_ctl(struct arch_timer_context *ctxt) 68 { 69 struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctxt); 70 71 switch(arch_timer_ctx_index(ctxt)) { 72 case TIMER_VTIMER: 73 return __vcpu_sys_reg(vcpu, CNTV_CTL_EL0); 74 case TIMER_PTIMER: 75 return __vcpu_sys_reg(vcpu, CNTP_CTL_EL0); 76 case TIMER_HVTIMER: 77 return __vcpu_sys_reg(vcpu, CNTHV_CTL_EL2); 78 case TIMER_HPTIMER: 79 return __vcpu_sys_reg(vcpu, CNTHP_CTL_EL2); 80 default: 81 WARN_ON(1); 82 return 0; 83 } 84 } 85 86 u64 timer_get_cval(struct arch_timer_context *ctxt) 87 { 88 struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctxt); 89 90 switch(arch_timer_ctx_index(ctxt)) { 91 case TIMER_VTIMER: 92 return __vcpu_sys_reg(vcpu, CNTV_CVAL_EL0); 93 case TIMER_PTIMER: 94 return __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0); 95 case TIMER_HVTIMER: 96 return __vcpu_sys_reg(vcpu, CNTHV_CVAL_EL2); 97 case TIMER_HPTIMER: 98 return __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2); 99 default: 100 WARN_ON(1); 101 return 0; 102 } 103 } 104 105 static void timer_set_ctl(struct arch_timer_context *ctxt, u32 ctl) 106 { 107 struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctxt); 108 109 switch(arch_timer_ctx_index(ctxt)) { 110 case TIMER_VTIMER: 111 __vcpu_assign_sys_reg(vcpu, CNTV_CTL_EL0, ctl); 112 break; 113 case TIMER_PTIMER: 114 __vcpu_assign_sys_reg(vcpu, CNTP_CTL_EL0, ctl); 115 break; 116 case TIMER_HVTIMER: 117 __vcpu_assign_sys_reg(vcpu, CNTHV_CTL_EL2, ctl); 118 break; 119 case TIMER_HPTIMER: 120 __vcpu_assign_sys_reg(vcpu, CNTHP_CTL_EL2, ctl); 121 break; 122 default: 123 WARN_ON(1); 124 } 125 } 126 127 static void timer_set_cval(struct arch_timer_context *ctxt, u64 cval) 128 { 129 struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctxt); 130 131 switch(arch_timer_ctx_index(ctxt)) { 132 case TIMER_VTIMER: 133 __vcpu_assign_sys_reg(vcpu, CNTV_CVAL_EL0, cval); 134 break; 135 case TIMER_PTIMER: 136 __vcpu_assign_sys_reg(vcpu, CNTP_CVAL_EL0, cval); 137 break; 138 case TIMER_HVTIMER: 139 __vcpu_assign_sys_reg(vcpu, CNTHV_CVAL_EL2, cval); 140 break; 141 case TIMER_HPTIMER: 142 __vcpu_assign_sys_reg(vcpu, CNTHP_CVAL_EL2, cval); 143 break; 144 default: 145 WARN_ON(1); 146 } 147 } 148 149 u64 kvm_phys_timer_read(void) 150 { 151 return timecounter->cc->read(timecounter->cc); 152 } 153 154 void get_timer_map(struct kvm_vcpu *vcpu, struct timer_map *map) 155 { 156 if (vcpu_has_nv(vcpu)) { 157 if (is_hyp_ctxt(vcpu)) { 158 map->direct_vtimer = vcpu_hvtimer(vcpu); 159 map->direct_ptimer = vcpu_hptimer(vcpu); 160 map->emul_vtimer = vcpu_vtimer(vcpu); 161 map->emul_ptimer = vcpu_ptimer(vcpu); 162 } else { 163 map->direct_vtimer = vcpu_vtimer(vcpu); 164 map->direct_ptimer = vcpu_ptimer(vcpu); 165 map->emul_vtimer = vcpu_hvtimer(vcpu); 166 map->emul_ptimer = vcpu_hptimer(vcpu); 167 } 168 } else if (has_vhe()) { 169 map->direct_vtimer = vcpu_vtimer(vcpu); 170 map->direct_ptimer = vcpu_ptimer(vcpu); 171 map->emul_vtimer = NULL; 172 map->emul_ptimer = NULL; 173 } else { 174 map->direct_vtimer = vcpu_vtimer(vcpu); 175 map->direct_ptimer = NULL; 176 map->emul_vtimer = NULL; 177 map->emul_ptimer = vcpu_ptimer(vcpu); 178 } 179 180 trace_kvm_get_timer_map(vcpu->vcpu_id, map); 181 } 182 183 static inline bool userspace_irqchip(struct kvm *kvm) 184 { 185 return unlikely(!irqchip_in_kernel(kvm)); 186 } 187 188 static void soft_timer_start(struct hrtimer *hrt, u64 ns) 189 { 190 hrtimer_start(hrt, ktime_add_ns(ktime_get(), ns), 191 HRTIMER_MODE_ABS_HARD); 192 } 193 194 static void soft_timer_cancel(struct hrtimer *hrt) 195 { 196 hrtimer_cancel(hrt); 197 } 198 199 static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id) 200 { 201 struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id; 202 struct arch_timer_context *ctx; 203 struct timer_map map; 204 205 /* 206 * We may see a timer interrupt after vcpu_put() has been called which 207 * sets the CPU's vcpu pointer to NULL, because even though the timer 208 * has been disabled in timer_save_state(), the hardware interrupt 209 * signal may not have been retired from the interrupt controller yet. 210 */ 211 if (!vcpu) 212 return IRQ_HANDLED; 213 214 get_timer_map(vcpu, &map); 215 216 if (irq == host_vtimer_irq) 217 ctx = map.direct_vtimer; 218 else 219 ctx = map.direct_ptimer; 220 221 if (kvm_timer_should_fire(ctx)) 222 kvm_timer_update_irq(vcpu, true, ctx); 223 224 if (userspace_irqchip(vcpu->kvm) && 225 !static_branch_unlikely(&has_gic_active_state)) 226 disable_percpu_irq(host_vtimer_irq); 227 228 return IRQ_HANDLED; 229 } 230 231 static u64 kvm_counter_compute_delta(struct arch_timer_context *timer_ctx, 232 u64 val) 233 { 234 u64 now = kvm_phys_timer_read() - timer_get_offset(timer_ctx); 235 236 if (now < val) { 237 u64 ns; 238 239 ns = cyclecounter_cyc2ns(timecounter->cc, 240 val - now, 241 timecounter->mask, 242 &timer_ctx->ns_frac); 243 return ns; 244 } 245 246 return 0; 247 } 248 249 static u64 kvm_timer_compute_delta(struct arch_timer_context *timer_ctx) 250 { 251 return kvm_counter_compute_delta(timer_ctx, timer_get_cval(timer_ctx)); 252 } 253 254 static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx) 255 { 256 WARN_ON(timer_ctx && timer_ctx->loaded); 257 return timer_ctx && 258 ((timer_get_ctl(timer_ctx) & 259 (ARCH_TIMER_CTRL_IT_MASK | ARCH_TIMER_CTRL_ENABLE)) == ARCH_TIMER_CTRL_ENABLE); 260 } 261 262 static bool vcpu_has_wfit_active(struct kvm_vcpu *vcpu) 263 { 264 return (cpus_have_final_cap(ARM64_HAS_WFXT) && 265 vcpu_get_flag(vcpu, IN_WFIT)); 266 } 267 268 static u64 wfit_delay_ns(struct kvm_vcpu *vcpu) 269 { 270 u64 val = vcpu_get_reg(vcpu, kvm_vcpu_sys_get_rt(vcpu)); 271 struct arch_timer_context *ctx; 272 273 ctx = is_hyp_ctxt(vcpu) ? vcpu_hvtimer(vcpu) : vcpu_vtimer(vcpu); 274 275 return kvm_counter_compute_delta(ctx, val); 276 } 277 278 /* 279 * Returns the earliest expiration time in ns among guest timers. 280 * Note that it will return 0 if none of timers can fire. 281 */ 282 static u64 kvm_timer_earliest_exp(struct kvm_vcpu *vcpu) 283 { 284 u64 min_delta = ULLONG_MAX; 285 int i; 286 287 for (i = 0; i < nr_timers(vcpu); i++) { 288 struct arch_timer_context *ctx = &vcpu->arch.timer_cpu.timers[i]; 289 290 WARN(ctx->loaded, "timer %d loaded\n", i); 291 if (kvm_timer_irq_can_fire(ctx)) 292 min_delta = min(min_delta, kvm_timer_compute_delta(ctx)); 293 } 294 295 if (vcpu_has_wfit_active(vcpu)) 296 min_delta = min(min_delta, wfit_delay_ns(vcpu)); 297 298 /* If none of timers can fire, then return 0 */ 299 if (min_delta == ULLONG_MAX) 300 return 0; 301 302 return min_delta; 303 } 304 305 static enum hrtimer_restart kvm_bg_timer_expire(struct hrtimer *hrt) 306 { 307 struct arch_timer_cpu *timer; 308 struct kvm_vcpu *vcpu; 309 u64 ns; 310 311 timer = container_of(hrt, struct arch_timer_cpu, bg_timer); 312 vcpu = container_of(timer, struct kvm_vcpu, arch.timer_cpu); 313 314 /* 315 * Check that the timer has really expired from the guest's 316 * PoV (NTP on the host may have forced it to expire 317 * early). If we should have slept longer, restart it. 318 */ 319 ns = kvm_timer_earliest_exp(vcpu); 320 if (unlikely(ns)) { 321 hrtimer_forward_now(hrt, ns_to_ktime(ns)); 322 return HRTIMER_RESTART; 323 } 324 325 kvm_vcpu_wake_up(vcpu); 326 return HRTIMER_NORESTART; 327 } 328 329 static enum hrtimer_restart kvm_hrtimer_expire(struct hrtimer *hrt) 330 { 331 struct arch_timer_context *ctx; 332 struct kvm_vcpu *vcpu; 333 u64 ns; 334 335 ctx = container_of(hrt, struct arch_timer_context, hrtimer); 336 vcpu = timer_context_to_vcpu(ctx); 337 338 trace_kvm_timer_hrtimer_expire(ctx); 339 340 /* 341 * Check that the timer has really expired from the guest's 342 * PoV (NTP on the host may have forced it to expire 343 * early). If not ready, schedule for a later time. 344 */ 345 ns = kvm_timer_compute_delta(ctx); 346 if (unlikely(ns)) { 347 hrtimer_forward_now(hrt, ns_to_ktime(ns)); 348 return HRTIMER_RESTART; 349 } 350 351 kvm_timer_update_irq(vcpu, true, ctx); 352 return HRTIMER_NORESTART; 353 } 354 355 static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx) 356 { 357 enum kvm_arch_timers index; 358 u64 cval, now; 359 360 if (!timer_ctx) 361 return false; 362 363 index = arch_timer_ctx_index(timer_ctx); 364 365 if (timer_ctx->loaded) { 366 u32 cnt_ctl = 0; 367 368 switch (index) { 369 case TIMER_VTIMER: 370 case TIMER_HVTIMER: 371 cnt_ctl = read_sysreg_el0(SYS_CNTV_CTL); 372 break; 373 case TIMER_PTIMER: 374 case TIMER_HPTIMER: 375 cnt_ctl = read_sysreg_el0(SYS_CNTP_CTL); 376 break; 377 case NR_KVM_TIMERS: 378 /* GCC is braindead */ 379 cnt_ctl = 0; 380 break; 381 } 382 383 return (cnt_ctl & ARCH_TIMER_CTRL_ENABLE) && 384 (cnt_ctl & ARCH_TIMER_CTRL_IT_STAT) && 385 !(cnt_ctl & ARCH_TIMER_CTRL_IT_MASK); 386 } 387 388 if (!kvm_timer_irq_can_fire(timer_ctx)) 389 return false; 390 391 cval = timer_get_cval(timer_ctx); 392 now = kvm_phys_timer_read() - timer_get_offset(timer_ctx); 393 394 return cval <= now; 395 } 396 397 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) 398 { 399 return vcpu_has_wfit_active(vcpu) && wfit_delay_ns(vcpu) == 0; 400 } 401 402 /* 403 * Reflect the timer output level into the kvm_run structure 404 */ 405 void kvm_timer_update_run(struct kvm_vcpu *vcpu) 406 { 407 struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); 408 struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); 409 struct kvm_sync_regs *regs = &vcpu->run->s.regs; 410 411 /* Populate the device bitmap with the timer states */ 412 regs->device_irq_level &= ~(KVM_ARM_DEV_EL1_VTIMER | 413 KVM_ARM_DEV_EL1_PTIMER); 414 if (kvm_timer_should_fire(vtimer)) 415 regs->device_irq_level |= KVM_ARM_DEV_EL1_VTIMER; 416 if (kvm_timer_should_fire(ptimer)) 417 regs->device_irq_level |= KVM_ARM_DEV_EL1_PTIMER; 418 } 419 420 static void kvm_timer_update_status(struct arch_timer_context *ctx, bool level) 421 { 422 /* 423 * Paper over NV2 brokenness by publishing the interrupt status 424 * bit. This still results in a poor quality of emulation (guest 425 * writes will have no effect until the next exit). 426 * 427 * But hey, it's fast, right? 428 */ 429 struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctx); 430 if (is_hyp_ctxt(vcpu) && 431 (ctx == vcpu_vtimer(vcpu) || ctx == vcpu_ptimer(vcpu))) { 432 unsigned long val = timer_get_ctl(ctx); 433 __assign_bit(__ffs(ARCH_TIMER_CTRL_IT_STAT), &val, level); 434 timer_set_ctl(ctx, val); 435 } 436 } 437 438 static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level, 439 struct arch_timer_context *timer_ctx) 440 { 441 kvm_timer_update_status(timer_ctx, new_level); 442 443 timer_ctx->irq.level = new_level; 444 trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_irq(timer_ctx), 445 timer_ctx->irq.level); 446 447 if (userspace_irqchip(vcpu->kvm)) 448 return; 449 450 kvm_vgic_inject_irq(vcpu->kvm, vcpu, 451 timer_irq(timer_ctx), 452 timer_ctx->irq.level, 453 timer_ctx); 454 } 455 456 /* Only called for a fully emulated timer */ 457 static void timer_emulate(struct arch_timer_context *ctx) 458 { 459 bool should_fire = kvm_timer_should_fire(ctx); 460 461 trace_kvm_timer_emulate(ctx, should_fire); 462 463 if (should_fire != ctx->irq.level) 464 kvm_timer_update_irq(timer_context_to_vcpu(ctx), should_fire, ctx); 465 466 kvm_timer_update_status(ctx, should_fire); 467 468 /* 469 * If the timer can fire now, we don't need to have a soft timer 470 * scheduled for the future. If the timer cannot fire at all, 471 * then we also don't need a soft timer. 472 */ 473 if (should_fire || !kvm_timer_irq_can_fire(ctx)) 474 return; 475 476 soft_timer_start(&ctx->hrtimer, kvm_timer_compute_delta(ctx)); 477 } 478 479 static void set_cntvoff(u64 cntvoff) 480 { 481 kvm_call_hyp(__kvm_timer_set_cntvoff, cntvoff); 482 } 483 484 static void set_cntpoff(u64 cntpoff) 485 { 486 if (has_cntpoff()) 487 write_sysreg_s(cntpoff, SYS_CNTPOFF_EL2); 488 } 489 490 static void timer_save_state(struct arch_timer_context *ctx) 491 { 492 struct arch_timer_cpu *timer = vcpu_timer(timer_context_to_vcpu(ctx)); 493 enum kvm_arch_timers index = arch_timer_ctx_index(ctx); 494 unsigned long flags; 495 496 if (!timer->enabled) 497 return; 498 499 local_irq_save(flags); 500 501 if (!ctx->loaded) 502 goto out; 503 504 switch (index) { 505 u64 cval; 506 507 case TIMER_VTIMER: 508 case TIMER_HVTIMER: 509 timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTV_CTL)); 510 cval = read_sysreg_el0(SYS_CNTV_CVAL); 511 512 if (has_broken_cntvoff()) 513 cval -= timer_get_offset(ctx); 514 515 timer_set_cval(ctx, cval); 516 517 /* Disable the timer */ 518 write_sysreg_el0(0, SYS_CNTV_CTL); 519 isb(); 520 521 /* 522 * The kernel may decide to run userspace after 523 * calling vcpu_put, so we reset cntvoff to 0 to 524 * ensure a consistent read between user accesses to 525 * the virtual counter and kernel access to the 526 * physical counter of non-VHE case. 527 * 528 * For VHE, the virtual counter uses a fixed virtual 529 * offset of zero, so no need to zero CNTVOFF_EL2 530 * register, but this is actually useful when switching 531 * between EL1/vEL2 with NV. 532 * 533 * Do it unconditionally, as this is either unavoidable 534 * or dirt cheap. 535 */ 536 set_cntvoff(0); 537 break; 538 case TIMER_PTIMER: 539 case TIMER_HPTIMER: 540 timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTP_CTL)); 541 cval = read_sysreg_el0(SYS_CNTP_CVAL); 542 543 cval -= timer_get_offset(ctx); 544 545 timer_set_cval(ctx, cval); 546 547 /* Disable the timer */ 548 write_sysreg_el0(0, SYS_CNTP_CTL); 549 isb(); 550 551 set_cntpoff(0); 552 break; 553 case NR_KVM_TIMERS: 554 BUG(); 555 } 556 557 trace_kvm_timer_save_state(ctx); 558 559 ctx->loaded = false; 560 out: 561 local_irq_restore(flags); 562 } 563 564 /* 565 * Schedule the background timer before calling kvm_vcpu_halt, so that this 566 * thread is removed from its waitqueue and made runnable when there's a timer 567 * interrupt to handle. 568 */ 569 static void kvm_timer_blocking(struct kvm_vcpu *vcpu) 570 { 571 struct arch_timer_cpu *timer = vcpu_timer(vcpu); 572 struct timer_map map; 573 574 get_timer_map(vcpu, &map); 575 576 /* 577 * If no timers are capable of raising interrupts (disabled or 578 * masked), then there's no more work for us to do. 579 */ 580 if (!kvm_timer_irq_can_fire(map.direct_vtimer) && 581 !kvm_timer_irq_can_fire(map.direct_ptimer) && 582 !kvm_timer_irq_can_fire(map.emul_vtimer) && 583 !kvm_timer_irq_can_fire(map.emul_ptimer) && 584 !vcpu_has_wfit_active(vcpu)) 585 return; 586 587 /* 588 * At least one guest time will expire. Schedule a background timer. 589 * Set the earliest expiration time among the guest timers. 590 */ 591 soft_timer_start(&timer->bg_timer, kvm_timer_earliest_exp(vcpu)); 592 } 593 594 static void kvm_timer_unblocking(struct kvm_vcpu *vcpu) 595 { 596 struct arch_timer_cpu *timer = vcpu_timer(vcpu); 597 598 soft_timer_cancel(&timer->bg_timer); 599 } 600 601 static void timer_restore_state(struct arch_timer_context *ctx) 602 { 603 struct arch_timer_cpu *timer = vcpu_timer(timer_context_to_vcpu(ctx)); 604 enum kvm_arch_timers index = arch_timer_ctx_index(ctx); 605 unsigned long flags; 606 607 if (!timer->enabled) 608 return; 609 610 local_irq_save(flags); 611 612 if (ctx->loaded) 613 goto out; 614 615 switch (index) { 616 u64 cval, offset; 617 618 case TIMER_VTIMER: 619 case TIMER_HVTIMER: 620 cval = timer_get_cval(ctx); 621 offset = timer_get_offset(ctx); 622 if (has_broken_cntvoff()) { 623 set_cntvoff(0); 624 cval += offset; 625 } else { 626 set_cntvoff(offset); 627 } 628 write_sysreg_el0(cval, SYS_CNTV_CVAL); 629 isb(); 630 write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTV_CTL); 631 break; 632 case TIMER_PTIMER: 633 case TIMER_HPTIMER: 634 cval = timer_get_cval(ctx); 635 offset = timer_get_offset(ctx); 636 set_cntpoff(offset); 637 cval += offset; 638 write_sysreg_el0(cval, SYS_CNTP_CVAL); 639 isb(); 640 write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTP_CTL); 641 break; 642 case NR_KVM_TIMERS: 643 BUG(); 644 } 645 646 trace_kvm_timer_restore_state(ctx); 647 648 ctx->loaded = true; 649 out: 650 local_irq_restore(flags); 651 } 652 653 static inline void set_timer_irq_phys_active(struct arch_timer_context *ctx, bool active) 654 { 655 int r; 656 r = irq_set_irqchip_state(ctx->host_timer_irq, IRQCHIP_STATE_ACTIVE, active); 657 WARN_ON(r); 658 } 659 660 static void kvm_timer_vcpu_load_gic(struct arch_timer_context *ctx) 661 { 662 struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctx); 663 bool phys_active = false; 664 665 /* 666 * Update the timer output so that it is likely to match the 667 * state we're about to restore. If the timer expires between 668 * this point and the register restoration, we'll take the 669 * interrupt anyway. 670 */ 671 kvm_timer_update_irq(vcpu, kvm_timer_should_fire(ctx), ctx); 672 673 if (irqchip_in_kernel(vcpu->kvm)) 674 phys_active = kvm_vgic_map_is_active(vcpu, timer_irq(ctx)); 675 676 phys_active |= ctx->irq.level; 677 678 set_timer_irq_phys_active(ctx, phys_active); 679 } 680 681 static void kvm_timer_vcpu_load_nogic(struct kvm_vcpu *vcpu) 682 { 683 struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); 684 685 /* 686 * Update the timer output so that it is likely to match the 687 * state we're about to restore. If the timer expires between 688 * this point and the register restoration, we'll take the 689 * interrupt anyway. 690 */ 691 kvm_timer_update_irq(vcpu, kvm_timer_should_fire(vtimer), vtimer); 692 693 /* 694 * When using a userspace irqchip with the architected timers and a 695 * host interrupt controller that doesn't support an active state, we 696 * must still prevent continuously exiting from the guest, and 697 * therefore mask the physical interrupt by disabling it on the host 698 * interrupt controller when the virtual level is high, such that the 699 * guest can make forward progress. Once we detect the output level 700 * being de-asserted, we unmask the interrupt again so that we exit 701 * from the guest when the timer fires. 702 */ 703 if (vtimer->irq.level) 704 disable_percpu_irq(host_vtimer_irq); 705 else 706 enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags); 707 } 708 709 /* If _pred is true, set bit in _set, otherwise set it in _clr */ 710 #define assign_clear_set_bit(_pred, _bit, _clr, _set) \ 711 do { \ 712 if (_pred) \ 713 (_set) |= (_bit); \ 714 else \ 715 (_clr) |= (_bit); \ 716 } while (0) 717 718 static void kvm_timer_vcpu_load_nested_switch(struct kvm_vcpu *vcpu, 719 struct timer_map *map) 720 { 721 int hw, ret; 722 723 if (!irqchip_in_kernel(vcpu->kvm)) 724 return; 725 726 /* 727 * We only ever unmap the vtimer irq on a VHE system that runs nested 728 * virtualization, in which case we have both a valid emul_vtimer, 729 * emul_ptimer, direct_vtimer, and direct_ptimer. 730 * 731 * Since this is called from kvm_timer_vcpu_load(), a change between 732 * vEL2 and vEL1/0 will have just happened, and the timer_map will 733 * represent this, and therefore we switch the emul/direct mappings 734 * below. 735 */ 736 hw = kvm_vgic_get_map(vcpu, timer_irq(map->direct_vtimer)); 737 if (hw < 0) { 738 kvm_vgic_unmap_phys_irq(vcpu, timer_irq(map->emul_vtimer)); 739 kvm_vgic_unmap_phys_irq(vcpu, timer_irq(map->emul_ptimer)); 740 741 ret = kvm_vgic_map_phys_irq(vcpu, 742 map->direct_vtimer->host_timer_irq, 743 timer_irq(map->direct_vtimer), 744 &arch_timer_irq_ops); 745 WARN_ON_ONCE(ret); 746 ret = kvm_vgic_map_phys_irq(vcpu, 747 map->direct_ptimer->host_timer_irq, 748 timer_irq(map->direct_ptimer), 749 &arch_timer_irq_ops); 750 WARN_ON_ONCE(ret); 751 } 752 } 753 754 static void timer_set_traps(struct kvm_vcpu *vcpu, struct timer_map *map) 755 { 756 bool tvt, tpt, tvc, tpc, tvt02, tpt02; 757 u64 clr, set; 758 759 /* 760 * No trapping gets configured here with nVHE. See 761 * __timer_enable_traps(), which is where the stuff happens. 762 */ 763 if (!has_vhe()) 764 return; 765 766 /* 767 * Our default policy is not to trap anything. As we progress 768 * within this function, reality kicks in and we start adding 769 * traps based on emulation requirements. 770 */ 771 tvt = tpt = tvc = tpc = false; 772 tvt02 = tpt02 = false; 773 774 /* 775 * NV2 badly breaks the timer semantics by redirecting accesses to 776 * the EL1 timer state to memory, so let's call ECV to the rescue if 777 * available: we trap all CNT{P,V}_{CTL,CVAL,TVAL}_EL0 accesses. 778 * 779 * The treatment slightly varies depending whether we run a nVHE or 780 * VHE guest: nVHE will use the _EL0 registers directly, while VHE 781 * will use the _EL02 accessors. This translates in different trap 782 * bits. 783 * 784 * None of the trapping is required when running in non-HYP context, 785 * unless required by the L1 hypervisor settings once we advertise 786 * ECV+NV in the guest, or that we need trapping for other reasons. 787 */ 788 if (cpus_have_final_cap(ARM64_HAS_ECV) && is_hyp_ctxt(vcpu)) { 789 if (vcpu_el2_e2h_is_set(vcpu)) 790 tvt02 = tpt02 = true; 791 else 792 tvt = tpt = true; 793 } 794 795 /* 796 * We have two possibility to deal with a physical offset: 797 * 798 * - Either we have CNTPOFF (yay!) or the offset is 0: 799 * we let the guest freely access the HW 800 * 801 * - or neither of these condition apply: 802 * we trap accesses to the HW, but still use it 803 * after correcting the physical offset 804 */ 805 if (!has_cntpoff() && timer_get_offset(map->direct_ptimer)) 806 tpt = tpc = true; 807 808 /* 809 * For the poor sods that could not correctly substract one value 810 * from another, trap the full virtual timer and counter. 811 */ 812 if (has_broken_cntvoff() && timer_get_offset(map->direct_vtimer)) 813 tvt = tvc = true; 814 815 /* 816 * Apply the enable bits that the guest hypervisor has requested for 817 * its own guest. We can only add traps that wouldn't have been set 818 * above. 819 * Implementation choices: we do not support NV when E2H=0 in the 820 * guest, and we don't support configuration where E2H is writable 821 * by the guest (either FEAT_VHE or FEAT_E2H0 is implemented, but 822 * not both). This simplifies the handling of the EL1NV* bits. 823 */ 824 if (is_nested_ctxt(vcpu)) { 825 u64 val = __vcpu_sys_reg(vcpu, CNTHCTL_EL2); 826 827 /* Use the VHE format for mental sanity */ 828 if (!vcpu_el2_e2h_is_set(vcpu)) 829 val = (val & (CNTHCTL_EL1PCEN | CNTHCTL_EL1PCTEN)) << 10; 830 831 tpt |= !(val & (CNTHCTL_EL1PCEN << 10)); 832 tpc |= !(val & (CNTHCTL_EL1PCTEN << 10)); 833 834 tpt02 |= (val & CNTHCTL_EL1NVPCT); 835 tvt02 |= (val & CNTHCTL_EL1NVVCT); 836 } 837 838 /* 839 * Now that we have collected our requirements, compute the 840 * trap and enable bits. 841 */ 842 set = 0; 843 clr = 0; 844 845 assign_clear_set_bit(tpt, CNTHCTL_EL1PCEN << 10, set, clr); 846 assign_clear_set_bit(tpc, CNTHCTL_EL1PCTEN << 10, set, clr); 847 assign_clear_set_bit(tvt, CNTHCTL_EL1TVT, clr, set); 848 assign_clear_set_bit(tvc, CNTHCTL_EL1TVCT, clr, set); 849 assign_clear_set_bit(tvt02, CNTHCTL_EL1NVVCT, clr, set); 850 assign_clear_set_bit(tpt02, CNTHCTL_EL1NVPCT, clr, set); 851 852 /* This only happens on VHE, so use the CNTHCTL_EL2 accessor. */ 853 sysreg_clear_set(cnthctl_el2, clr, set); 854 } 855 856 void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu) 857 { 858 struct arch_timer_cpu *timer = vcpu_timer(vcpu); 859 struct timer_map map; 860 861 if (unlikely(!timer->enabled)) 862 return; 863 864 get_timer_map(vcpu, &map); 865 866 if (static_branch_likely(&has_gic_active_state)) { 867 if (vcpu_has_nv(vcpu)) 868 kvm_timer_vcpu_load_nested_switch(vcpu, &map); 869 870 kvm_timer_vcpu_load_gic(map.direct_vtimer); 871 if (map.direct_ptimer) 872 kvm_timer_vcpu_load_gic(map.direct_ptimer); 873 } else { 874 kvm_timer_vcpu_load_nogic(vcpu); 875 } 876 877 kvm_timer_unblocking(vcpu); 878 879 timer_restore_state(map.direct_vtimer); 880 if (map.direct_ptimer) 881 timer_restore_state(map.direct_ptimer); 882 if (map.emul_vtimer) 883 timer_emulate(map.emul_vtimer); 884 if (map.emul_ptimer) 885 timer_emulate(map.emul_ptimer); 886 887 timer_set_traps(vcpu, &map); 888 } 889 890 bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu) 891 { 892 struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); 893 struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); 894 struct kvm_sync_regs *sregs = &vcpu->run->s.regs; 895 bool vlevel, plevel; 896 897 if (likely(irqchip_in_kernel(vcpu->kvm))) 898 return false; 899 900 vlevel = sregs->device_irq_level & KVM_ARM_DEV_EL1_VTIMER; 901 plevel = sregs->device_irq_level & KVM_ARM_DEV_EL1_PTIMER; 902 903 return kvm_timer_should_fire(vtimer) != vlevel || 904 kvm_timer_should_fire(ptimer) != plevel; 905 } 906 907 void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu) 908 { 909 struct arch_timer_cpu *timer = vcpu_timer(vcpu); 910 struct timer_map map; 911 912 if (unlikely(!timer->enabled)) 913 return; 914 915 get_timer_map(vcpu, &map); 916 917 timer_save_state(map.direct_vtimer); 918 if (map.direct_ptimer) 919 timer_save_state(map.direct_ptimer); 920 921 /* 922 * Cancel soft timer emulation, because the only case where we 923 * need it after a vcpu_put is in the context of a sleeping VCPU, and 924 * in that case we already factor in the deadline for the physical 925 * timer when scheduling the bg_timer. 926 * 927 * In any case, we re-schedule the hrtimer for the physical timer when 928 * coming back to the VCPU thread in kvm_timer_vcpu_load(). 929 */ 930 if (map.emul_vtimer) 931 soft_timer_cancel(&map.emul_vtimer->hrtimer); 932 if (map.emul_ptimer) 933 soft_timer_cancel(&map.emul_ptimer->hrtimer); 934 935 if (kvm_vcpu_is_blocking(vcpu)) 936 kvm_timer_blocking(vcpu); 937 } 938 939 void kvm_timer_sync_nested(struct kvm_vcpu *vcpu) 940 { 941 /* 942 * When NV2 is on, guest hypervisors have their EL1 timer register 943 * accesses redirected to the VNCR page. Any guest action taken on 944 * the timer is postponed until the next exit, leading to a very 945 * poor quality of emulation. 946 * 947 * This is an unmitigated disaster, only papered over by FEAT_ECV, 948 * which allows trapping of the timer registers even with NV2. 949 * Still, this is still worse than FEAT_NV on its own. Meh. 950 */ 951 if (!cpus_have_final_cap(ARM64_HAS_ECV)) { 952 /* 953 * For a VHE guest hypervisor, the EL2 state is directly 954 * stored in the host EL1 timers, while the emulated EL1 955 * state is stored in the VNCR page. The latter could have 956 * been updated behind our back, and we must reset the 957 * emulation of the timers. 958 * 959 * A non-VHE guest hypervisor doesn't have any direct access 960 * to its timers: the EL2 registers trap despite being 961 * notionally direct (we use the EL1 HW, as for VHE), while 962 * the EL1 registers access memory. 963 * 964 * In both cases, process the emulated timers on each guest 965 * exit. Boo. 966 */ 967 struct timer_map map; 968 get_timer_map(vcpu, &map); 969 970 soft_timer_cancel(&map.emul_vtimer->hrtimer); 971 soft_timer_cancel(&map.emul_ptimer->hrtimer); 972 timer_emulate(map.emul_vtimer); 973 timer_emulate(map.emul_ptimer); 974 } 975 } 976 977 /* 978 * With a userspace irqchip we have to check if the guest de-asserted the 979 * timer and if so, unmask the timer irq signal on the host interrupt 980 * controller to ensure that we see future timer signals. 981 */ 982 static void unmask_vtimer_irq_user(struct kvm_vcpu *vcpu) 983 { 984 struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); 985 986 if (!kvm_timer_should_fire(vtimer)) { 987 kvm_timer_update_irq(vcpu, false, vtimer); 988 if (static_branch_likely(&has_gic_active_state)) 989 set_timer_irq_phys_active(vtimer, false); 990 else 991 enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags); 992 } 993 } 994 995 void kvm_timer_sync_user(struct kvm_vcpu *vcpu) 996 { 997 struct arch_timer_cpu *timer = vcpu_timer(vcpu); 998 999 if (unlikely(!timer->enabled)) 1000 return; 1001 1002 if (unlikely(!irqchip_in_kernel(vcpu->kvm))) 1003 unmask_vtimer_irq_user(vcpu); 1004 } 1005 1006 void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu) 1007 { 1008 struct arch_timer_cpu *timer = vcpu_timer(vcpu); 1009 struct timer_map map; 1010 1011 get_timer_map(vcpu, &map); 1012 1013 /* 1014 * The bits in CNTV_CTL are architecturally reset to UNKNOWN for ARMv8 1015 * and to 0 for ARMv7. We provide an implementation that always 1016 * resets the timer to be disabled and unmasked and is compliant with 1017 * the ARMv7 architecture. 1018 */ 1019 for (int i = 0; i < nr_timers(vcpu); i++) 1020 timer_set_ctl(vcpu_get_timer(vcpu, i), 0); 1021 1022 /* 1023 * A vcpu running at EL2 is in charge of the offset applied to 1024 * the virtual timer, so use the physical VM offset, and point 1025 * the vcpu offset to CNTVOFF_EL2. 1026 */ 1027 if (vcpu_has_nv(vcpu)) { 1028 struct arch_timer_offset *offs = &vcpu_vtimer(vcpu)->offset; 1029 1030 offs->vcpu_offset = __ctxt_sys_reg(&vcpu->arch.ctxt, CNTVOFF_EL2); 1031 offs->vm_offset = &vcpu->kvm->arch.timer_data.poffset; 1032 } 1033 1034 if (timer->enabled) { 1035 for (int i = 0; i < nr_timers(vcpu); i++) 1036 kvm_timer_update_irq(vcpu, false, 1037 vcpu_get_timer(vcpu, i)); 1038 1039 if (irqchip_in_kernel(vcpu->kvm)) { 1040 kvm_vgic_reset_mapped_irq(vcpu, timer_irq(map.direct_vtimer)); 1041 if (map.direct_ptimer) 1042 kvm_vgic_reset_mapped_irq(vcpu, timer_irq(map.direct_ptimer)); 1043 } 1044 } 1045 1046 if (map.emul_vtimer) 1047 soft_timer_cancel(&map.emul_vtimer->hrtimer); 1048 if (map.emul_ptimer) 1049 soft_timer_cancel(&map.emul_ptimer->hrtimer); 1050 } 1051 1052 static void timer_context_init(struct kvm_vcpu *vcpu, int timerid) 1053 { 1054 struct arch_timer_context *ctxt = vcpu_get_timer(vcpu, timerid); 1055 struct kvm *kvm = vcpu->kvm; 1056 1057 ctxt->timer_id = timerid; 1058 1059 if (timerid == TIMER_VTIMER) 1060 ctxt->offset.vm_offset = &kvm->arch.timer_data.voffset; 1061 else 1062 ctxt->offset.vm_offset = &kvm->arch.timer_data.poffset; 1063 1064 hrtimer_setup(&ctxt->hrtimer, kvm_hrtimer_expire, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); 1065 1066 switch (timerid) { 1067 case TIMER_PTIMER: 1068 case TIMER_HPTIMER: 1069 ctxt->host_timer_irq = host_ptimer_irq; 1070 break; 1071 case TIMER_VTIMER: 1072 case TIMER_HVTIMER: 1073 ctxt->host_timer_irq = host_vtimer_irq; 1074 break; 1075 } 1076 } 1077 1078 void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) 1079 { 1080 struct arch_timer_cpu *timer = vcpu_timer(vcpu); 1081 1082 for (int i = 0; i < NR_KVM_TIMERS; i++) 1083 timer_context_init(vcpu, i); 1084 1085 /* Synchronize offsets across timers of a VM if not already provided */ 1086 if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, &vcpu->kvm->arch.flags)) { 1087 timer_set_offset(vcpu_vtimer(vcpu), kvm_phys_timer_read()); 1088 timer_set_offset(vcpu_ptimer(vcpu), 0); 1089 } 1090 1091 hrtimer_setup(&timer->bg_timer, kvm_bg_timer_expire, CLOCK_MONOTONIC, 1092 HRTIMER_MODE_ABS_HARD); 1093 } 1094 1095 void kvm_timer_init_vm(struct kvm *kvm) 1096 { 1097 for (int i = 0; i < NR_KVM_TIMERS; i++) 1098 kvm->arch.timer_data.ppi[i] = default_ppi[i]; 1099 } 1100 1101 void kvm_timer_cpu_up(void) 1102 { 1103 enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags); 1104 if (host_ptimer_irq) 1105 enable_percpu_irq(host_ptimer_irq, host_ptimer_irq_flags); 1106 } 1107 1108 void kvm_timer_cpu_down(void) 1109 { 1110 disable_percpu_irq(host_vtimer_irq); 1111 if (host_ptimer_irq) 1112 disable_percpu_irq(host_ptimer_irq); 1113 } 1114 1115 static u64 read_timer_ctl(struct arch_timer_context *timer) 1116 { 1117 /* 1118 * Set ISTATUS bit if it's expired. 1119 * Note that according to ARMv8 ARM Issue A.k, ISTATUS bit is 1120 * UNKNOWN when ENABLE bit is 0, so we chose to set ISTATUS bit 1121 * regardless of ENABLE bit for our implementation convenience. 1122 */ 1123 u32 ctl = timer_get_ctl(timer); 1124 1125 if (!kvm_timer_compute_delta(timer)) 1126 ctl |= ARCH_TIMER_CTRL_IT_STAT; 1127 1128 return ctl; 1129 } 1130 1131 static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu, 1132 struct arch_timer_context *timer, 1133 enum kvm_arch_timer_regs treg) 1134 { 1135 u64 val; 1136 1137 switch (treg) { 1138 case TIMER_REG_TVAL: 1139 val = timer_get_cval(timer) - kvm_phys_timer_read() + timer_get_offset(timer); 1140 val = lower_32_bits(val); 1141 break; 1142 1143 case TIMER_REG_CTL: 1144 val = read_timer_ctl(timer); 1145 break; 1146 1147 case TIMER_REG_CVAL: 1148 val = timer_get_cval(timer); 1149 break; 1150 1151 case TIMER_REG_CNT: 1152 val = kvm_phys_timer_read() - timer_get_offset(timer); 1153 break; 1154 1155 case TIMER_REG_VOFF: 1156 val = *timer->offset.vcpu_offset; 1157 break; 1158 1159 default: 1160 BUG(); 1161 } 1162 1163 return val; 1164 } 1165 1166 u64 kvm_arm_timer_read_sysreg(struct kvm_vcpu *vcpu, 1167 enum kvm_arch_timers tmr, 1168 enum kvm_arch_timer_regs treg) 1169 { 1170 struct arch_timer_context *timer; 1171 struct timer_map map; 1172 u64 val; 1173 1174 get_timer_map(vcpu, &map); 1175 timer = vcpu_get_timer(vcpu, tmr); 1176 1177 if (timer == map.emul_vtimer || timer == map.emul_ptimer) 1178 return kvm_arm_timer_read(vcpu, timer, treg); 1179 1180 preempt_disable(); 1181 timer_save_state(timer); 1182 1183 val = kvm_arm_timer_read(vcpu, timer, treg); 1184 1185 timer_restore_state(timer); 1186 preempt_enable(); 1187 1188 return val; 1189 } 1190 1191 static void kvm_arm_timer_write(struct kvm_vcpu *vcpu, 1192 struct arch_timer_context *timer, 1193 enum kvm_arch_timer_regs treg, 1194 u64 val) 1195 { 1196 switch (treg) { 1197 case TIMER_REG_TVAL: 1198 timer_set_cval(timer, kvm_phys_timer_read() - timer_get_offset(timer) + (s32)val); 1199 break; 1200 1201 case TIMER_REG_CTL: 1202 timer_set_ctl(timer, val & ~ARCH_TIMER_CTRL_IT_STAT); 1203 break; 1204 1205 case TIMER_REG_CVAL: 1206 timer_set_cval(timer, val); 1207 break; 1208 1209 case TIMER_REG_VOFF: 1210 *timer->offset.vcpu_offset = val; 1211 break; 1212 1213 default: 1214 BUG(); 1215 } 1216 } 1217 1218 void kvm_arm_timer_write_sysreg(struct kvm_vcpu *vcpu, 1219 enum kvm_arch_timers tmr, 1220 enum kvm_arch_timer_regs treg, 1221 u64 val) 1222 { 1223 struct arch_timer_context *timer; 1224 struct timer_map map; 1225 1226 get_timer_map(vcpu, &map); 1227 timer = vcpu_get_timer(vcpu, tmr); 1228 if (timer == map.emul_vtimer || timer == map.emul_ptimer) { 1229 soft_timer_cancel(&timer->hrtimer); 1230 kvm_arm_timer_write(vcpu, timer, treg, val); 1231 timer_emulate(timer); 1232 } else { 1233 preempt_disable(); 1234 timer_save_state(timer); 1235 kvm_arm_timer_write(vcpu, timer, treg, val); 1236 timer_restore_state(timer); 1237 preempt_enable(); 1238 } 1239 } 1240 1241 static int timer_irq_set_vcpu_affinity(struct irq_data *d, void *vcpu) 1242 { 1243 if (vcpu) 1244 irqd_set_forwarded_to_vcpu(d); 1245 else 1246 irqd_clr_forwarded_to_vcpu(d); 1247 1248 return 0; 1249 } 1250 1251 static int timer_irq_set_irqchip_state(struct irq_data *d, 1252 enum irqchip_irq_state which, bool val) 1253 { 1254 if (which != IRQCHIP_STATE_ACTIVE || !irqd_is_forwarded_to_vcpu(d)) 1255 return irq_chip_set_parent_state(d, which, val); 1256 1257 if (val) 1258 irq_chip_mask_parent(d); 1259 else 1260 irq_chip_unmask_parent(d); 1261 1262 return 0; 1263 } 1264 1265 static void timer_irq_eoi(struct irq_data *d) 1266 { 1267 if (!irqd_is_forwarded_to_vcpu(d)) 1268 irq_chip_eoi_parent(d); 1269 } 1270 1271 static void timer_irq_ack(struct irq_data *d) 1272 { 1273 d = d->parent_data; 1274 if (d->chip->irq_ack) 1275 d->chip->irq_ack(d); 1276 } 1277 1278 static struct irq_chip timer_chip = { 1279 .name = "KVM", 1280 .irq_ack = timer_irq_ack, 1281 .irq_mask = irq_chip_mask_parent, 1282 .irq_unmask = irq_chip_unmask_parent, 1283 .irq_eoi = timer_irq_eoi, 1284 .irq_set_type = irq_chip_set_type_parent, 1285 .irq_set_vcpu_affinity = timer_irq_set_vcpu_affinity, 1286 .irq_set_irqchip_state = timer_irq_set_irqchip_state, 1287 }; 1288 1289 static int timer_irq_domain_alloc(struct irq_domain *domain, unsigned int virq, 1290 unsigned int nr_irqs, void *arg) 1291 { 1292 irq_hw_number_t hwirq = (uintptr_t)arg; 1293 1294 return irq_domain_set_hwirq_and_chip(domain, virq, hwirq, 1295 &timer_chip, NULL); 1296 } 1297 1298 static void timer_irq_domain_free(struct irq_domain *domain, unsigned int virq, 1299 unsigned int nr_irqs) 1300 { 1301 } 1302 1303 static const struct irq_domain_ops timer_domain_ops = { 1304 .alloc = timer_irq_domain_alloc, 1305 .free = timer_irq_domain_free, 1306 }; 1307 1308 static void kvm_irq_fixup_flags(unsigned int virq, u32 *flags) 1309 { 1310 *flags = irq_get_trigger_type(virq); 1311 if (*flags != IRQF_TRIGGER_HIGH && *flags != IRQF_TRIGGER_LOW) { 1312 kvm_err("Invalid trigger for timer IRQ%d, assuming level low\n", 1313 virq); 1314 *flags = IRQF_TRIGGER_LOW; 1315 } 1316 } 1317 1318 static int kvm_irq_init(struct arch_timer_kvm_info *info) 1319 { 1320 struct irq_domain *domain = NULL; 1321 1322 if (info->virtual_irq <= 0) { 1323 kvm_err("kvm_arch_timer: invalid virtual timer IRQ: %d\n", 1324 info->virtual_irq); 1325 return -ENODEV; 1326 } 1327 1328 host_vtimer_irq = info->virtual_irq; 1329 kvm_irq_fixup_flags(host_vtimer_irq, &host_vtimer_irq_flags); 1330 1331 if (kvm_vgic_global_state.no_hw_deactivation) { 1332 struct fwnode_handle *fwnode; 1333 struct irq_data *data; 1334 1335 fwnode = irq_domain_alloc_named_fwnode("kvm-timer"); 1336 if (!fwnode) 1337 return -ENOMEM; 1338 1339 /* Assume both vtimer and ptimer in the same parent */ 1340 data = irq_get_irq_data(host_vtimer_irq); 1341 domain = irq_domain_create_hierarchy(data->domain, 0, 1342 NR_KVM_TIMERS, fwnode, 1343 &timer_domain_ops, NULL); 1344 if (!domain) { 1345 irq_domain_free_fwnode(fwnode); 1346 return -ENOMEM; 1347 } 1348 1349 arch_timer_irq_ops.flags |= VGIC_IRQ_SW_RESAMPLE; 1350 WARN_ON(irq_domain_push_irq(domain, host_vtimer_irq, 1351 (void *)TIMER_VTIMER)); 1352 } 1353 1354 if (info->physical_irq > 0) { 1355 host_ptimer_irq = info->physical_irq; 1356 kvm_irq_fixup_flags(host_ptimer_irq, &host_ptimer_irq_flags); 1357 1358 if (domain) 1359 WARN_ON(irq_domain_push_irq(domain, host_ptimer_irq, 1360 (void *)TIMER_PTIMER)); 1361 } 1362 1363 return 0; 1364 } 1365 1366 static void kvm_timer_handle_errata(void) 1367 { 1368 u64 mmfr0, mmfr1, mmfr4; 1369 1370 /* 1371 * CNTVOFF_EL2 is broken on some implementations. For those, we trap 1372 * all virtual timer/counter accesses, requiring FEAT_ECV. 1373 * 1374 * However, a hypervisor supporting nesting is likely to mitigate the 1375 * erratum at L0, and not require other levels to mitigate it (which 1376 * would otherwise be a terrible performance sink due to trap 1377 * amplification). 1378 * 1379 * Given that the affected HW implements both FEAT_VHE and FEAT_E2H0, 1380 * and that NV is likely not to (because of limitations of the 1381 * architecture), only enable the workaround when FEAT_VHE and 1382 * FEAT_E2H0 are both detected. Time will tell if this actually holds. 1383 */ 1384 mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1); 1385 mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1); 1386 mmfr4 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR4_EL1); 1387 if (SYS_FIELD_GET(ID_AA64MMFR1_EL1, VH, mmfr1) && 1388 !SYS_FIELD_GET(ID_AA64MMFR4_EL1, E2H0, mmfr4) && 1389 SYS_FIELD_GET(ID_AA64MMFR0_EL1, ECV, mmfr0) && 1390 (has_vhe() || has_hvhe()) && 1391 cpus_have_final_cap(ARM64_WORKAROUND_QCOM_ORYON_CNTVOFF)) { 1392 static_branch_enable(&broken_cntvoff_key); 1393 kvm_info("Broken CNTVOFF_EL2, trapping virtual timer\n"); 1394 } 1395 } 1396 1397 int __init kvm_timer_hyp_init(bool has_gic) 1398 { 1399 struct arch_timer_kvm_info *info; 1400 int err; 1401 1402 info = arch_timer_get_kvm_info(); 1403 timecounter = &info->timecounter; 1404 1405 if (!timecounter->cc) { 1406 kvm_err("kvm_arch_timer: uninitialized timecounter\n"); 1407 return -ENODEV; 1408 } 1409 1410 err = kvm_irq_init(info); 1411 if (err) 1412 return err; 1413 1414 /* First, do the virtual EL1 timer irq */ 1415 1416 err = request_percpu_irq(host_vtimer_irq, kvm_arch_timer_handler, 1417 "kvm guest vtimer", kvm_get_running_vcpus()); 1418 if (err) { 1419 kvm_err("kvm_arch_timer: can't request vtimer interrupt %d (%d)\n", 1420 host_vtimer_irq, err); 1421 return err; 1422 } 1423 1424 if (has_gic) { 1425 err = irq_set_vcpu_affinity(host_vtimer_irq, 1426 kvm_get_running_vcpus()); 1427 if (err) { 1428 kvm_err("kvm_arch_timer: error setting vcpu affinity\n"); 1429 goto out_free_vtimer_irq; 1430 } 1431 1432 static_branch_enable(&has_gic_active_state); 1433 } 1434 1435 kvm_debug("virtual timer IRQ%d\n", host_vtimer_irq); 1436 1437 /* Now let's do the physical EL1 timer irq */ 1438 1439 if (info->physical_irq > 0) { 1440 err = request_percpu_irq(host_ptimer_irq, kvm_arch_timer_handler, 1441 "kvm guest ptimer", kvm_get_running_vcpus()); 1442 if (err) { 1443 kvm_err("kvm_arch_timer: can't request ptimer interrupt %d (%d)\n", 1444 host_ptimer_irq, err); 1445 goto out_free_vtimer_irq; 1446 } 1447 1448 if (has_gic) { 1449 err = irq_set_vcpu_affinity(host_ptimer_irq, 1450 kvm_get_running_vcpus()); 1451 if (err) { 1452 kvm_err("kvm_arch_timer: error setting vcpu affinity\n"); 1453 goto out_free_ptimer_irq; 1454 } 1455 } 1456 1457 kvm_debug("physical timer IRQ%d\n", host_ptimer_irq); 1458 } else if (has_vhe()) { 1459 kvm_err("kvm_arch_timer: invalid physical timer IRQ: %d\n", 1460 info->physical_irq); 1461 err = -ENODEV; 1462 goto out_free_vtimer_irq; 1463 } 1464 1465 kvm_timer_handle_errata(); 1466 return 0; 1467 1468 out_free_ptimer_irq: 1469 if (info->physical_irq > 0) 1470 free_percpu_irq(host_ptimer_irq, kvm_get_running_vcpus()); 1471 out_free_vtimer_irq: 1472 free_percpu_irq(host_vtimer_irq, kvm_get_running_vcpus()); 1473 return err; 1474 } 1475 1476 void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu) 1477 { 1478 struct arch_timer_cpu *timer = vcpu_timer(vcpu); 1479 1480 soft_timer_cancel(&timer->bg_timer); 1481 } 1482 1483 static bool timer_irqs_are_valid(struct kvm_vcpu *vcpu) 1484 { 1485 u32 ppis = 0; 1486 bool valid; 1487 1488 mutex_lock(&vcpu->kvm->arch.config_lock); 1489 1490 for (int i = 0; i < nr_timers(vcpu); i++) { 1491 struct arch_timer_context *ctx; 1492 int irq; 1493 1494 ctx = vcpu_get_timer(vcpu, i); 1495 irq = timer_irq(ctx); 1496 if (kvm_vgic_set_owner(vcpu, irq, ctx)) 1497 break; 1498 1499 /* 1500 * We know by construction that we only have PPIs, so 1501 * all values are less than 32. 1502 */ 1503 ppis |= BIT(irq); 1504 } 1505 1506 valid = hweight32(ppis) == nr_timers(vcpu); 1507 1508 if (valid) 1509 set_bit(KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE, &vcpu->kvm->arch.flags); 1510 1511 mutex_unlock(&vcpu->kvm->arch.config_lock); 1512 1513 return valid; 1514 } 1515 1516 static bool kvm_arch_timer_get_input_level(int vintid) 1517 { 1518 struct kvm_vcpu *vcpu = kvm_get_running_vcpu(); 1519 1520 if (WARN(!vcpu, "No vcpu context!\n")) 1521 return false; 1522 1523 for (int i = 0; i < nr_timers(vcpu); i++) { 1524 struct arch_timer_context *ctx; 1525 1526 ctx = vcpu_get_timer(vcpu, i); 1527 if (timer_irq(ctx) == vintid) 1528 return kvm_timer_should_fire(ctx); 1529 } 1530 1531 /* A timer IRQ has fired, but no matching timer was found? */ 1532 WARN_RATELIMIT(1, "timer INTID%d unknown\n", vintid); 1533 1534 return false; 1535 } 1536 1537 int kvm_timer_enable(struct kvm_vcpu *vcpu) 1538 { 1539 struct arch_timer_cpu *timer = vcpu_timer(vcpu); 1540 struct timer_map map; 1541 int ret; 1542 1543 if (timer->enabled) 1544 return 0; 1545 1546 /* Without a VGIC we do not map virtual IRQs to physical IRQs */ 1547 if (!irqchip_in_kernel(vcpu->kvm)) 1548 goto no_vgic; 1549 1550 /* 1551 * At this stage, we have the guarantee that the vgic is both 1552 * available and initialized. 1553 */ 1554 if (!timer_irqs_are_valid(vcpu)) { 1555 kvm_debug("incorrectly configured timer irqs\n"); 1556 return -EINVAL; 1557 } 1558 1559 get_timer_map(vcpu, &map); 1560 1561 ret = kvm_vgic_map_phys_irq(vcpu, 1562 map.direct_vtimer->host_timer_irq, 1563 timer_irq(map.direct_vtimer), 1564 &arch_timer_irq_ops); 1565 if (ret) 1566 return ret; 1567 1568 if (map.direct_ptimer) { 1569 ret = kvm_vgic_map_phys_irq(vcpu, 1570 map.direct_ptimer->host_timer_irq, 1571 timer_irq(map.direct_ptimer), 1572 &arch_timer_irq_ops); 1573 } 1574 1575 if (ret) 1576 return ret; 1577 1578 no_vgic: 1579 timer->enabled = 1; 1580 return 0; 1581 } 1582 1583 /* If we have CNTPOFF, permanently set ECV to enable it */ 1584 void kvm_timer_init_vhe(void) 1585 { 1586 if (cpus_have_final_cap(ARM64_HAS_ECV_CNTPOFF)) 1587 sysreg_clear_set(cnthctl_el2, 0, CNTHCTL_ECV); 1588 } 1589 1590 int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) 1591 { 1592 int __user *uaddr = (int __user *)(long)attr->addr; 1593 int irq, idx, ret = 0; 1594 1595 if (!irqchip_in_kernel(vcpu->kvm)) 1596 return -EINVAL; 1597 1598 if (get_user(irq, uaddr)) 1599 return -EFAULT; 1600 1601 if (!(irq_is_ppi(irq))) 1602 return -EINVAL; 1603 1604 mutex_lock(&vcpu->kvm->arch.config_lock); 1605 1606 if (test_bit(KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE, 1607 &vcpu->kvm->arch.flags)) { 1608 ret = -EBUSY; 1609 goto out; 1610 } 1611 1612 switch (attr->attr) { 1613 case KVM_ARM_VCPU_TIMER_IRQ_VTIMER: 1614 idx = TIMER_VTIMER; 1615 break; 1616 case KVM_ARM_VCPU_TIMER_IRQ_PTIMER: 1617 idx = TIMER_PTIMER; 1618 break; 1619 case KVM_ARM_VCPU_TIMER_IRQ_HVTIMER: 1620 idx = TIMER_HVTIMER; 1621 break; 1622 case KVM_ARM_VCPU_TIMER_IRQ_HPTIMER: 1623 idx = TIMER_HPTIMER; 1624 break; 1625 default: 1626 ret = -ENXIO; 1627 goto out; 1628 } 1629 1630 /* 1631 * We cannot validate the IRQ unicity before we run, so take it at 1632 * face value. The verdict will be given on first vcpu run, for each 1633 * vcpu. Yes this is late. Blame it on the stupid API. 1634 */ 1635 vcpu->kvm->arch.timer_data.ppi[idx] = irq; 1636 1637 out: 1638 mutex_unlock(&vcpu->kvm->arch.config_lock); 1639 return ret; 1640 } 1641 1642 int kvm_arm_timer_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) 1643 { 1644 int __user *uaddr = (int __user *)(long)attr->addr; 1645 struct arch_timer_context *timer; 1646 int irq; 1647 1648 switch (attr->attr) { 1649 case KVM_ARM_VCPU_TIMER_IRQ_VTIMER: 1650 timer = vcpu_vtimer(vcpu); 1651 break; 1652 case KVM_ARM_VCPU_TIMER_IRQ_PTIMER: 1653 timer = vcpu_ptimer(vcpu); 1654 break; 1655 case KVM_ARM_VCPU_TIMER_IRQ_HVTIMER: 1656 timer = vcpu_hvtimer(vcpu); 1657 break; 1658 case KVM_ARM_VCPU_TIMER_IRQ_HPTIMER: 1659 timer = vcpu_hptimer(vcpu); 1660 break; 1661 default: 1662 return -ENXIO; 1663 } 1664 1665 irq = timer_irq(timer); 1666 return put_user(irq, uaddr); 1667 } 1668 1669 int kvm_arm_timer_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) 1670 { 1671 switch (attr->attr) { 1672 case KVM_ARM_VCPU_TIMER_IRQ_VTIMER: 1673 case KVM_ARM_VCPU_TIMER_IRQ_PTIMER: 1674 case KVM_ARM_VCPU_TIMER_IRQ_HVTIMER: 1675 case KVM_ARM_VCPU_TIMER_IRQ_HPTIMER: 1676 return 0; 1677 } 1678 1679 return -ENXIO; 1680 } 1681 1682 int kvm_vm_ioctl_set_counter_offset(struct kvm *kvm, 1683 struct kvm_arm_counter_offset *offset) 1684 { 1685 int ret = 0; 1686 1687 if (offset->reserved) 1688 return -EINVAL; 1689 1690 mutex_lock(&kvm->lock); 1691 1692 if (!kvm_trylock_all_vcpus(kvm)) { 1693 set_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, &kvm->arch.flags); 1694 1695 /* 1696 * If userspace decides to set the offset using this 1697 * API rather than merely restoring the counter 1698 * values, the offset applies to both the virtual and 1699 * physical views. 1700 */ 1701 kvm->arch.timer_data.voffset = offset->counter_offset; 1702 kvm->arch.timer_data.poffset = offset->counter_offset; 1703 1704 kvm_unlock_all_vcpus(kvm); 1705 } else { 1706 ret = -EBUSY; 1707 } 1708 1709 mutex_unlock(&kvm->lock); 1710 1711 return ret; 1712 } 1713