1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2012 ARM Ltd. 4 * Author: Marc Zyngier <marc.zyngier@arm.com> 5 */ 6 7 #include <linux/cpu.h> 8 #include <linux/kvm.h> 9 #include <linux/kvm_host.h> 10 #include <linux/interrupt.h> 11 #include <linux/irq.h> 12 #include <linux/irqdomain.h> 13 #include <linux/uaccess.h> 14 15 #include <clocksource/arm_arch_timer.h> 16 #include <asm/arch_timer.h> 17 #include <asm/kvm_emulate.h> 18 #include <asm/kvm_hyp.h> 19 #include <asm/kvm_nested.h> 20 21 #include <kvm/arm_vgic.h> 22 #include <kvm/arm_arch_timer.h> 23 24 #include "trace.h" 25 26 static struct timecounter *timecounter; 27 static unsigned int host_vtimer_irq; 28 static unsigned int host_ptimer_irq; 29 static u32 host_vtimer_irq_flags; 30 static u32 host_ptimer_irq_flags; 31 32 static DEFINE_STATIC_KEY_FALSE(has_gic_active_state); 33 DEFINE_STATIC_KEY_FALSE(broken_cntvoff_key); 34 35 static const u8 default_ppi[] = { 36 [TIMER_PTIMER] = 30, 37 [TIMER_VTIMER] = 27, 38 [TIMER_HPTIMER] = 26, 39 [TIMER_HVTIMER] = 28, 40 }; 41 42 static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level, 43 struct arch_timer_context *timer_ctx); 44 static bool kvm_timer_pending(struct arch_timer_context *timer_ctx); 45 static void kvm_arm_timer_write(struct kvm_vcpu *vcpu, 46 struct arch_timer_context *timer, 47 enum kvm_arch_timer_regs treg, 48 u64 val); 49 static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu, 50 struct arch_timer_context *timer, 51 enum kvm_arch_timer_regs treg); 52 static bool kvm_arch_timer_get_input_level(int vintid); 53 54 static unsigned long kvm_arch_timer_get_irq_flags(void) 55 { 56 return kvm_vgic_global_state.no_hw_deactivation ? VGIC_IRQ_SW_RESAMPLE : 0; 57 } 58 59 static const struct irq_ops arch_timer_irq_ops = { 60 .get_flags = kvm_arch_timer_get_irq_flags, 61 .get_input_level = kvm_arch_timer_get_input_level, 62 }; 63 64 static const struct irq_ops arch_timer_irq_ops_vgic_v5 = { 65 .get_input_level = kvm_arch_timer_get_input_level, 66 .queue_irq_unlock = vgic_v5_ppi_queue_irq_unlock, 67 .set_direct_injection = vgic_v5_set_ppi_dvi, 68 }; 69 70 static int nr_timers(struct kvm_vcpu *vcpu) 71 { 72 if (!vcpu_has_nv(vcpu)) 73 return NR_KVM_EL0_TIMERS; 74 75 return NR_KVM_TIMERS; 76 } 77 78 u32 timer_get_ctl(struct arch_timer_context *ctxt) 79 { 80 struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctxt); 81 82 switch(arch_timer_ctx_index(ctxt)) { 83 case TIMER_VTIMER: 84 return __vcpu_sys_reg(vcpu, CNTV_CTL_EL0); 85 case TIMER_PTIMER: 86 return __vcpu_sys_reg(vcpu, CNTP_CTL_EL0); 87 case TIMER_HVTIMER: 88 return __vcpu_sys_reg(vcpu, CNTHV_CTL_EL2); 89 case TIMER_HPTIMER: 90 return __vcpu_sys_reg(vcpu, CNTHP_CTL_EL2); 91 default: 92 WARN_ON(1); 93 return 0; 94 } 95 } 96 97 u64 timer_get_cval(struct arch_timer_context *ctxt) 98 { 99 struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctxt); 100 101 switch(arch_timer_ctx_index(ctxt)) { 102 case TIMER_VTIMER: 103 return __vcpu_sys_reg(vcpu, CNTV_CVAL_EL0); 104 case TIMER_PTIMER: 105 return __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0); 106 case TIMER_HVTIMER: 107 return __vcpu_sys_reg(vcpu, CNTHV_CVAL_EL2); 108 case TIMER_HPTIMER: 109 return __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2); 110 default: 111 WARN_ON(1); 112 return 0; 113 } 114 } 115 116 static void timer_set_ctl(struct arch_timer_context *ctxt, u32 ctl) 117 { 118 struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctxt); 119 120 switch(arch_timer_ctx_index(ctxt)) { 121 case TIMER_VTIMER: 122 __vcpu_assign_sys_reg(vcpu, CNTV_CTL_EL0, ctl); 123 break; 124 case TIMER_PTIMER: 125 __vcpu_assign_sys_reg(vcpu, CNTP_CTL_EL0, ctl); 126 break; 127 case TIMER_HVTIMER: 128 __vcpu_assign_sys_reg(vcpu, CNTHV_CTL_EL2, ctl); 129 break; 130 case TIMER_HPTIMER: 131 __vcpu_assign_sys_reg(vcpu, CNTHP_CTL_EL2, ctl); 132 break; 133 default: 134 WARN_ON(1); 135 } 136 } 137 138 static void timer_set_cval(struct arch_timer_context *ctxt, u64 cval) 139 { 140 struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctxt); 141 142 switch(arch_timer_ctx_index(ctxt)) { 143 case TIMER_VTIMER: 144 __vcpu_assign_sys_reg(vcpu, CNTV_CVAL_EL0, cval); 145 break; 146 case TIMER_PTIMER: 147 __vcpu_assign_sys_reg(vcpu, CNTP_CVAL_EL0, cval); 148 break; 149 case TIMER_HVTIMER: 150 __vcpu_assign_sys_reg(vcpu, CNTHV_CVAL_EL2, cval); 151 break; 152 case TIMER_HPTIMER: 153 __vcpu_assign_sys_reg(vcpu, CNTHP_CVAL_EL2, cval); 154 break; 155 default: 156 WARN_ON(1); 157 } 158 } 159 160 u64 kvm_phys_timer_read(void) 161 { 162 return timecounter->cc->read(timecounter->cc); 163 } 164 165 void get_timer_map(struct kvm_vcpu *vcpu, struct timer_map *map) 166 { 167 if (vcpu_has_nv(vcpu)) { 168 if (is_hyp_ctxt(vcpu)) { 169 map->direct_vtimer = vcpu_hvtimer(vcpu); 170 map->direct_ptimer = vcpu_hptimer(vcpu); 171 map->emul_vtimer = vcpu_vtimer(vcpu); 172 map->emul_ptimer = vcpu_ptimer(vcpu); 173 } else { 174 map->direct_vtimer = vcpu_vtimer(vcpu); 175 map->direct_ptimer = vcpu_ptimer(vcpu); 176 map->emul_vtimer = vcpu_hvtimer(vcpu); 177 map->emul_ptimer = vcpu_hptimer(vcpu); 178 } 179 } else if (has_vhe()) { 180 map->direct_vtimer = vcpu_vtimer(vcpu); 181 map->direct_ptimer = vcpu_ptimer(vcpu); 182 map->emul_vtimer = NULL; 183 map->emul_ptimer = NULL; 184 } else { 185 map->direct_vtimer = vcpu_vtimer(vcpu); 186 map->direct_ptimer = NULL; 187 map->emul_vtimer = NULL; 188 map->emul_ptimer = vcpu_ptimer(vcpu); 189 } 190 191 trace_kvm_get_timer_map(vcpu->vcpu_id, map); 192 } 193 194 static inline bool userspace_irqchip(struct kvm *kvm) 195 { 196 return unlikely(!irqchip_in_kernel(kvm)); 197 } 198 199 static void soft_timer_start(struct hrtimer *hrt, u64 ns) 200 { 201 hrtimer_start(hrt, ktime_add_ns(ktime_get(), ns), 202 HRTIMER_MODE_ABS_HARD); 203 } 204 205 static void soft_timer_cancel(struct hrtimer *hrt) 206 { 207 hrtimer_cancel(hrt); 208 } 209 210 static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id) 211 { 212 struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id; 213 struct arch_timer_context *ctx; 214 struct timer_map map; 215 216 /* 217 * We may see a timer interrupt after vcpu_put() has been called which 218 * sets the CPU's vcpu pointer to NULL, because even though the timer 219 * has been disabled in timer_save_state(), the hardware interrupt 220 * signal may not have been retired from the interrupt controller yet. 221 */ 222 if (!vcpu) 223 return IRQ_HANDLED; 224 225 get_timer_map(vcpu, &map); 226 227 if (irq == host_vtimer_irq) 228 ctx = map.direct_vtimer; 229 else 230 ctx = map.direct_ptimer; 231 232 if (kvm_timer_pending(ctx)) 233 kvm_timer_update_irq(vcpu, true, ctx); 234 235 if (userspace_irqchip(vcpu->kvm) && 236 !static_branch_unlikely(&has_gic_active_state)) 237 disable_percpu_irq(host_vtimer_irq); 238 239 return IRQ_HANDLED; 240 } 241 242 static u64 kvm_counter_compute_delta(struct arch_timer_context *timer_ctx, 243 u64 val) 244 { 245 u64 now = kvm_phys_timer_read() - timer_get_offset(timer_ctx); 246 247 if (now < val) { 248 u64 ns; 249 250 ns = cyclecounter_cyc2ns(timecounter->cc, 251 val - now, 252 timecounter->mask, 253 &timer_ctx->ns_frac); 254 return ns; 255 } 256 257 return 0; 258 } 259 260 static u64 kvm_timer_compute_delta(struct arch_timer_context *timer_ctx) 261 { 262 return kvm_counter_compute_delta(timer_ctx, timer_get_cval(timer_ctx)); 263 } 264 265 static bool kvm_timer_enabled(struct arch_timer_context *timer_ctx) 266 { 267 WARN_ON(timer_ctx && timer_ctx->loaded); 268 return timer_ctx && 269 ((timer_get_ctl(timer_ctx) & 270 (ARCH_TIMER_CTRL_IT_MASK | ARCH_TIMER_CTRL_ENABLE)) == ARCH_TIMER_CTRL_ENABLE); 271 } 272 273 static bool vcpu_has_wfit_active(struct kvm_vcpu *vcpu) 274 { 275 return (cpus_have_final_cap(ARM64_HAS_WFXT) && 276 vcpu_get_flag(vcpu, IN_WFIT)); 277 } 278 279 static u64 wfit_delay_ns(struct kvm_vcpu *vcpu) 280 { 281 u64 val = vcpu_get_reg(vcpu, kvm_vcpu_sys_get_rt(vcpu)); 282 struct arch_timer_context *ctx; 283 284 ctx = is_hyp_ctxt(vcpu) ? vcpu_hvtimer(vcpu) : vcpu_vtimer(vcpu); 285 286 return kvm_counter_compute_delta(ctx, val); 287 } 288 289 /* 290 * Returns the earliest expiration time in ns among guest timers. 291 * Note that it will return 0 if none of timers can fire. 292 */ 293 static u64 kvm_timer_earliest_exp(struct kvm_vcpu *vcpu) 294 { 295 u64 min_delta = ULLONG_MAX; 296 int i; 297 298 for (i = 0; i < nr_timers(vcpu); i++) { 299 struct arch_timer_context *ctx = &vcpu->arch.timer_cpu.timers[i]; 300 301 WARN(ctx->loaded, "timer %d loaded\n", i); 302 if (kvm_timer_enabled(ctx)) 303 min_delta = min(min_delta, kvm_timer_compute_delta(ctx)); 304 } 305 306 if (vcpu_has_wfit_active(vcpu)) 307 min_delta = min(min_delta, wfit_delay_ns(vcpu)); 308 309 /* If none of timers can fire, then return 0 */ 310 if (min_delta == ULLONG_MAX) 311 return 0; 312 313 return min_delta; 314 } 315 316 static enum hrtimer_restart kvm_bg_timer_expire(struct hrtimer *hrt) 317 { 318 struct arch_timer_cpu *timer; 319 struct kvm_vcpu *vcpu; 320 u64 ns; 321 322 timer = container_of(hrt, struct arch_timer_cpu, bg_timer); 323 vcpu = container_of(timer, struct kvm_vcpu, arch.timer_cpu); 324 325 /* 326 * Check that the timer has really expired from the guest's 327 * PoV (NTP on the host may have forced it to expire 328 * early). If we should have slept longer, restart it. 329 */ 330 ns = kvm_timer_earliest_exp(vcpu); 331 if (unlikely(ns)) { 332 hrtimer_forward_now(hrt, ns_to_ktime(ns)); 333 return HRTIMER_RESTART; 334 } 335 336 kvm_vcpu_wake_up(vcpu); 337 return HRTIMER_NORESTART; 338 } 339 340 static enum hrtimer_restart kvm_hrtimer_expire(struct hrtimer *hrt) 341 { 342 struct arch_timer_context *ctx; 343 struct kvm_vcpu *vcpu; 344 u64 ns; 345 346 ctx = container_of(hrt, struct arch_timer_context, hrtimer); 347 vcpu = timer_context_to_vcpu(ctx); 348 349 trace_kvm_timer_hrtimer_expire(ctx); 350 351 /* 352 * Check that the timer has really expired from the guest's 353 * PoV (NTP on the host may have forced it to expire 354 * early). If not ready, schedule for a later time. 355 */ 356 ns = kvm_timer_compute_delta(ctx); 357 if (unlikely(ns)) { 358 hrtimer_forward_now(hrt, ns_to_ktime(ns)); 359 return HRTIMER_RESTART; 360 } 361 362 kvm_timer_update_irq(vcpu, true, ctx); 363 return HRTIMER_NORESTART; 364 } 365 366 static bool kvm_timer_pending(struct arch_timer_context *timer_ctx) 367 { 368 enum kvm_arch_timers index; 369 u64 cval, now; 370 371 if (!timer_ctx) 372 return false; 373 374 index = arch_timer_ctx_index(timer_ctx); 375 376 if (timer_ctx->loaded) { 377 u32 cnt_ctl = 0; 378 379 switch (index) { 380 case TIMER_VTIMER: 381 case TIMER_HVTIMER: 382 cnt_ctl = read_sysreg_el0(SYS_CNTV_CTL); 383 break; 384 case TIMER_PTIMER: 385 case TIMER_HPTIMER: 386 cnt_ctl = read_sysreg_el0(SYS_CNTP_CTL); 387 break; 388 case NR_KVM_TIMERS: 389 /* GCC is braindead */ 390 cnt_ctl = 0; 391 break; 392 } 393 394 return (cnt_ctl & ARCH_TIMER_CTRL_ENABLE) && 395 (cnt_ctl & ARCH_TIMER_CTRL_IT_STAT) && 396 !(cnt_ctl & ARCH_TIMER_CTRL_IT_MASK); 397 } 398 399 if (!kvm_timer_enabled(timer_ctx)) 400 return false; 401 402 cval = timer_get_cval(timer_ctx); 403 now = kvm_phys_timer_read() - timer_get_offset(timer_ctx); 404 405 return cval <= now; 406 } 407 408 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) 409 { 410 return vcpu_has_wfit_active(vcpu) && wfit_delay_ns(vcpu) == 0; 411 } 412 413 static u64 kvm_timer_needs_notify(struct kvm_vcpu *vcpu) 414 { 415 u64 v = vcpu->run->s.regs.device_irq_level; 416 417 v ^= kvm_timer_pending(vcpu_vtimer(vcpu)) ? KVM_ARM_DEV_EL1_VTIMER : 0; 418 v ^= kvm_timer_pending(vcpu_ptimer(vcpu)) ? KVM_ARM_DEV_EL1_PTIMER : 0; 419 420 return v & (KVM_ARM_DEV_EL1_VTIMER | KVM_ARM_DEV_EL1_PTIMER); 421 } 422 423 bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu) 424 { 425 return !!kvm_timer_needs_notify(vcpu); 426 } 427 428 /* 429 * Reflect the timer output level into the kvm_run structure 430 */ 431 bool kvm_timer_update_run(struct kvm_vcpu *vcpu) 432 { 433 u64 mask = kvm_timer_needs_notify(vcpu); 434 if (mask) 435 vcpu->run->s.regs.device_irq_level ^= mask; 436 return !!mask; 437 } 438 439 static void kvm_timer_update_status(struct arch_timer_context *ctx, bool level) 440 { 441 /* 442 * Paper over NV2 brokenness by publishing the interrupt status 443 * bit. This still results in a poor quality of emulation (guest 444 * writes will have no effect until the next exit). 445 * 446 * But hey, it's fast, right? 447 */ 448 struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctx); 449 if (is_hyp_ctxt(vcpu) && 450 (ctx == vcpu_vtimer(vcpu) || ctx == vcpu_ptimer(vcpu))) { 451 unsigned long val = timer_get_ctl(ctx); 452 __assign_bit(__ffs(ARCH_TIMER_CTRL_IT_STAT), &val, level); 453 timer_set_ctl(ctx, val); 454 } 455 } 456 457 static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level, 458 struct arch_timer_context *timer_ctx) 459 { 460 kvm_timer_update_status(timer_ctx, new_level); 461 462 trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_irq(timer_ctx), 463 new_level); 464 465 if (userspace_irqchip(vcpu->kvm)) 466 return; 467 468 /* Skip injecting on GICv5 for directly injected (DVI'd) timers */ 469 if (vgic_is_v5(vcpu->kvm)) { 470 struct timer_map map; 471 472 get_timer_map(vcpu, &map); 473 474 if (map.direct_ptimer == timer_ctx || 475 map.direct_vtimer == timer_ctx) 476 return; 477 } 478 479 kvm_vgic_inject_irq(vcpu->kvm, vcpu, 480 timer_irq(timer_ctx), 481 new_level, 482 timer_ctx); 483 } 484 485 /* Only called for a fully emulated timer */ 486 static void timer_emulate(struct arch_timer_context *ctx) 487 { 488 bool pending = kvm_timer_pending(ctx); 489 490 trace_kvm_timer_emulate(ctx, pending); 491 492 kvm_timer_update_irq(timer_context_to_vcpu(ctx), pending, ctx); 493 494 /* 495 * If the timer is pending, we don't need to have a soft timer 496 * scheduled for the future. If the timer is disabled, then 497 * we don't need a soft timer either. 498 */ 499 if (pending || !kvm_timer_enabled(ctx)) 500 return; 501 502 soft_timer_start(&ctx->hrtimer, kvm_timer_compute_delta(ctx)); 503 } 504 505 static void set_cntvoff(u64 cntvoff) 506 { 507 kvm_call_hyp(__kvm_timer_set_cntvoff, cntvoff); 508 } 509 510 static void set_cntpoff(u64 cntpoff) 511 { 512 if (has_cntpoff()) 513 write_sysreg_s(cntpoff, SYS_CNTPOFF_EL2); 514 } 515 516 static void timer_save_state(struct arch_timer_context *ctx) 517 { 518 struct arch_timer_cpu *timer = vcpu_timer(timer_context_to_vcpu(ctx)); 519 enum kvm_arch_timers index = arch_timer_ctx_index(ctx); 520 unsigned long flags; 521 522 if (!timer->enabled) 523 return; 524 525 local_irq_save(flags); 526 527 if (!ctx->loaded) 528 goto out; 529 530 switch (index) { 531 u64 cval; 532 533 case TIMER_VTIMER: 534 case TIMER_HVTIMER: 535 timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTV_CTL)); 536 cval = read_sysreg_el0(SYS_CNTV_CVAL); 537 538 if (has_broken_cntvoff()) 539 cval -= timer_get_offset(ctx); 540 541 timer_set_cval(ctx, cval); 542 543 /* Disable the timer */ 544 write_sysreg_el0(0, SYS_CNTV_CTL); 545 isb(); 546 547 /* 548 * The kernel may decide to run userspace after 549 * calling vcpu_put, so we reset cntvoff to 0 to 550 * ensure a consistent read between user accesses to 551 * the virtual counter and kernel access to the 552 * physical counter of non-VHE case. 553 * 554 * For VHE, the virtual counter uses a fixed virtual 555 * offset of zero, so no need to zero CNTVOFF_EL2 556 * register, but this is actually useful when switching 557 * between EL1/vEL2 with NV. 558 * 559 * Do it unconditionally, as this is either unavoidable 560 * or dirt cheap. 561 */ 562 set_cntvoff(0); 563 break; 564 case TIMER_PTIMER: 565 case TIMER_HPTIMER: 566 timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTP_CTL)); 567 cval = read_sysreg_el0(SYS_CNTP_CVAL); 568 569 cval -= timer_get_offset(ctx); 570 571 timer_set_cval(ctx, cval); 572 573 /* Disable the timer */ 574 write_sysreg_el0(0, SYS_CNTP_CTL); 575 isb(); 576 577 set_cntpoff(0); 578 break; 579 case NR_KVM_TIMERS: 580 BUG(); 581 } 582 583 trace_kvm_timer_save_state(ctx); 584 585 ctx->loaded = false; 586 out: 587 local_irq_restore(flags); 588 } 589 590 /* 591 * Schedule the background timer before calling kvm_vcpu_halt, so that this 592 * thread is removed from its waitqueue and made runnable when there's a timer 593 * interrupt to handle. 594 */ 595 static void kvm_timer_blocking(struct kvm_vcpu *vcpu) 596 { 597 struct arch_timer_cpu *timer = vcpu_timer(vcpu); 598 struct timer_map map; 599 600 get_timer_map(vcpu, &map); 601 602 /* 603 * If no timers are capable of raising interrupts (disabled or 604 * masked), then there's no more work for us to do. 605 */ 606 if (!kvm_timer_enabled(map.direct_vtimer) && 607 !kvm_timer_enabled(map.direct_ptimer) && 608 !kvm_timer_enabled(map.emul_vtimer) && 609 !kvm_timer_enabled(map.emul_ptimer) && 610 !vcpu_has_wfit_active(vcpu)) 611 return; 612 613 /* 614 * At least one guest time will expire. Schedule a background timer. 615 * Set the earliest expiration time among the guest timers. 616 */ 617 soft_timer_start(&timer->bg_timer, kvm_timer_earliest_exp(vcpu)); 618 } 619 620 static void kvm_timer_unblocking(struct kvm_vcpu *vcpu) 621 { 622 struct arch_timer_cpu *timer = vcpu_timer(vcpu); 623 624 soft_timer_cancel(&timer->bg_timer); 625 } 626 627 static void timer_restore_state(struct arch_timer_context *ctx) 628 { 629 struct arch_timer_cpu *timer = vcpu_timer(timer_context_to_vcpu(ctx)); 630 enum kvm_arch_timers index = arch_timer_ctx_index(ctx); 631 unsigned long flags; 632 633 if (!timer->enabled) 634 return; 635 636 local_irq_save(flags); 637 638 if (ctx->loaded) 639 goto out; 640 641 switch (index) { 642 u64 cval, offset; 643 644 case TIMER_VTIMER: 645 case TIMER_HVTIMER: 646 cval = timer_get_cval(ctx); 647 offset = timer_get_offset(ctx); 648 if (has_broken_cntvoff()) { 649 set_cntvoff(0); 650 cval += offset; 651 } else { 652 set_cntvoff(offset); 653 } 654 write_sysreg_el0(cval, SYS_CNTV_CVAL); 655 isb(); 656 write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTV_CTL); 657 break; 658 case TIMER_PTIMER: 659 case TIMER_HPTIMER: 660 cval = timer_get_cval(ctx); 661 offset = timer_get_offset(ctx); 662 set_cntpoff(offset); 663 cval += offset; 664 write_sysreg_el0(cval, SYS_CNTP_CVAL); 665 isb(); 666 write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTP_CTL); 667 break; 668 case NR_KVM_TIMERS: 669 BUG(); 670 } 671 672 trace_kvm_timer_restore_state(ctx); 673 674 ctx->loaded = true; 675 out: 676 local_irq_restore(flags); 677 } 678 679 static inline void set_timer_irq_phys_active(struct arch_timer_context *ctx, bool active) 680 { 681 int r; 682 r = irq_set_irqchip_state(ctx->host_timer_irq, IRQCHIP_STATE_ACTIVE, active); 683 WARN_ON(r); 684 } 685 686 static void kvm_timer_vcpu_load_gic(struct arch_timer_context *ctx) 687 { 688 struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctx); 689 bool pending = kvm_timer_pending(ctx); 690 bool phys_active = false; 691 692 /* 693 * Update the timer output so that it is likely to match the 694 * state we're about to restore. If the timer expires between 695 * this point and the register restoration, we'll take the 696 * interrupt anyway. 697 */ 698 kvm_timer_update_irq(vcpu, pending, ctx); 699 700 if (irqchip_in_kernel(vcpu->kvm)) 701 phys_active = kvm_vgic_map_is_active(vcpu, timer_irq(ctx)); 702 703 phys_active |= pending; 704 phys_active |= vgic_is_v5(vcpu->kvm); 705 706 set_timer_irq_phys_active(ctx, phys_active); 707 } 708 709 static void kvm_timer_vcpu_load_nogic(struct kvm_vcpu *vcpu) 710 { 711 struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); 712 bool pending = kvm_timer_pending(vtimer); 713 714 /* 715 * Update the timer output so that it is likely to match the 716 * state we're about to restore. If the timer expires between 717 * this point and the register restoration, we'll take the 718 * interrupt anyway. 719 */ 720 kvm_timer_update_irq(vcpu, pending, vtimer); 721 722 /* 723 * When using a userspace irqchip with the architected timers and a 724 * host interrupt controller that doesn't support an active state, we 725 * must still prevent continuously exiting from the guest, and 726 * therefore mask the physical interrupt by disabling it on the host 727 * interrupt controller when the virtual level is high, such that the 728 * guest can make forward progress. Once we detect the output level 729 * being de-asserted, we unmask the interrupt again so that we exit 730 * from the guest when the timer fires. 731 */ 732 if (pending) 733 disable_percpu_irq(host_vtimer_irq); 734 else 735 enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags); 736 } 737 738 /* If _pred is true, set bit in _set, otherwise set it in _clr */ 739 #define assign_clear_set_bit(_pred, _bit, _clr, _set) \ 740 do { \ 741 if (_pred) \ 742 (_set) |= (_bit); \ 743 else \ 744 (_clr) |= (_bit); \ 745 } while (0) 746 747 static void kvm_timer_vcpu_load_nested_switch(struct kvm_vcpu *vcpu, 748 struct timer_map *map) 749 { 750 int hw, ret; 751 752 if (!irqchip_in_kernel(vcpu->kvm)) 753 return; 754 755 /* 756 * We only ever unmap the vtimer irq on a VHE system that runs nested 757 * virtualization, in which case we have both a valid emul_vtimer, 758 * emul_ptimer, direct_vtimer, and direct_ptimer. 759 * 760 * Since this is called from kvm_timer_vcpu_load(), a change between 761 * vEL2 and vEL1/0 will have just happened, and the timer_map will 762 * represent this, and therefore we switch the emul/direct mappings 763 * below. 764 */ 765 hw = kvm_vgic_get_map(vcpu, timer_irq(map->direct_vtimer)); 766 if (hw < 0) { 767 kvm_vgic_unmap_phys_irq(vcpu, timer_irq(map->emul_vtimer)); 768 kvm_vgic_unmap_phys_irq(vcpu, timer_irq(map->emul_ptimer)); 769 770 ret = kvm_vgic_map_phys_irq(vcpu, 771 map->direct_vtimer->host_timer_irq, 772 timer_irq(map->direct_vtimer)); 773 WARN_ON_ONCE(ret); 774 ret = kvm_vgic_map_phys_irq(vcpu, 775 map->direct_ptimer->host_timer_irq, 776 timer_irq(map->direct_ptimer)); 777 WARN_ON_ONCE(ret); 778 } 779 } 780 781 static void timer_set_traps(struct kvm_vcpu *vcpu, struct timer_map *map) 782 { 783 bool tvt, tpt, tvc, tpc, tvt02, tpt02; 784 u64 clr, set; 785 786 /* 787 * No trapping gets configured here with nVHE. See 788 * __timer_enable_traps(), which is where the stuff happens. 789 */ 790 if (!has_vhe()) 791 return; 792 793 /* 794 * Our default policy is not to trap anything. As we progress 795 * within this function, reality kicks in and we start adding 796 * traps based on emulation requirements. 797 */ 798 tvt = tpt = tvc = tpc = false; 799 tvt02 = tpt02 = false; 800 801 /* 802 * NV2 badly breaks the timer semantics by redirecting accesses to 803 * the EL1 timer state to memory, so let's call ECV to the rescue if 804 * available: we trap all CNT{P,V}_{CTL,CVAL,TVAL}_EL0 accesses. 805 * 806 * The treatment slightly varies depending whether we run a nVHE or 807 * VHE guest: nVHE will use the _EL0 registers directly, while VHE 808 * will use the _EL02 accessors. This translates in different trap 809 * bits. 810 * 811 * None of the trapping is required when running in non-HYP context, 812 * unless required by the L1 hypervisor settings once we advertise 813 * ECV+NV in the guest, or that we need trapping for other reasons. 814 */ 815 if (cpus_have_final_cap(ARM64_HAS_ECV) && is_hyp_ctxt(vcpu)) { 816 if (vcpu_el2_e2h_is_set(vcpu)) 817 tvt02 = tpt02 = true; 818 else 819 tvt = tpt = true; 820 } 821 822 /* 823 * We have two possibility to deal with a physical offset: 824 * 825 * - Either we have CNTPOFF (yay!) or the offset is 0: 826 * we let the guest freely access the HW 827 * 828 * - or neither of these condition apply: 829 * we trap accesses to the HW, but still use it 830 * after correcting the physical offset 831 */ 832 if (!has_cntpoff() && timer_get_offset(map->direct_ptimer)) 833 tpt = tpc = true; 834 835 /* 836 * For the poor sods that could not correctly subtract one value 837 * from another, trap the full virtual timer and counter. 838 */ 839 if (has_broken_cntvoff() && timer_get_offset(map->direct_vtimer)) 840 tvt = tvc = true; 841 842 /* 843 * Apply the enable bits that the guest hypervisor has requested for 844 * its own guest. We can only add traps that wouldn't have been set 845 * above. 846 * Implementation choices: we do not support NV when E2H=0 in the 847 * guest, and we don't support configuration where E2H is writable 848 * by the guest (either FEAT_VHE or FEAT_E2H0 is implemented, but 849 * not both). This simplifies the handling of the EL1NV* bits. 850 */ 851 if (is_nested_ctxt(vcpu)) { 852 u64 val = __vcpu_sys_reg(vcpu, CNTHCTL_EL2); 853 854 /* Use the VHE format for mental sanity */ 855 if (!vcpu_el2_e2h_is_set(vcpu)) 856 val = (val & (CNTHCTL_EL1PCEN | CNTHCTL_EL1PCTEN)) << 10; 857 858 tpt |= !(val & (CNTHCTL_EL1PCEN << 10)); 859 tpc |= !(val & (CNTHCTL_EL1PCTEN << 10)); 860 861 tpt02 |= (val & CNTHCTL_EL1NVPCT); 862 tvt02 |= (val & CNTHCTL_EL1NVVCT); 863 } 864 865 /* 866 * Now that we have collected our requirements, compute the 867 * trap and enable bits. 868 */ 869 set = 0; 870 clr = 0; 871 872 assign_clear_set_bit(tpt, CNTHCTL_EL1PCEN << 10, set, clr); 873 assign_clear_set_bit(tpc, CNTHCTL_EL1PCTEN << 10, set, clr); 874 assign_clear_set_bit(tvt, CNTHCTL_EL1TVT, clr, set); 875 assign_clear_set_bit(tvc, CNTHCTL_EL1TVCT, clr, set); 876 assign_clear_set_bit(tvt02, CNTHCTL_EL1NVVCT, clr, set); 877 assign_clear_set_bit(tpt02, CNTHCTL_EL1NVPCT, clr, set); 878 879 /* This only happens on VHE, so use the CNTHCTL_EL2 accessor. */ 880 sysreg_clear_set(cnthctl_el2, clr, set); 881 } 882 883 void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu) 884 { 885 struct arch_timer_cpu *timer = vcpu_timer(vcpu); 886 struct timer_map map; 887 888 if (unlikely(!timer->enabled)) 889 return; 890 891 get_timer_map(vcpu, &map); 892 893 if (static_branch_likely(&has_gic_active_state)) { 894 /* We don't do NV on GICv5, yet */ 895 if (vcpu_has_nv(vcpu) && !vgic_is_v5(vcpu->kvm)) 896 kvm_timer_vcpu_load_nested_switch(vcpu, &map); 897 898 kvm_timer_vcpu_load_gic(map.direct_vtimer); 899 if (map.direct_ptimer) 900 kvm_timer_vcpu_load_gic(map.direct_ptimer); 901 } else { 902 kvm_timer_vcpu_load_nogic(vcpu); 903 } 904 905 kvm_timer_unblocking(vcpu); 906 907 timer_restore_state(map.direct_vtimer); 908 if (map.direct_ptimer) 909 timer_restore_state(map.direct_ptimer); 910 if (map.emul_vtimer) 911 timer_emulate(map.emul_vtimer); 912 if (map.emul_ptimer) 913 timer_emulate(map.emul_ptimer); 914 915 timer_set_traps(vcpu, &map); 916 } 917 918 void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu) 919 { 920 struct arch_timer_cpu *timer = vcpu_timer(vcpu); 921 struct timer_map map; 922 923 if (unlikely(!timer->enabled)) 924 return; 925 926 get_timer_map(vcpu, &map); 927 928 timer_save_state(map.direct_vtimer); 929 if (map.direct_ptimer) 930 timer_save_state(map.direct_ptimer); 931 932 /* 933 * Cancel soft timer emulation, because the only case where we 934 * need it after a vcpu_put is in the context of a sleeping VCPU, and 935 * in that case we already factor in the deadline for the physical 936 * timer when scheduling the bg_timer. 937 * 938 * In any case, we re-schedule the hrtimer for the physical timer when 939 * coming back to the VCPU thread in kvm_timer_vcpu_load(). 940 */ 941 if (map.emul_vtimer) 942 soft_timer_cancel(&map.emul_vtimer->hrtimer); 943 if (map.emul_ptimer) 944 soft_timer_cancel(&map.emul_ptimer->hrtimer); 945 946 if (kvm_vcpu_is_blocking(vcpu)) 947 kvm_timer_blocking(vcpu); 948 949 if (vgic_is_v5(vcpu->kvm)) { 950 set_timer_irq_phys_active(map.direct_vtimer, false); 951 if (map.direct_ptimer) 952 set_timer_irq_phys_active(map.direct_ptimer, false); 953 } 954 } 955 956 void kvm_timer_sync_nested(struct kvm_vcpu *vcpu) 957 { 958 /* 959 * When NV2 is on, guest hypervisors have their EL1 timer register 960 * accesses redirected to the VNCR page. Any guest action taken on 961 * the timer is postponed until the next exit, leading to a very 962 * poor quality of emulation. 963 * 964 * This is an unmitigated disaster, only papered over by FEAT_ECV, 965 * which allows trapping of the timer registers even with NV2. 966 * Still, this is still worse than FEAT_NV on its own. Meh. 967 */ 968 if (!cpus_have_final_cap(ARM64_HAS_ECV)) { 969 /* 970 * For a VHE guest hypervisor, the EL2 state is directly 971 * stored in the host EL1 timers, while the emulated EL1 972 * state is stored in the VNCR page. The latter could have 973 * been updated behind our back, and we must reset the 974 * emulation of the timers. 975 * 976 * A non-VHE guest hypervisor doesn't have any direct access 977 * to its timers: the EL2 registers trap despite being 978 * notionally direct (we use the EL1 HW, as for VHE), while 979 * the EL1 registers access memory. 980 * 981 * In both cases, process the emulated timers on each guest 982 * exit. Boo. 983 */ 984 struct timer_map map; 985 get_timer_map(vcpu, &map); 986 987 soft_timer_cancel(&map.emul_vtimer->hrtimer); 988 soft_timer_cancel(&map.emul_ptimer->hrtimer); 989 timer_emulate(map.emul_vtimer); 990 timer_emulate(map.emul_ptimer); 991 } 992 } 993 994 /* 995 * With a userspace irqchip we have to check if the guest de-asserted the 996 * timer and if so, unmask the timer irq signal on the host interrupt 997 * controller to ensure that we see future timer signals. 998 */ 999 static void unmask_vtimer_irq_user(struct kvm_vcpu *vcpu) 1000 { 1001 struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); 1002 1003 if (!kvm_timer_pending(vtimer)) { 1004 kvm_timer_update_irq(vcpu, false, vtimer); 1005 if (static_branch_likely(&has_gic_active_state)) 1006 set_timer_irq_phys_active(vtimer, false); 1007 else 1008 enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags); 1009 } 1010 } 1011 1012 void kvm_timer_sync_user(struct kvm_vcpu *vcpu) 1013 { 1014 struct arch_timer_cpu *timer = vcpu_timer(vcpu); 1015 1016 if (unlikely(!timer->enabled)) 1017 return; 1018 1019 if (unlikely(!irqchip_in_kernel(vcpu->kvm))) 1020 unmask_vtimer_irq_user(vcpu); 1021 } 1022 1023 void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu) 1024 { 1025 struct arch_timer_cpu *timer = vcpu_timer(vcpu); 1026 struct timer_map map; 1027 1028 get_timer_map(vcpu, &map); 1029 1030 /* 1031 * The bits in CNTV_CTL are architecturally reset to UNKNOWN for ARMv8 1032 * and to 0 for ARMv7. We provide an implementation that always 1033 * resets the timer to be disabled and unmasked and is compliant with 1034 * the ARMv7 architecture. 1035 */ 1036 for (int i = 0; i < nr_timers(vcpu); i++) 1037 timer_set_ctl(vcpu_get_timer(vcpu, i), 0); 1038 1039 /* 1040 * A vcpu running at EL2 is in charge of the offset applied to 1041 * the virtual timer, so use the physical VM offset, and point 1042 * the vcpu offset to CNTVOFF_EL2. 1043 */ 1044 if (vcpu_has_nv(vcpu)) { 1045 struct arch_timer_offset *offs = &vcpu_vtimer(vcpu)->offset; 1046 1047 offs->vcpu_offset = __ctxt_sys_reg(&vcpu->arch.ctxt, CNTVOFF_EL2); 1048 offs->vm_offset = &vcpu->kvm->arch.timer_data.poffset; 1049 } 1050 1051 if (timer->enabled) { 1052 for (int i = 0; i < nr_timers(vcpu); i++) 1053 kvm_timer_update_irq(vcpu, false, 1054 vcpu_get_timer(vcpu, i)); 1055 1056 if (irqchip_in_kernel(vcpu->kvm)) { 1057 kvm_vgic_reset_mapped_irq(vcpu, timer_irq(map.direct_vtimer)); 1058 if (map.direct_ptimer) 1059 kvm_vgic_reset_mapped_irq(vcpu, timer_irq(map.direct_ptimer)); 1060 } 1061 } 1062 1063 if (map.emul_vtimer) 1064 soft_timer_cancel(&map.emul_vtimer->hrtimer); 1065 if (map.emul_ptimer) 1066 soft_timer_cancel(&map.emul_ptimer->hrtimer); 1067 } 1068 1069 static void timer_context_init(struct kvm_vcpu *vcpu, int timerid) 1070 { 1071 struct arch_timer_context *ctxt = vcpu_get_timer(vcpu, timerid); 1072 struct kvm *kvm = vcpu->kvm; 1073 1074 ctxt->timer_id = timerid; 1075 1076 if (!kvm_vm_is_protected(vcpu->kvm)) { 1077 if (timerid == TIMER_VTIMER) 1078 ctxt->offset.vm_offset = &kvm->arch.timer_data.voffset; 1079 else 1080 ctxt->offset.vm_offset = &kvm->arch.timer_data.poffset; 1081 } else { 1082 ctxt->offset.vm_offset = NULL; 1083 } 1084 1085 hrtimer_setup(&ctxt->hrtimer, kvm_hrtimer_expire, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); 1086 1087 switch (timerid) { 1088 case TIMER_PTIMER: 1089 case TIMER_HPTIMER: 1090 ctxt->host_timer_irq = host_ptimer_irq; 1091 break; 1092 case TIMER_VTIMER: 1093 case TIMER_HVTIMER: 1094 ctxt->host_timer_irq = host_vtimer_irq; 1095 break; 1096 } 1097 } 1098 1099 void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) 1100 { 1101 struct arch_timer_cpu *timer = vcpu_timer(vcpu); 1102 1103 for (int i = 0; i < NR_KVM_TIMERS; i++) 1104 timer_context_init(vcpu, i); 1105 1106 /* Synchronize offsets across timers of a VM if not already provided */ 1107 if (!vcpu_is_protected(vcpu) && 1108 !test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, &vcpu->kvm->arch.flags)) { 1109 timer_set_offset(vcpu_vtimer(vcpu), kvm_phys_timer_read()); 1110 timer_set_offset(vcpu_ptimer(vcpu), 0); 1111 } 1112 1113 hrtimer_setup(&timer->bg_timer, kvm_bg_timer_expire, CLOCK_MONOTONIC, 1114 HRTIMER_MODE_ABS_HARD); 1115 } 1116 1117 /* 1118 * This is always called during kvm_arch_init_vm, but will also be 1119 * called from kvm_vgic_create if we have a vGICv5. 1120 */ 1121 void kvm_timer_init_vm(struct kvm *kvm) 1122 { 1123 /* 1124 * Set up the default PPIs - note that we adjust them based on 1125 * the model of the GIC as GICv5 uses a different way to 1126 * describing interrupts. 1127 */ 1128 for (int i = 0; i < NR_KVM_TIMERS; i++) 1129 kvm->arch.timer_data.ppi[i] = get_vgic_ppi(kvm, default_ppi[i]); 1130 } 1131 1132 void kvm_timer_cpu_up(void) 1133 { 1134 enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags); 1135 if (host_ptimer_irq) 1136 enable_percpu_irq(host_ptimer_irq, host_ptimer_irq_flags); 1137 } 1138 1139 void kvm_timer_cpu_down(void) 1140 { 1141 disable_percpu_irq(host_vtimer_irq); 1142 if (host_ptimer_irq) 1143 disable_percpu_irq(host_ptimer_irq); 1144 } 1145 1146 static u64 read_timer_ctl(struct arch_timer_context *timer) 1147 { 1148 /* 1149 * Set ISTATUS bit if it's expired. 1150 * Note that according to ARMv8 ARM Issue A.k, ISTATUS bit is 1151 * UNKNOWN when ENABLE bit is 0, so we chose to set ISTATUS bit 1152 * regardless of ENABLE bit for our implementation convenience. 1153 */ 1154 u32 ctl = timer_get_ctl(timer); 1155 1156 if (!kvm_timer_compute_delta(timer)) 1157 ctl |= ARCH_TIMER_CTRL_IT_STAT; 1158 1159 return ctl; 1160 } 1161 1162 static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu, 1163 struct arch_timer_context *timer, 1164 enum kvm_arch_timer_regs treg) 1165 { 1166 u64 val; 1167 1168 switch (treg) { 1169 case TIMER_REG_TVAL: 1170 val = timer_get_cval(timer) - kvm_phys_timer_read() + timer_get_offset(timer); 1171 val = lower_32_bits(val); 1172 break; 1173 1174 case TIMER_REG_CTL: 1175 val = read_timer_ctl(timer); 1176 break; 1177 1178 case TIMER_REG_CVAL: 1179 val = timer_get_cval(timer); 1180 break; 1181 1182 case TIMER_REG_CNT: 1183 val = kvm_phys_timer_read() - timer_get_offset(timer); 1184 break; 1185 1186 case TIMER_REG_VOFF: 1187 val = *timer->offset.vcpu_offset; 1188 break; 1189 1190 default: 1191 BUG(); 1192 } 1193 1194 return val; 1195 } 1196 1197 u64 kvm_arm_timer_read_sysreg(struct kvm_vcpu *vcpu, 1198 enum kvm_arch_timers tmr, 1199 enum kvm_arch_timer_regs treg) 1200 { 1201 struct arch_timer_context *timer; 1202 struct timer_map map; 1203 u64 val; 1204 1205 get_timer_map(vcpu, &map); 1206 timer = vcpu_get_timer(vcpu, tmr); 1207 1208 if (timer == map.emul_vtimer || timer == map.emul_ptimer) 1209 return kvm_arm_timer_read(vcpu, timer, treg); 1210 1211 preempt_disable(); 1212 timer_save_state(timer); 1213 1214 val = kvm_arm_timer_read(vcpu, timer, treg); 1215 1216 timer_restore_state(timer); 1217 preempt_enable(); 1218 1219 return val; 1220 } 1221 1222 static void kvm_arm_timer_write(struct kvm_vcpu *vcpu, 1223 struct arch_timer_context *timer, 1224 enum kvm_arch_timer_regs treg, 1225 u64 val) 1226 { 1227 switch (treg) { 1228 case TIMER_REG_TVAL: 1229 timer_set_cval(timer, kvm_phys_timer_read() - timer_get_offset(timer) + (s32)val); 1230 break; 1231 1232 case TIMER_REG_CTL: 1233 timer_set_ctl(timer, val & ~ARCH_TIMER_CTRL_IT_STAT); 1234 break; 1235 1236 case TIMER_REG_CVAL: 1237 timer_set_cval(timer, val); 1238 break; 1239 1240 case TIMER_REG_VOFF: 1241 *timer->offset.vcpu_offset = val; 1242 break; 1243 1244 default: 1245 BUG(); 1246 } 1247 } 1248 1249 void kvm_arm_timer_write_sysreg(struct kvm_vcpu *vcpu, 1250 enum kvm_arch_timers tmr, 1251 enum kvm_arch_timer_regs treg, 1252 u64 val) 1253 { 1254 struct arch_timer_context *timer; 1255 struct timer_map map; 1256 1257 get_timer_map(vcpu, &map); 1258 timer = vcpu_get_timer(vcpu, tmr); 1259 if (timer == map.emul_vtimer || timer == map.emul_ptimer) { 1260 soft_timer_cancel(&timer->hrtimer); 1261 kvm_arm_timer_write(vcpu, timer, treg, val); 1262 timer_emulate(timer); 1263 } else { 1264 preempt_disable(); 1265 timer_save_state(timer); 1266 kvm_arm_timer_write(vcpu, timer, treg, val); 1267 timer_restore_state(timer); 1268 preempt_enable(); 1269 } 1270 } 1271 1272 static int timer_irq_set_vcpu_affinity(struct irq_data *d, void *vcpu) 1273 { 1274 if (vcpu) 1275 irqd_set_forwarded_to_vcpu(d); 1276 else 1277 irqd_clr_forwarded_to_vcpu(d); 1278 1279 return 0; 1280 } 1281 1282 static int timer_irq_set_irqchip_state(struct irq_data *d, 1283 enum irqchip_irq_state which, bool val) 1284 { 1285 bool passthrough = which != IRQCHIP_STATE_ACTIVE || 1286 !irqd_is_forwarded_to_vcpu(d) || 1287 (kvm_vgic_global_state.type == VGIC_V5 && 1288 vgic_is_v3(kvm_get_running_vcpu()->kvm)); 1289 1290 if (passthrough) 1291 return irq_chip_set_parent_state(d, which, val); 1292 1293 if (val) 1294 irq_chip_mask_parent(d); 1295 else 1296 irq_chip_unmask_parent(d); 1297 1298 return 0; 1299 } 1300 1301 static void timer_irq_eoi(struct irq_data *d) 1302 { 1303 if (!irqd_is_forwarded_to_vcpu(d)) 1304 irq_chip_eoi_parent(d); 1305 } 1306 1307 static void timer_irq_ack(struct irq_data *d) 1308 { 1309 d = d->parent_data; 1310 if (d->chip->irq_ack) 1311 d->chip->irq_ack(d); 1312 } 1313 1314 static struct irq_chip timer_chip = { 1315 .name = "KVM", 1316 .irq_ack = timer_irq_ack, 1317 .irq_mask = irq_chip_mask_parent, 1318 .irq_unmask = irq_chip_unmask_parent, 1319 .irq_eoi = timer_irq_eoi, 1320 .irq_set_type = irq_chip_set_type_parent, 1321 .irq_set_vcpu_affinity = timer_irq_set_vcpu_affinity, 1322 .irq_set_irqchip_state = timer_irq_set_irqchip_state, 1323 }; 1324 1325 static int timer_irq_domain_alloc(struct irq_domain *domain, unsigned int virq, 1326 unsigned int nr_irqs, void *arg) 1327 { 1328 irq_hw_number_t hwirq = (uintptr_t)arg; 1329 1330 return irq_domain_set_hwirq_and_chip(domain, virq, hwirq, 1331 &timer_chip, NULL); 1332 } 1333 1334 static void timer_irq_domain_free(struct irq_domain *domain, unsigned int virq, 1335 unsigned int nr_irqs) 1336 { 1337 } 1338 1339 static const struct irq_domain_ops timer_domain_ops = { 1340 .alloc = timer_irq_domain_alloc, 1341 .free = timer_irq_domain_free, 1342 }; 1343 1344 static void kvm_irq_fixup_flags(unsigned int virq, u32 *flags) 1345 { 1346 *flags = irq_get_trigger_type(virq); 1347 if (*flags != IRQF_TRIGGER_HIGH && *flags != IRQF_TRIGGER_LOW) { 1348 kvm_err("Invalid trigger for timer IRQ%d, assuming level low\n", 1349 virq); 1350 *flags = IRQF_TRIGGER_LOW; 1351 } 1352 } 1353 1354 static int kvm_irq_init(struct arch_timer_kvm_info *info) 1355 { 1356 struct irq_domain *domain = NULL; 1357 1358 if (info->virtual_irq <= 0) { 1359 kvm_err("kvm_arch_timer: invalid virtual timer IRQ: %d\n", 1360 info->virtual_irq); 1361 return -ENODEV; 1362 } 1363 1364 host_vtimer_irq = info->virtual_irq; 1365 kvm_irq_fixup_flags(host_vtimer_irq, &host_vtimer_irq_flags); 1366 1367 if (kvm_vgic_global_state.no_hw_deactivation || 1368 kvm_vgic_global_state.type == VGIC_V5) { 1369 struct fwnode_handle *fwnode; 1370 struct irq_data *data; 1371 1372 fwnode = irq_domain_alloc_named_fwnode("kvm-timer"); 1373 if (!fwnode) 1374 return -ENOMEM; 1375 1376 /* Assume both vtimer and ptimer in the same parent */ 1377 data = irq_get_irq_data(host_vtimer_irq); 1378 domain = irq_domain_create_hierarchy(data->domain, 0, 1379 NR_KVM_TIMERS, fwnode, 1380 &timer_domain_ops, NULL); 1381 if (!domain) { 1382 irq_domain_free_fwnode(fwnode); 1383 return -ENOMEM; 1384 } 1385 1386 WARN_ON(irq_domain_push_irq(domain, host_vtimer_irq, 1387 (void *)TIMER_VTIMER)); 1388 } 1389 1390 if (info->physical_irq > 0) { 1391 host_ptimer_irq = info->physical_irq; 1392 kvm_irq_fixup_flags(host_ptimer_irq, &host_ptimer_irq_flags); 1393 1394 if (domain) 1395 WARN_ON(irq_domain_push_irq(domain, host_ptimer_irq, 1396 (void *)TIMER_PTIMER)); 1397 } 1398 1399 return 0; 1400 } 1401 1402 static void kvm_timer_handle_errata(void) 1403 { 1404 u64 mmfr0, mmfr1, mmfr4; 1405 1406 /* 1407 * CNTVOFF_EL2 is broken on some implementations. For those, we trap 1408 * all virtual timer/counter accesses, requiring FEAT_ECV. 1409 * 1410 * However, a hypervisor supporting nesting is likely to mitigate the 1411 * erratum at L0, and not require other levels to mitigate it (which 1412 * would otherwise be a terrible performance sink due to trap 1413 * amplification). 1414 * 1415 * Given that the affected HW implements both FEAT_VHE and FEAT_E2H0, 1416 * and that NV is likely not to (because of limitations of the 1417 * architecture), only enable the workaround when FEAT_VHE and 1418 * FEAT_E2H0 are both detected. Time will tell if this actually holds. 1419 */ 1420 mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1); 1421 mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1); 1422 mmfr4 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR4_EL1); 1423 if (SYS_FIELD_GET(ID_AA64MMFR1_EL1, VH, mmfr1) && 1424 !SYS_FIELD_GET(ID_AA64MMFR4_EL1, E2H0, mmfr4) && 1425 SYS_FIELD_GET(ID_AA64MMFR0_EL1, ECV, mmfr0) && 1426 (has_vhe() || has_hvhe()) && 1427 cpus_have_final_cap(ARM64_WORKAROUND_QCOM_ORYON_CNTVOFF)) { 1428 static_branch_enable(&broken_cntvoff_key); 1429 kvm_info("Broken CNTVOFF_EL2, trapping virtual timer\n"); 1430 } 1431 } 1432 1433 int __init kvm_timer_hyp_init(bool has_gic) 1434 { 1435 struct arch_timer_kvm_info *info; 1436 int err; 1437 1438 info = arch_timer_get_kvm_info(); 1439 timecounter = &info->timecounter; 1440 1441 if (!timecounter->cc) { 1442 kvm_err("kvm_arch_timer: uninitialized timecounter\n"); 1443 return -ENODEV; 1444 } 1445 1446 err = kvm_irq_init(info); 1447 if (err) 1448 return err; 1449 1450 /* First, do the virtual EL1 timer irq */ 1451 1452 err = request_percpu_irq(host_vtimer_irq, kvm_arch_timer_handler, 1453 "kvm guest vtimer", kvm_get_running_vcpus()); 1454 if (err) { 1455 kvm_err("kvm_arch_timer: can't request vtimer interrupt %d (%d)\n", 1456 host_vtimer_irq, err); 1457 return err; 1458 } 1459 1460 if (has_gic) { 1461 err = irq_set_vcpu_affinity(host_vtimer_irq, 1462 kvm_get_running_vcpus()); 1463 if (err) { 1464 kvm_err("kvm_arch_timer: error setting vcpu affinity\n"); 1465 goto out_free_vtimer_irq; 1466 } 1467 1468 static_branch_enable(&has_gic_active_state); 1469 } 1470 1471 kvm_debug("virtual timer IRQ%d\n", host_vtimer_irq); 1472 1473 /* Now let's do the physical EL1 timer irq */ 1474 1475 if (info->physical_irq > 0) { 1476 err = request_percpu_irq(host_ptimer_irq, kvm_arch_timer_handler, 1477 "kvm guest ptimer", kvm_get_running_vcpus()); 1478 if (err) { 1479 kvm_err("kvm_arch_timer: can't request ptimer interrupt %d (%d)\n", 1480 host_ptimer_irq, err); 1481 goto out_free_vtimer_irq; 1482 } 1483 1484 if (has_gic) { 1485 err = irq_set_vcpu_affinity(host_ptimer_irq, 1486 kvm_get_running_vcpus()); 1487 if (err) { 1488 kvm_err("kvm_arch_timer: error setting vcpu affinity\n"); 1489 goto out_free_ptimer_irq; 1490 } 1491 } 1492 1493 kvm_debug("physical timer IRQ%d\n", host_ptimer_irq); 1494 } else if (has_vhe()) { 1495 kvm_err("kvm_arch_timer: invalid physical timer IRQ: %d\n", 1496 info->physical_irq); 1497 err = -ENODEV; 1498 goto out_free_vtimer_irq; 1499 } 1500 1501 kvm_timer_handle_errata(); 1502 return 0; 1503 1504 out_free_ptimer_irq: 1505 if (info->physical_irq > 0) 1506 free_percpu_irq(host_ptimer_irq, kvm_get_running_vcpus()); 1507 out_free_vtimer_irq: 1508 free_percpu_irq(host_vtimer_irq, kvm_get_running_vcpus()); 1509 return err; 1510 } 1511 1512 void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu) 1513 { 1514 struct arch_timer_cpu *timer = vcpu_timer(vcpu); 1515 1516 soft_timer_cancel(&timer->bg_timer); 1517 } 1518 1519 static bool timer_irqs_are_valid(struct kvm_vcpu *vcpu) 1520 { 1521 u32 ppis = 0; 1522 bool valid; 1523 1524 mutex_lock(&vcpu->kvm->arch.config_lock); 1525 1526 for (int i = 0; i < nr_timers(vcpu); i++) { 1527 struct arch_timer_context *ctx; 1528 int irq; 1529 1530 ctx = vcpu_get_timer(vcpu, i); 1531 irq = timer_irq(ctx); 1532 if (kvm_vgic_set_owner(vcpu, irq, ctx)) 1533 break; 1534 1535 /* With GICv5, the default PPI is what you get -- nothing else */ 1536 if (vgic_is_v5(vcpu->kvm) && irq != get_vgic_ppi(vcpu->kvm, default_ppi[i])) 1537 break; 1538 1539 /* 1540 * We know by construction that we only have PPIs, so all values 1541 * are less than 32 for non-GICv5 VGICs. On GICv5, they are 1542 * architecturally defined to be under 32 too. However, we mask 1543 * off most of the bits as we might be presented with a GICv5 1544 * style PPI where the type is encoded in the top-bits. 1545 */ 1546 ppis |= BIT(irq & 0x1f); 1547 } 1548 1549 valid = hweight32(ppis) == nr_timers(vcpu); 1550 1551 if (valid) 1552 set_bit(KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE, &vcpu->kvm->arch.flags); 1553 1554 mutex_unlock(&vcpu->kvm->arch.config_lock); 1555 1556 return valid; 1557 } 1558 1559 static bool kvm_arch_timer_get_input_level(int vintid) 1560 { 1561 struct kvm_vcpu *vcpu = kvm_get_running_vcpu(); 1562 1563 if (WARN(!vcpu, "No vcpu context!\n")) 1564 return false; 1565 1566 for (int i = 0; i < nr_timers(vcpu); i++) { 1567 struct arch_timer_context *ctx; 1568 1569 ctx = vcpu_get_timer(vcpu, i); 1570 if (timer_irq(ctx) == vintid) 1571 return kvm_timer_pending(ctx); 1572 } 1573 1574 /* A timer IRQ has fired, but no matching timer was found? */ 1575 WARN_RATELIMIT(1, "timer INTID%d unknown\n", vintid); 1576 1577 return false; 1578 } 1579 1580 int kvm_timer_enable(struct kvm_vcpu *vcpu) 1581 { 1582 struct arch_timer_cpu *timer = vcpu_timer(vcpu); 1583 const struct irq_ops *ops; 1584 struct timer_map map; 1585 int ret; 1586 1587 if (timer->enabled) 1588 return 0; 1589 1590 /* Without a VGIC we do not map virtual IRQs to physical IRQs */ 1591 if (!irqchip_in_kernel(vcpu->kvm)) 1592 goto no_vgic; 1593 1594 /* 1595 * At this stage, we have the guarantee that the vgic is both 1596 * available and initialized. 1597 */ 1598 if (!timer_irqs_are_valid(vcpu)) { 1599 kvm_debug("incorrectly configured timer irqs\n"); 1600 return -EINVAL; 1601 } 1602 1603 get_timer_map(vcpu, &map); 1604 1605 ops = vgic_is_v5(vcpu->kvm) ? &arch_timer_irq_ops_vgic_v5 : 1606 &arch_timer_irq_ops; 1607 1608 for (int i = 0; i < nr_timers(vcpu); i++) 1609 kvm_vgic_set_irq_ops(vcpu, timer_irq(vcpu_get_timer(vcpu, i)), ops); 1610 1611 ret = kvm_vgic_map_phys_irq(vcpu, 1612 map.direct_vtimer->host_timer_irq, 1613 timer_irq(map.direct_vtimer)); 1614 if (ret) 1615 return ret; 1616 1617 if (map.direct_ptimer) 1618 ret = kvm_vgic_map_phys_irq(vcpu, 1619 map.direct_ptimer->host_timer_irq, 1620 timer_irq(map.direct_ptimer)); 1621 if (ret) 1622 return ret; 1623 1624 no_vgic: 1625 timer->enabled = 1; 1626 return 0; 1627 } 1628 1629 /* If we have CNTPOFF, permanently set ECV to enable it */ 1630 void kvm_timer_init_vhe(void) 1631 { 1632 if (cpus_have_final_cap(ARM64_HAS_ECV_CNTPOFF)) 1633 sysreg_clear_set(cnthctl_el2, 0, CNTHCTL_ECV); 1634 } 1635 1636 int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) 1637 { 1638 int __user *uaddr = (int __user *)(long)attr->addr; 1639 int irq, idx, ret = 0; 1640 1641 if (!irqchip_in_kernel(vcpu->kvm)) 1642 return -EINVAL; 1643 1644 if (get_user(irq, uaddr)) 1645 return -EFAULT; 1646 1647 if (!(irq_is_ppi(vcpu->kvm, irq))) 1648 return -EINVAL; 1649 1650 guard(mutex)(&vcpu->kvm->arch.config_lock); 1651 1652 if (test_bit(KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE, 1653 &vcpu->kvm->arch.flags)) { 1654 return -EBUSY; 1655 } 1656 1657 switch (attr->attr) { 1658 case KVM_ARM_VCPU_TIMER_IRQ_VTIMER: 1659 idx = TIMER_VTIMER; 1660 break; 1661 case KVM_ARM_VCPU_TIMER_IRQ_PTIMER: 1662 idx = TIMER_PTIMER; 1663 break; 1664 case KVM_ARM_VCPU_TIMER_IRQ_HVTIMER: 1665 idx = TIMER_HVTIMER; 1666 break; 1667 case KVM_ARM_VCPU_TIMER_IRQ_HPTIMER: 1668 idx = TIMER_HPTIMER; 1669 break; 1670 default: 1671 return -ENXIO; 1672 } 1673 1674 /* 1675 * We cannot validate the IRQ unicity before we run, so take it at 1676 * face value. The verdict will be given on first vcpu run, for each 1677 * vcpu. Yes this is late. Blame it on the stupid API. 1678 */ 1679 vcpu->kvm->arch.timer_data.ppi[idx] = irq; 1680 1681 return ret; 1682 } 1683 1684 int kvm_arm_timer_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) 1685 { 1686 int __user *uaddr = (int __user *)(long)attr->addr; 1687 struct arch_timer_context *timer; 1688 int irq; 1689 1690 switch (attr->attr) { 1691 case KVM_ARM_VCPU_TIMER_IRQ_VTIMER: 1692 timer = vcpu_vtimer(vcpu); 1693 break; 1694 case KVM_ARM_VCPU_TIMER_IRQ_PTIMER: 1695 timer = vcpu_ptimer(vcpu); 1696 break; 1697 case KVM_ARM_VCPU_TIMER_IRQ_HVTIMER: 1698 timer = vcpu_hvtimer(vcpu); 1699 break; 1700 case KVM_ARM_VCPU_TIMER_IRQ_HPTIMER: 1701 timer = vcpu_hptimer(vcpu); 1702 break; 1703 default: 1704 return -ENXIO; 1705 } 1706 1707 irq = timer_irq(timer); 1708 return put_user(irq, uaddr); 1709 } 1710 1711 int kvm_arm_timer_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) 1712 { 1713 switch (attr->attr) { 1714 case KVM_ARM_VCPU_TIMER_IRQ_VTIMER: 1715 case KVM_ARM_VCPU_TIMER_IRQ_PTIMER: 1716 case KVM_ARM_VCPU_TIMER_IRQ_HVTIMER: 1717 case KVM_ARM_VCPU_TIMER_IRQ_HPTIMER: 1718 return 0; 1719 } 1720 1721 return -ENXIO; 1722 } 1723 1724 int kvm_vm_ioctl_set_counter_offset(struct kvm *kvm, 1725 struct kvm_arm_counter_offset *offset) 1726 { 1727 int ret = 0; 1728 1729 if (offset->reserved) 1730 return -EINVAL; 1731 1732 if (kvm_vm_is_protected(kvm)) 1733 return -EINVAL; 1734 1735 mutex_lock(&kvm->lock); 1736 1737 if (!kvm_trylock_all_vcpus(kvm)) { 1738 set_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, &kvm->arch.flags); 1739 1740 /* 1741 * If userspace decides to set the offset using this 1742 * API rather than merely restoring the counter 1743 * values, the offset applies to both the virtual and 1744 * physical views. 1745 */ 1746 kvm->arch.timer_data.voffset = offset->counter_offset; 1747 kvm->arch.timer_data.poffset = offset->counter_offset; 1748 1749 kvm_unlock_all_vcpus(kvm); 1750 } else { 1751 ret = -EBUSY; 1752 } 1753 1754 mutex_unlock(&kvm->lock); 1755 1756 return ret; 1757 } 1758